diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/space_to_depth_base.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/space_to_depth_base.h old mode 100644 new mode 100755 diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/exp_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/exp_fp32.c old mode 100644 new mode 100755 diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/space_to_batch_fp32.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/space_to_batch_fp32.h old mode 100644 new mode 100755 diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/gather_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/gather_parameter.h old mode 100644 new mode 100755 diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/gatherNd_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/gatherNd_int8.h old mode 100644 new mode 100755 diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/gather_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/gather_int8.h old mode 100644 new mode 100755 diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/leaky_relu_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/leaky_relu_int8.h old mode 100644 new mode 100755 diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/quant_dtype_cast_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/quant_dtype_cast_int8.c old mode 100644 new mode 100755 diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/sigmoid_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/sigmoid_int8.h old mode 100644 new mode 100755 diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/squeeze_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/squeeze_int8.h old mode 100644 new mode 100755 diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.h old mode 100644 new mode 100755 diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/space_to_depth_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/space_to_depth_parameter.h old mode 100644 new mode 100755 diff --git a/mindspore/lite/micro/example/mnist_stm32f746/mnist_stm32f746/operator_library/kernels/nnacl/fp32/exp_fp32.c b/mindspore/lite/micro/example/mnist_stm32f746/mnist_stm32f746/operator_library/kernels/nnacl/fp32/exp_fp32.c old mode 100755 new mode 100644 diff --git a/mindspore/lite/micro/example/mnist_stm32f746/mnist_stm32f746/operator_library/kernels/nnacl/int8/quant_dtype_cast_int8.c b/mindspore/lite/micro/example/mnist_stm32f746/mnist_stm32f746/operator_library/kernels/nnacl/int8/quant_dtype_cast_int8.c old mode 100755 new mode 100644 diff --git a/mindspore/lite/micro/example/mnist_stm32f746/mnist_stm32f746/src/net.h b/mindspore/lite/micro/example/mnist_stm32f746/mnist_stm32f746/src/net.h old mode 100644 new mode 100755 diff --git a/mindspore/lite/micro/example/mnist_stm32f746/mnist_stm32f746/src/tensor.cc b/mindspore/lite/micro/example/mnist_stm32f746/mnist_stm32f746/src/tensor.cc old mode 100644 new mode 100755 diff --git a/mindspore/lite/micro/example/mnist_x86/benchmark/load_input.c b/mindspore/lite/micro/example/mnist_x86/benchmark/load_input.c old mode 100644 new mode 100755 diff --git a/mindspore/lite/micro/example/mnist_x86/benchmark/load_input.h b/mindspore/lite/micro/example/mnist_x86/benchmark/load_input.h old mode 100644 new mode 100755 diff --git a/model_zoo/official/cv/mobilenetv1/README.md b/model_zoo/official/cv/mobilenetv1/README.md index d3f4f16d51..c800518f7e 100644 --- a/model_zoo/official/cv/mobilenetv1/README.md +++ b/model_zoo/official/cv/mobilenetv1/README.md @@ -99,6 +99,7 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil You can start training using python or shell scripts. The usage of shell scripts as follows: - Ascend: sh run_distribute_train.sh [cifar10|imagenet2012] [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH] (optional) +- CPU: sh run_train_CPU.sh [cifar10|imagenet2012] [DATASET_PATH] [PRETRAINED_CKPT_PATH] (optional) For distributed training, a hccl configuration file with JSON format needs to be created in advance. @@ -109,10 +110,12 @@ Please follow the instructions in the link [hccn_tools](https://gitee.com/mindsp ```shell # training example python: - Ascend: python train.py --platform Ascend --dataset_path [TRAIN_DATASET_PATH] + Ascend: python train.py --device_target Ascend --dataset_path [TRAIN_DATASET_PATH] + CPU: python train.py --device_target CPU --dataset_path [TRAIN_DATASET_PATH] shell: Ascend: sh run_distribute_train.sh [cifar10|imagenet2012] [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional) + CPU: sh run_train_CPU.sh [cifar10|imagenet2012] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional) ``` ### Result @@ -133,6 +136,7 @@ Epoch time: 150950.623, per step time: 120.664 You can start training using python or shell scripts.If the train method is train or fine tune, should not input the `[CHECKPOINT_PATH]` The usage of shell scripts as follows: - Ascend: sh run_eval.sh [cifar10|imagenet2012] [DATASET_PATH] [CHECKPOINT_PATH] +- CPU: sh run_eval_CPU.sh [cifar10|imagenet2012] [DATASET_PATH] [CHECKPOINT_PATH] ### Launch @@ -140,9 +144,11 @@ You can start training using python or shell scripts.If the train method is trai # eval example python: Ascend: python eval.py --dataset [cifar10|imagenet2012] --dataset_path [VAL_DATASET_PATH] --pretrain_ckpt [CHECKPOINT_PATH] + CPU: python eval.py --dataset [cifar10|imagenet2012] --dataset_path [VAL_DATASET_PATH] --pretrain_ckpt [CHECKPOINT_PATH] --device_target CPU shell: Ascend: sh run_eval.sh [cifar10|imagenet2012] [DATASET_PATH] [CHECKPOINT_PATH] + CPU: sh run_eval_CPU.sh [cifar10|imagenet2012] [DATASET_PATH] [CHECKPOINT_PATH] ``` > checkpoint can be produced in training process. diff --git a/model_zoo/official/cv/mobilenetv1/eval.py b/model_zoo/official/cv/mobilenetv1/eval.py index 914ac1205e..aa3348b54c 100755 --- a/model_zoo/official/cv/mobilenetv1/eval.py +++ b/model_zoo/official/cv/mobilenetv1/eval.py @@ -45,7 +45,7 @@ if __name__ == '__main__': # init context context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False) - if target != "GPU": + if target == "Ascend": device_id = int(os.getenv('DEVICE_ID')) context.set_context(device_id=device_id) diff --git a/model_zoo/official/cv/mobilenetv1/scripts/run_eval_cpu.sh b/model_zoo/official/cv/mobilenetv1/scripts/run_eval_cpu.sh new file mode 100755 index 0000000000..75e1ed72a6 --- /dev/null +++ b/model_zoo/official/cv/mobilenetv1/scripts/run_eval_cpu.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 3 ] +then + echo "Usage: bash run_eval_cpu.sh [cifar10|imagenet2012] [DATASET_PATH] [CHECKPOINT_PATH]" +exit 1 +fi + +if [ $1 != "cifar10" ] && [ $1 != "imagenet2012" ] +then + echo "error: the selected dataset is neither cifar10 nor imagenet2012" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $2) +PATH2=$(get_real_path $3) + + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ ! -f $PATH2 ] +then + echo "error: CHECKPOINT_PATH=$PATH2 is not a file" +exit 1 +fi + +if [ -d "eval" ]; +then + rm -rf ./eval +fi +mkdir ./eval +cp ../*.py ./eval +cp *.sh ./eval +cp -r ../src ./eval +cd ./eval || exit +env > env.log +python eval.py --dataset=$1 --dataset_path=$PATH1 --checkpoint_path=$PATH2 --device_target=CPU &> log & +cd .. diff --git a/model_zoo/official/cv/mobilenetv1/scripts/run_train_cpu.sh b/model_zoo/official/cv/mobilenetv1/scripts/run_train_cpu.sh new file mode 100755 index 0000000000..ec010a47e6 --- /dev/null +++ b/model_zoo/official/cv/mobilenetv1/scripts/run_train_cpu.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] && [ $# != 3 ] +then + echo "Usage: bash run_train_cpu.sh [cifar10|imagenet2012] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)" +exit 1 +fi + +if [ $1 != "cifar10" ] && [ $1 != "imagenet2012" ] +then + echo "error: the selected dataset is neither cifar10 nor imagenet2012" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $2) + +if [ $# == 3 ] +then + PATH2=$(get_real_path $3) +fi + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ $# == 3 ] && [ ! -f $PATH2 ] +then + echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file" +exit 1 +fi + +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cp ../*.py ./train +cp *.sh ./train +cp -r ../src ./train +cd ./train || exit +env > env.log +if [ $# == 2 ] +then + python train.py --dataset=$1 --dataset_path=$PATH1 --device_target=CPU &> log & +fi + +if [ $# == 3 ] +then + python train.py --dataset=$1 --dataset_path=$PATH1 --pre_trained=$PATH2 --device_target=CPU &> log & +fi +cd .. diff --git a/model_zoo/official/cv/mobilenetv1/src/dataset.py b/model_zoo/official/cv/mobilenetv1/src/dataset.py index be81f3253f..660f7b3a3f 100755 --- a/model_zoo/official/cv/mobilenetv1/src/dataset.py +++ b/model_zoo/official/cv/mobilenetv1/src/dataset.py @@ -16,12 +16,15 @@ create train or eval dataset. """ import os +from multiprocessing import cpu_count import mindspore.common.dtype as mstype import mindspore.dataset as ds import mindspore.dataset.vision.c_transforms as C import mindspore.dataset.transforms.c_transforms as C2 from mindspore.communication.management import init, get_rank, get_group_size +THREAD_NUM = 12 if cpu_count() >= 12 else 8 + def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): """ @@ -38,15 +41,17 @@ def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target= """ if target == "Ascend": device_num, rank_id = _get_rank_info() - else: + elif target == "GPU": init() rank_id = get_rank() device_num = get_group_size() + else: + device_num = 1 if device_num == 1: - data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=12, shuffle=True) + data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=THREAD_NUM, shuffle=True) else: - data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=12, shuffle=True, + data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=THREAD_NUM, shuffle=True, num_shards=device_num, shard_id=rank_id) # define map operations @@ -66,8 +71,8 @@ def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target= type_cast_op = C2.TypeCast(mstype.int32) - data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12) - data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=12) + data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=THREAD_NUM) + data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=THREAD_NUM) # apply batch operations data_set = data_set.batch(batch_size, drop_remainder=True) @@ -99,9 +104,9 @@ def create_dataset2(dataset_path, do_train, repeat_num=1, batch_size=32, target= device_num = get_group_size() if device_num == 1: - data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=True) + data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=THREAD_NUM, shuffle=True) else: - data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=True, + data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=THREAD_NUM, shuffle=True, num_shards=device_num, shard_id=rank_id) image_size = 224 @@ -127,8 +132,8 @@ def create_dataset2(dataset_path, do_train, repeat_num=1, batch_size=32, target= type_cast_op = C2.TypeCast(mstype.int32) - data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=12) - data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12) + data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=THREAD_NUM) + data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=THREAD_NUM) # apply batch operations data_set = data_set.batch(batch_size, drop_remainder=True) diff --git a/model_zoo/official/cv/mobilenetv1/train.py b/model_zoo/official/cv/mobilenetv1/train.py index ba2f378912..aa29eedc46 100755 --- a/model_zoo/official/cv/mobilenetv1/train.py +++ b/model_zoo/official/cv/mobilenetv1/train.py @@ -116,38 +116,28 @@ if __name__ == '__main__': else: no_decayed_params.append(param) - group_params = [{'params': decayed_params, 'weight_decay': config.weight_decay}, - {'params': no_decayed_params}, - {'order_params': net.trainable_params()}] - opt = Momentum(group_params, lr, config.momentum, loss_scale=config.loss_scale) - # define loss, model if target == "Ascend": - if args_opt.dataset == "imagenet2012": - if not config.use_label_smooth: - config.label_smooth_factor = 0.0 - loss = CrossEntropySmooth(sparse=True, reduction="mean", - smooth_factor=config.label_smooth_factor, num_classes=config.class_num) - else: - loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') - loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) - model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}, - amp_level="O2", keep_batchnorm_fp32=False) + group_params = [{'params': decayed_params, 'weight_decay': config.weight_decay}, + {'params': no_decayed_params}, + {'order_params': net.trainable_params()}] + opt = Momentum(group_params, lr, config.momentum, loss_scale=config.loss_scale) else: - # GPU target - if args_opt.dataset == "imagenet2012": - if not config.use_label_smooth: - config.label_smooth_factor = 0.0 - loss = CrossEntropySmooth(sparse=True, reduction="mean", - smooth_factor=config.label_smooth_factor, num_classes=config.class_num) - else: - loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") - opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay, config.loss_scale) - loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) - # Mixed precision + # define loss, model + if args_opt.dataset == "imagenet2012": + if not config.use_label_smooth: + config.label_smooth_factor = 0.0 + loss = CrossEntropySmooth(sparse=True, reduction="mean", + smooth_factor=config.label_smooth_factor, num_classes=config.class_num) + else: + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') + loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) + if target != "CPU": model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}, amp_level="O2", keep_batchnorm_fp32=False) + else: + model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}) # define callbacks time_cb = TimeMonitor(data_size=step_size)