Merge pull request !6503 from chenfei_mindspore/mastertags/v1.0.0
| @@ -8,6 +8,7 @@ | |||||
| - [Environment Requirements](#environment-requirements) | - [Environment Requirements](#environment-requirements) | ||||
| - [Script Description](#script-description) | - [Script Description](#script-description) | ||||
| - [Script and Sample Code](#script-and-sample-code) | - [Script and Sample Code](#script-and-sample-code) | ||||
| - [Script Parameters](#script-parameters) | |||||
| - [Training Process](#training-process) | - [Training Process](#training-process) | ||||
| - [Evaluation Process](#evaluation-process) | - [Evaluation Process](#evaluation-process) | ||||
| - [Model Description](#model-description) | - [Model Description](#model-description) | ||||
| @@ -83,6 +84,31 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil | |||||
| ├── export.py # export checkpoint files into air/onnx | ├── export.py # export checkpoint files into air/onnx | ||||
| ``` | ``` | ||||
| ## [Script Parameters](#contents) | |||||
| Parameters for both training and evaluation can be set in config.py | |||||
| - config for MobileNetV2-quant, ImageNet2012 dataset | |||||
| ```python | |||||
| 'class_num': 1000 # the number of classes in the dataset | |||||
| 'batch_size': 134 # training batch size | |||||
| 'epoch_size': 60 # training epochs of mobilenetv2-quant | |||||
| 'start epoch':200 # pretraining epochs of unquantative network | |||||
| 'warmup_epochs': 0 # number of warmup epochs | |||||
| 'lr': 0.3 #learning rate | |||||
| 'momentum': 0.9 # momentum | |||||
| 'weight_decay': 4e-5 # weight decay value | |||||
| 'loss_scale': 1024 # the initial loss_scale value | |||||
| 'label_smooth': 0.1 #label smooth factor | |||||
| 'loss_scale': 1024 # the initial loss_scale value | |||||
| 'save_checkpoint':True # whether save checkpoint file after training finish | |||||
| 'save_checkpoint_epochs': 1 # the step from which start to save checkpoint file. | |||||
| 'keep_checkpoint_max': 300 # only keep the last keep_checkpoint_max checkpoint | |||||
| 'save_checkpoint_path': './checkpoint' # the absolute full path to save the checkpoint file | |||||
| ``` | |||||
| ## [Training process](#contents) | ## [Training process](#contents) | ||||
| ### Usage | ### Usage | ||||
| @@ -39,8 +39,6 @@ config_ascend_quant = ed({ | |||||
| config_gpu_quant = ed({ | config_gpu_quant = ed({ | ||||
| "num_classes": 1000, | "num_classes": 1000, | ||||
| "image_height": 224, | |||||
| "image_width": 224, | |||||
| "batch_size": 134, | "batch_size": 134, | ||||
| "epoch_size": 60, | "epoch_size": 60, | ||||
| "start_epoch": 200, | "start_epoch": 200, | ||||
| @@ -54,5 +52,4 @@ config_gpu_quant = ed({ | |||||
| "save_checkpoint_epochs": 1, | "save_checkpoint_epochs": 1, | ||||
| "keep_checkpoint_max": 300, | "keep_checkpoint_max": 300, | ||||
| "save_checkpoint_path": "./checkpoint", | "save_checkpoint_path": "./checkpoint", | ||||
| "quantization_aware": True, | |||||
| }) | }) | ||||
| @@ -64,7 +64,7 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1, | |||||
| else: | else: | ||||
| raise ValueError("Unsupported device_target.") | raise ValueError("Unsupported device_target.") | ||||
| resize_height = config.image_height | |||||
| resize_height = 224 | |||||
| if do_train: | if do_train: | ||||
| buffer_size = 20480 | buffer_size = 20480 | ||||
| @@ -128,7 +128,7 @@ def create_dataset_py(dataset_path, do_train, config, device_target, repeat_num= | |||||
| else: | else: | ||||
| raise ValueError("Unsupported device target.") | raise ValueError("Unsupported device target.") | ||||
| resize_height = config.image_height | |||||
| resize_height = 224 | |||||
| if do_train: | if do_train: | ||||
| buffer_size = 20480 | buffer_size = 20480 | ||||
| @@ -8,6 +8,7 @@ | |||||
| - [Environment Requirements](#environment-requirements) | - [Environment Requirements](#environment-requirements) | ||||
| - [Script Description](#script-description) | - [Script Description](#script-description) | ||||
| - [Script and Sample Code](#script-and-sample-code) | - [Script and Sample Code](#script-and-sample-code) | ||||
| - [Script Parameters](#script-parameters) | |||||
| - [Training Process](#training-process) | - [Training Process](#training-process) | ||||
| - [Evaluation Process](#evaluation-process) | - [Evaluation Process](#evaluation-process) | ||||
| - [Model Description](#model-description) | - [Model Description](#model-description) | ||||
| @@ -83,6 +84,33 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil | |||||
| ``` | ``` | ||||
| ## [Script Parameters](#contents) | |||||
| Parameters for both training and evaluation can be set in config.py | |||||
| - config for Resnet50-quant, ImageNet2012 dataset | |||||
| ```python | |||||
| 'class_num': 10 # the number of classes in the dataset | |||||
| 'batch_size': 32 # training batch size | |||||
| 'loss_scale': 1024 # the initial loss_scale value | |||||
| 'momentum': 0.9 # momentum | |||||
| 'weight_decay': 1e-4 # weight decay value | |||||
| 'epoch_size': 120 # total training epochs | |||||
| 'pretrained_epoch_size': 90 # pretraining epochs of resnet50, which is unquantative network of resnet50_quant | |||||
| 'data_load_mode': 'mindata' # the style of loading data into device | |||||
| 'save_checkpoint':True # whether save checkpoint file after training finish | |||||
| 'save_checkpoint_epochs': 1 # the step from which start to save checkpoint file. | |||||
| 'keep_checkpoint_max': 50 # only keep the last keep_checkpoint_max checkpoint | |||||
| 'save_checkpoint_path': './' # the absolute full path to save the checkpoint file | |||||
| "warmup_epochs": 0 # number of warmup epochs | |||||
| 'lr_decay_mode': "cosine" #learning rate decay mode, including steps, steps_decay, cosine or liner | |||||
| 'use_label_smooth': True #whether use label smooth | |||||
| 'label_smooth_factor': 0.1 #label smooth factor | |||||
| 'lr_init': 0 # initial learning rate | |||||
| 'lr_max': 0.005 # the max learning rate | |||||
| ``` | |||||
| ## [Training process](#contents) | ## [Training process](#contents) | ||||
| ### Usage | ### Usage | ||||
| @@ -24,9 +24,6 @@ config_quant = ed({ | |||||
| "weight_decay": 1e-4, | "weight_decay": 1e-4, | ||||
| "epoch_size": 120, | "epoch_size": 120, | ||||
| "pretrained_epoch_size": 90, | "pretrained_epoch_size": 90, | ||||
| "buffer_size": 1000, | |||||
| "image_height": 224, | |||||
| "image_width": 224, | |||||
| "data_load_mode": "mindata", | "data_load_mode": "mindata", | ||||
| "save_checkpoint": True, | "save_checkpoint": True, | ||||
| "save_checkpoint_epochs": 1, | "save_checkpoint_epochs": 1, | ||||
| @@ -62,7 +62,7 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target=" | |||||
| ds = load_func(num_parallel_workers=8, shuffle=True, | ds = load_func(num_parallel_workers=8, shuffle=True, | ||||
| num_shards=device_num, shard_id=rank_id) | num_shards=device_num, shard_id=rank_id) | ||||
| image_size = config.image_height | |||||
| image_size = 224 | |||||
| mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] | mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] | ||||
| std = [0.229 * 255, 0.224 * 255, 0.225 * 255] | std = [0.229 * 255, 0.224 * 255, 0.225 * 255] | ||||
| @@ -47,7 +47,7 @@ def train_lenet(): | |||||
| time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) | time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) | ||||
| config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, | config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, | ||||
| keep_checkpoint_max=cfg.keep_checkpoint_max) | keep_checkpoint_max=cfg.keep_checkpoint_max) | ||||
| ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) | |||||
| ckpoint_cb = ModelCheckpoint(prefix="ckpt_lenet_noquant", config=config_ck) | |||||
| model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) | model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) | ||||
| print("============== Starting Training Lenet==============") | print("============== Starting Training Lenet==============") | ||||
| @@ -58,7 +58,7 @@ def train_lenet(): | |||||
| def train_lenet_quant(): | def train_lenet_quant(): | ||||
| context.set_context(mode=context.GRAPH_MODE, device_target=device_target) | context.set_context(mode=context.GRAPH_MODE, device_target=device_target) | ||||
| cfg = quant_cfg | cfg = quant_cfg | ||||
| ckpt_path = './checkpoint_lenet-10_1875.ckpt' | |||||
| ckpt_path = './ckpt_lenet_noquant-10_1875.ckpt' | |||||
| ds_train = create_dataset(os.path.join(data_path, "train"), cfg.batch_size, 1) | ds_train = create_dataset(os.path.join(data_path, "train"), cfg.batch_size, 1) | ||||
| step_size = ds_train.get_dataset_size() | step_size = ds_train.get_dataset_size() | ||||
| @@ -81,7 +81,7 @@ def train_lenet_quant(): | |||||
| # call back and monitor | # call back and monitor | ||||
| config_ckpt = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size, | config_ckpt = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size, | ||||
| keep_checkpoint_max=cfg.keep_checkpoint_max) | keep_checkpoint_max=cfg.keep_checkpoint_max) | ||||
| ckpt_callback = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ckpt) | |||||
| ckpt_callback = ModelCheckpoint(prefix="ckpt_lenet_quant", config=config_ckpt) | |||||
| # define model | # define model | ||||
| model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) | model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) | ||||
| @@ -96,7 +96,7 @@ def eval_quant(): | |||||
| context.set_context(mode=context.GRAPH_MODE, device_target=device_target) | context.set_context(mode=context.GRAPH_MODE, device_target=device_target) | ||||
| cfg = quant_cfg | cfg = quant_cfg | ||||
| ds_eval = create_dataset(os.path.join(data_path, "test"), cfg.batch_size, 1) | ds_eval = create_dataset(os.path.join(data_path, "test"), cfg.batch_size, 1) | ||||
| ckpt_path = './checkpoint_lenet_1-10_937.ckpt' | |||||
| ckpt_path = './ckpt_lenet_quant-10_937.ckpt' | |||||
| # define fusion network | # define fusion network | ||||
| network = LeNet5Fusion(cfg.num_classes) | network = LeNet5Fusion(cfg.num_classes) | ||||
| # convert fusion network to quantization aware network | # convert fusion network to quantization aware network | ||||