| @@ -0,0 +1,69 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """train resnet.""" | |||
| import argparse | |||
| import os | |||
| import random | |||
| import numpy as np | |||
| from src.cnn_direction_model import CNNDirectionModel | |||
| from src.config import config1 as config | |||
| from src.dataset import create_dataset_eval | |||
| from mindspore import context | |||
| from mindspore import dataset as de | |||
| from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits | |||
| from mindspore.train.model import Model | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| parser = argparse.ArgumentParser(description='Image classification') | |||
| parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') | |||
| parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') | |||
| args_opt = parser.parse_args() | |||
| random.seed(1) | |||
| np.random.seed(1) | |||
| de.config.set_seed(1) | |||
| if __name__ == '__main__': | |||
| # init context | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| context.set_context(device_id=device_id) | |||
| # create dataset | |||
| dataset = create_dataset_eval(args_opt.dataset_path + "/ocr_eval_pos.mindrecord", config=config) | |||
| step_size = dataset.get_dataset_size() | |||
| print("step_size ", step_size) | |||
| # define net | |||
| net = CNNDirectionModel([3, 64, 48, 48, 64], [64, 48, 48, 64, 64], [256, 64], [64, 512]) | |||
| # load checkpoint | |||
| param_dict = load_checkpoint(args_opt.checkpoint_path) | |||
| load_param_into_net(net, param_dict) | |||
| net.set_train(False) | |||
| # define loss, model | |||
| loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="sum") | |||
| # define model | |||
| model = Model(net, loss_fn=loss, metrics={'top_1_accuracy'}) | |||
| # eval model | |||
| res = model.eval(dataset, dataset_sink_mode=False) | |||
| print("result:", res, "ckpt=", args_opt.checkpoint_path) | |||
| @@ -0,0 +1,5 @@ | |||
| mindspore | |||
| numpy | |||
| Pillow | |||
| python-opencv | |||
| scikit-image | |||
| @@ -0,0 +1,88 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| if [ $# != 2 ] && [ $# != 3 ] | |||
| then | |||
| echo "Usage: sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)" | |||
| exit 1 | |||
| fi | |||
| get_real_path(){ | |||
| if [ "${1:0:1}" == "/" ]; then | |||
| echo "$1" | |||
| else | |||
| echo "$(realpath -m $PWD/$1)" | |||
| fi | |||
| } | |||
| PATH1=$(get_real_path $1) | |||
| PATH2=$(get_real_path $2) | |||
| if [ $# == 3 ] | |||
| then | |||
| PATH3=$(get_real_path $3) | |||
| fi | |||
| if [ ! -f $PATH1 ] | |||
| then | |||
| echo "error: RANK_TABLE_FILE=$PATH1 is not a file" | |||
| exit 1 | |||
| fi | |||
| if [ ! -d $PATH2 ] | |||
| then | |||
| echo "error: DATASET_PATH=$PATH2 is not a directory" | |||
| exit 1 | |||
| fi | |||
| if [ $# == 3 ] && [ ! -f $PATH3 ] | |||
| then | |||
| echo "error: PRETRAINED_CKPT_PATH=$PATH3 is not a file" | |||
| exit 1 | |||
| fi | |||
| ulimit -u unlimited | |||
| export DEVICE_NUM=8 | |||
| export RANK_SIZE=8 | |||
| export RANK_TABLE_FILE=$PATH1 | |||
| export SERVER_ID=0 | |||
| rank_start=$((DEVICE_NUM * SERVER_ID)) | |||
| for((i=0; i<${DEVICE_NUM}; i++)) | |||
| do | |||
| export DEVICE_ID=$i | |||
| export RANK_ID=$((rank_start + i)) | |||
| rm -rf ./train_parallel$i | |||
| mkdir ./train_parallel$i | |||
| cp ../*.py ./train_parallel$i | |||
| cp *.sh ./train_parallel$i | |||
| cp -r ../src ./train_parallel$i | |||
| cd ./train_parallel$i || exit | |||
| echo "start training for rank $RANK_ID, device $DEVICE_ID" | |||
| env > env.log | |||
| if [ $# == 2 ] | |||
| then | |||
| python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log & | |||
| fi | |||
| if [ $# == 3 ] | |||
| then | |||
| python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 --pre_trained=$PATH3 &> log & | |||
| fi | |||
| cd .. | |||
| done | |||
| @@ -0,0 +1,62 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| if [ $# != 2 ] | |||
| then | |||
| echo "Usage: sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH]" | |||
| exit 1 | |||
| fi | |||
| ulimit -u unlimited | |||
| export DEVICE_NUM=1 | |||
| export DEVICE_ID=4 | |||
| export RANK_ID=0 | |||
| export RANK_SIZE=1 | |||
| get_real_path(){ | |||
| if [ "${1:0:1}" == "/" ]; then | |||
| echo "$1" | |||
| else | |||
| echo "$(realpath -m $PWD/$1)" | |||
| fi | |||
| } | |||
| PATH1=$(get_real_path $1) | |||
| PATH2=$(get_real_path $2) | |||
| if [ ! -f $PATH2 ] | |||
| then | |||
| echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file" | |||
| exit 1 | |||
| fi | |||
| if [ -d "eval" ]; | |||
| then | |||
| rm -rf ./eval | |||
| fi | |||
| mkdir ./eval | |||
| cp ../*.py ./eval | |||
| cp *.sh ./eval | |||
| cp -r ../src ./eval | |||
| cd ./eval || exit | |||
| echo "start evaluation for device $DEVICE_ID" | |||
| env > env.log | |||
| python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 #&> log & | |||
| cd .. | |||
| @@ -0,0 +1,72 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| if [ $# != 1 ] && [ $# != 2 ] | |||
| then | |||
| echo "Usage: sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)" | |||
| exit 1 | |||
| fi | |||
| ulimit -u unlimited | |||
| export DEVICE_NUM=1 | |||
| export DEVICE_ID=3 | |||
| export RANK_ID=0 | |||
| export RANK_SIZE=1 | |||
| get_real_path(){ | |||
| if [ "${1:0:1}" == "/" ]; then | |||
| echo "$1" | |||
| else | |||
| echo "$(realpath -m $PWD/$1)" | |||
| fi | |||
| } | |||
| PATH1=$(get_real_path $1) | |||
| if [ $# == 2 ] | |||
| then | |||
| PATH2=$(get_real_path $2) | |||
| fi | |||
| if [ $# == 2 ] && [ ! -f $PATH2 ] | |||
| then | |||
| echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file" | |||
| exit 1 | |||
| fi | |||
| if [ -d "train" ]; | |||
| then | |||
| rm -rf ./train | |||
| fi | |||
| mkdir ./train | |||
| cp ../*.py ./train | |||
| cp *.sh ./train | |||
| cp -r ../src ./train | |||
| cd ./train || exit | |||
| echo "start training for device $DEVICE_ID" | |||
| env > env.log | |||
| if [ $# == 1 ] | |||
| then | |||
| python train.py --dataset_path=$PATH1 &> log & | |||
| fi | |||
| if [ $# == 2 ] | |||
| then | |||
| python train.py --dataset_path=$PATH1 --pre_trained=$PATH2 &> log & | |||
| fi | |||
| cd .. | |||
| @@ -0,0 +1,264 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """CNN direction model.""" | |||
| import math | |||
| import mindspore.nn as nn | |||
| from mindspore.common.initializer import Uniform | |||
| from mindspore.ops import operations as P | |||
| class NetAddN(nn.Cell): | |||
| """ | |||
| Computes addition of all input tensors element-wise. | |||
| """ | |||
| def __init__(self): | |||
| super(NetAddN, self).__init__() | |||
| self.addN = P.AddN() | |||
| def construct(self, *z): | |||
| return self.addN(z) | |||
| class Conv(nn.Cell): | |||
| """ | |||
| A convolution layer | |||
| Args: | |||
| in_channel (int): Input channel. | |||
| out_channel (int): Output channel. | |||
| kernel (tuple): Size of the kernel. Default: (3, 3). | |||
| dilate (bool): If set to true a second convolution layer is added. Default: True. | |||
| act (string): The activation function. Default: 'relu'. | |||
| mp (int): Size of max pooling layer. Default: None. | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> Conv(3, 64) | |||
| """ | |||
| def __init__(self, | |||
| in_channel, | |||
| out_channel, | |||
| kernel=(3, 3), | |||
| dilate=True, | |||
| act='relu', | |||
| mp=None): | |||
| super(Conv, self).__init__() | |||
| self.in_channel = in_channel | |||
| self.out_channel = out_channel | |||
| self.kernel = kernel | |||
| self.dilate = dilate | |||
| self.act = act | |||
| self.mp = mp | |||
| self.conv1 = nn.Conv2d(self.in_channel, self.out_channel, kernel_size=self.kernel, pad_mode="same", | |||
| weight_init='he_normal') | |||
| self.batch_norm1 = nn.BatchNorm2d(self.out_channel, eps=1e-3, momentum=0.99, | |||
| gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1) | |||
| if self.dilate: | |||
| self.dilate_relu = P.ReLU() | |||
| self.dilate_conv = nn.Conv2d(self.out_channel, self.out_channel, kernel_size=self.kernel, | |||
| dilation=(2, 2), pad_mode='same', weight_init='he_normal') | |||
| self.dilate_batch_norm = nn.BatchNorm2d(self.out_channel, eps=1e-3, momentum=0.99, | |||
| gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1) | |||
| self.dilate_add = NetAddN() | |||
| if self.act == 'relu': | |||
| self.act_layer = P.ReLU() | |||
| if self.mp is not None: | |||
| self.mp_layer = nn.MaxPool2d(kernel_size=self.mp, stride=self.mp, pad_mode='valid') | |||
| def construct(self, x): | |||
| out = self.conv1(x) | |||
| out = self.batch_norm1(out) | |||
| out1 = out | |||
| if self.dilate: | |||
| out = self.dilate_relu(out) | |||
| out = self.dilate_conv(out) | |||
| out = self.dilate_batch_norm(out) | |||
| out = self.dilate_add(out1, out) | |||
| if self.act == 'relu': | |||
| out = self.act_layer(out) | |||
| if self.mp is not None: | |||
| out = self.mp_layer(out) | |||
| return out | |||
| class Block(nn.Cell): | |||
| """ | |||
| A Block of convolution operations. | |||
| Args: | |||
| in_channel (int): Input channel. | |||
| out_channel (int): Output channel. | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> Block(3, 64) | |||
| """ | |||
| def __init__(self, | |||
| in_channel, | |||
| out_channel): | |||
| super(Block, self).__init__() | |||
| self.conv1 = Conv(in_channel, out_channel, act='relu') | |||
| self.conv2 = Conv(out_channel, out_channel, act=None) | |||
| self.add = NetAddN() | |||
| self.relu = P.ReLU() | |||
| def construct(self, x): | |||
| y = self.conv1(x) | |||
| y = self.conv2(y) | |||
| out = self.add(x, y) | |||
| out = self.relu(out) | |||
| return out | |||
| class ResidualBlock(nn.Cell): | |||
| """ | |||
| A residual block. | |||
| Args: | |||
| block (Block) : The building block. | |||
| num_blocks (int): Number of blocks. | |||
| in_channel (int): Input channel. | |||
| out_channel (int): Output channel. | |||
| mp (int) : Size of the max pooling layer. Default: 2. | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> ResidualBlock(Block, 1, 3, 64) | |||
| """ | |||
| def __init__(self, | |||
| block, | |||
| num_blocks, | |||
| in_channel, | |||
| out_channel, | |||
| mp=2): | |||
| super(ResidualBlock, self).__init__() | |||
| self.num_blocks = num_blocks | |||
| self.in_channel = in_channel | |||
| self.out_channel = out_channel | |||
| self.mp = mp | |||
| self.conv1 = Conv(self.in_channel, self.out_channel, kernel=(3, 3), dilate=False) | |||
| layers = [] | |||
| for _ in range(self.num_blocks): | |||
| res_block = block(out_channel, out_channel) | |||
| layers.append(res_block) | |||
| self.layer = nn.SequentialCell(layers) | |||
| if mp is not None: | |||
| self.max_pool = nn.MaxPool2d(kernel_size=mp, stride=mp, pad_mode='valid') | |||
| def construct(self, x): | |||
| out = self.conv1(x) | |||
| out = self.layer(out) | |||
| if self.mp is not None: | |||
| out = self.max_pool(out) | |||
| return out | |||
| class CNNDirectionModel(nn.Cell): | |||
| """ | |||
| CNN direction model. | |||
| Args: | |||
| in_channels (list): List of the dimesnions of the input channels. The first element is the input dimension | |||
| of the first Conv layer, and the rest of the elements are the input dimensions of the residual blocks, | |||
| in order. | |||
| out_channels (list): List of the dimesnions of the output channels. The first element is the ourpur dimension | |||
| of the first Conv layer, and the rest of the elements are the output dimensions of the residual blocks, in order. | |||
| dense_layers (list): Dimensions of the dense layers, inorder. | |||
| image_size (list): Size of the input images. | |||
| num_classes (int): Number of classes. Default: 2 for binary classification. | |||
| Returns: Tensor, output tensor. | |||
| Examples: | |||
| >>> CNNDirectionModel([3, 64, 48, 48, 64], [64, 48, 48, 64, 64], [256, 64], [64, 512] ) | |||
| """ | |||
| def __init__(self, | |||
| in_channels, | |||
| out_channels, | |||
| dense_layers, | |||
| image_size, | |||
| num_classes=2 | |||
| ): | |||
| super(CNNDirectionModel, self).__init__() | |||
| self.num_classes = num_classes | |||
| self.image_h = image_size[0] | |||
| self.image_w = image_size[1] | |||
| self.conv1 = Conv(in_channels[0], out_channels[0], kernel=(7, 7), dilate=False, mp=2) | |||
| self.residual_block1 = ResidualBlock(Block, 1, in_channels[1], out_channels[1]) | |||
| self.residual_block2 = ResidualBlock(Block, 1, in_channels[2], out_channels[2]) | |||
| self.residual_block3 = ResidualBlock(Block, 2, in_channels[3], out_channels[3]) | |||
| self.residual_block4 = ResidualBlock(Block, 1, in_channels[4], out_channels[4]) | |||
| # 5 previous layers have mp=2. Height and width of the image would become 1/32. | |||
| self.avg_pool = nn.AvgPool2d(kernel_size=(int(self.image_h / 32), int(self.image_w / 32))) | |||
| # sqrt(6 / (fan_in + fan_out)) | |||
| scale = math.sqrt(6 / (out_channels[-1] + dense_layers[0])) | |||
| # weight_init='glorot_uniform' | |||
| self.dense1 = nn.Dense(out_channels[-1], dense_layers[0], weight_init=Uniform(scale=scale), activation='relu') | |||
| scale = math.sqrt(6 / (dense_layers[0] + dense_layers[1])) | |||
| self.dense2 = nn.Dense(dense_layers[0], dense_layers[1], weight_init=Uniform(scale=scale), activation='relu') | |||
| scale = math.sqrt(6 / (dense_layers[1] + num_classes)) | |||
| self.dense3 = nn.Dense(dense_layers[1], num_classes, weight_init=Uniform(scale=scale), activation='softmax') | |||
| def construct(self, x): | |||
| out = self.conv1(x) | |||
| out = self.residual_block1(out) | |||
| out = self.residual_block2(out) | |||
| out = self.residual_block3(out) | |||
| out = self.residual_block4(out) | |||
| out = self.avg_pool(out) | |||
| out = P.Reshape()(out, (out.shape[0], out.shape[1])) | |||
| out = self.dense1(out) | |||
| out = self.dense2(out) | |||
| out = self.dense3(out) | |||
| return out | |||
| @@ -0,0 +1,37 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| network config setting, will be used in train.py and eval.py | |||
| """ | |||
| from easydict import EasyDict as ed | |||
| config1 = ed({ | |||
| "batch_size": 8, | |||
| "epoch_size": 5, | |||
| "pretrain_epoch_size": 0, | |||
| "save_checkpoint": True, | |||
| "save_checkpoint_epochs": 10, | |||
| "keep_checkpoint_max": 20, | |||
| "save_checkpoint_path": "./", | |||
| "warmup_epochs": 5, | |||
| "lr_decay_mode": "poly", | |||
| "lr": 1e-4, | |||
| "work_nums": 4, | |||
| "im_size_w": 512, | |||
| "im_size_h": 64, | |||
| "pos_samples_size": 100, | |||
| "augment_severity": 0.1, | |||
| "augment_prob": 0.3 | |||
| }) | |||
| @@ -0,0 +1,246 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| Data operations, will be used in train.py and eval.py | |||
| """ | |||
| import os | |||
| import mindspore.dataset.engine as de | |||
| import mindspore.dataset.vision.c_transforms as C | |||
| from src.dataset_utils import lucky, noise_blur, noise_speckle, noise_gamma, noise_gaussian, noise_salt_pepper, \ | |||
| shift_color, enhance_brightness, enhance_sharpness, enhance_contrast, enhance_color, gaussian_blur, \ | |||
| randcrop, resize, rdistort, rgeometry, rotate_about_center, whole_rdistort, warp_perspective, random_contrast, \ | |||
| unify_img_label | |||
| import cv2 | |||
| import numpy as np | |||
| cv2.setNumThreads(0) | |||
| image_height = None | |||
| image_width = None | |||
| class Augmentor(): | |||
| """ | |||
| Augment image with random noise and transformation | |||
| Controlled by severity level [0, 1] | |||
| Usage: | |||
| augmentor = Augmentor(severity=0.3, | |||
| prob=0.5, | |||
| enable_transform=True, | |||
| enable_crop=False) | |||
| image_new = augmentor.process(image) | |||
| """ | |||
| def __init__(self, severity, prob, enable_transform=True, enable_crop=False): | |||
| """ | |||
| severity: in [0, 1], from min to max level of noise/transformation | |||
| prob: in [0, 1], probability to apply each operator | |||
| enable_transform: enable all transformation operators | |||
| enable_crop: enable crop operator | |||
| """ | |||
| self.severity = np.clip(severity, 0, 1) | |||
| self.prob = np.clip(prob, 0, 1) | |||
| self.enable_transform = enable_transform | |||
| self.enable_crop = enable_crop | |||
| def add_noise(self, im): | |||
| """randomly add noise to image""" | |||
| severity = self.severity | |||
| prob = self.prob | |||
| if lucky(prob): | |||
| im = noise_gamma(im, severity=severity) | |||
| if lucky(prob): | |||
| im = noise_blur(im, severity=severity) | |||
| if lucky(prob): | |||
| im = noise_gaussian(im, severity=severity) | |||
| if lucky(prob): | |||
| im = noise_salt_pepper(im, severity=severity) | |||
| if lucky(prob): | |||
| im = shift_color(im, severity=severity) | |||
| if lucky(prob): | |||
| im = gaussian_blur(im, severity=severity) | |||
| if lucky(prob): | |||
| im = noise_speckle(im, severity=severity) | |||
| if lucky(prob): | |||
| im = enhance_sharpness(im, severity=severity) | |||
| if lucky(prob): | |||
| im = enhance_contrast(im, severity=severity) | |||
| if lucky(prob): | |||
| im = enhance_brightness(im, severity=severity) | |||
| if lucky(prob): | |||
| im = enhance_color(im, severity=severity) | |||
| if lucky(prob): | |||
| im = random_contrast(im) | |||
| return im | |||
| def convert_color(self, im, cval): | |||
| if cval in ['median', 'md']: | |||
| cval = np.median(im, axis=(0, 1)).astype(int) | |||
| elif cval == 'mean': | |||
| cval = np.mean(im, axis=(0, 1)).astype(int) | |||
| if hasattr(cval, '__iter__'): | |||
| cval = [int(i) for i in cval] | |||
| else: | |||
| cval = int(cval) | |||
| return cval | |||
| def transform(self, im, cval=255, **kw): | |||
| """According to the parameters initialized by the class, deform the incoming image""" | |||
| severity = self.severity | |||
| prob = self.prob | |||
| cval = self.convert_color(im, cval) | |||
| if lucky(prob): | |||
| # affine transform | |||
| im = rgeometry(im, severity=severity, cval=cval) | |||
| if lucky(prob): | |||
| im = rdistort(im, severity=severity, cval=cval) | |||
| if lucky(prob): | |||
| im = warp_perspective(im, severity=severity, cval=cval) | |||
| if lucky(prob): | |||
| im = resize(im, fx=kw.get('fx'), fy=kw.get('fy'), severity=severity) | |||
| if lucky(prob): | |||
| im = rotate_about_center(im, severity=severity, cval=cval) | |||
| if lucky(prob): | |||
| # the overall distortion of the image. | |||
| im = whole_rdistort(im, severity=severity) | |||
| if lucky(prob) and self.enable_crop: | |||
| # random crop | |||
| im = randcrop(im, severity=severity) | |||
| return im | |||
| def process(self, im, cval='median', **kw): | |||
| """ Execute code according to the effect of initial setting, and support variable parameters""" | |||
| if self.enable_transform: | |||
| im = self.transform(im, cval=cval, **kw) | |||
| im = self.add_noise(im) | |||
| return im | |||
| def rotate_and_set_neg(img, label): | |||
| label = label - 1 | |||
| img_rotate = np.rot90(img) | |||
| img_rotate = np.rot90(img_rotate) | |||
| # return img_rotate, label | |||
| return img_rotate, np.array(label).astype(np.int32) | |||
| def rotate(img, label): | |||
| img_rotate = np.rot90(img) | |||
| img_rotate = np.rot90(img_rotate) | |||
| return img_rotate, label | |||
| def random_neg_with_rotate(img, label): | |||
| if lucky(0.5): | |||
| ##50% of samples set to negative samples | |||
| label = label - 1 | |||
| # rotate by 180 debgress | |||
| img_rotate = np.rot90(img) | |||
| img = np.rot90(img_rotate) | |||
| return img, np.array(label).astype(np.int32) | |||
| def transform_image(img, label): | |||
| data = np.array([img[...]], np.float32) | |||
| data = data / 127.5 - 1 | |||
| return data.transpose((0, 3, 1, 2))[0], label | |||
| def create_dataset_train(mindrecord_file_pos, config): | |||
| """ | |||
| create a train dataset | |||
| Args: | |||
| mindrecord_file_pos(string): mindrecord file for positive samples. | |||
| config(dict): config of dataset. | |||
| Returns: | |||
| dataset | |||
| """ | |||
| rank_size = int(os.getenv("RANK_SIZE", '1')) | |||
| rank_id = int(os.getenv("RANK_ID", '0')) | |||
| decode = C.Decode() | |||
| ds = de.MindDataset(mindrecord_file_pos, columns_list=["image", "label"], num_parallel_workers=4, | |||
| num_shards=rank_size, shard_id=rank_id, shuffle=True) | |||
| ds = ds.map(operations=decode, input_columns=["image"], num_parallel_workers=8) | |||
| augmentor = Augmentor(config.augment_severity, config.augment_prob) | |||
| operation = augmentor.process | |||
| ds = ds.map(operations=operation, input_columns=["image"], | |||
| num_parallel_workers=1, python_multiprocessing=True) | |||
| ##randomly augment half of samples to be negative samples | |||
| ds = ds.map(operations=[random_neg_with_rotate, unify_img_label, transform_image], input_columns=["image", "label"], | |||
| num_parallel_workers=8, python_multiprocessing=True) | |||
| ##for training double the dataset to accoun for positive and negative | |||
| ds = ds.repeat(2) | |||
| # apply batch operations | |||
| ds = ds.batch(config.batch_size, drop_remainder=True) | |||
| return ds | |||
| def resize_image(img, label): | |||
| color_fill = 255 | |||
| scale = image_height / img.shape[0] | |||
| img = cv2.resize(img, None, fx=scale, fy=scale) | |||
| if img.shape[1] > image_width: | |||
| img = img[:, 0:image_width] | |||
| else: | |||
| blank_img = np.zeros((image_height, image_width, 3), np.uint8) | |||
| # fill the image with white | |||
| blank_img.fill(color_fill) | |||
| blank_img[:image_height, :img.shape[1]] = img | |||
| img = blank_img | |||
| data = np.array([img[...]], np.float32) | |||
| data = data / 127.5 - 1 | |||
| return data.transpose((0, 3, 1, 2))[0], label | |||
| def create_dataset_eval(mindrecord_file_pos, config): | |||
| """ | |||
| create an eval dataset | |||
| Args: | |||
| mindrecord_file_pos(string): mindrecord file for positive samples. | |||
| config(dict): config of dataset. | |||
| Returns: | |||
| dataset | |||
| """ | |||
| rank_size = int(os.getenv("RANK_SIZE", '1')) | |||
| rank_id = int(os.getenv("RANK_ID", '0')) | |||
| decode = C.Decode() | |||
| ds = de.MindDataset(mindrecord_file_pos, columns_list=["image", "label"], num_parallel_workers=1, | |||
| num_shards=rank_size, shard_id=rank_id, shuffle=False) | |||
| ds = ds.map(operations=decode, input_columns=["image"], num_parallel_workers=8) | |||
| global image_height | |||
| global image_width | |||
| image_height = config.im_size_h | |||
| image_width = config.im_size_w | |||
| ds = ds.map(operations=resize_image, input_columns=["image", "label"], num_parallel_workers=config.work_nums, | |||
| python_multiprocessing=False) | |||
| # apply batch operations | |||
| ds = ds.batch(1, drop_remainder=True) | |||
| return ds | |||
| @@ -0,0 +1,641 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| from __future__ import absolute_import, division, print_function, unicode_literals | |||
| from math import ceil, sin, pi | |||
| from random import choice, random | |||
| from random import randint, uniform | |||
| import cv2 | |||
| import numpy as np | |||
| from numpy.random import randn | |||
| from PIL import ImageEnhance, Image | |||
| from scipy.ndimage import filters, interpolation | |||
| from scipy.ndimage.interpolation import map_coordinates | |||
| from skimage.transform import PiecewiseAffineTransform, warp | |||
| nprandint = np.random.randint | |||
| def lucky(p=0.3, rand_func=random): | |||
| """ return True with probability p """ | |||
| return rand_func() < p | |||
| def rgeometry(im, eps=0.04, delta=0.8, cval=None, severity=1): | |||
| """ | |||
| affine transform | |||
| """ | |||
| if severity == 0: | |||
| return im | |||
| if cval is None: | |||
| cval = [0] * im.shape[2] | |||
| elif isinstance(cval, (float, int)): | |||
| cval = [cval] * im.shape[2] | |||
| severity = abs(severity) | |||
| eps = severity * eps | |||
| delta = severity * delta | |||
| m = np.array([[1 + eps * randn(), 0.0], [eps * randn(), 1.0 + eps * randn()]]) | |||
| c = np.array(im.shape[:2]) * 0.5 | |||
| d = c - np.dot(m, c) + np.array([randn() * delta, randn() * delta]) | |||
| im = cv2.split(im) | |||
| im = [interpolation.affine_transform(i, m, offset=d, order=1, mode='constant', cval=cval[e]) | |||
| for e, i in enumerate(im)] | |||
| im = cv2.merge(im) | |||
| return np.array(im) | |||
| def rdistort(im, distort=4.0, dsigma=10.0, cval=None, severity=1): | |||
| """distort""" | |||
| if severity == 0: | |||
| return im | |||
| if cval is None: | |||
| cval = [0] * im.shape[2] | |||
| elif isinstance(cval, (float, int)): | |||
| cval = [cval] * im.shape[2] | |||
| severity = abs(severity) | |||
| distort = severity * distort | |||
| dsigma = dsigma * (1 - severity) | |||
| h, w = im.shape[:2] | |||
| hs, ws = randn(h, w), randn(h, w) | |||
| hs = filters.gaussian_filter(hs, dsigma) | |||
| ws = filters.gaussian_filter(ws, dsigma) | |||
| hs *= distort / np.abs(hs).max() | |||
| ws *= distort / np.abs(ws).max() | |||
| # When "ij" is passed in, the first array determines the column, the second array determines the row, by default, | |||
| # the first array determines the row, and the second array determines the column | |||
| ch, cw = np.meshgrid(np.arange(h), np.arange(w), indexing='ij') | |||
| coordinates = np.array([ch + hs, cw + ws]) | |||
| im = cv2.split(im) | |||
| im = [map_coordinates(img, coordinates, order=1, cval=cval[i]) for i, img in enumerate(im)] | |||
| im = cv2.merge(im) | |||
| return np.array(im) | |||
| def reverse_color(im): | |||
| """ Pixel inversion """ | |||
| return 255 - im | |||
| def resize(im, fx=None, fy=None, delta=0.3, severity=1): | |||
| """ scaling in the two directions of width fx and height fy, | |||
| If the zoom factor is not specified, the maximum change amount of 0.3 is randomly selected from 1 to 1""" | |||
| if fx is None: | |||
| fx = 1 + delta * severity * uniform(-1, 1) | |||
| if fy is None: | |||
| fy = 1 + delta * severity * uniform(-1, 1) | |||
| return np.array(cv2.resize(im, None, fx=fx, fy=fy)) | |||
| def warp_perspective(im, theta=20, delta=10, cval=0, severity=1): | |||
| """ perspective mapping """ | |||
| if severity == 0: | |||
| return im | |||
| if cval is None: | |||
| cval = [0] * im.shape[2] | |||
| elif isinstance(cval, (float, int)): | |||
| cval = [cval] * im.shape[2] | |||
| delta = delta * severity | |||
| rows, cols = im.shape[:2] | |||
| pts_im = np.float32([[0, 0], [cols, 0], [cols, rows], [0, rows]]) | |||
| # Distort randomly and constrain the scope of change | |||
| pts_warp = pts_im + np.random.uniform(-1, 1, pts_im.shape) * theta * severity | |||
| pts_warp = np.maximum(pts_warp, delta) # Constrain the change to the part >=3 | |||
| pts_warp[[1, 2], 0] = np.minimum(pts_warp[[1, 2], 0], pts_im[[1, 2], 0] - delta) | |||
| pts_warp[[2, 3], 1] = np.minimum(pts_warp[[2, 3], 1], pts_im[[2, 3], 1] - delta) | |||
| pts_warp = np.float32(pts_warp) | |||
| M = cv2.getPerspectiveTransform(pts_im, pts_warp) | |||
| res = np.array(cv2.warpPerspective(im, M, (cols, rows), borderValue=cval)) | |||
| return res | |||
| def noise_salt_pepper(image, percentage=0.001, severity=1): | |||
| """ Salt and pepper noise, percentage represents the percentage of salt and pepper noise""" | |||
| percentage *= severity | |||
| amount = int(percentage * image.shape[0] * image.shape[1]) | |||
| if amount == 0: | |||
| return image | |||
| _, _, deep = image.shape | |||
| # Salt mode | |||
| coords = [np.random.randint(0, i - 1, amount) for i in image.shape[:2]] | |||
| salt = nprandint(200, 255, amount) | |||
| salt = salt.repeat(deep, axis=0) | |||
| image[coords[0], coords[1], :] = salt.reshape(amount, deep) | |||
| # pepper mode | |||
| coords = [np.random.randint(0, i - 1, amount) for i in image.shape[:2]] | |||
| pepper = nprandint(0, 50, amount) | |||
| pepper = pepper.repeat(deep, axis=0) | |||
| image[coords[0], coords[1], :] = pepper.reshape(amount, deep) | |||
| return image | |||
| def noise_gaussian(im, sigma=20, severity=1): | |||
| """ add Gaussian noise""" | |||
| sigma = sigma * abs(severity) | |||
| return cvt_uint8(np.float32(im) + sigma * np.random.randn(*im.shape)) | |||
| def noise_gamma(im, extend=30, severity=1): | |||
| """ add gamma noise """ | |||
| s = int(extend * abs(severity)) | |||
| n = np.random.gamma(shape=2, scale=s, size=im.shape) | |||
| n = n - np.mean(n) | |||
| im = cvt_uint8(np.float32(im) + n) | |||
| return im | |||
| def noise_speckle(img, extend=40, severity=1): | |||
| """ this creates larger 'blotches' of noise which look | |||
| more realistic than just adding gaussian noise """ | |||
| severity = abs(severity) * extend | |||
| blur = filters.gaussian_filter(np.random.randn(*img.shape) * severity, 1) | |||
| return cvt_uint8(img + blur) | |||
| def noise_blur(im, severity=1): | |||
| """add blur by shrinking an image and then enlarging to original size""" | |||
| severity = abs(severity) | |||
| f = 1 - 0.2 * severity | |||
| h, w = im.shape[:2] | |||
| hmin = 19.0 | |||
| f = max(f, hmin / h) | |||
| im = cv2.resize(im, None, fx=f, fy=f) | |||
| return np.array(cv2.resize(im, (w, h))) | |||
| def add_noise(img): | |||
| """combine noises in np array""" | |||
| img0 = img | |||
| if lucky(0.1): | |||
| img = noise_salt_pepper(img, uniform(0.3, 0.6)) | |||
| if lucky(0.2): | |||
| img = noise_gaussian(img, uniform(0.3, 0.6)) | |||
| if lucky(0.5): | |||
| img = noise_blur(img, uniform(0.3, 0.6)) | |||
| if lucky(0.5): | |||
| img = noise_speckle(img, uniform(0.3, 0.6)) | |||
| if lucky(0.3): | |||
| img = img // 2 + img0 // 2 | |||
| return img | |||
| def gaussian_blur(im, sigma=1, kernel_size=None, severity=1): | |||
| """Gaussian blur, if kernel_size is passed in, severity will be invalid""" | |||
| if kernel_size is None: | |||
| step = 11 | |||
| kernel_size = int(step * severity) | |||
| if kernel_size < 3.0: | |||
| return im | |||
| if kernel_size % 2 == 0: | |||
| kernel_size -= 1 | |||
| return np.array(cv2.GaussianBlur(im, (kernel_size, kernel_size), sigma)) | |||
| def rotate_shrink(im, max_angle=6, severity=0.5, cval=255): | |||
| """rotate about center, shrink to keep the same size without cropping image""" | |||
| max_angle = int(abs(severity) * max_angle) | |||
| angle = randint(-max_angle, max_angle) | |||
| h, w = im.shape[:2] | |||
| rangle = np.deg2rad(angle) # angle in radians | |||
| # now calculate new image width and height | |||
| nw = abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w) | |||
| nh = abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w) | |||
| scale = min(w / nw, h / nh) | |||
| mat = cv2.getRotationMatrix2D((w // 2, h // 2), angle, scale) | |||
| im = cv2.warpAffine(im, mat, (w, h), borderValue=cval) | |||
| return np.array(im) | |||
| def rotate_about_center(im, angle=4, scale=1, b_mode=None, cval=None, severity=1): | |||
| """For the rotation effect, it is recommended to make b_mode not equal to None for color images, so that the | |||
| filling will copy the edge pixel filling """ | |||
| angle = severity * angle | |||
| if angle == 0: | |||
| return im | |||
| w = im.shape[1] | |||
| h = im.shape[0] | |||
| rangle = np.deg2rad(angle) # angle in radians | |||
| # now calculate new image width and height | |||
| nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale | |||
| nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale | |||
| # ask OpenCV for the rotation matrix | |||
| rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale) | |||
| # calculate the move from the old center to the new center combined | |||
| # with the rotation | |||
| rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0])) | |||
| # the move only affects the translation, so update the translation | |||
| # part of the transform | |||
| rot_mat[0, 2] += rot_move[0] | |||
| rot_mat[1, 2] += rot_move[1] | |||
| if cval is None: | |||
| cval = [0] * im.shape[2] | |||
| elif isinstance(cval, (int, float)): | |||
| cval = [cval] * im.shape[2] | |||
| if b_mode is None: | |||
| src = cv2.warpAffine(im, rot_mat, (int(ceil(nw)), int(ceil(nh))), flags=cv2.INTER_LANCZOS4, | |||
| borderMode=cv2.BORDER_CONSTANT, borderValue=cval) | |||
| else: | |||
| src = cv2.warpAffine(im, rot_mat, (int(ceil(nw)), int(ceil(nh))), flags=cv2.INTER_LANCZOS4, | |||
| borderMode=cv2.BORDER_REPLICATE) | |||
| return np.array(src) | |||
| def randcrop(img, max_per=0.15, severity=1): | |||
| """Random crop""" | |||
| perc = max_per * severity | |||
| rows, cols = img.shape[:2] | |||
| k = int(rows * cols * perc / (rows + cols)) | |||
| roi = img[randint(0, k):rows - randint(0, k), randint(0, k):cols - randint(0, k)] | |||
| return np.array(roi) | |||
| def enhance_sharpness(img, r=None, severity=1): | |||
| """ | |||
| adjust the sharpness of an image. An | |||
| enhancement factor of 0.0 gives a blurred image, a factor of 1.0 gives the | |||
| original image, and a factor of 2.0 gives a sharpened image. | |||
| """ | |||
| if r is None: | |||
| severity = abs(severity) | |||
| r = uniform(1 - 0.5 * severity, 1) if lucky(0.5) else uniform(1, 1 + severity) | |||
| img = Image.fromarray(img) | |||
| img = np.array(ImageEnhance.Sharpness(img).enhance(r)) | |||
| return img | |||
| def enhance_contrast(img, r=None, severity=1): | |||
| """ | |||
| control the contrast of an image, similar | |||
| to the contrast control on a TV set. An enhancement factor of 0.0 | |||
| gives a solid grey image. A factor of 1.0 gives the original image. | |||
| """ | |||
| if r is None: | |||
| severity = abs(severity) | |||
| r = uniform(1 - 0.5 * severity, 1) if lucky(0.5) else uniform(1, 1 + severity) | |||
| img = Image.fromarray(img) | |||
| img = np.array(ImageEnhance.Contrast(img).enhance(r)) | |||
| return img | |||
| def enhance_brightness(img, r=None, severity=1): | |||
| """ | |||
| control the brightness of an image. An | |||
| enhancement factor of 0.0 gives a black image. A factor of 1.0 gives the | |||
| original image. | |||
| """ | |||
| if r is None: | |||
| severity = abs(severity) | |||
| r = uniform(1 - 0.2 * severity, 1) if lucky(0.5) else uniform(1, 1 + severity * 0.5) | |||
| img = Image.fromarray(img) | |||
| img = np.array(ImageEnhance.Brightness(img).enhance(r)) | |||
| return img | |||
| def enhance_color(img, r=None, severity=1): | |||
| """ | |||
| adjust the colour balance of an image, in | |||
| a manner similar to the controls on a colour TV set. An enhancement | |||
| factor of 0.0 gives a black and white image. A factor of 1.0 gives | |||
| the original image. | |||
| """ | |||
| if r is None: | |||
| severity = abs(severity) | |||
| r = uniform(1 - 0.5 * severity, 1) if lucky(0.5) else uniform(1, 1 + severity) | |||
| img = Image.fromarray(img) | |||
| img = np.array(ImageEnhance.Color(img).enhance(r)) | |||
| return img | |||
| def enhance(img): | |||
| """combine image enhancement in the Image type, reduce conversions to np array""" | |||
| if lucky(0.3): | |||
| img = enhance_sharpness(img) | |||
| if lucky(0.3): | |||
| img = enhance_contrast(img) | |||
| if lucky(0.3): | |||
| img = enhance_brightness(img) | |||
| return np.array(img) | |||
| def draw_line(im): | |||
| """draw a line randomly""" | |||
| h, w = im.shape[:2] | |||
| p1 = (randint(0, w // 3), randint(0, h - 1)) # from left 1/3 | |||
| p2 = (randint(w // 3 * 2, w - 1), randint(0, h - 1)) # to right 1/3 | |||
| color = [randint(0, 255) for i in range(3)] | |||
| lw = lucky_choice((1, 2), (0.8, 0.2)) | |||
| cv2.line(im, p1, p2, color, lw, cv2.LINE_AA) | |||
| return np.array(im) | |||
| def center_im(im_outter, im_inner, shrink=True, vertical='center'): | |||
| """center an image in a container image. `im_outter` can be the shape of it""" | |||
| if not isinstance(im_outter, np.ndarray): | |||
| shape = tuple(im_outter) | |||
| if im_inner.ndim > len(shape): | |||
| shape += im_inner.shape[len(shape):] | |||
| im_outter = np.zeros(shape, np.uint8) | |||
| H, W = im_outter.shape[:2] | |||
| h, w = im_inner.shape[:2] | |||
| if h > H or w > W: | |||
| if shrink: | |||
| rate = min(H / h, W / w) | |||
| im_inner = cv2.resize(im_inner, rate) | |||
| im_inner = im_inner[:H, :W] | |||
| h, w = im_inner.shape[:2] | |||
| vertical = vertical.lower() | |||
| if vertical == 'center': | |||
| dh = (H - h) // 2 | |||
| elif vertical == 'top': | |||
| dh = 0 | |||
| elif vertical == 'bottom': | |||
| dh = H - h | |||
| im = im_outter.copy() | |||
| dw = (W - w) // 2 | |||
| im[dh:dh + h, dw:dw + w] = im_inner | |||
| return np.array(im) | |||
| def enhance_light(img): | |||
| """combine image enhancement in the Image type, reduce conversions to np array""" | |||
| if lucky(0.3): | |||
| img = enhance_sharpness(img, uniform(0.5, 1.5)) | |||
| if lucky(0.3): | |||
| img = enhance_contrast(img, uniform(0.7, 1.3)) | |||
| if lucky(0.3): | |||
| img = enhance_brightness(img, uniform(0.85, 1.15)) | |||
| return np.array(img) | |||
| def gaussian2d(w, h): | |||
| """The two-dimensional Gaussian distribution effect is actually an ellipse""" | |||
| h = h // 2 | |||
| w = w // 2 | |||
| x = np.arange(-w, w) | |||
| y = np.arange(-h, h) | |||
| x, y = np.meshgrid(x, y) | |||
| mean_x = np.mean(x) | |||
| mean_y = np.mean(y) | |||
| std_x = np.std(x) | |||
| std_y = np.std(y) | |||
| z = np.exp( | |||
| -((y - mean_y) ** 2 / (std_y ** 2) + (x - mean_x) ** 2 / (std_x ** 2)) / 2 | |||
| ) | |||
| z /= (np.sqrt(2 * np.pi) * std_y) | |||
| z *= 1 / (np.max(z) - np.min(z)) | |||
| return z | |||
| def add_stain(img, theta=200, severity=0.5, bright_spot=False, iteration=1): | |||
| """Generate black stains or white bright spots""" | |||
| for _ in range(0, iteration): | |||
| img = np.float32(img) | |||
| theta = theta * abs(severity) | |||
| cols_big, rows_big = img.shape[:2] | |||
| temp = min([cols_big, rows_big]) | |||
| if temp < 80: | |||
| temp = 80 | |||
| if temp > 300: | |||
| temp = 300 | |||
| if not bright_spot: | |||
| gaussian_img = gaussian2d(randint(temp // 3, temp // 2), randint(temp // 3, temp // 2)) * theta | |||
| else: | |||
| gaussian_img = gaussian2d(randint(temp // 1.5, int(temp / 0.8)), | |||
| randint(temp // 1.5, int(temp / 0.8))) | |||
| cols_small, rows_small = gaussian_img.shape[:2] | |||
| tmp_min = int(min(cols_small, rows_small)) | |||
| # 对椭圆效果做大幅度扭曲,cval最好不要过大。 | |||
| gaussian_img = rdistort(gaussian_img, randint(tmp_min // 10, tmp_min // 6), cval=0) | |||
| x1 = randint(0, rows_big - 5 if rows_big - 5 > 0 else 0) | |||
| y1 = randint(0, cols_big - 5 if cols_big - 5 > 0 else 0) | |||
| if y1 + cols_small > cols_big: | |||
| y2 = int(cols_big - 1) | |||
| else: | |||
| y2 = int(y1 + cols_small) | |||
| if x1 + rows_small > rows_big: | |||
| x2 = int(rows_big - 1) | |||
| else: | |||
| x2 = int(x1 + rows_small) | |||
| row, col = gaussian_img.shape | |||
| gaussian_img = gaussian_img.repeat(img.shape[2], axis=1) | |||
| gaussian_img = gaussian_img.reshape(row, col, img.shape[2]) | |||
| gaussian_img = np.float32(gaussian_img[:(y2 - y1), :(x2 - x1)]) | |||
| if not bright_spot: | |||
| img[y1:y2, x1:x2] -= gaussian_img | |||
| else: | |||
| temp1 = min([np.median(gaussian_img), 255 - np.mean(img[y1:y2, x1:x2])]) | |||
| gaussian_img = np.clip(gaussian_img - temp1, 0, 255) | |||
| img[y1:y2, x1:x2] = np.clip(img[y1:y2, x1:x2] + gaussian_img, 0, 255) | |||
| img = cvt_uint8(img) | |||
| return np.array(img) | |||
| def shift_color(im, delta_max=10, severity=0.5): | |||
| """randomly shift image color""" | |||
| if severity == 0: | |||
| return im | |||
| delta_max = int(delta_max * severity) | |||
| if isinstance(delta_max, tuple): | |||
| delta_min, delta_max = delta_max | |||
| else: | |||
| delta_min = -delta_max | |||
| im = np.float32(im) | |||
| delta = np.random.randint(delta_min, delta_max, (1, 1, im.shape[2])) | |||
| im += delta | |||
| return np.array(cvt_uint8(im)) | |||
| def random_contrast(img, contrast_delta=0.3, bright_delta=0.1): | |||
| """randomly change image contrast and brightness""" | |||
| if isinstance(contrast_delta, tuple): | |||
| contrast_delta_min, contrast_delta = contrast_delta | |||
| else: | |||
| contrast_delta_min = -contrast_delta | |||
| if isinstance(bright_delta, tuple): | |||
| bright_delta_min, bright_delta = bright_delta | |||
| else: | |||
| bright_delta_min = -bright_delta | |||
| fc = 1 + uniform(contrast_delta_min, contrast_delta) | |||
| fb = 1 + uniform(bright_delta_min, bright_delta) | |||
| im = img.astype(np.float32) | |||
| if img.ndim == 2: | |||
| im = im[:, :, None] | |||
| mn = im.mean(axis=(0, 1), keepdims=True) | |||
| im = (im - mn) * fc + mn * fb | |||
| im = im.clip(0, 255).astype(np.uint8) | |||
| return np.array(im) | |||
| def period_map(xi, times, extent): | |||
| if times < 1: | |||
| return None | |||
| times = float(times) | |||
| theta = randint(extent, extent + 10) * choice([1, -1]) | |||
| def back(x): | |||
| if x < times / 2.0: | |||
| # Here only the effect of a sin function is achieved, and more effects can be added later. | |||
| return theta * sin(pi * (3 / 2.0 + x / times)) # Monotonically increasing | |||
| return theta * sin(pi * (1 / 2.0 + x / times)) | |||
| xi = np.fabs(xi) | |||
| xi = xi % times | |||
| yi = np.array(list(map(back, xi))) | |||
| return yi | |||
| def whole_rdistort(im, severity=1, scop=40): | |||
| """ | |||
| Using the affine projection method in skimg, | |||
| Realize the picture through the corresponding coordinate projection | |||
| Specifies the distortion effect of the form. This function will normalize 0-1 | |||
| """ | |||
| if severity == 0: | |||
| return im | |||
| theta = severity * scop | |||
| rows, cols = im.shape[:2] | |||
| colpoints = max(int(cols * severity * 0.05), 3) | |||
| rowpoints = max(int(rows * severity * 0.05), 3) | |||
| src_cols = np.linspace(0, cols, colpoints) | |||
| src_rows = np.linspace(0, rows, rowpoints) | |||
| src_rows, src_cols = np.meshgrid(src_rows, src_cols) | |||
| src = np.dstack([src_cols.flat, src_rows.flat])[0] | |||
| # The key location for wave distortion effect | |||
| dst_rows = src[:, 1] - period_map(np.linspace(0, 100, src.shape[0]), 50, 20) | |||
| # dst columns | |||
| dst_cols = src[:, 0] - np.sin(np.linspace(0, 3 * np.pi, src.shape[0])) * theta | |||
| dst = np.vstack([dst_cols, dst_rows]).T | |||
| tform = PiecewiseAffineTransform() | |||
| tform.estimate(src, dst) | |||
| image = warp(im, tform, mode='edge', output_shape=(rows, cols)) * 255 | |||
| return np.array(cvt_uint8(image)) | |||
| def lucky_choice(seq, ps=None, rand_func=random): | |||
| """randomly choose an element from `seq` according to their probability distribution `ps`""" | |||
| if not seq: | |||
| return None | |||
| if ps is None: | |||
| return choice(seq) | |||
| cumps = np.cumsum(ps) | |||
| r = rand_func() * cumps[-1] | |||
| idx = (cumps < r).sum() | |||
| idx = min(idx, len(seq) - 1) | |||
| return seq[idx] | |||
| def cvt_uint8(im): | |||
| """convert image type to `np.uint8`""" | |||
| if im.dtype == np.uint8: | |||
| return im | |||
| return np.round(im).clip(0, 255).astype(np.uint8) | |||
| def to_image(im): | |||
| """convert `im` to `Image` type""" | |||
| if not isinstance(im, Image.Image): | |||
| if im.ndim == 3: | |||
| im = im[:, :, ::-1] # reverse channels: BGR in cv2 to RGB in Image | |||
| im = Image.fromarray(im) | |||
| return im | |||
| def to_array(im): | |||
| """convert `im` to `np.array` type""" | |||
| if isinstance(im, Image.Image): | |||
| im = np.array(im) | |||
| if im.ndim == 3: | |||
| im = im[:, :, ::-1] # reverse channels: RGB in Image to BGR in cv2 | |||
| return im | |||
| def unify_img(img, img_height=64, max_length=512, img_channel=3): | |||
| color_fill = 255 | |||
| img_shape = img.shape | |||
| img_width = int(float(img_shape[1]) / img_shape[0] * img_height) | |||
| img = cv2.resize(img, (img_width, img_height)) | |||
| if img_width > max_length: | |||
| img = img[:, 0:max_length] | |||
| else: | |||
| blank_img = np.zeros((img_height, max_length, img_channel), np.uint8) | |||
| # fill the image with white | |||
| blank_img.fill(color_fill) | |||
| blank_img[0:img_height, 0:img_width] = img | |||
| img = blank_img | |||
| return np.array(img) | |||
| def unify_img_label(img, label, img_height=64, max_length=512, min_length=192, img_channel=3): | |||
| color_fill = 255 | |||
| img_shape = img.shape | |||
| img_width = int(float(img_shape[1]) / img_shape[0] * img_height) | |||
| img = cv2.resize(img, (img_width, img_height)) | |||
| if img_width > max_length: | |||
| img = img[:, 0:max_length] | |||
| else: | |||
| blank_img = np.zeros((img_height, max_length, img_channel), np.uint8) | |||
| # fill the image with white | |||
| blank_img.fill(color_fill) | |||
| blank_img[0:img_height, 0:img_width] = img | |||
| img = blank_img | |||
| return np.array(img), label | |||
| @@ -0,0 +1,108 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """train CNN direction model.""" | |||
| import argparse | |||
| import os | |||
| import random | |||
| from src.cnn_direction_model import CNNDirectionModel | |||
| from src.config import config1 as config | |||
| from src.dataset import create_dataset_train | |||
| import numpy as np | |||
| import mindspore as ms | |||
| from mindspore import Tensor | |||
| from mindspore import context | |||
| from mindspore import dataset as de | |||
| from mindspore.communication.management import init | |||
| from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits | |||
| from mindspore.nn.metrics import Accuracy | |||
| from mindspore.nn.optim.adam import Adam | |||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor | |||
| from mindspore.train.model import Model, ParallelMode | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| parser = argparse.ArgumentParser(description='Image classification') | |||
| parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute') | |||
| parser.add_argument('--device_num', type=int, default=1, help='Device num.') | |||
| parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') | |||
| parser.add_argument('--device_target', type=str, default='Ascend', help='Device target') | |||
| parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path') | |||
| args_opt = parser.parse_args() | |||
| random.seed(11) | |||
| np.random.seed(11) | |||
| de.config.set_seed(11) | |||
| ms.common.set_seed(11) | |||
| if __name__ == '__main__': | |||
| target = args_opt.device_target | |||
| ckpt_save_dir = config.save_checkpoint_path | |||
| # init context | |||
| device_id = int(os.getenv('DEVICE_ID', '0')) | |||
| rank_id = int(os.getenv('RANK_ID', '0')) | |||
| rank_size = int(os.getenv('RANK_SIZE', '1')) | |||
| run_distribute = rank_size > 1 | |||
| context.set_context(mode=context.GRAPH_MODE, | |||
| device_target="Ascend", | |||
| device_id=device_id, save_graphs=False) | |||
| print("train args: ", args_opt, "\ncfg: ", config, | |||
| "\nparallel args: rank_id {}, device_id {}, rank_size {}".format(rank_id, device_id, rank_size)) | |||
| if run_distribute: | |||
| context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL) | |||
| init() | |||
| # create dataset | |||
| dataset = create_dataset_train(args_opt.dataset_path + "/ocr_pos.mindrecord0", config=config) | |||
| step_size = dataset.get_dataset_size() | |||
| # define net | |||
| net = CNNDirectionModel([3, 64, 48, 48, 64], [64, 48, 48, 64, 64], [256, 64], [64, 512]) | |||
| # init weight | |||
| if args_opt.pre_trained: | |||
| param_dict = load_checkpoint(args_opt.pre_trained) | |||
| load_param_into_net(net, param_dict) | |||
| lr = config.lr | |||
| lr = Tensor(lr, ms.float32) | |||
| # define opt | |||
| opt = Adam(params=net.trainable_params(), learning_rate=lr, eps=1e-07) | |||
| # define loss, model | |||
| loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="sum") | |||
| model = Model(net, loss_fn=loss, optimizer=opt, metrics={"Accuracy": Accuracy()}) | |||
| # define callbacks | |||
| time_cb = TimeMonitor(data_size=step_size) | |||
| loss_cb = LossMonitor() | |||
| cb = [time_cb, loss_cb] | |||
| if config.save_checkpoint: | |||
| config_ck = CheckpointConfig(save_checkpoint_steps=2500, | |||
| keep_checkpoint_max=config.keep_checkpoint_max) | |||
| ckpt_cb = ModelCheckpoint(prefix="cnn_direction_model", directory=ckpt_save_dir, config=config_ck) | |||
| cb += [ckpt_cb] | |||
| # train model | |||
| model.train(config.epoch_size, dataset, callbacks=cb, dataset_sink_mode=False) | |||