_base_ = [ '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' ] model = dict( type='DeformableDETR', backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(1, 2, 3), frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=False), norm_eval=True, style='pytorch', init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), neck=dict( type='ChannelMapper', in_channels=[512, 1024, 2048], kernel_size=1, out_channels=256, act_cfg=None, norm_cfg=dict(type='GN', num_groups=32), num_outs=4), bbox_head=dict( type='DeformableDETRHead', with_box_refine=True, as_two_stage=True, num_query=300, num_classes=11, in_channels=2048, sync_cls_avg_factor=True, transformer=dict( type='DeformableDetrTransformer', encoder=dict( type='DetrTransformerEncoder', num_layers=6, transformerlayers=dict( type='BaseTransformerLayer', attn_cfgs=dict( type='MultiScaleDeformableAttention', embed_dims=256), feedforward_channels=1024, ffn_dropout=0.1, operation_order=('self_attn', 'norm', 'ffn', 'norm'))), decoder=dict( type='DeformableDetrTransformerDecoder', num_layers=6, return_intermediate=True, transformerlayers=dict( type='DetrTransformerDecoderLayer', attn_cfgs=[ dict( type='MultiheadAttention', embed_dims=256, num_heads=8, dropout=0.1), dict( type='MultiScaleDeformableAttention', embed_dims=256) ], feedforward_channels=1024, ffn_dropout=0.1, operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm')))), positional_encoding=dict( type='SinePositionalEncoding', num_feats=128, normalize=True, offset=-0.5), loss_cls=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=2.0), loss_bbox=dict(type='L1Loss', loss_weight=5.0), loss_iou=dict(type='GIoULoss', loss_weight=2.0)), # training and testing settings train_cfg=dict( assigner=dict( type='HungarianAssigner', cls_cost=dict(type='FocalLossCost', weight=2.0), reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'), iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))), test_cfg=dict(max_per_img=100)) img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) # train_pipeline, NOTE the img_scale and the Pad's size_divisor is different # from the default setting in mmdet. train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict( type='Resize', img_scale=[(400, 300), (500, 400)], multiscale_mode='value', keep_ratio=True), dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), dict(type='BrightnessTransform', level=5, prob=0.5), dict(type='ContrastTransform', level=5, prob=0.5), dict(type='RandomShift', shift_ratio=0.5), dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(400, 300), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']), ]) ] dataset_type = 'CocoDataset' classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') data = dict( samples_per_gpu=16, workers_per_gpu=8, train=dict( type=dataset_type, img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_train/images/', classes=classes, ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_train/annotations/train.json', pipeline=train_pipeline), val=dict( type=dataset_type, img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/images/', classes=classes, ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/annotations/test.json', pipeline=test_pipeline), test=dict( type=dataset_type, img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/images/', classes=classes, ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/annotations/test.json', pipeline=test_pipeline)) # optimizer optimizer = dict( type='AdamW', lr=2e-4, weight_decay=0.0001, paramwise_cfg=dict( custom_keys={ 'backbone': dict(lr_mult=0.1), 'sampling_offsets': dict(lr_mult=0.1), 'reference_points': dict(lr_mult=0.1) })) optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) # learning policy lr_config = dict(policy='step', step=[40]) runner = dict(type='EpochBasedRunner', max_epochs=60) evaluation = dict(interval=5, metric='bbox')