_base_ = [ '../_base_/datasets/coco_detection.py', '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' ] model = dict( type='YOLOF', backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(3, ), frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=False), norm_eval=True, style='caffe', init_cfg=dict( type='Pretrained', checkpoint='open-mmlab://detectron/resnet50_caffe')), neck=dict( type='DilatedEncoder', in_channels=2048, out_channels=512, block_mid_channels=128, num_residual_blocks=4), bbox_head=dict( type='YOLOFHead', num_classes=80, in_channels=512, reg_decoded_bbox=True, anchor_generator=dict( type='AnchorGenerator', ratios=[1.0], scales=[1, 2, 4, 8, 16], strides=[32]), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[1., 1., 1., 1.], add_ctr_clamp=True, ctr_clamp=32), loss_cls=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_bbox=dict(type='GIoULoss', loss_weight=1.0)), # training and testing settings train_cfg=dict( assigner=dict( type='UniformAssigner', pos_ignore_thr=0.15, neg_ignore_thr=0.7), allowed_border=-1, pos_weight=-1, debug=False), test_cfg=dict( nms_pre=1000, min_bbox_size=0, score_thr=0.05, nms=dict(type='nms', iou_threshold=0.6), max_per_img=100)) # optimizer optimizer = dict( type='SGD', lr=0.12, momentum=0.9, weight_decay=0.0001, paramwise_cfg=dict( norm_decay_mult=0., custom_keys={'backbone': dict(lr_mult=1. / 3)})) lr_config = dict(warmup_iters=1500, warmup_ratio=0.00066667) # use caffe img_norm img_norm_cfg = dict( mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.5), dict(type='RandomShift', shift_ratio=0.5, max_shift_px=32), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1333, 800), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']), ]) ] data = dict( samples_per_gpu=8, workers_per_gpu=8, train=dict(pipeline=train_pipeline), val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline))