You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

yolact_r50_1x8_coco.py 5.1 kB

2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. _base_ = '../_base_/default_runtime.py'
  2. # model settings
  3. img_size = 550
  4. model = dict(
  5. type='YOLACT',
  6. backbone=dict(
  7. type='ResNet',
  8. depth=50,
  9. num_stages=4,
  10. out_indices=(0, 1, 2, 3),
  11. frozen_stages=-1, # do not freeze stem
  12. norm_cfg=dict(type='BN', requires_grad=True),
  13. norm_eval=False, # update the statistics of bn
  14. zero_init_residual=False,
  15. style='pytorch',
  16. init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
  17. neck=dict(
  18. type='FPN',
  19. in_channels=[256, 512, 1024, 2048],
  20. out_channels=256,
  21. start_level=1,
  22. add_extra_convs='on_input',
  23. num_outs=5,
  24. upsample_cfg=dict(mode='bilinear')),
  25. bbox_head=dict(
  26. type='YOLACTHead',
  27. num_classes=80,
  28. in_channels=256,
  29. feat_channels=256,
  30. anchor_generator=dict(
  31. type='AnchorGenerator',
  32. octave_base_scale=3,
  33. scales_per_octave=1,
  34. base_sizes=[8, 16, 32, 64, 128],
  35. ratios=[0.5, 1.0, 2.0],
  36. strides=[550.0 / x for x in [69, 35, 18, 9, 5]],
  37. centers=[(550 * 0.5 / x, 550 * 0.5 / x)
  38. for x in [69, 35, 18, 9, 5]]),
  39. bbox_coder=dict(
  40. type='DeltaXYWHBBoxCoder',
  41. target_means=[.0, .0, .0, .0],
  42. target_stds=[0.1, 0.1, 0.2, 0.2]),
  43. loss_cls=dict(
  44. type='CrossEntropyLoss',
  45. use_sigmoid=False,
  46. reduction='none',
  47. loss_weight=1.0),
  48. loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.5),
  49. num_head_convs=1,
  50. num_protos=32,
  51. use_ohem=True),
  52. mask_head=dict(
  53. type='YOLACTProtonet',
  54. in_channels=256,
  55. num_protos=32,
  56. num_classes=80,
  57. max_masks_to_train=100,
  58. loss_mask_weight=6.125),
  59. segm_head=dict(
  60. type='YOLACTSegmHead',
  61. num_classes=80,
  62. in_channels=256,
  63. loss_segm=dict(
  64. type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
  65. # training and testing settings
  66. train_cfg=dict(
  67. assigner=dict(
  68. type='MaxIoUAssigner',
  69. pos_iou_thr=0.5,
  70. neg_iou_thr=0.4,
  71. min_pos_iou=0.,
  72. ignore_iof_thr=-1,
  73. gt_max_assign_all=False),
  74. # smoothl1_beta=1.,
  75. allowed_border=-1,
  76. pos_weight=-1,
  77. neg_pos_ratio=3,
  78. debug=False),
  79. test_cfg=dict(
  80. nms_pre=1000,
  81. min_bbox_size=0,
  82. score_thr=0.05,
  83. iou_thr=0.5,
  84. top_k=200,
  85. max_per_img=100))
  86. # dataset settings
  87. dataset_type = 'CocoDataset'
  88. data_root = 'data/coco/'
  89. img_norm_cfg = dict(
  90. mean=[123.68, 116.78, 103.94], std=[58.40, 57.12, 57.38], to_rgb=True)
  91. train_pipeline = [
  92. dict(type='LoadImageFromFile', to_float32=True),
  93. dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
  94. dict(type='FilterAnnotations', min_gt_bbox_wh=(4.0, 4.0)),
  95. dict(
  96. type='PhotoMetricDistortion',
  97. brightness_delta=32,
  98. contrast_range=(0.5, 1.5),
  99. saturation_range=(0.5, 1.5),
  100. hue_delta=18),
  101. dict(
  102. type='Expand',
  103. mean=img_norm_cfg['mean'],
  104. to_rgb=img_norm_cfg['to_rgb'],
  105. ratio_range=(1, 4)),
  106. dict(
  107. type='MinIoURandomCrop',
  108. min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
  109. min_crop_size=0.3),
  110. dict(type='Resize', img_scale=(img_size, img_size), keep_ratio=False),
  111. dict(type='Normalize', **img_norm_cfg),
  112. dict(type='RandomFlip', flip_ratio=0.5),
  113. dict(type='DefaultFormatBundle'),
  114. dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
  115. ]
  116. test_pipeline = [
  117. dict(type='LoadImageFromFile'),
  118. dict(
  119. type='MultiScaleFlipAug',
  120. img_scale=(img_size, img_size),
  121. flip=False,
  122. transforms=[
  123. dict(type='Resize', keep_ratio=False),
  124. dict(type='Normalize', **img_norm_cfg),
  125. dict(type='ImageToTensor', keys=['img']),
  126. dict(type='Collect', keys=['img']),
  127. ])
  128. ]
  129. data = dict(
  130. samples_per_gpu=8,
  131. workers_per_gpu=4,
  132. train=dict(
  133. type=dataset_type,
  134. ann_file=data_root + 'annotations/instances_train2017.json',
  135. img_prefix=data_root + 'train2017/',
  136. pipeline=train_pipeline),
  137. val=dict(
  138. type=dataset_type,
  139. ann_file=data_root + 'annotations/instances_val2017.json',
  140. img_prefix=data_root + 'val2017/',
  141. pipeline=test_pipeline),
  142. test=dict(
  143. type=dataset_type,
  144. ann_file=data_root + 'annotations/instances_val2017.json',
  145. img_prefix=data_root + 'val2017/',
  146. pipeline=test_pipeline))
  147. # optimizer
  148. optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4)
  149. optimizer_config = dict()
  150. # learning policy
  151. lr_config = dict(
  152. policy='step',
  153. warmup='linear',
  154. warmup_iters=500,
  155. warmup_ratio=0.1,
  156. step=[20, 42, 49, 52])
  157. runner = dict(type='EpochBasedRunner', max_epochs=55)
  158. cudnn_benchmark = True
  159. evaluation = dict(metric=['bbox', 'segm'])

No Description

Contributors (3)