You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

detr_r50_8x2_150e_coco.py 5.9 kB

2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. _base_ = [
  2. '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py'
  3. ]
  4. model = dict(
  5. type='DETR',
  6. backbone=dict(
  7. type='ResNet',
  8. depth=50,
  9. num_stages=4,
  10. out_indices=(3, ),
  11. frozen_stages=1,
  12. norm_cfg=dict(type='BN', requires_grad=False),
  13. norm_eval=True,
  14. style='pytorch',
  15. init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
  16. bbox_head=dict(
  17. type='DETRHead',
  18. num_classes=80,
  19. in_channels=2048,
  20. transformer=dict(
  21. type='Transformer',
  22. encoder=dict(
  23. type='DetrTransformerEncoder',
  24. num_layers=6,
  25. transformerlayers=dict(
  26. type='BaseTransformerLayer',
  27. attn_cfgs=[
  28. dict(
  29. type='MultiheadAttention',
  30. embed_dims=256,
  31. num_heads=8,
  32. dropout=0.1)
  33. ],
  34. feedforward_channels=2048,
  35. ffn_dropout=0.1,
  36. operation_order=('self_attn', 'norm', 'ffn', 'norm'))),
  37. decoder=dict(
  38. type='DetrTransformerDecoder',
  39. return_intermediate=True,
  40. num_layers=6,
  41. transformerlayers=dict(
  42. type='DetrTransformerDecoderLayer',
  43. attn_cfgs=dict(
  44. type='MultiheadAttention',
  45. embed_dims=256,
  46. num_heads=8,
  47. dropout=0.1),
  48. feedforward_channels=2048,
  49. ffn_dropout=0.1,
  50. operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
  51. 'ffn', 'norm')),
  52. )),
  53. positional_encoding=dict(
  54. type='SinePositionalEncoding', num_feats=128, normalize=True),
  55. loss_cls=dict(
  56. type='CrossEntropyLoss',
  57. bg_cls_weight=0.1,
  58. use_sigmoid=False,
  59. loss_weight=1.0,
  60. class_weight=1.0),
  61. loss_bbox=dict(type='L1Loss', loss_weight=5.0),
  62. loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
  63. # training and testing settings
  64. train_cfg=dict(
  65. assigner=dict(
  66. type='HungarianAssigner',
  67. cls_cost=dict(type='ClassificationCost', weight=1.),
  68. reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
  69. iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),
  70. test_cfg=dict(max_per_img=100))
  71. img_norm_cfg = dict(
  72. mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
  73. # train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
  74. # from the default setting in mmdet.
  75. train_pipeline = [
  76. dict(type='LoadImageFromFile'),
  77. dict(type='LoadAnnotations', with_bbox=True),
  78. dict(type='RandomFlip', flip_ratio=0.5),
  79. dict(
  80. type='AutoAugment',
  81. policies=[[
  82. dict(
  83. type='Resize',
  84. img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
  85. (608, 1333), (640, 1333), (672, 1333), (704, 1333),
  86. (736, 1333), (768, 1333), (800, 1333)],
  87. multiscale_mode='value',
  88. keep_ratio=True)
  89. ],
  90. [
  91. dict(
  92. type='Resize',
  93. img_scale=[(400, 1333), (500, 1333), (600, 1333)],
  94. multiscale_mode='value',
  95. keep_ratio=True),
  96. dict(
  97. type='RandomCrop',
  98. crop_type='absolute_range',
  99. crop_size=(384, 600),
  100. allow_negative_crop=True),
  101. dict(
  102. type='Resize',
  103. img_scale=[(480, 1333), (512, 1333), (544, 1333),
  104. (576, 1333), (608, 1333), (640, 1333),
  105. (672, 1333), (704, 1333), (736, 1333),
  106. (768, 1333), (800, 1333)],
  107. multiscale_mode='value',
  108. override=True,
  109. keep_ratio=True)
  110. ]]),
  111. dict(type='Normalize', **img_norm_cfg),
  112. dict(type='Pad', size_divisor=1),
  113. dict(type='DefaultFormatBundle'),
  114. dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
  115. ]
  116. # test_pipeline, NOTE the Pad's size_divisor is different from the default
  117. # setting (size_divisor=32). While there is little effect on the performance
  118. # whether we use the default setting or use size_divisor=1.
  119. test_pipeline = [
  120. dict(type='LoadImageFromFile'),
  121. dict(
  122. type='MultiScaleFlipAug',
  123. img_scale=(1333, 800),
  124. flip=False,
  125. transforms=[
  126. dict(type='Resize', keep_ratio=True),
  127. dict(type='RandomFlip'),
  128. dict(type='Normalize', **img_norm_cfg),
  129. dict(type='Pad', size_divisor=1),
  130. dict(type='ImageToTensor', keys=['img']),
  131. dict(type='Collect', keys=['img'])
  132. ])
  133. ]
  134. data = dict(
  135. samples_per_gpu=2,
  136. workers_per_gpu=2,
  137. train=dict(pipeline=train_pipeline),
  138. val=dict(pipeline=test_pipeline),
  139. test=dict(pipeline=test_pipeline))
  140. # optimizer
  141. optimizer = dict(
  142. type='AdamW',
  143. lr=0.0001,
  144. weight_decay=0.0001,
  145. paramwise_cfg=dict(
  146. custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
  147. optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
  148. # learning policy
  149. lr_config = dict(policy='step', step=[100])
  150. runner = dict(type='EpochBasedRunner', max_epochs=150)

No Description

Contributors (3)