From 3d41d6d6208edfcdb7cf7c00c571e0579405cde7 Mon Sep 17 00:00:00 2001 From: "tianchu.gtc" Date: Tue, 27 Sep 2022 23:22:46 +0800 Subject: [PATCH] [to #42322933] fix seg4demo Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10189886 --- .../image_panoptic_segmentation/panseg_model.py | 3 +-- .../pan_merge/__init__.py | 1 + .../pan_merge/maskformer_semantic_head.py | 1 + .../semantic_seg_model.py | 1 + .../vit_adapter/__init__.py | 2 ++ .../vit_adapter/models/__init__.py | 2 ++ .../vit_adapter/models/backbone/__init__.py | 2 ++ .../models/backbone/adapter_modules.py | 17 ++++++++--------- .../models/backbone/base/__init__.py | 2 ++ .../vit_adapter/models/backbone/base/beit.py | 6 ++---- .../vit_adapter/models/backbone/beit_adapter.py | 13 ++++++------- .../vit_adapter/models/decode_heads/__init__.py | 2 ++ .../models/decode_heads/base_decode_head.py | 5 ++--- .../decode_heads/mask2former_head_from_mmseg.py | 5 ++--- .../vit_adapter/models/segmentors/__init__.py | 2 ++ .../models/segmentors/base_segmentor.py | 5 ++--- .../segmentors/encoder_decoder_mask2former.py | 5 ++--- .../vit_adapter/utils/__init__.py | 2 ++ .../vit_adapter/utils/builder.py | 5 ++--- .../vit_adapter/utils/seg_func.py | 5 ++--- .../cv/image_panoptic_segmentation_pipeline.py | 16 +++++++--------- .../cv/image_semantic_segmentation_pipeline.py | 17 ++++++----------- 22 files changed, 59 insertions(+), 60 deletions(-) diff --git a/modelscope/models/cv/image_panoptic_segmentation/panseg_model.py b/modelscope/models/cv/image_panoptic_segmentation/panseg_model.py index f9022f90..f44c01e8 100644 --- a/modelscope/models/cv/image_panoptic_segmentation/panseg_model.py +++ b/modelscope/models/cv/image_panoptic_segmentation/panseg_model.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. import os.path as osp import torch @@ -49,6 +50,4 @@ class SwinLPanopticSegmentation(TorchModel): return results def forward(self, Inputs): - import pdb - pdb.set_trace() return self.model(**Inputs) diff --git a/modelscope/models/cv/image_semantic_segmentation/pan_merge/__init__.py b/modelscope/models/cv/image_semantic_segmentation/pan_merge/__init__.py index 2a75f318..6a31a308 100644 --- a/modelscope/models/cv/image_semantic_segmentation/pan_merge/__init__.py +++ b/modelscope/models/cv/image_semantic_segmentation/pan_merge/__init__.py @@ -1 +1,2 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. from .maskformer_semantic_head import MaskFormerSemanticHead diff --git a/modelscope/models/cv/image_semantic_segmentation/pan_merge/maskformer_semantic_head.py b/modelscope/models/cv/image_semantic_segmentation/pan_merge/maskformer_semantic_head.py index 6769ebaf..2f3364d0 100644 --- a/modelscope/models/cv/image_semantic_segmentation/pan_merge/maskformer_semantic_head.py +++ b/modelscope/models/cv/image_semantic_segmentation/pan_merge/maskformer_semantic_head.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. import torch import torch.nn.functional as F from mmdet.models.builder import HEADS diff --git a/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py b/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py index 60acf28f..2b38ebad 100644 --- a/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py +++ b/modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py @@ -1,3 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. import os.path as osp import numpy as np diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/__init__.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/__init__.py index 82eec1c6..3b9a301c 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/__init__.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/__init__.py @@ -1,3 +1,5 @@ +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git from .models import backbone, decode_heads, segmentors from .utils import (ResizeToMultiple, add_prefix, build_pixel_sampler, seg_resize) diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/__init__.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/__init__.py index ae5c5acf..791dd26f 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/__init__.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/__init__.py @@ -1,3 +1,5 @@ +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git from .backbone import BASEBEiT, BEiTAdapter from .decode_heads import Mask2FormerHeadFromMMSeg from .segmentors import EncoderDecoderMask2Former diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/__init__.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/__init__.py index ab4258c1..7abd0ef1 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/__init__.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/__init__.py @@ -1,3 +1,5 @@ +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git from .base import BASEBEiT from .beit_adapter import BEiTAdapter diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/adapter_modules.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/adapter_modules.py index 03080342..cf30cca0 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/adapter_modules.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/adapter_modules.py @@ -1,6 +1,5 @@ -# The implementation refers to the VitAdapter -# available at -# https://github.com/czczup/ViT-Adapter.git +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git import logging from functools import partial @@ -417,7 +416,7 @@ class SpatialPriorModule(nn.Module): self.stem = nn.Sequential(*[ nn.Conv2d( 3, inplanes, kernel_size=3, stride=2, padding=1, bias=False), - nn.SyncBatchNorm(inplanes), + nn.BatchNorm2d(inplanes), nn.ReLU(inplace=True), nn.Conv2d( inplanes, @@ -426,7 +425,7 @@ class SpatialPriorModule(nn.Module): stride=1, padding=1, bias=False), - nn.SyncBatchNorm(inplanes), + nn.BatchNorm2d(inplanes), nn.ReLU(inplace=True), nn.Conv2d( inplanes, @@ -435,7 +434,7 @@ class SpatialPriorModule(nn.Module): stride=1, padding=1, bias=False), - nn.SyncBatchNorm(inplanes), + nn.BatchNorm2d(inplanes), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=1) ]) @@ -447,7 +446,7 @@ class SpatialPriorModule(nn.Module): stride=2, padding=1, bias=False), - nn.SyncBatchNorm(2 * inplanes), + nn.BatchNorm2d(2 * inplanes), nn.ReLU(inplace=True) ]) self.conv3 = nn.Sequential(*[ @@ -458,7 +457,7 @@ class SpatialPriorModule(nn.Module): stride=2, padding=1, bias=False), - nn.SyncBatchNorm(4 * inplanes), + nn.BatchNorm2d(4 * inplanes), nn.ReLU(inplace=True) ]) self.conv4 = nn.Sequential(*[ @@ -469,7 +468,7 @@ class SpatialPriorModule(nn.Module): stride=2, padding=1, bias=False), - nn.SyncBatchNorm(4 * inplanes), + nn.BatchNorm2d(4 * inplanes), nn.ReLU(inplace=True) ]) self.fc1 = nn.Conv2d( diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/__init__.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/__init__.py index 40b0fa89..5b33031f 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/__init__.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/__init__.py @@ -1,3 +1,5 @@ +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git from .beit import BASEBEiT __all__ = ['BASEBEiT'] diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/beit.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/beit.py index a5811fb9..62f873ec 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/beit.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/beit.py @@ -1,7 +1,5 @@ -# BEIT: BERT Pre-Training of Image Transformers (https://arxiv.org/abs/2106.08254) -# Github source: https://github.com/microsoft/unilm/tree/master/beit -# This implementation refers to -# https://github.com/czczup/ViT-Adapter.git +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git import math from functools import partial diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/beit_adapter.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/beit_adapter.py index 02a4968e..182fc0c1 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/beit_adapter.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/beit_adapter.py @@ -1,6 +1,5 @@ -# The implementation refers to the VitAdapter -# available at -# https://github.com/czczup/ViT-Adapter.git +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git import logging import math @@ -69,10 +68,10 @@ class BEiTAdapter(BASEBEiT): ]) self.up = nn.ConvTranspose2d(embed_dim, embed_dim, 2, 2) - self.norm1 = nn.SyncBatchNorm(embed_dim) - self.norm2 = nn.SyncBatchNorm(embed_dim) - self.norm3 = nn.SyncBatchNorm(embed_dim) - self.norm4 = nn.SyncBatchNorm(embed_dim) + self.norm1 = nn.BatchNorm2d(embed_dim) + self.norm2 = nn.BatchNorm2d(embed_dim) + self.norm3 = nn.BatchNorm2d(embed_dim) + self.norm4 = nn.BatchNorm2d(embed_dim) self.up.apply(self._init_weights) self.spm.apply(self._init_weights) diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/__init__.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/__init__.py index 9367806f..12bf2a21 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/__init__.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/__init__.py @@ -1,3 +1,5 @@ +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git from .mask2former_head_from_mmseg import Mask2FormerHeadFromMMSeg __all__ = ['Mask2FormerHeadFromMMSeg'] diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/base_decode_head.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/base_decode_head.py index 36660520..ae7a0416 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/base_decode_head.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/base_decode_head.py @@ -1,6 +1,5 @@ -# The implementation refers to the VitAdapter -# available at -# https://github.com/czczup/ViT-Adapter.git +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git from abc import ABCMeta, abstractmethod import torch diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/mask2former_head_from_mmseg.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/mask2former_head_from_mmseg.py index ad8b1586..c0681d2b 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/mask2former_head_from_mmseg.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/mask2former_head_from_mmseg.py @@ -1,6 +1,5 @@ -# The implementation refers to the VitAdapter -# available at -# https://github.com/czczup/ViT-Adapter.git +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git import copy diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/__init__.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/__init__.py index 1f2c8b04..18bbce0d 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/__init__.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/__init__.py @@ -1,3 +1,5 @@ +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git from .encoder_decoder_mask2former import EncoderDecoderMask2Former __all__ = ['EncoderDecoderMask2Former'] diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/base_segmentor.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/base_segmentor.py index 8bd8fa3f..311352c2 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/base_segmentor.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/base_segmentor.py @@ -1,6 +1,5 @@ -# The implementation refers to the VitAdapter -# available at -# https://github.com/czczup/ViT-Adapter.git +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git import warnings from abc import ABCMeta, abstractmethod from collections import OrderedDict diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/encoder_decoder_mask2former.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/encoder_decoder_mask2former.py index 9287e8aa..50492374 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/encoder_decoder_mask2former.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/encoder_decoder_mask2former.py @@ -1,6 +1,5 @@ -# The implementation refers to the VitAdapter -# available at -# https://github.com/czczup/ViT-Adapter.git +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git import torch import torch.nn as nn import torch.nn.functional as F diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/__init__.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/__init__.py index dec8a5f2..9c4d5c4c 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/__init__.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/__init__.py @@ -1,3 +1,5 @@ +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git from .builder import build_pixel_sampler from .data_process_func import ResizeToMultiple from .seg_func import add_prefix, seg_resize diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/builder.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/builder.py index 63d77fea..0603ef94 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/builder.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/builder.py @@ -1,6 +1,5 @@ -# The implementation refers to the VitAdapter -# available at -# https://github.com/czczup/ViT-Adapter.git +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git from mmcv.utils import Registry, build_from_cfg PIXEL_SAMPLERS = Registry('pixel sampler') diff --git a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/seg_func.py b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/seg_func.py index fba46b81..db564cca 100644 --- a/modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/seg_func.py +++ b/modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/seg_func.py @@ -1,6 +1,5 @@ -# The implementation refers to the VitAdapter -# available at -# https://github.com/czczup/ViT-Adapter.git +# The implementation is adopted from VitAdapter, +# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git import warnings diff --git a/modelscope/pipelines/cv/image_panoptic_segmentation_pipeline.py b/modelscope/pipelines/cv/image_panoptic_segmentation_pipeline.py index 9ffc2b03..b96e709c 100644 --- a/modelscope/pipelines/cv/image_panoptic_segmentation_pipeline.py +++ b/modelscope/pipelines/cv/image_panoptic_segmentation_pipeline.py @@ -4,11 +4,13 @@ from typing import Any, Dict, Union import cv2 import numpy as np import PIL +import torch from modelscope.metainfo import Pipelines from modelscope.outputs import OutputKeys from modelscope.pipelines.base import Input, Pipeline from modelscope.pipelines.builder import PIPELINES +from modelscope.preprocessors import load_image from modelscope.utils.constant import Tasks from modelscope.utils.logger import get_logger @@ -39,28 +41,24 @@ class ImagePanopticSegmentationPipeline(Pipeline): # build the data pipeline if isinstance(input, str): - # input is str, file names, pipeline loadimagefromfile - # collect data - data = dict(img_info=dict(filename=input), img_prefix=None) + cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam' + img = np.array(load_image(input)) + img = img[:, :, ::-1] # convert to bgr elif isinstance(input, PIL.Image.Image): cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam' img = np.array(input.convert('RGB')) - # collect data - data = dict(img=img) elif isinstance(input, np.ndarray): cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam' if len(input.shape) == 2: img = cv2.cvtColor(input, cv2.COLOR_GRAY2BGR) else: img = input - img = img[:, :, ::-1] # in rgb order - # collect data - data = dict(img=img) - else: raise TypeError(f'input should be either str, PIL.Image,' f' np.array, but got {type(input)}') + # collect data + data = dict(img=img) cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) test_pipeline = Compose(cfg.data.test.pipeline) diff --git a/modelscope/pipelines/cv/image_semantic_segmentation_pipeline.py b/modelscope/pipelines/cv/image_semantic_segmentation_pipeline.py index e3e1fd6b..023d9712 100644 --- a/modelscope/pipelines/cv/image_semantic_segmentation_pipeline.py +++ b/modelscope/pipelines/cv/image_semantic_segmentation_pipeline.py @@ -10,6 +10,7 @@ from modelscope.metainfo import Pipelines from modelscope.outputs import OutputKeys from modelscope.pipelines.base import Input, Model, Pipeline from modelscope.pipelines.builder import PIPELINES +from modelscope.preprocessors import load_image from modelscope.utils.constant import Tasks from modelscope.utils.logger import get_logger @@ -40,28 +41,24 @@ class ImageSemanticSegmentationPipeline(Pipeline): # build the data pipeline if isinstance(input, str): - # input is str, file names, pipeline loadimagefromfile - # collect data - data = dict(img_info=dict(filename=input), img_prefix=None) + cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam' + img = np.array(load_image(input)) + img = img[:, :, ::-1] # convert to bgr elif isinstance(input, PIL.Image.Image): # BGR cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam' img = np.array(input)[:, :, ::-1] - # collect data - data = dict(img=img) elif isinstance(input, np.ndarray): cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam' if len(input.shape) == 2: img = cv2.cvtColor(input, cv2.COLOR_GRAY2BGR) else: img = input - # collect data - data = dict(img=img) - else: raise TypeError(f'input should be either str, PIL.Image,' f' np.array, but got {type(input)}') - # data = dict(img=input) + # collect data + data = dict(img=img) cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) test_pipeline = Compose(cfg.data.test.pipeline) @@ -80,11 +77,9 @@ class ImageSemanticSegmentationPipeline(Pipeline): def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: results = self.model.inference(input) - return results def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: - results = self.model.postprocess(inputs) outputs = { OutputKeys.MASKS: results[OutputKeys.MASKS],