tianchu.gtc yingda.chen 3 years ago
parent
commit
3d41d6d620
22 changed files with 59 additions and 60 deletions
  1. +1
    -2
      modelscope/models/cv/image_panoptic_segmentation/panseg_model.py
  2. +1
    -0
      modelscope/models/cv/image_semantic_segmentation/pan_merge/__init__.py
  3. +1
    -0
      modelscope/models/cv/image_semantic_segmentation/pan_merge/maskformer_semantic_head.py
  4. +1
    -0
      modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py
  5. +2
    -0
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/__init__.py
  6. +2
    -0
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/__init__.py
  7. +2
    -0
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/__init__.py
  8. +8
    -9
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/adapter_modules.py
  9. +2
    -0
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/__init__.py
  10. +2
    -4
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/beit.py
  11. +6
    -7
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/beit_adapter.py
  12. +2
    -0
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/__init__.py
  13. +2
    -3
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/base_decode_head.py
  14. +2
    -3
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/mask2former_head_from_mmseg.py
  15. +2
    -0
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/__init__.py
  16. +2
    -3
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/base_segmentor.py
  17. +2
    -3
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/encoder_decoder_mask2former.py
  18. +2
    -0
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/__init__.py
  19. +2
    -3
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/builder.py
  20. +2
    -3
      modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/seg_func.py
  21. +7
    -9
      modelscope/pipelines/cv/image_panoptic_segmentation_pipeline.py
  22. +6
    -11
      modelscope/pipelines/cv/image_semantic_segmentation_pipeline.py

+ 1
- 2
modelscope/models/cv/image_panoptic_segmentation/panseg_model.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os.path as osp

import torch
@@ -49,6 +50,4 @@ class SwinLPanopticSegmentation(TorchModel):
return results

def forward(self, Inputs):
import pdb
pdb.set_trace()
return self.model(**Inputs)

+ 1
- 0
modelscope/models/cv/image_semantic_segmentation/pan_merge/__init__.py View File

@@ -1 +1,2 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .maskformer_semantic_head import MaskFormerSemanticHead

+ 1
- 0
modelscope/models/cv/image_semantic_segmentation/pan_merge/maskformer_semantic_head.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import torch
import torch.nn.functional as F
from mmdet.models.builder import HEADS


+ 1
- 0
modelscope/models/cv/image_semantic_segmentation/semantic_seg_model.py View File

@@ -1,3 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os.path as osp

import numpy as np


+ 2
- 0
modelscope/models/cv/image_semantic_segmentation/vit_adapter/__init__.py View File

@@ -1,3 +1,5 @@
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git
from .models import backbone, decode_heads, segmentors
from .utils import (ResizeToMultiple, add_prefix, build_pixel_sampler,
seg_resize)

+ 2
- 0
modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/__init__.py View File

@@ -1,3 +1,5 @@
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git
from .backbone import BASEBEiT, BEiTAdapter
from .decode_heads import Mask2FormerHeadFromMMSeg
from .segmentors import EncoderDecoderMask2Former

+ 2
- 0
modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/__init__.py View File

@@ -1,3 +1,5 @@
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git
from .base import BASEBEiT
from .beit_adapter import BEiTAdapter



+ 8
- 9
modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/adapter_modules.py View File

@@ -1,6 +1,5 @@
# The implementation refers to the VitAdapter
# available at
# https://github.com/czczup/ViT-Adapter.git
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git

import logging
from functools import partial
@@ -417,7 +416,7 @@ class SpatialPriorModule(nn.Module):
self.stem = nn.Sequential(*[
nn.Conv2d(
3, inplanes, kernel_size=3, stride=2, padding=1, bias=False),
nn.SyncBatchNorm(inplanes),
nn.BatchNorm2d(inplanes),
nn.ReLU(inplace=True),
nn.Conv2d(
inplanes,
@@ -426,7 +425,7 @@ class SpatialPriorModule(nn.Module):
stride=1,
padding=1,
bias=False),
nn.SyncBatchNorm(inplanes),
nn.BatchNorm2d(inplanes),
nn.ReLU(inplace=True),
nn.Conv2d(
inplanes,
@@ -435,7 +434,7 @@ class SpatialPriorModule(nn.Module):
stride=1,
padding=1,
bias=False),
nn.SyncBatchNorm(inplanes),
nn.BatchNorm2d(inplanes),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
])
@@ -447,7 +446,7 @@ class SpatialPriorModule(nn.Module):
stride=2,
padding=1,
bias=False),
nn.SyncBatchNorm(2 * inplanes),
nn.BatchNorm2d(2 * inplanes),
nn.ReLU(inplace=True)
])
self.conv3 = nn.Sequential(*[
@@ -458,7 +457,7 @@ class SpatialPriorModule(nn.Module):
stride=2,
padding=1,
bias=False),
nn.SyncBatchNorm(4 * inplanes),
nn.BatchNorm2d(4 * inplanes),
nn.ReLU(inplace=True)
])
self.conv4 = nn.Sequential(*[
@@ -469,7 +468,7 @@ class SpatialPriorModule(nn.Module):
stride=2,
padding=1,
bias=False),
nn.SyncBatchNorm(4 * inplanes),
nn.BatchNorm2d(4 * inplanes),
nn.ReLU(inplace=True)
])
self.fc1 = nn.Conv2d(


+ 2
- 0
modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/__init__.py View File

@@ -1,3 +1,5 @@
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git
from .beit import BASEBEiT

__all__ = ['BASEBEiT']

+ 2
- 4
modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/base/beit.py View File

@@ -1,7 +1,5 @@
# BEIT: BERT Pre-Training of Image Transformers (https://arxiv.org/abs/2106.08254)
# Github source: https://github.com/microsoft/unilm/tree/master/beit
# This implementation refers to
# https://github.com/czczup/ViT-Adapter.git
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git
import math
from functools import partial



+ 6
- 7
modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/backbone/beit_adapter.py View File

@@ -1,6 +1,5 @@
# The implementation refers to the VitAdapter
# available at
# https://github.com/czczup/ViT-Adapter.git
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git
import logging
import math

@@ -69,10 +68,10 @@ class BEiTAdapter(BASEBEiT):
])

self.up = nn.ConvTranspose2d(embed_dim, embed_dim, 2, 2)
self.norm1 = nn.SyncBatchNorm(embed_dim)
self.norm2 = nn.SyncBatchNorm(embed_dim)
self.norm3 = nn.SyncBatchNorm(embed_dim)
self.norm4 = nn.SyncBatchNorm(embed_dim)
self.norm1 = nn.BatchNorm2d(embed_dim)
self.norm2 = nn.BatchNorm2d(embed_dim)
self.norm3 = nn.BatchNorm2d(embed_dim)
self.norm4 = nn.BatchNorm2d(embed_dim)

self.up.apply(self._init_weights)
self.spm.apply(self._init_weights)


+ 2
- 0
modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/__init__.py View File

@@ -1,3 +1,5 @@
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git
from .mask2former_head_from_mmseg import Mask2FormerHeadFromMMSeg

__all__ = ['Mask2FormerHeadFromMMSeg']

+ 2
- 3
modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/base_decode_head.py View File

@@ -1,6 +1,5 @@
# The implementation refers to the VitAdapter
# available at
# https://github.com/czczup/ViT-Adapter.git
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git
from abc import ABCMeta, abstractmethod

import torch


+ 2
- 3
modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/decode_heads/mask2former_head_from_mmseg.py View File

@@ -1,6 +1,5 @@
# The implementation refers to the VitAdapter
# available at
# https://github.com/czczup/ViT-Adapter.git
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git

import copy



+ 2
- 0
modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/__init__.py View File

@@ -1,3 +1,5 @@
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git
from .encoder_decoder_mask2former import EncoderDecoderMask2Former

__all__ = ['EncoderDecoderMask2Former']

+ 2
- 3
modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/base_segmentor.py View File

@@ -1,6 +1,5 @@
# The implementation refers to the VitAdapter
# available at
# https://github.com/czczup/ViT-Adapter.git
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git
import warnings
from abc import ABCMeta, abstractmethod
from collections import OrderedDict


+ 2
- 3
modelscope/models/cv/image_semantic_segmentation/vit_adapter/models/segmentors/encoder_decoder_mask2former.py View File

@@ -1,6 +1,5 @@
# The implementation refers to the VitAdapter
# available at
# https://github.com/czczup/ViT-Adapter.git
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git
import torch
import torch.nn as nn
import torch.nn.functional as F


+ 2
- 0
modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/__init__.py View File

@@ -1,3 +1,5 @@
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git
from .builder import build_pixel_sampler
from .data_process_func import ResizeToMultiple
from .seg_func import add_prefix, seg_resize


+ 2
- 3
modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/builder.py View File

@@ -1,6 +1,5 @@
# The implementation refers to the VitAdapter
# available at
# https://github.com/czczup/ViT-Adapter.git
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git
from mmcv.utils import Registry, build_from_cfg

PIXEL_SAMPLERS = Registry('pixel sampler')


+ 2
- 3
modelscope/models/cv/image_semantic_segmentation/vit_adapter/utils/seg_func.py View File

@@ -1,6 +1,5 @@
# The implementation refers to the VitAdapter
# available at
# https://github.com/czczup/ViT-Adapter.git
# The implementation is adopted from VitAdapter,
# made publicly available under the Apache License at https://github.com/czczup/ViT-Adapter.git

import warnings



+ 7
- 9
modelscope/pipelines/cv/image_panoptic_segmentation_pipeline.py View File

@@ -4,11 +4,13 @@ from typing import Any, Dict, Union
import cv2
import numpy as np
import PIL
import torch

from modelscope.metainfo import Pipelines
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Input, Pipeline
from modelscope.pipelines.builder import PIPELINES
from modelscope.preprocessors import load_image
from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger

@@ -39,28 +41,24 @@ class ImagePanopticSegmentationPipeline(Pipeline):
# build the data pipeline

if isinstance(input, str):
# input is str, file names, pipeline loadimagefromfile
# collect data
data = dict(img_info=dict(filename=input), img_prefix=None)
cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
img = np.array(load_image(input))
img = img[:, :, ::-1] # convert to bgr
elif isinstance(input, PIL.Image.Image):
cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
img = np.array(input.convert('RGB'))
# collect data
data = dict(img=img)
elif isinstance(input, np.ndarray):
cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
if len(input.shape) == 2:
img = cv2.cvtColor(input, cv2.COLOR_GRAY2BGR)
else:
img = input
img = img[:, :, ::-1] # in rgb order
# collect data
data = dict(img=img)

else:
raise TypeError(f'input should be either str, PIL.Image,'
f' np.array, but got {type(input)}')

# collect data
data = dict(img=img)
cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
test_pipeline = Compose(cfg.data.test.pipeline)



+ 6
- 11
modelscope/pipelines/cv/image_semantic_segmentation_pipeline.py View File

@@ -10,6 +10,7 @@ from modelscope.metainfo import Pipelines
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Input, Model, Pipeline
from modelscope.pipelines.builder import PIPELINES
from modelscope.preprocessors import load_image
from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger

@@ -40,28 +41,24 @@ class ImageSemanticSegmentationPipeline(Pipeline):
# build the data pipeline

if isinstance(input, str):
# input is str, file names, pipeline loadimagefromfile
# collect data
data = dict(img_info=dict(filename=input), img_prefix=None)
cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
img = np.array(load_image(input))
img = img[:, :, ::-1] # convert to bgr
elif isinstance(input, PIL.Image.Image): # BGR
cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
img = np.array(input)[:, :, ::-1]
# collect data
data = dict(img=img)
elif isinstance(input, np.ndarray):
cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
if len(input.shape) == 2:
img = cv2.cvtColor(input, cv2.COLOR_GRAY2BGR)
else:
img = input
# collect data
data = dict(img=img)

else:
raise TypeError(f'input should be either str, PIL.Image,'
f' np.array, but got {type(input)}')

# data = dict(img=input)
# collect data
data = dict(img=img)
cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
test_pipeline = Compose(cfg.data.test.pipeline)

@@ -80,11 +77,9 @@ class ImageSemanticSegmentationPipeline(Pipeline):

def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
results = self.model.inference(input)

return results

def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:

results = self.model.postprocess(inputs)
outputs = {
OutputKeys.MASKS: results[OutputKeys.MASKS],


Loading…
Cancel
Save