# Conflicts: # modelscope/preprocessors/multi_modal.py # modelscope/trainers/trainer.py # tests/pipelines/test_ofa_tasks.pymaster
| @@ -36,6 +36,7 @@ do | |||
| -e TEST_ACCESS_TOKEN_SDKDEV=$TEST_ACCESS_TOKEN_SDKDEV \ | |||
| -e TEST_LEVEL=$TEST_LEVEL \ | |||
| -e TEST_UPLOAD_MS_TOKEN=$TEST_UPLOAD_MS_TOKEN \ | |||
| -e MODEL_TAG_URL=$MODEL_TAG_URL \ | |||
| --workdir=$CODE_DIR_IN_CONTAINER \ | |||
| --net host \ | |||
| ${IMAGE_NAME}:${IMAGE_VERSION} \ | |||
| @@ -0,0 +1,3 @@ | |||
| version https://git-lfs.github.com/spec/v1 | |||
| oid sha256:176c824d99af119b36f743d3d90b44529167b0e4fc6db276da60fa140ee3f4a9 | |||
| size 87228 | |||
| @@ -0,0 +1,3 @@ | |||
| version https://git-lfs.github.com/spec/v1 | |||
| oid sha256:176c824d99af119b36f743d3d90b44529167b0e4fc6db276da60fa140ee3f4a9 | |||
| size 87228 | |||
| @@ -0,0 +1,3 @@ | |||
| version https://git-lfs.github.com/spec/v1 | |||
| oid sha256:1f24abbba43782d733dedbb0b4f416635af50263862e5632963ac9263e430555 | |||
| size 88542 | |||
| @@ -0,0 +1,3 @@ | |||
| version https://git-lfs.github.com/spec/v1 | |||
| oid sha256:176c824d99af119b36f743d3d90b44529167b0e4fc6db276da60fa140ee3f4a9 | |||
| size 87228 | |||
| @@ -0,0 +1,3 @@ | |||
| version https://git-lfs.github.com/spec/v1 | |||
| oid sha256:b158f6029d9763d7f84042f7c5835f398c688fdbb6b3f4fe6431101d4118c66c | |||
| size 2766 | |||
| @@ -0,0 +1,3 @@ | |||
| version https://git-lfs.github.com/spec/v1 | |||
| oid sha256:0dcf46b93077e2229ab69cd6ddb80e2689546c575ee538bb2033fee1124ef3e3 | |||
| size 2761 | |||
| @@ -0,0 +1,3 @@ | |||
| version https://git-lfs.github.com/spec/v1 | |||
| oid sha256:9c9870df5a86acaaec67063183dace795479cd0f05296f13058995f475149c56 | |||
| size 2957783 | |||
| @@ -75,7 +75,9 @@ RUN pip install --no-cache-dir --upgrade pip && \ | |||
| ENV SHELL=/bin/bash | |||
| # install special package | |||
| RUN pip install --no-cache-dir mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 numpy==1.18.5 datasets==2.1.0 | |||
| RUN pip install --no-cache-dir mmcls>=0.21.0 mmdet>=2.25.0 decord>=0.6.0 datasets==2.1.0 ipykernel && \ | |||
| pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \ | |||
| pip config set install.trusted-host pypi.tuna.tsinghua.edu.cn | |||
| RUN if [ "$USE_GPU" = "True" ] ; then \ | |||
| pip install --no-cache-dir dgl-cu113 dglgo -f https://data.dgl.ai/wheels/repo.html; \ | |||
| @@ -0,0 +1,4 @@ | |||
| from .base import Exporter | |||
| from .builder import build_exporter | |||
| from .nlp import SbertForSequenceClassificationExporter | |||
| from .torch_model_exporter import TorchModelExporter | |||
| @@ -0,0 +1,53 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import os | |||
| from abc import ABC, abstractmethod | |||
| from modelscope.models import Model | |||
| from modelscope.utils.config import Config, ConfigDict | |||
| from modelscope.utils.constant import ModelFile | |||
| from .builder import build_exporter | |||
| class Exporter(ABC): | |||
| """Exporter base class to output model to onnx, torch_script, graphdef, etc. | |||
| """ | |||
| def __init__(self): | |||
| self.model = None | |||
| @classmethod | |||
| def from_model(cls, model: Model, **kwargs): | |||
| """Build the Exporter instance. | |||
| @param model: A model instance. it will be used to output the generated file, | |||
| and the configuration.json in its model_dir field will be used to create the exporter instance. | |||
| @param kwargs: Extra kwargs used to create the Exporter instance. | |||
| @return: The Exporter instance | |||
| """ | |||
| cfg = Config.from_file( | |||
| os.path.join(model.model_dir, ModelFile.CONFIGURATION)) | |||
| task_name = cfg.task | |||
| model_cfg = cfg.model | |||
| if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'): | |||
| model_cfg.type = model_cfg.model_type | |||
| export_cfg = ConfigDict({'type': model_cfg.type}) | |||
| if hasattr(cfg, 'export'): | |||
| export_cfg.update(cfg.export) | |||
| exporter = build_exporter(export_cfg, task_name, kwargs) | |||
| exporter.model = model | |||
| return exporter | |||
| @abstractmethod | |||
| def export_onnx(self, outputs: str, opset=11, **kwargs): | |||
| """Export the model as onnx format files. | |||
| In some cases, several files may be generated, | |||
| So please return a dict which contains the generated name with the file path. | |||
| @param opset: The version of the ONNX operator set to use. | |||
| @param outputs: The output dir. | |||
| @param kwargs: In this default implementation, | |||
| kwargs will be carried to generate_dummy_inputs as extra arguments (like input shape). | |||
| @return: A dict contains the model name with the model file path. | |||
| """ | |||
| pass | |||
| @@ -0,0 +1,21 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from modelscope.utils.config import ConfigDict | |||
| from modelscope.utils.registry import Registry, build_from_cfg | |||
| EXPORTERS = Registry('exporters') | |||
| def build_exporter(cfg: ConfigDict, | |||
| task_name: str = None, | |||
| default_args: dict = None): | |||
| """ build exporter by the given model config dict | |||
| Args: | |||
| cfg (:obj:`ConfigDict`): config dict for exporter object. | |||
| task_name (str, optional): task name, refer to | |||
| :obj:`Tasks` for more details | |||
| default_args (dict, optional): Default initialization arguments. | |||
| """ | |||
| return build_from_cfg( | |||
| cfg, EXPORTERS, group_key=task_name, default_args=default_args) | |||
| @@ -0,0 +1,2 @@ | |||
| from .sbert_for_sequence_classification_exporter import \ | |||
| SbertForSequenceClassificationExporter | |||
| @@ -0,0 +1,81 @@ | |||
| import os | |||
| from collections import OrderedDict | |||
| from typing import Any, Dict, Mapping, Tuple | |||
| from torch.utils.data.dataloader import default_collate | |||
| from modelscope.exporters.builder import EXPORTERS | |||
| from modelscope.exporters.torch_model_exporter import TorchModelExporter | |||
| from modelscope.metainfo import Models | |||
| from modelscope.preprocessors import Preprocessor, build_preprocessor | |||
| from modelscope.utils.config import Config | |||
| from modelscope.utils.constant import ModeKeys, Tasks | |||
| @EXPORTERS.register_module( | |||
| Tasks.sentence_similarity, module_name=Models.structbert) | |||
| @EXPORTERS.register_module( | |||
| Tasks.sentiment_classification, module_name=Models.structbert) | |||
| @EXPORTERS.register_module(Tasks.nli, module_name=Models.structbert) | |||
| @EXPORTERS.register_module( | |||
| Tasks.zero_shot_classification, module_name=Models.structbert) | |||
| class SbertForSequenceClassificationExporter(TorchModelExporter): | |||
| def generate_dummy_inputs(self, | |||
| shape: Tuple = None, | |||
| **kwargs) -> Dict[str, Any]: | |||
| """Generate dummy inputs for model exportation to onnx or other formats by tracing. | |||
| @param shape: A tuple of input shape which should have at most two dimensions. | |||
| shape = (1, ) batch_size=1, sequence_length will be taken from the preprocessor. | |||
| shape = (8, 128) batch_size=1, sequence_length=128, which will cover the config of the preprocessor. | |||
| @return: Dummy inputs. | |||
| """ | |||
| cfg = Config.from_file( | |||
| os.path.join(self.model.model_dir, 'configuration.json')) | |||
| field_name = Tasks.find_field_by_task(cfg.task) | |||
| if 'type' not in cfg.preprocessor and 'val' in cfg.preprocessor: | |||
| cfg = cfg.preprocessor.val | |||
| else: | |||
| cfg = cfg.preprocessor | |||
| batch_size = 1 | |||
| sequence_length = {} | |||
| if shape is not None: | |||
| if len(shape) == 1: | |||
| batch_size = shape[0] | |||
| elif len(shape) == 2: | |||
| batch_size, max_length = shape | |||
| sequence_length = {'sequence_length': max_length} | |||
| cfg.update({ | |||
| 'model_dir': self.model.model_dir, | |||
| 'mode': ModeKeys.TRAIN, | |||
| **sequence_length | |||
| }) | |||
| preprocessor: Preprocessor = build_preprocessor(cfg, field_name) | |||
| if preprocessor.pair: | |||
| first_sequence = preprocessor.tokenizer.unk_token | |||
| second_sequence = preprocessor.tokenizer.unk_token | |||
| else: | |||
| first_sequence = preprocessor.tokenizer.unk_token | |||
| second_sequence = None | |||
| batched = [] | |||
| for _ in range(batch_size): | |||
| batched.append(preprocessor((first_sequence, second_sequence))) | |||
| return default_collate(batched) | |||
| @property | |||
| def inputs(self) -> Mapping[str, Mapping[int, str]]: | |||
| dynamic_axis = {0: 'batch', 1: 'sequence'} | |||
| return OrderedDict([ | |||
| ('input_ids', dynamic_axis), | |||
| ('attention_mask', dynamic_axis), | |||
| ('token_type_ids', dynamic_axis), | |||
| ]) | |||
| @property | |||
| def outputs(self) -> Mapping[str, Mapping[int, str]]: | |||
| return OrderedDict({'logits': {0: 'batch'}}) | |||
| @@ -0,0 +1,247 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import os | |||
| from contextlib import contextmanager | |||
| from itertools import chain | |||
| from typing import Any, Dict, Mapping | |||
| import torch | |||
| from torch import nn | |||
| from torch.onnx import export as onnx_export | |||
| from torch.onnx.utils import _decide_input_format | |||
| from modelscope.models import TorchModel | |||
| from modelscope.pipelines.base import collate_fn | |||
| from modelscope.utils.constant import ModelFile | |||
| from modelscope.utils.logger import get_logger | |||
| from modelscope.utils.regress_test_utils import compare_arguments_nested | |||
| from modelscope.utils.tensor_utils import torch_nested_numpify | |||
| from .base import Exporter | |||
| logger = get_logger(__name__) | |||
| class TorchModelExporter(Exporter): | |||
| """The torch base class of exporter. | |||
| This class provides the default implementations for exporting onnx and torch script. | |||
| Each specific model may implement its own exporter by overriding the export_onnx/export_torch_script, | |||
| and to provide implementations for generate_dummy_inputs/inputs/outputs methods. | |||
| """ | |||
| def export_onnx(self, outputs: str, opset=11, **kwargs): | |||
| """Export the model as onnx format files. | |||
| In some cases, several files may be generated, | |||
| So please return a dict which contains the generated name with the file path. | |||
| @param opset: The version of the ONNX operator set to use. | |||
| @param outputs: The output dir. | |||
| @param kwargs: In this default implementation, | |||
| you can pass the arguments needed by _torch_export_onnx, other unrecognized args | |||
| will be carried to generate_dummy_inputs as extra arguments (such as input shape). | |||
| @return: A dict containing the model key - model file path pairs. | |||
| """ | |||
| model = self.model | |||
| if not isinstance(model, nn.Module) and hasattr(model, 'model'): | |||
| model = model.model | |||
| onnx_file = os.path.join(outputs, ModelFile.ONNX_MODEL_FILE) | |||
| self._torch_export_onnx(model, onnx_file, opset=opset, **kwargs) | |||
| return {'model': onnx_file} | |||
| def export_torch_script(self, outputs: str, **kwargs): | |||
| """Export the model as torch script files. | |||
| In some cases, several files may be generated, | |||
| So please return a dict which contains the generated name with the file path. | |||
| @param outputs: The output dir. | |||
| @param kwargs: In this default implementation, | |||
| you can pass the arguments needed by _torch_export_torch_script, other unrecognized args | |||
| will be carried to generate_dummy_inputs as extra arguments (like input shape). | |||
| @return: A dict contains the model name with the model file path. | |||
| """ | |||
| model = self.model | |||
| if not isinstance(model, nn.Module) and hasattr(model, 'model'): | |||
| model = model.model | |||
| ts_file = os.path.join(outputs, ModelFile.TS_MODEL_FILE) | |||
| # generate ts by tracing | |||
| self._torch_export_torch_script(model, ts_file, **kwargs) | |||
| return {'model': ts_file} | |||
| def generate_dummy_inputs(self, **kwargs) -> Dict[str, Any]: | |||
| """Generate dummy inputs for model exportation to onnx or other formats by tracing. | |||
| @return: Dummy inputs. | |||
| """ | |||
| return None | |||
| @property | |||
| def inputs(self) -> Mapping[str, Mapping[int, str]]: | |||
| """Return an ordered dict contains the model's input arguments name with their dynamic axis. | |||
| About the information of dynamic axis please check the dynamic_axes argument of torch.onnx.export function | |||
| """ | |||
| return None | |||
| @property | |||
| def outputs(self) -> Mapping[str, Mapping[int, str]]: | |||
| """Return an ordered dict contains the model's output arguments name with their dynamic axis. | |||
| About the information of dynamic axis please check the dynamic_axes argument of torch.onnx.export function | |||
| """ | |||
| return None | |||
| def _torch_export_onnx(self, | |||
| model: nn.Module, | |||
| output: str, | |||
| opset: int = 11, | |||
| device: str = 'cpu', | |||
| validation: bool = True, | |||
| rtol: float = None, | |||
| atol: float = None, | |||
| **kwargs): | |||
| """Export the model to an onnx format file. | |||
| @param model: A torch.nn.Module instance to export. | |||
| @param output: The output file. | |||
| @param opset: The version of the ONNX operator set to use. | |||
| @param device: The device used to forward. | |||
| @param validation: Whether validate the export file. | |||
| @param rtol: The rtol used to regress the outputs. | |||
| @param atol: The atol used to regress the outputs. | |||
| """ | |||
| dummy_inputs = self.generate_dummy_inputs(**kwargs) | |||
| inputs = self.inputs | |||
| outputs = self.outputs | |||
| if dummy_inputs is None or inputs is None or outputs is None: | |||
| raise NotImplementedError( | |||
| 'Model property dummy_inputs,inputs,outputs must be set.') | |||
| with torch.no_grad(): | |||
| model.eval() | |||
| device = torch.device(device) | |||
| model.to(device) | |||
| dummy_inputs = collate_fn(dummy_inputs, device) | |||
| if isinstance(dummy_inputs, Mapping): | |||
| dummy_inputs = dict(dummy_inputs) | |||
| onnx_outputs = list(self.outputs.keys()) | |||
| with replace_call(): | |||
| onnx_export( | |||
| model, | |||
| (dummy_inputs, ), | |||
| f=output, | |||
| input_names=list(inputs.keys()), | |||
| output_names=onnx_outputs, | |||
| dynamic_axes={ | |||
| name: axes | |||
| for name, axes in chain(inputs.items(), | |||
| outputs.items()) | |||
| }, | |||
| do_constant_folding=True, | |||
| opset_version=opset, | |||
| ) | |||
| if validation: | |||
| try: | |||
| import onnx | |||
| import onnxruntime as ort | |||
| except ImportError: | |||
| logger.warn( | |||
| 'Cannot validate the exported onnx file, because ' | |||
| 'the installation of onnx or onnxruntime cannot be found') | |||
| return | |||
| onnx_model = onnx.load(output) | |||
| onnx.checker.check_model(onnx_model) | |||
| ort_session = ort.InferenceSession(output) | |||
| with torch.no_grad(): | |||
| model.eval() | |||
| outputs_origin = model.forward( | |||
| *_decide_input_format(model, dummy_inputs)) | |||
| if isinstance(outputs_origin, Mapping): | |||
| outputs_origin = torch_nested_numpify( | |||
| list(outputs_origin.values())) | |||
| outputs = ort_session.run( | |||
| onnx_outputs, | |||
| torch_nested_numpify(dummy_inputs), | |||
| ) | |||
| tols = {} | |||
| if rtol is not None: | |||
| tols['rtol'] = rtol | |||
| if atol is not None: | |||
| tols['atol'] = atol | |||
| if not compare_arguments_nested('Onnx model output match failed', | |||
| outputs, outputs_origin, **tols): | |||
| raise RuntimeError( | |||
| 'export onnx failed because of validation error.') | |||
| def _torch_export_torch_script(self, | |||
| model: nn.Module, | |||
| output: str, | |||
| device: str = 'cpu', | |||
| validation: bool = True, | |||
| rtol: float = None, | |||
| atol: float = None, | |||
| **kwargs): | |||
| """Export the model to a torch script file. | |||
| @param model: A torch.nn.Module instance to export. | |||
| @param output: The output file. | |||
| @param device: The device used to forward. | |||
| @param validation: Whether validate the export file. | |||
| @param rtol: The rtol used to regress the outputs. | |||
| @param atol: The atol used to regress the outputs. | |||
| """ | |||
| model.eval() | |||
| dummy_inputs = self.generate_dummy_inputs(**kwargs) | |||
| if dummy_inputs is None: | |||
| raise NotImplementedError( | |||
| 'Model property dummy_inputs must be set.') | |||
| dummy_inputs = collate_fn(dummy_inputs, device) | |||
| if isinstance(dummy_inputs, Mapping): | |||
| dummy_inputs = tuple(dummy_inputs.values()) | |||
| with torch.no_grad(): | |||
| model.eval() | |||
| with replace_call(): | |||
| traced_model = torch.jit.trace( | |||
| model, dummy_inputs, strict=False) | |||
| torch.jit.save(traced_model, output) | |||
| if validation: | |||
| ts_model = torch.jit.load(output) | |||
| with torch.no_grad(): | |||
| model.eval() | |||
| ts_model.eval() | |||
| outputs = ts_model.forward(*dummy_inputs) | |||
| outputs = torch_nested_numpify(outputs) | |||
| outputs_origin = model.forward(*dummy_inputs) | |||
| outputs_origin = torch_nested_numpify(outputs_origin) | |||
| tols = {} | |||
| if rtol is not None: | |||
| tols['rtol'] = rtol | |||
| if atol is not None: | |||
| tols['atol'] = atol | |||
| if not compare_arguments_nested( | |||
| 'Torch script model output match failed', outputs, | |||
| outputs_origin, **tols): | |||
| raise RuntimeError( | |||
| 'export torch script failed because of validation error.') | |||
| @contextmanager | |||
| def replace_call(): | |||
| """This function is used to recover the original call method. | |||
| The Model class of modelscope overrides the call method. When exporting to onnx or torchscript, torch will | |||
| prepare the parameters as the prototype of forward method, and trace the call method, this causes | |||
| problems. Here we recover the call method to the default implementation of torch.nn.Module, and change it | |||
| back after the tracing was done. | |||
| """ | |||
| TorchModel.call_origin, TorchModel.__call__ = TorchModel.__call__, TorchModel._call_impl | |||
| yield | |||
| TorchModel.__call__ = TorchModel.call_origin | |||
| del TorchModel.call_origin | |||
| @@ -1,2 +1,4 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from .file import File, LocalStorage | |||
| from .io import dump, dumps, load | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from .base import FormatHandler | |||
| from .json import JsonHandler | |||
| from .yaml import YamlHandler | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import os | |||
| import pickle | |||
| import shutil | |||
| @@ -389,7 +391,7 @@ class HubApi: | |||
| cookies = requests.utils.dict_from_cookiejar(cookies) | |||
| r = requests.get(url=datahub_url, cookies=cookies) | |||
| resp = r.json() | |||
| datahub_raise_on_error(datahub_url, resp) | |||
| raise_on_error(resp) | |||
| return resp['Data'] | |||
| def on_dataset_download(self, dataset_name: str, namespace: str) -> None: | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from pathlib import Path | |||
| MODELSCOPE_URL_SCHEME = 'http://' | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from http import HTTPStatus | |||
| from requests.exceptions import HTTPError | |||
| @@ -60,7 +62,7 @@ def raise_on_error(rsp): | |||
| Args: | |||
| rsp (_type_): The server response | |||
| """ | |||
| if rsp['Code'] == HTTPStatus.OK and rsp['Success']: | |||
| if rsp['Code'] == HTTPStatus.OK: | |||
| return True | |||
| else: | |||
| raise RequestError(rsp['Message']) | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import copy | |||
| import os | |||
| import sys | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import os | |||
| import subprocess | |||
| from typing import List | |||
| @@ -39,17 +41,28 @@ class GitCommandWrapper(metaclass=Singleton): | |||
| subprocess.CompletedProcess: the command response | |||
| """ | |||
| logger.debug(' '.join(args)) | |||
| git_env = os.environ.copy() | |||
| git_env['GIT_TERMINAL_PROMPT'] = '0' | |||
| response = subprocess.run( | |||
| [self.git_path, *args], | |||
| stdout=subprocess.PIPE, | |||
| stderr=subprocess.PIPE) # compatible for python3.6 | |||
| stderr=subprocess.PIPE, | |||
| env=git_env, | |||
| ) # compatible for python3.6 | |||
| try: | |||
| response.check_returncode() | |||
| return response | |||
| except subprocess.CalledProcessError as error: | |||
| raise GitError( | |||
| 'stdout: %s, stderr: %s' % | |||
| (response.stdout.decode('utf8'), error.stderr.decode('utf8'))) | |||
| if response.returncode == 1: | |||
| logger.info('Nothing to commit.') | |||
| return response | |||
| else: | |||
| logger.error( | |||
| 'There are error run git command, you may need to login first.' | |||
| ) | |||
| raise GitError('stdout: %s, stderr: %s' % | |||
| (response.stdout.decode('utf8'), | |||
| error.stderr.decode('utf8'))) | |||
| def config_auth_token(self, repo_dir, auth_token): | |||
| url = self.get_repo_remote_url(repo_dir) | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import os | |||
| from typing import Optional | |||
| @@ -40,6 +42,11 @@ class Repository: | |||
| self.model_dir = model_dir | |||
| self.model_base_dir = os.path.dirname(model_dir) | |||
| self.model_repo_name = os.path.basename(model_dir) | |||
| if not revision: | |||
| err_msg = 'a non-default value of revision cannot be empty.' | |||
| raise InvalidParameter(err_msg) | |||
| if auth_token: | |||
| self.auth_token = auth_token | |||
| else: | |||
| @@ -145,10 +152,21 @@ class DatasetRepository: | |||
| The git command line path, if None, we use 'git' | |||
| """ | |||
| self.dataset_id = dataset_id | |||
| self.repo_work_dir = repo_work_dir | |||
| self.repo_base_dir = os.path.dirname(repo_work_dir) | |||
| self.repo_name = os.path.basename(repo_work_dir) | |||
| if not repo_work_dir or not isinstance(repo_work_dir, str): | |||
| err_msg = 'dataset_work_dir must be provided!' | |||
| raise InvalidParameter(err_msg) | |||
| self.repo_work_dir = repo_work_dir.rstrip('/') | |||
| if not self.repo_work_dir: | |||
| err_msg = 'dataset_work_dir can not be root dir!' | |||
| raise InvalidParameter(err_msg) | |||
| self.repo_base_dir = os.path.dirname(self.repo_work_dir) | |||
| self.repo_name = os.path.basename(self.repo_work_dir) | |||
| if not revision: | |||
| err_msg = 'a non-default value of revision cannot be empty.' | |||
| raise InvalidParameter(err_msg) | |||
| self.revision = revision | |||
| if auth_token: | |||
| self.auth_token = auth_token | |||
| else: | |||
| @@ -199,7 +217,9 @@ class DatasetRepository: | |||
| self.git_wrapper.config_auth_token(self.repo_work_dir, self.auth_token) | |||
| self.git_wrapper.add_user_info(self.repo_base_dir, self.repo_name) | |||
| remote_url = self.git_wrapper.get_repo_remote_url(self.repo_work_dir) | |||
| remote_url = self._get_remote_url() | |||
| remote_url = self.git_wrapper.remove_token_from_url(remote_url) | |||
| self.git_wrapper.pull(self.repo_work_dir) | |||
| self.git_wrapper.add(self.repo_work_dir, all_files=True) | |||
| self.git_wrapper.commit(self.repo_work_dir, commit_message) | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import os | |||
| import tempfile | |||
| from pathlib import Path | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import hashlib | |||
| import os | |||
| import pickle | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import hashlib | |||
| import os | |||
| from typing import Optional | |||
| @@ -35,6 +35,10 @@ class Models(object): | |||
| fer = 'fer' | |||
| retinaface = 'retinaface' | |||
| shop_segmentation = 'shop-segmentation' | |||
| mogface = 'mogface' | |||
| mtcnn = 'mtcnn' | |||
| ulfd = 'ulfd' | |||
| video_inpainting = 'video-inpainting' | |||
| # EasyCV models | |||
| yolox = 'YOLOX' | |||
| @@ -51,11 +55,16 @@ class Models(object): | |||
| space_intent = 'space-intent' | |||
| space_modeling = 'space-modeling' | |||
| star = 'star' | |||
| star3 = 'star3' | |||
| tcrf = 'transformer-crf' | |||
| transformer_softmax = 'transformer-softmax' | |||
| lcrf = 'lstm-crf' | |||
| gcnncrf = 'gcnn-crf' | |||
| bart = 'bart' | |||
| gpt3 = 'gpt3' | |||
| plug = 'plug' | |||
| bert_for_ds = 'bert-for-document-segmentation' | |||
| ponet = 'ponet' | |||
| # audio models | |||
| sambert_hifigan = 'sambert-hifigan' | |||
| @@ -70,6 +79,7 @@ class Models(object): | |||
| gemm = 'gemm-generative-multi-modal' | |||
| mplug = 'mplug' | |||
| diffusion = 'diffusion-text-to-image-synthesis' | |||
| multi_stage_diffusion = 'multi-stage-diffusion-text-to-image-synthesis' | |||
| team = 'team-multi-modal-similarity' | |||
| video_clip = 'video-clip-multi-modal-embedding' | |||
| @@ -77,6 +87,7 @@ class Models(object): | |||
| class TaskModels(object): | |||
| # nlp task | |||
| text_classification = 'text-classification' | |||
| token_classification = 'token-classification' | |||
| information_extraction = 'information-extraction' | |||
| @@ -87,6 +98,8 @@ class Heads(object): | |||
| bert_mlm = 'bert-mlm' | |||
| # roberta mlm | |||
| roberta_mlm = 'roberta-mlm' | |||
| # token cls | |||
| token_classification = 'token-classification' | |||
| information_extraction = 'information-extraction' | |||
| @@ -121,8 +134,11 @@ class Pipelines(object): | |||
| salient_detection = 'u2net-salient-detection' | |||
| image_classification = 'image-classification' | |||
| face_detection = 'resnet-face-detection-scrfd10gkps' | |||
| ulfd_face_detection = 'manual-face-detection-ulfd' | |||
| facial_expression_recognition = 'vgg19-facial-expression-recognition-fer' | |||
| retina_face_detection = 'resnet50-face-detection-retinaface' | |||
| mog_face_detection = 'resnet101-face-detection-cvpr22papermogface' | |||
| mtcnn_face_detection = 'manual-face-detection-mtcnn' | |||
| live_category = 'live-category' | |||
| general_image_classification = 'vit-base_image-classification_ImageNet-labels' | |||
| daily_image_classification = 'vit-base_image-classification_Dailylife-labels' | |||
| @@ -155,16 +171,19 @@ class Pipelines(object): | |||
| text_driven_segmentation = 'text-driven-segmentation' | |||
| movie_scene_segmentation = 'resnet50-bert-movie-scene-segmentation' | |||
| shop_segmentation = 'shop-segmentation' | |||
| video_inpainting = 'video-inpainting' | |||
| # nlp tasks | |||
| sentence_similarity = 'sentence-similarity' | |||
| word_segmentation = 'word-segmentation' | |||
| part_of_speech = 'part-of-speech' | |||
| named_entity_recognition = 'named-entity-recognition' | |||
| text_generation = 'text-generation' | |||
| sentiment_analysis = 'sentiment-analysis' | |||
| sentiment_classification = 'sentiment-classification' | |||
| text_classification = 'text-classification' | |||
| fill_mask = 'fill-mask' | |||
| fill_mask_ponet = 'fill-mask-ponet' | |||
| csanmt_translation = 'csanmt-translation' | |||
| nli = 'nli' | |||
| dialog_intent_prediction = 'dialog-intent-prediction' | |||
| @@ -172,8 +191,12 @@ class Pipelines(object): | |||
| dialog_state_tracking = 'dialog-state-tracking' | |||
| zero_shot_classification = 'zero-shot-classification' | |||
| text_error_correction = 'text-error-correction' | |||
| plug_generation = 'plug-generation' | |||
| faq_question_answering = 'faq-question-answering' | |||
| conversational_text_to_sql = 'conversational-text-to-sql' | |||
| table_question_answering_pipeline = 'table-question-answering-pipeline' | |||
| sentence_embedding = 'sentence-embedding' | |||
| passage_ranking = 'passage-ranking' | |||
| relation_extraction = 'relation-extraction' | |||
| document_segmentation = 'document-segmentation' | |||
| @@ -223,8 +246,11 @@ class Trainers(object): | |||
| # nlp trainers | |||
| bert_sentiment_analysis = 'bert-sentiment-analysis' | |||
| dialog_modeling_trainer = 'dialog-modeling-trainer' | |||
| dialog_intent_trainer = 'dialog-intent-trainer' | |||
| nlp_base_trainer = 'nlp-base-trainer' | |||
| nlp_veco_trainer = 'nlp-veco-trainer' | |||
| nlp_passage_ranking_trainer = 'nlp-passage-ranking-trainer' | |||
| # audio trainers | |||
| speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k' | |||
| @@ -252,6 +278,7 @@ class Preprocessors(object): | |||
| # nlp preprocessor | |||
| sen_sim_tokenizer = 'sen-sim-tokenizer' | |||
| cross_encoder_tokenizer = 'cross-encoder-tokenizer' | |||
| bert_seq_cls_tokenizer = 'bert-seq-cls-tokenizer' | |||
| text_gen_tokenizer = 'text-gen-tokenizer' | |||
| token_cls_tokenizer = 'token-cls-tokenizer' | |||
| @@ -264,10 +291,15 @@ class Preprocessors(object): | |||
| sbert_token_cls_tokenizer = 'sbert-token-cls-tokenizer' | |||
| zero_shot_cls_tokenizer = 'zero-shot-cls-tokenizer' | |||
| text_error_correction = 'text-error-correction' | |||
| sentence_embedding = 'sentence-embedding' | |||
| passage_ranking = 'passage-ranking' | |||
| sequence_labeling_tokenizer = 'sequence-labeling-tokenizer' | |||
| word_segment_text_to_label_preprocessor = 'word-segment-text-to-label-preprocessor' | |||
| fill_mask = 'fill-mask' | |||
| fill_mask_ponet = 'fill-mask-ponet' | |||
| faq_question_answering_preprocessor = 'faq-question-answering-preprocessor' | |||
| conversational_text_to_sql = 'conversational-text-to-sql' | |||
| table_question_answering_preprocessor = 'table-question-answering-preprocessor' | |||
| re_tokenizer = 're-tokenizer' | |||
| document_segmentation = 'document-segmentation' | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from typing import Dict | |||
| from modelscope.metainfo import Metrics | |||
| @@ -14,9 +14,9 @@ from .builder import METRICS, MetricKeys | |||
| @METRICS.register_module( | |||
| group_key=default_group, module_name=Metrics.seq_cls_metric) | |||
| class SequenceClassificationMetric(Metric): | |||
| """The metric computation class for sequence classification classes. | |||
| """The metric computation class for sequence classification tasks. | |||
| This metric class calculates accuracy for the whole input batches. | |||
| This metric class calculates accuracy of the whole input batches. | |||
| """ | |||
| def __init__(self, *args, **kwargs): | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import torch.nn as nn | |||
| from .layer_base import LayerBase | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import numpy as np | |||
| import torch as th | |||
| import torch.nn as nn | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import numpy as np | |||
| import torch as th | |||
| import torch.nn as nn | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import abc | |||
| import re | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import numpy as np | |||
| import torch as th | |||
| import torch.nn as nn | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import torch | |||
| import torch.nn.functional as F | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import math | |||
| import torch | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| @@ -1,9 +1,10 @@ | |||
| """ | |||
| The implementation of class ComplexConv2d, ComplexConvTranspose2d and ComplexBatchNorm2d | |||
| here is modified based on Jongho Choi(sweetcocoa@snu.ac.kr / Seoul National Univ., ESTsoft ) | |||
| and publicly available at https://github.com/sweetcocoa/DeepComplexUNetPyTorch | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| # | |||
| # The implementation of class ComplexConv2d, ComplexConvTranspose2d and | |||
| # ComplexBatchNorm2d here is modified based on Jongho Choi(sweetcocoa@snu.ac.kr | |||
| # / Seoul National Univ., ESTsoft ) and publicly available at | |||
| # https://github.com/sweetcocoa/DeepComplexUNetPyTorch | |||
| """ | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| @@ -1,8 +1,10 @@ | |||
| """ | |||
| The implementation here is modified based on | |||
| Jongho Choi(sweetcocoa@snu.ac.kr / Seoul National Univ., ESTsoft ) | |||
| and publicly available at https://github.com/sweetcocoa/DeepComplexUNetPyTorch | |||
| """ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| # | |||
| # The implementation here is modified based on | |||
| # Jongho Choi(sweetcocoa@snu.ac.kr / Seoul National Univ., ESTsoft ) | |||
| # and publicly available at | |||
| # https://github.com/sweetcocoa/DeepComplexUNetPyTorch | |||
| import torch | |||
| import torch.nn as nn | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import numpy as np | |||
| import torch | |||
| import torch.nn as nn | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import os | |||
| from typing import Dict | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import math | |||
| import struct | |||
| from enum import Enum | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from .base_head import * # noqa F403 | |||
| from .base_model import * # noqa F403 | |||
| from .base_torch_head import * # noqa F403 | |||
| @@ -1,6 +1,6 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from abc import ABC, abstractmethod | |||
| from typing import Dict, Union | |||
| from typing import Any, Dict, Union | |||
| from modelscope.models.base.base_model import Model | |||
| from modelscope.utils.config import ConfigDict | |||
| @@ -22,25 +22,20 @@ class Head(ABC): | |||
| self.config = ConfigDict(kwargs) | |||
| @abstractmethod | |||
| def forward(self, input: Input) -> Dict[str, Tensor]: | |||
| def forward(self, *args, **kwargs) -> Dict[str, Any]: | |||
| """ | |||
| This method will use the output from backbone model to do any | |||
| downstream tasks | |||
| Args: | |||
| input: The tensor output or a model from backbone model | |||
| (text generation need a model as input) | |||
| Returns: The output from downstream taks | |||
| downstream tasks. Recieve The output from backbone model. | |||
| Returns (Dict[str, Any]): The output from downstream task. | |||
| """ | |||
| pass | |||
| @abstractmethod | |||
| def compute_loss(self, outputs: Dict[str, Tensor], | |||
| labels) -> Dict[str, Tensor]: | |||
| def compute_loss(self, *args, **kwargs) -> Dict[str, Any]: | |||
| """ | |||
| compute loss for head during the finetuning | |||
| compute loss for head during the finetuning. | |||
| Args: | |||
| outputs (Dict[str, Tensor]): the output from the model forward | |||
| Returns: the loss(Dict[str, Tensor]): | |||
| Returns (Dict[str, Any]): The loss dict | |||
| """ | |||
| pass | |||
| @@ -2,7 +2,7 @@ | |||
| import os | |||
| import os.path as osp | |||
| from abc import ABC, abstractmethod | |||
| from typing import Callable, Dict, List, Optional, Union | |||
| from typing import Any, Callable, Dict, List, Optional, Union | |||
| from modelscope.hub.snapshot_download import snapshot_download | |||
| from modelscope.models.builder import build_model | |||
| @@ -10,8 +10,6 @@ from modelscope.utils.checkpoint import save_pretrained | |||
| from modelscope.utils.config import Config | |||
| from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile | |||
| from modelscope.utils.device import device_placement, verify_device | |||
| from modelscope.utils.file_utils import func_receive_dict_inputs | |||
| from modelscope.utils.hub import parse_label_mapping | |||
| from modelscope.utils.logger import get_logger | |||
| logger = get_logger() | |||
| @@ -27,35 +25,31 @@ class Model(ABC): | |||
| verify_device(device_name) | |||
| self._device_name = device_name | |||
| def __call__(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: | |||
| return self.postprocess(self.forward(input)) | |||
| def __call__(self, *args, **kwargs) -> Dict[str, Any]: | |||
| return self.postprocess(self.forward(*args, **kwargs)) | |||
| @abstractmethod | |||
| def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: | |||
| def forward(self, *args, **kwargs) -> Dict[str, Any]: | |||
| """ | |||
| Run the forward pass for a model. | |||
| Args: | |||
| input (Dict[str, Tensor]): the dict of the model inputs for the forward method | |||
| Returns: | |||
| Dict[str, Tensor]: output from the model forward pass | |||
| Dict[str, Any]: output from the model forward pass | |||
| """ | |||
| pass | |||
| def postprocess(self, input: Dict[str, Tensor], | |||
| **kwargs) -> Dict[str, Tensor]: | |||
| def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: | |||
| """ Model specific postprocess and convert model output to | |||
| standard model outputs. | |||
| Args: | |||
| input: input data | |||
| inputs: input data | |||
| Return: | |||
| dict of results: a dict containing outputs of model, each | |||
| output should have the standard output name. | |||
| """ | |||
| return input | |||
| return inputs | |||
| @classmethod | |||
| def _instantiate(cls, **kwargs): | |||
| @@ -97,7 +91,6 @@ class Model(ABC): | |||
| osp.join(local_model_dir, ModelFile.CONFIGURATION)) | |||
| task_name = cfg.task | |||
| model_cfg = cfg.model | |||
| framework = cfg.framework | |||
| if hasattr(model_cfg, 'model_type') and not hasattr(model_cfg, 'type'): | |||
| model_cfg.type = model_cfg.model_type | |||
| @@ -107,9 +100,8 @@ class Model(ABC): | |||
| model_cfg[k] = v | |||
| if device is not None: | |||
| model_cfg.device = device | |||
| with device_placement(framework, device): | |||
| model = build_model( | |||
| model_cfg, task_name=task_name, default_args=kwargs) | |||
| model = build_model( | |||
| model_cfg, task_name=task_name, default_args=kwargs) | |||
| else: | |||
| model = build_model( | |||
| model_cfg, task_name=task_name, default_args=kwargs) | |||
| @@ -1,5 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from typing import Dict | |||
| from typing import Any, Dict | |||
| import torch | |||
| @@ -18,10 +18,8 @@ class TorchHead(Head, torch.nn.Module): | |||
| super().__init__(**kwargs) | |||
| torch.nn.Module.__init__(self) | |||
| def forward(self, inputs: Dict[str, | |||
| torch.Tensor]) -> Dict[str, torch.Tensor]: | |||
| def forward(self, *args, **kwargs) -> Dict[str, Any]: | |||
| raise NotImplementedError | |||
| def compute_loss(self, outputs: Dict[str, torch.Tensor], | |||
| labels) -> Dict[str, torch.Tensor]: | |||
| def compute_loss(self, *args, **kwargs) -> Dict[str, Any]: | |||
| raise NotImplementedError | |||
| @@ -1,6 +1,6 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from typing import Any, Dict, Optional, Union | |||
| from typing import Any, Dict | |||
| import torch | |||
| from torch import nn | |||
| @@ -21,15 +21,14 @@ class TorchModel(Model, torch.nn.Module): | |||
| super().__init__(model_dir, *args, **kwargs) | |||
| torch.nn.Module.__init__(self) | |||
| def __call__(self, input: Dict[str, | |||
| torch.Tensor]) -> Dict[str, torch.Tensor]: | |||
| def __call__(self, *args, **kwargs) -> Dict[str, Any]: | |||
| # Adapting a model with only one dict arg, and the arg name must be input or inputs | |||
| if func_receive_dict_inputs(self.forward): | |||
| return self.postprocess(self.forward(input)) | |||
| return self.postprocess(self.forward(args[0], **kwargs)) | |||
| else: | |||
| return self.postprocess(self.forward(**input)) | |||
| return self.postprocess(self.forward(*args, **kwargs)) | |||
| def forward(self, inputs: Dict[str, | |||
| torch.Tensor]) -> Dict[str, torch.Tensor]: | |||
| def forward(self, *args, **kwargs) -> Dict[str, Any]: | |||
| raise NotImplementedError | |||
| def post_init(self): | |||
| @@ -1,3 +1,5 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import os | |||
| import os.path as osp | |||
| import shutil | |||
| @@ -4,11 +4,16 @@ from typing import TYPE_CHECKING | |||
| from modelscope.utils.import_utils import LazyImportModule | |||
| if TYPE_CHECKING: | |||
| from .mogface import MogFaceDetector | |||
| from .mtcnn import MtcnnFaceDetector | |||
| from .retinaface import RetinaFaceDetection | |||
| from .ulfd_slim import UlfdFaceDetector | |||
| else: | |||
| _import_structure = { | |||
| 'ulfd_slim': ['UlfdFaceDetector'], | |||
| 'retinaface': ['RetinaFaceDetection'], | |||
| 'mtcnn': ['MtcnnFaceDetector'], | |||
| 'mogface': ['MogFaceDetector'] | |||
| } | |||
| import sys | |||
| @@ -1,5 +1,4 @@ | |||
| """ | |||
| mmdet_patch is based on | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet, | |||
| all duplicate functions from official mmdetection are removed. | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet | |||
| """ | |||
| @@ -1,3 +1,7 @@ | |||
| """ | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/bbox | |||
| """ | |||
| from .transforms import bbox2result, distance2kps, kps2distance | |||
| __all__ = ['bbox2result', 'distance2kps', 'kps2distance'] | |||
| @@ -1,5 +1,6 @@ | |||
| """ | |||
| based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/bbox/transforms.py | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/bbox/transforms.py | |||
| """ | |||
| import numpy as np | |||
| import torch | |||
| @@ -1,3 +1,7 @@ | |||
| """ | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/post_processing/bbox_nms.py | |||
| """ | |||
| from .bbox_nms import multiclass_nms | |||
| __all__ = ['multiclass_nms'] | |||
| @@ -1,5 +1,6 @@ | |||
| """ | |||
| based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/post_processing/bbox_nms.py | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/core/post_processing/bbox_nms.py | |||
| """ | |||
| import torch | |||
| @@ -1,3 +1,7 @@ | |||
| """ | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets | |||
| """ | |||
| from .retinaface import RetinaFaceDataset | |||
| __all__ = ['RetinaFaceDataset'] | |||
| @@ -1,3 +1,7 @@ | |||
| """ | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines | |||
| """ | |||
| from .transforms import RandomSquareCrop | |||
| __all__ = ['RandomSquareCrop'] | |||
| @@ -1,5 +1,6 @@ | |||
| """ | |||
| based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines/transforms.py | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/pipelines/transforms.py | |||
| """ | |||
| import numpy as np | |||
| from mmdet.datasets.builder import PIPELINES | |||
| @@ -1,5 +1,6 @@ | |||
| """ | |||
| based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/retinaface.py | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/datasets/retinaface.py | |||
| """ | |||
| import numpy as np | |||
| from mmdet.datasets.builder import DATASETS | |||
| @@ -1,2 +1,6 @@ | |||
| """ | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models | |||
| """ | |||
| from .dense_heads import * # noqa: F401,F403 | |||
| from .detectors import * # noqa: F401,F403 | |||
| @@ -1,3 +1,7 @@ | |||
| """ | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/backbones | |||
| """ | |||
| from .resnet import ResNetV1e | |||
| __all__ = ['ResNetV1e'] | |||
| @@ -1,5 +1,6 @@ | |||
| """ | |||
| based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/backbones/resnet.py | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/backbones/resnet.py | |||
| """ | |||
| import torch.nn as nn | |||
| import torch.utils.checkpoint as cp | |||
| @@ -1,3 +1,7 @@ | |||
| """ | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/dense_heads | |||
| """ | |||
| from .scrfd_head import SCRFDHead | |||
| __all__ = ['SCRFDHead'] | |||
| @@ -1,5 +1,6 @@ | |||
| """ | |||
| based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/dense_heads/scrfd_head.py | |||
| """ | |||
| import numpy as np | |||
| import torch | |||
| @@ -1,3 +1,7 @@ | |||
| """ | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/detectors | |||
| """ | |||
| from .scrfd import SCRFD | |||
| __all__ = ['SCRFD'] | |||
| @@ -1,5 +1,6 @@ | |||
| """ | |||
| based on https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/detectors/scrfd.py | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/tree/master/detection/scrfd/mmdet/models/detectors/scrfd.py | |||
| """ | |||
| import torch | |||
| from mmdet.models.builder import DETECTORS | |||
| @@ -0,0 +1 @@ | |||
| from .models.detectors import MogFaceDetector | |||
| @@ -0,0 +1,96 @@ | |||
| import os | |||
| import cv2 | |||
| import numpy as np | |||
| import torch | |||
| import torch.backends.cudnn as cudnn | |||
| from modelscope.metainfo import Models | |||
| from modelscope.models.base import TorchModel | |||
| from modelscope.models.builder import MODELS | |||
| from modelscope.utils.constant import Tasks | |||
| from .mogface import MogFace | |||
| from .utils import MogPriorBox, mogdecode, py_cpu_nms | |||
| @MODELS.register_module(Tasks.face_detection, module_name=Models.mogface) | |||
| class MogFaceDetector(TorchModel): | |||
| def __init__(self, model_path, device='cuda'): | |||
| super().__init__(model_path) | |||
| torch.set_grad_enabled(False) | |||
| cudnn.benchmark = True | |||
| self.model_path = model_path | |||
| self.device = device | |||
| self.net = MogFace() | |||
| self.load_model() | |||
| self.net = self.net.to(device) | |||
| self.mean = np.array([[104, 117, 123]]) | |||
| def load_model(self, load_to_cpu=False): | |||
| pretrained_dict = torch.load( | |||
| self.model_path, map_location=torch.device('cpu')) | |||
| self.net.load_state_dict(pretrained_dict, strict=False) | |||
| self.net.eval() | |||
| def forward(self, input): | |||
| img_raw = input['img'] | |||
| img = np.array(img_raw.cpu().detach()) | |||
| img = img[:, :, ::-1] | |||
| im_height, im_width = img.shape[:2] | |||
| ss = 1.0 | |||
| # tricky | |||
| if max(im_height, im_width) > 1500: | |||
| ss = 1000.0 / max(im_height, im_width) | |||
| img = cv2.resize(img, (0, 0), fx=ss, fy=ss) | |||
| im_height, im_width = img.shape[:2] | |||
| scale = torch.Tensor( | |||
| [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) | |||
| img -= np.array([[103.53, 116.28, 123.675]]) | |||
| img /= np.array([[57.375, 57.120003, 58.395]]) | |||
| img /= 255 | |||
| img = img[:, :, ::-1].copy() | |||
| img = img.transpose(2, 0, 1) | |||
| img = torch.from_numpy(img).unsqueeze(0) | |||
| img = img.to(self.device) | |||
| scale = scale.to(self.device) | |||
| conf, loc = self.net(img) # forward pass | |||
| confidence_threshold = 0.82 | |||
| nms_threshold = 0.4 | |||
| top_k = 5000 | |||
| keep_top_k = 750 | |||
| priorbox = MogPriorBox(scale_list=[0.68]) | |||
| priors = priorbox(im_height, im_width) | |||
| priors = torch.tensor(priors).to(self.device) | |||
| prior_data = priors.data | |||
| boxes = mogdecode(loc.data.squeeze(0), prior_data) | |||
| boxes = boxes.cpu().numpy() | |||
| scores = conf.squeeze(0).data.cpu().numpy()[:, 0] | |||
| # ignore low scores | |||
| inds = np.where(scores > confidence_threshold)[0] | |||
| boxes = boxes[inds] | |||
| scores = scores[inds] | |||
| # keep top-K before NMS | |||
| order = scores.argsort()[::-1][:top_k] | |||
| boxes = boxes[order] | |||
| scores = scores[order] | |||
| # do NMS | |||
| dets = np.hstack((boxes, scores[:, np.newaxis])).astype( | |||
| np.float32, copy=False) | |||
| keep = py_cpu_nms(dets, nms_threshold) | |||
| dets = dets[keep, :] | |||
| # keep top-K faster NMS | |||
| dets = dets[:keep_top_k, :] | |||
| return dets / ss | |||
| @@ -0,0 +1,135 @@ | |||
| # -------------------------------------------------------- | |||
| # The implementation is also open-sourced by the authors as Yang Liu, and is available publicly on | |||
| # https://github.com/damo-cv/MogFace | |||
| # -------------------------------------------------------- | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| from .mogprednet import MogPredNet | |||
| from .resnet import ResNet | |||
| class MogFace(nn.Module): | |||
| def __init__(self): | |||
| super(MogFace, self).__init__() | |||
| self.backbone = ResNet(depth=101) | |||
| self.fpn = LFPN() | |||
| self.pred_net = MogPredNet() | |||
| def forward(self, x): | |||
| feature_list = self.backbone(x) | |||
| fpn_list = self.fpn(feature_list) | |||
| pyramid_feature_list = fpn_list[0] | |||
| conf, loc = self.pred_net(pyramid_feature_list) | |||
| return conf, loc | |||
| class FeatureFusion(nn.Module): | |||
| def __init__(self, lat_ch=256, **channels): | |||
| super(FeatureFusion, self).__init__() | |||
| self.main_conv = nn.Conv2d(channels['main'], lat_ch, kernel_size=1) | |||
| def forward(self, up, main): | |||
| main = self.main_conv(main) | |||
| _, _, H, W = main.size() | |||
| res = F.upsample(up, scale_factor=2, mode='bilinear') | |||
| if res.size(2) != main.size(2) or res.size(3) != main.size(3): | |||
| res = res[:, :, 0:H, 0:W] | |||
| res = res + main | |||
| return res | |||
| class LFPN(nn.Module): | |||
| def __init__(self, | |||
| c2_out_ch=256, | |||
| c3_out_ch=512, | |||
| c4_out_ch=1024, | |||
| c5_out_ch=2048, | |||
| c6_mid_ch=512, | |||
| c6_out_ch=512, | |||
| c7_mid_ch=128, | |||
| c7_out_ch=256, | |||
| out_dsfd_ft=True): | |||
| super(LFPN, self).__init__() | |||
| self.out_dsfd_ft = out_dsfd_ft | |||
| if self.out_dsfd_ft: | |||
| dsfd_module = [] | |||
| dsfd_module.append(nn.Conv2d(256, 256, kernel_size=3, padding=1)) | |||
| dsfd_module.append(nn.Conv2d(512, 256, kernel_size=3, padding=1)) | |||
| dsfd_module.append(nn.Conv2d(1024, 256, kernel_size=3, padding=1)) | |||
| dsfd_module.append(nn.Conv2d(2048, 256, kernel_size=3, padding=1)) | |||
| dsfd_module.append(nn.Conv2d(256, 256, kernel_size=3, padding=1)) | |||
| dsfd_module.append(nn.Conv2d(256, 256, kernel_size=3, padding=1)) | |||
| self.dsfd_modules = nn.ModuleList(dsfd_module) | |||
| c6_input_ch = c5_out_ch | |||
| self.c6 = nn.Sequential(*[ | |||
| nn.Conv2d( | |||
| c6_input_ch, | |||
| c6_mid_ch, | |||
| kernel_size=1, | |||
| ), | |||
| nn.BatchNorm2d(c6_mid_ch), | |||
| nn.ReLU(inplace=True), | |||
| nn.Conv2d( | |||
| c6_mid_ch, c6_out_ch, kernel_size=3, padding=1, stride=2), | |||
| nn.BatchNorm2d(c6_out_ch), | |||
| nn.ReLU(inplace=True) | |||
| ]) | |||
| self.c7 = nn.Sequential(*[ | |||
| nn.Conv2d( | |||
| c6_out_ch, | |||
| c7_mid_ch, | |||
| kernel_size=1, | |||
| ), | |||
| nn.BatchNorm2d(c7_mid_ch), | |||
| nn.ReLU(inplace=True), | |||
| nn.Conv2d( | |||
| c7_mid_ch, c7_out_ch, kernel_size=3, padding=1, stride=2), | |||
| nn.BatchNorm2d(c7_out_ch), | |||
| nn.ReLU(inplace=True) | |||
| ]) | |||
| self.p2_lat = nn.Conv2d(256, 256, kernel_size=3, padding=1) | |||
| self.p3_lat = nn.Conv2d(256, 256, kernel_size=3, padding=1) | |||
| self.p4_lat = nn.Conv2d(256, 256, kernel_size=3, padding=1) | |||
| self.c5_lat = nn.Conv2d(c6_input_ch, 256, kernel_size=3, padding=1) | |||
| self.c6_lat = nn.Conv2d(c6_out_ch, 256, kernel_size=3, padding=1) | |||
| self.c7_lat = nn.Conv2d(c7_out_ch, 256, kernel_size=3, padding=1) | |||
| self.ff_c5_c4 = FeatureFusion(main=c4_out_ch) | |||
| self.ff_c4_c3 = FeatureFusion(main=c3_out_ch) | |||
| self.ff_c3_c2 = FeatureFusion(main=c2_out_ch) | |||
| def forward(self, feature_list): | |||
| c2, c3, c4, c5 = feature_list | |||
| c6 = self.c6(c5) | |||
| c7 = self.c7(c6) | |||
| c5 = self.c5_lat(c5) | |||
| c6 = self.c6_lat(c6) | |||
| c7 = self.c7_lat(c7) | |||
| if self.out_dsfd_ft: | |||
| dsfd_fts = [] | |||
| dsfd_fts.append(self.dsfd_modules[0](c2)) | |||
| dsfd_fts.append(self.dsfd_modules[1](c3)) | |||
| dsfd_fts.append(self.dsfd_modules[2](c4)) | |||
| dsfd_fts.append(self.dsfd_modules[3](feature_list[-1])) | |||
| dsfd_fts.append(self.dsfd_modules[4](c6)) | |||
| dsfd_fts.append(self.dsfd_modules[5](c7)) | |||
| p4 = self.ff_c5_c4(c5, c4) | |||
| p3 = self.ff_c4_c3(p4, c3) | |||
| p2 = self.ff_c3_c2(p3, c2) | |||
| p2 = self.p2_lat(p2) | |||
| p3 = self.p3_lat(p3) | |||
| p4 = self.p4_lat(p4) | |||
| if self.out_dsfd_ft: | |||
| return ([p2, p3, p4, c5, c6, c7], dsfd_fts) | |||
| @@ -0,0 +1,164 @@ | |||
| # -------------------------------------------------------- | |||
| # The implementation is also open-sourced by the authors as Yang Liu, and is available publicly on | |||
| # https://github.com/damo-cv/MogFace | |||
| # -------------------------------------------------------- | |||
| import math | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| class conv_bn(nn.Module): | |||
| """docstring for conv""" | |||
| def __init__(self, in_plane, out_plane, kernel_size, stride, padding): | |||
| super(conv_bn, self).__init__() | |||
| self.conv1 = nn.Conv2d( | |||
| in_plane, | |||
| out_plane, | |||
| kernel_size=kernel_size, | |||
| stride=stride, | |||
| padding=padding) | |||
| self.bn1 = nn.BatchNorm2d(out_plane) | |||
| def forward(self, x): | |||
| x = self.conv1(x) | |||
| return self.bn1(x) | |||
| class SSHContext(nn.Module): | |||
| def __init__(self, channels, Xchannels=256): | |||
| super(SSHContext, self).__init__() | |||
| self.conv1 = nn.Conv2d( | |||
| channels, Xchannels, kernel_size=3, stride=1, padding=1) | |||
| self.conv2 = nn.Conv2d( | |||
| channels, | |||
| Xchannels // 2, | |||
| kernel_size=3, | |||
| dilation=2, | |||
| stride=1, | |||
| padding=2) | |||
| self.conv2_1 = nn.Conv2d( | |||
| Xchannels // 2, Xchannels // 2, kernel_size=3, stride=1, padding=1) | |||
| self.conv2_2 = nn.Conv2d( | |||
| Xchannels // 2, | |||
| Xchannels // 2, | |||
| kernel_size=3, | |||
| dilation=2, | |||
| stride=1, | |||
| padding=2) | |||
| self.conv2_2_1 = nn.Conv2d( | |||
| Xchannels // 2, Xchannels // 2, kernel_size=3, stride=1, padding=1) | |||
| def forward(self, x): | |||
| x1 = F.relu(self.conv1(x), inplace=True) | |||
| x2 = F.relu(self.conv2(x), inplace=True) | |||
| x2_1 = F.relu(self.conv2_1(x2), inplace=True) | |||
| x2_2 = F.relu(self.conv2_2(x2), inplace=True) | |||
| x2_2 = F.relu(self.conv2_2_1(x2_2), inplace=True) | |||
| return torch.cat([x1, x2_1, x2_2], 1) | |||
| class DeepHead(nn.Module): | |||
| def __init__(self, | |||
| in_channel=256, | |||
| out_channel=256, | |||
| use_gn=False, | |||
| num_conv=4): | |||
| super(DeepHead, self).__init__() | |||
| self.use_gn = use_gn | |||
| self.num_conv = num_conv | |||
| self.conv1 = nn.Conv2d(in_channel, out_channel, 3, 1, 1) | |||
| self.conv2 = nn.Conv2d(out_channel, out_channel, 3, 1, 1) | |||
| self.conv3 = nn.Conv2d(out_channel, out_channel, 3, 1, 1) | |||
| self.conv4 = nn.Conv2d(out_channel, out_channel, 3, 1, 1) | |||
| if self.use_gn: | |||
| self.gn1 = nn.GroupNorm(16, out_channel) | |||
| self.gn2 = nn.GroupNorm(16, out_channel) | |||
| self.gn3 = nn.GroupNorm(16, out_channel) | |||
| self.gn4 = nn.GroupNorm(16, out_channel) | |||
| def forward(self, x): | |||
| if self.use_gn: | |||
| x1 = F.relu(self.gn1(self.conv1(x)), inplace=True) | |||
| x2 = F.relu(self.gn2(self.conv1(x1)), inplace=True) | |||
| x3 = F.relu(self.gn3(self.conv1(x2)), inplace=True) | |||
| x4 = F.relu(self.gn4(self.conv1(x3)), inplace=True) | |||
| else: | |||
| x1 = F.relu(self.conv1(x), inplace=True) | |||
| x2 = F.relu(self.conv1(x1), inplace=True) | |||
| if self.num_conv == 2: | |||
| return x2 | |||
| x3 = F.relu(self.conv1(x2), inplace=True) | |||
| x4 = F.relu(self.conv1(x3), inplace=True) | |||
| return x4 | |||
| class MogPredNet(nn.Module): | |||
| def __init__(self, | |||
| num_anchor_per_pixel=1, | |||
| num_classes=1, | |||
| input_ch_list=[256, 256, 256, 256, 256, 256], | |||
| use_deep_head=True, | |||
| deep_head_with_gn=True, | |||
| use_ssh=True, | |||
| deep_head_ch=512): | |||
| super(MogPredNet, self).__init__() | |||
| self.num_classes = num_classes | |||
| self.use_deep_head = use_deep_head | |||
| self.deep_head_with_gn = deep_head_with_gn | |||
| self.use_ssh = use_ssh | |||
| self.deep_head_ch = deep_head_ch | |||
| if self.use_ssh: | |||
| self.conv_SSH = SSHContext(input_ch_list[0], | |||
| self.deep_head_ch // 2) | |||
| if self.use_deep_head: | |||
| if self.deep_head_with_gn: | |||
| self.deep_loc_head = DeepHead( | |||
| self.deep_head_ch, self.deep_head_ch, use_gn=True) | |||
| self.deep_cls_head = DeepHead( | |||
| self.deep_head_ch, self.deep_head_ch, use_gn=True) | |||
| self.pred_cls = nn.Conv2d(self.deep_head_ch, | |||
| 1 * num_anchor_per_pixel, 3, 1, 1) | |||
| self.pred_loc = nn.Conv2d(self.deep_head_ch, | |||
| 4 * num_anchor_per_pixel, 3, 1, 1) | |||
| self.sigmoid = nn.Sigmoid() | |||
| def forward(self, pyramid_feature_list, dsfd_ft_list=None): | |||
| loc = [] | |||
| conf = [] | |||
| if self.use_deep_head: | |||
| for x in pyramid_feature_list: | |||
| if self.use_ssh: | |||
| x = self.conv_SSH(x) | |||
| x_cls = self.deep_cls_head(x) | |||
| x_loc = self.deep_loc_head(x) | |||
| conf.append( | |||
| self.pred_cls(x_cls).permute(0, 2, 3, 1).contiguous()) | |||
| loc.append( | |||
| self.pred_loc(x_loc).permute(0, 2, 3, 1).contiguous()) | |||
| loc = torch.cat([o.view(o.size(0), -1, 4) for o in loc], 1) | |||
| conf = torch.cat( | |||
| [o.view(o.size(0), -1, self.num_classes) for o in conf], 1) | |||
| output = ( | |||
| self.sigmoid(conf.view(conf.size(0), -1, self.num_classes)), | |||
| loc.view(loc.size(0), -1, 4), | |||
| ) | |||
| return output | |||
| @@ -0,0 +1,193 @@ | |||
| # The implementation is modified from original resent implementaiton, which is | |||
| # also open-sourced by the authors as Yang Liu, | |||
| # and is available publicly on https://github.com/damo-cv/MogFace | |||
| import torch.nn as nn | |||
| def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): | |||
| """3x3 convolution with padding""" | |||
| return nn.Conv2d( | |||
| in_planes, | |||
| out_planes, | |||
| kernel_size=3, | |||
| stride=stride, | |||
| padding=dilation, | |||
| groups=groups, | |||
| bias=False, | |||
| dilation=dilation) | |||
| def conv1x1(in_planes, out_planes, stride=1): | |||
| """1x1 convolution""" | |||
| return nn.Conv2d( | |||
| in_planes, out_planes, kernel_size=1, stride=stride, bias=False) | |||
| class Bottleneck(nn.Module): | |||
| expansion = 4 | |||
| def __init__(self, | |||
| inplanes, | |||
| planes, | |||
| stride=1, | |||
| downsample=None, | |||
| groups=1, | |||
| base_width=64, | |||
| dilation=1, | |||
| norm_layer=None): | |||
| super(Bottleneck, self).__init__() | |||
| if norm_layer is None: | |||
| norm_layer = nn.BatchNorm2d | |||
| width = int(planes * (base_width / 64.)) * groups | |||
| # Both self.conv2 and self.downsample layers downsample the input when stride != 1 | |||
| self.conv1 = conv1x1(inplanes, width) | |||
| self.bn1 = norm_layer(width) | |||
| self.conv2 = conv3x3(width, width, stride, groups, dilation) | |||
| self.bn2 = norm_layer(width) | |||
| self.conv3 = conv1x1(width, planes * self.expansion) | |||
| self.bn3 = norm_layer(planes * self.expansion) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.downsample = downsample | |||
| self.stride = stride | |||
| def forward(self, x): | |||
| identity = x | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.relu(out) | |||
| out = self.conv2(out) | |||
| out = self.bn2(out) | |||
| out = self.relu(out) | |||
| out = self.conv3(out) | |||
| out = self.bn3(out) | |||
| if self.downsample is not None: | |||
| identity = self.downsample(x) | |||
| out += identity | |||
| out = self.relu(out) | |||
| return out | |||
| class ResNet(nn.Module): | |||
| def __init__(self, | |||
| depth=50, | |||
| groups=1, | |||
| width_per_group=64, | |||
| replace_stride_with_dilation=None, | |||
| norm_layer=None, | |||
| inplanes=64, | |||
| shrink_ch_ratio=1): | |||
| super(ResNet, self).__init__() | |||
| if norm_layer is None: | |||
| norm_layer = nn.BatchNorm2d | |||
| self._norm_layer = norm_layer | |||
| if depth == 50: | |||
| block = Bottleneck | |||
| layers = [3, 4, 6, 3] | |||
| elif depth == 101: | |||
| block = Bottleneck | |||
| layers = [3, 4, 23, 3] | |||
| elif depth == 152: | |||
| block = Bottleneck | |||
| layers = [3, 4, 36, 3] | |||
| elif depth == 18: | |||
| block = BasicBlock | |||
| layers = [2, 2, 2, 2] | |||
| else: | |||
| raise ValueError('only support depth in [18, 50, 101, 152]') | |||
| shrink_input_ch = int(inplanes * shrink_ch_ratio) | |||
| self.inplanes = int(inplanes * shrink_ch_ratio) | |||
| if shrink_ch_ratio == 0.125: | |||
| layers = [2, 3, 3, 3] | |||
| self.dilation = 1 | |||
| if replace_stride_with_dilation is None: | |||
| # each element in the tuple indicates if we should replace | |||
| # the 2x2 stride with a dilated convolution instead | |||
| replace_stride_with_dilation = [False, False, False] | |||
| if len(replace_stride_with_dilation) != 3: | |||
| raise ValueError('replace_stride_with_dilation should be None ' | |||
| 'or a 3-element tuple, got {}'.format( | |||
| replace_stride_with_dilation)) | |||
| self.groups = groups | |||
| self.base_width = width_per_group | |||
| self.conv1 = nn.Conv2d( | |||
| 3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) | |||
| self.bn1 = norm_layer(self.inplanes) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||
| self.layer1 = self._make_layer(block, shrink_input_ch, layers[0]) | |||
| self.layer2 = self._make_layer( | |||
| block, | |||
| shrink_input_ch * 2, | |||
| layers[1], | |||
| stride=2, | |||
| dilate=replace_stride_with_dilation[0]) | |||
| self.layer3 = self._make_layer( | |||
| block, | |||
| shrink_input_ch * 4, | |||
| layers[2], | |||
| stride=2, | |||
| dilate=replace_stride_with_dilation[1]) | |||
| self.layer4 = self._make_layer( | |||
| block, | |||
| shrink_input_ch * 8, | |||
| layers[3], | |||
| stride=2, | |||
| dilate=replace_stride_with_dilation[2]) | |||
| def _make_layer(self, block, planes, blocks, stride=1, dilate=False): | |||
| norm_layer = self._norm_layer | |||
| downsample = None | |||
| previous_dilation = self.dilation | |||
| if dilate: | |||
| self.dilation *= stride | |||
| stride = 1 | |||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||
| downsample = nn.Sequential( | |||
| conv1x1(self.inplanes, planes * block.expansion, stride), | |||
| norm_layer(planes * block.expansion), | |||
| ) | |||
| layers = [] | |||
| layers.append( | |||
| block(self.inplanes, planes, stride, downsample, self.groups, | |||
| self.base_width, previous_dilation, norm_layer)) | |||
| self.inplanes = planes * block.expansion | |||
| for _ in range(1, blocks): | |||
| layers.append( | |||
| block( | |||
| self.inplanes, | |||
| planes, | |||
| groups=self.groups, | |||
| base_width=self.base_width, | |||
| dilation=self.dilation, | |||
| norm_layer=norm_layer)) | |||
| return nn.Sequential(*layers) | |||
| def forward(self, x): | |||
| x = self.conv1(x) | |||
| x = self.bn1(x) | |||
| x = self.relu(x) | |||
| x = self.maxpool(x) | |||
| four_conv_layer = [] | |||
| x = self.layer1(x) | |||
| four_conv_layer.append(x) | |||
| x = self.layer2(x) | |||
| four_conv_layer.append(x) | |||
| x = self.layer3(x) | |||
| four_conv_layer.append(x) | |||
| x = self.layer4(x) | |||
| four_conv_layer.append(x) | |||
| return four_conv_layer | |||
| @@ -0,0 +1,212 @@ | |||
| # Modified from https://github.com/biubug6/Pytorch_Retinaface | |||
| import math | |||
| from itertools import product as product | |||
| from math import ceil | |||
| import numpy as np | |||
| import torch | |||
| def transform_anchor(anchors): | |||
| """ | |||
| from [x0, x1, y0, y1] to [c_x, cy, w, h] | |||
| x1 = x0 + w - 1 | |||
| c_x = (x0 + x1) / 2 = (2x0 + w - 1) / 2 = x0 + (w - 1) / 2 | |||
| """ | |||
| return np.concatenate(((anchors[:, :2] + anchors[:, 2:]) / 2, | |||
| anchors[:, 2:] - anchors[:, :2] + 1), | |||
| axis=1) | |||
| def normalize_anchor(anchors): | |||
| """ | |||
| from [c_x, cy, w, h] to [x0, x1, y0, y1] | |||
| """ | |||
| item_1 = anchors[:, :2] - (anchors[:, 2:] - 1) / 2 | |||
| item_2 = anchors[:, :2] + (anchors[:, 2:] - 1) / 2 | |||
| return np.concatenate((item_1, item_2), axis=1) | |||
| class MogPriorBox(object): | |||
| """ | |||
| both for fpn and single layer, single layer need to test | |||
| return (np.array) [num_anchros, 4] [x0, y0, x1, y1] | |||
| """ | |||
| def __init__(self, | |||
| scale_list=[1.], | |||
| aspect_ratio_list=[1.0], | |||
| stride_list=[4, 8, 16, 32, 64, 128], | |||
| anchor_size_list=[16, 32, 64, 128, 256, 512]): | |||
| self.scale_list = scale_list | |||
| self.aspect_ratio_list = aspect_ratio_list | |||
| self.stride_list = stride_list | |||
| self.anchor_size_list = anchor_size_list | |||
| def __call__(self, img_height, img_width): | |||
| final_anchor_list = [] | |||
| for idx, stride in enumerate(self.stride_list): | |||
| anchor_list = [] | |||
| cur_img_height = img_height | |||
| cur_img_width = img_width | |||
| tmp_stride = stride | |||
| while tmp_stride != 1: | |||
| tmp_stride = tmp_stride // 2 | |||
| cur_img_height = (cur_img_height + 1) // 2 | |||
| cur_img_width = (cur_img_width + 1) // 2 | |||
| for i in range(cur_img_height): | |||
| for j in range(cur_img_width): | |||
| for scale in self.scale_list: | |||
| cx = (j + 0.5) * stride | |||
| cy = (i + 0.5) * stride | |||
| side_x = self.anchor_size_list[idx] * scale | |||
| side_y = self.anchor_size_list[idx] * scale | |||
| for ratio in self.aspect_ratio_list: | |||
| anchor_list.append([ | |||
| cx, cy, side_x / math.sqrt(ratio), | |||
| side_y * math.sqrt(ratio) | |||
| ]) | |||
| final_anchor_list.append(anchor_list) | |||
| final_anchor_arr = np.concatenate(final_anchor_list, axis=0) | |||
| normalized_anchor_arr = normalize_anchor(final_anchor_arr).astype( | |||
| 'float32') | |||
| transformed_anchor = transform_anchor(normalized_anchor_arr) | |||
| return transformed_anchor | |||
| class PriorBox(object): | |||
| def __init__(self, cfg, image_size=None, phase='train'): | |||
| super(PriorBox, self).__init__() | |||
| self.min_sizes = cfg['min_sizes'] | |||
| self.steps = cfg['steps'] | |||
| self.clip = cfg['clip'] | |||
| self.image_size = image_size | |||
| self.feature_maps = [[ | |||
| ceil(self.image_size[0] / step), | |||
| ceil(self.image_size[1] / step) | |||
| ] for step in self.steps] | |||
| self.name = 's' | |||
| def forward(self): | |||
| anchors = [] | |||
| for k, f in enumerate(self.feature_maps): | |||
| min_sizes = self.min_sizes[k] | |||
| for i, j in product(range(f[0]), range(f[1])): | |||
| for min_size in min_sizes: | |||
| s_kx = min_size / self.image_size[1] | |||
| s_ky = min_size / self.image_size[0] | |||
| dense_cx = [ | |||
| x * self.steps[k] / self.image_size[1] | |||
| for x in [j + 0.5] | |||
| ] | |||
| dense_cy = [ | |||
| y * self.steps[k] / self.image_size[0] | |||
| for y in [i + 0.5] | |||
| ] | |||
| for cy, cx in product(dense_cy, dense_cx): | |||
| anchors += [cx, cy, s_kx, s_ky] | |||
| # back to torch land | |||
| output = torch.Tensor(anchors).view(-1, 4) | |||
| if self.clip: | |||
| output.clamp_(max=1, min=0) | |||
| return output | |||
| def py_cpu_nms(dets, thresh): | |||
| """Pure Python NMS baseline.""" | |||
| x1 = dets[:, 0] | |||
| y1 = dets[:, 1] | |||
| x2 = dets[:, 2] | |||
| y2 = dets[:, 3] | |||
| scores = dets[:, 4] | |||
| areas = (x2 - x1 + 1) * (y2 - y1 + 1) | |||
| order = scores.argsort()[::-1] | |||
| keep = [] | |||
| while order.size > 0: | |||
| i = order[0] | |||
| keep.append(i) | |||
| xx1 = np.maximum(x1[i], x1[order[1:]]) | |||
| yy1 = np.maximum(y1[i], y1[order[1:]]) | |||
| xx2 = np.minimum(x2[i], x2[order[1:]]) | |||
| yy2 = np.minimum(y2[i], y2[order[1:]]) | |||
| w = np.maximum(0.0, xx2 - xx1 + 1) | |||
| h = np.maximum(0.0, yy2 - yy1 + 1) | |||
| inter = w * h | |||
| ovr = inter / (areas[i] + areas[order[1:]] - inter) | |||
| inds = np.where(ovr <= thresh)[0] | |||
| order = order[inds + 1] | |||
| return keep | |||
| def mogdecode(loc, anchors): | |||
| """ | |||
| loc: torch.Tensor | |||
| anchors: 2-d, torch.Tensor (cx, cy, w, h) | |||
| boxes: 2-d, torch.Tensor (x0, y0, x1, y1) | |||
| """ | |||
| boxes = torch.cat((anchors[:, :2] + loc[:, :2] * anchors[:, 2:], | |||
| anchors[:, 2:] * torch.exp(loc[:, 2:])), 1) | |||
| boxes[:, 0] -= (boxes[:, 2] - 1) / 2 | |||
| boxes[:, 1] -= (boxes[:, 3] - 1) / 2 | |||
| boxes[:, 2] += boxes[:, 0] - 1 | |||
| boxes[:, 3] += boxes[:, 1] - 1 | |||
| return boxes | |||
| # Adapted from https://github.com/Hakuyume/chainer-ssd | |||
| def decode(loc, priors, variances): | |||
| """Decode locations from predictions using priors to undo | |||
| the encoding we did for offset regression at train time. | |||
| Args: | |||
| loc (tensor): location predictions for loc layers, | |||
| Shape: [num_priors,4] | |||
| priors (tensor): Prior boxes in center-offset form. | |||
| Shape: [num_priors,4]. | |||
| variances: (list[float]) Variances of priorboxes | |||
| Return: | |||
| decoded bounding box predictions | |||
| """ | |||
| boxes = torch.cat( | |||
| (priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], | |||
| priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) | |||
| boxes[:, :2] -= boxes[:, 2:] / 2 | |||
| boxes[:, 2:] += boxes[:, :2] | |||
| return boxes | |||
| def decode_landm(pre, priors, variances): | |||
| """Decode landm from predictions using priors to undo | |||
| the encoding we did for offset regression at train time. | |||
| Args: | |||
| pre (tensor): landm predictions for loc layers, | |||
| Shape: [num_priors,10] | |||
| priors (tensor): Prior boxes in center-offset form. | |||
| Shape: [num_priors,4]. | |||
| variances: (list[float]) Variances of priorboxes | |||
| Return: | |||
| decoded landm predictions | |||
| """ | |||
| a = priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:] | |||
| b = priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:] | |||
| c = priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:] | |||
| d = priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:] | |||
| e = priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:] | |||
| landms = torch.cat((a, b, c, d, e), dim=1) | |||
| return landms | |||
| @@ -0,0 +1 @@ | |||
| from .models.detector import MtcnnFaceDetector | |||
| @@ -0,0 +1,240 @@ | |||
| # The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch | |||
| import numpy as np | |||
| from PIL import Image | |||
| def nms(boxes, overlap_threshold=0.5, mode='union'): | |||
| """Non-maximum suppression. | |||
| Arguments: | |||
| boxes: a float numpy array of shape [n, 5], | |||
| where each row is (xmin, ymin, xmax, ymax, score). | |||
| overlap_threshold: a float number. | |||
| mode: 'union' or 'min'. | |||
| Returns: | |||
| list with indices of the selected boxes | |||
| """ | |||
| # if there are no boxes, return the empty list | |||
| if len(boxes) == 0: | |||
| return [] | |||
| # list of picked indices | |||
| pick = [] | |||
| # grab the coordinates of the bounding boxes | |||
| x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)] | |||
| area = (x2 - x1 + 1.0) * (y2 - y1 + 1.0) | |||
| ids = np.argsort(score) # in increasing order | |||
| while len(ids) > 0: | |||
| # grab index of the largest value | |||
| last = len(ids) - 1 | |||
| i = ids[last] | |||
| pick.append(i) | |||
| # compute intersections | |||
| # of the box with the largest score | |||
| # with the rest of boxes | |||
| # left top corner of intersection boxes | |||
| ix1 = np.maximum(x1[i], x1[ids[:last]]) | |||
| iy1 = np.maximum(y1[i], y1[ids[:last]]) | |||
| # right bottom corner of intersection boxes | |||
| ix2 = np.minimum(x2[i], x2[ids[:last]]) | |||
| iy2 = np.minimum(y2[i], y2[ids[:last]]) | |||
| # width and height of intersection boxes | |||
| w = np.maximum(0.0, ix2 - ix1 + 1.0) | |||
| h = np.maximum(0.0, iy2 - iy1 + 1.0) | |||
| # intersections' areas | |||
| inter = w * h | |||
| if mode == 'min': | |||
| overlap = inter / np.minimum(area[i], area[ids[:last]]) | |||
| elif mode == 'union': | |||
| # intersection over union (IoU) | |||
| overlap = inter / (area[i] + area[ids[:last]] - inter) | |||
| # delete all boxes where overlap is too big | |||
| ids = np.delete( | |||
| ids, | |||
| np.concatenate([[last], | |||
| np.where(overlap > overlap_threshold)[0]])) | |||
| return pick | |||
| def convert_to_square(bboxes): | |||
| """Convert bounding boxes to a square form. | |||
| Arguments: | |||
| bboxes: a float numpy array of shape [n, 5]. | |||
| Returns: | |||
| a float numpy array of shape [n, 5], | |||
| squared bounding boxes. | |||
| """ | |||
| square_bboxes = np.zeros_like(bboxes) | |||
| x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] | |||
| h = y2 - y1 + 1.0 | |||
| w = x2 - x1 + 1.0 | |||
| max_side = np.maximum(h, w) | |||
| square_bboxes[:, 0] = x1 + w * 0.5 - max_side * 0.5 | |||
| square_bboxes[:, 1] = y1 + h * 0.5 - max_side * 0.5 | |||
| square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0 | |||
| square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0 | |||
| return square_bboxes | |||
| def calibrate_box(bboxes, offsets): | |||
| """Transform bounding boxes to be more like true bounding boxes. | |||
| 'offsets' is one of the outputs of the nets. | |||
| Arguments: | |||
| bboxes: a float numpy array of shape [n, 5]. | |||
| offsets: a float numpy array of shape [n, 4]. | |||
| Returns: | |||
| a float numpy array of shape [n, 5]. | |||
| """ | |||
| x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] | |||
| w = x2 - x1 + 1.0 | |||
| h = y2 - y1 + 1.0 | |||
| w = np.expand_dims(w, 1) | |||
| h = np.expand_dims(h, 1) | |||
| # this is what happening here: | |||
| # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)] | |||
| # x1_true = x1 + tx1*w | |||
| # y1_true = y1 + ty1*h | |||
| # x2_true = x2 + tx2*w | |||
| # y2_true = y2 + ty2*h | |||
| # below is just more compact form of this | |||
| # are offsets always such that | |||
| # x1 < x2 and y1 < y2 ? | |||
| translation = np.hstack([w, h, w, h]) * offsets | |||
| bboxes[:, 0:4] = bboxes[:, 0:4] + translation | |||
| return bboxes | |||
| def get_image_boxes(bounding_boxes, img, size=24): | |||
| """Cut out boxes from the image. | |||
| Arguments: | |||
| bounding_boxes: a float numpy array of shape [n, 5]. | |||
| img: an instance of PIL.Image. | |||
| size: an integer, size of cutouts. | |||
| Returns: | |||
| a float numpy array of shape [n, 3, size, size]. | |||
| """ | |||
| num_boxes = len(bounding_boxes) | |||
| width, height = img.size | |||
| [dy, edy, dx, edx, y, ey, x, ex, w, | |||
| h] = correct_bboxes(bounding_boxes, width, height) | |||
| img_boxes = np.zeros((num_boxes, 3, size, size), 'float32') | |||
| for i in range(num_boxes): | |||
| img_box = np.zeros((h[i], w[i], 3), 'uint8') | |||
| img_array = np.asarray(img, 'uint8') | |||
| img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] =\ | |||
| img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :] | |||
| # resize | |||
| img_box = Image.fromarray(img_box) | |||
| img_box = img_box.resize((size, size), Image.BILINEAR) | |||
| img_box = np.asarray(img_box, 'float32') | |||
| img_boxes[i, :, :, :] = _preprocess(img_box) | |||
| return img_boxes | |||
| def correct_bboxes(bboxes, width, height): | |||
| """Crop boxes that are too big and get coordinates | |||
| with respect to cutouts. | |||
| Arguments: | |||
| bboxes: a float numpy array of shape [n, 5], | |||
| where each row is (xmin, ymin, xmax, ymax, score). | |||
| width: a float number. | |||
| height: a float number. | |||
| Returns: | |||
| dy, dx, edy, edx: a int numpy arrays of shape [n], | |||
| coordinates of the boxes with respect to the cutouts. | |||
| y, x, ey, ex: a int numpy arrays of shape [n], | |||
| corrected ymin, xmin, ymax, xmax. | |||
| h, w: a int numpy arrays of shape [n], | |||
| just heights and widths of boxes. | |||
| in the following order: | |||
| [dy, edy, dx, edx, y, ey, x, ex, w, h]. | |||
| """ | |||
| x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] | |||
| w, h = x2 - x1 + 1.0, y2 - y1 + 1.0 | |||
| num_boxes = bboxes.shape[0] | |||
| # 'e' stands for end | |||
| # (x, y) -> (ex, ey) | |||
| x, y, ex, ey = x1, y1, x2, y2 | |||
| # we need to cut out a box from the image. | |||
| # (x, y, ex, ey) are corrected coordinates of the box | |||
| # in the image. | |||
| # (dx, dy, edx, edy) are coordinates of the box in the cutout | |||
| # from the image. | |||
| dx, dy = np.zeros((num_boxes, )), np.zeros((num_boxes, )) | |||
| edx, edy = w.copy() - 1.0, h.copy() - 1.0 | |||
| # if box's bottom right corner is too far right | |||
| ind = np.where(ex > width - 1.0)[0] | |||
| edx[ind] = w[ind] + width - 2.0 - ex[ind] | |||
| ex[ind] = width - 1.0 | |||
| # if box's bottom right corner is too low | |||
| ind = np.where(ey > height - 1.0)[0] | |||
| edy[ind] = h[ind] + height - 2.0 - ey[ind] | |||
| ey[ind] = height - 1.0 | |||
| # if box's top left corner is too far left | |||
| ind = np.where(x < 0.0)[0] | |||
| dx[ind] = 0.0 - x[ind] | |||
| x[ind] = 0.0 | |||
| # if box's top left corner is too high | |||
| ind = np.where(y < 0.0)[0] | |||
| dy[ind] = 0.0 - y[ind] | |||
| y[ind] = 0.0 | |||
| return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h] | |||
| return_list = [i.astype('int32') for i in return_list] | |||
| return return_list | |||
| def _preprocess(img): | |||
| """Preprocessing step before feeding the network. | |||
| Arguments: | |||
| img: a float numpy array of shape [h, w, c]. | |||
| Returns: | |||
| a float numpy array of shape [1, c, h, w]. | |||
| """ | |||
| img = img.transpose((2, 0, 1)) | |||
| img = np.expand_dims(img, 0) | |||
| img = (img - 127.5) * 0.0078125 | |||
| return img | |||
| @@ -0,0 +1,149 @@ | |||
| # The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch | |||
| import os | |||
| import numpy as np | |||
| import torch | |||
| import torch.backends.cudnn as cudnn | |||
| from PIL import Image | |||
| from torch.autograd import Variable | |||
| from modelscope.metainfo import Models | |||
| from modelscope.models.base import TorchModel | |||
| from modelscope.models.builder import MODELS | |||
| from modelscope.utils.constant import Tasks | |||
| from .box_utils import calibrate_box, convert_to_square, get_image_boxes, nms | |||
| from .first_stage import run_first_stage | |||
| from .get_nets import ONet, PNet, RNet | |||
| @MODELS.register_module(Tasks.face_detection, module_name=Models.mtcnn) | |||
| class MtcnnFaceDetector(TorchModel): | |||
| def __init__(self, model_path, device='cuda'): | |||
| super().__init__(model_path) | |||
| torch.set_grad_enabled(False) | |||
| cudnn.benchmark = True | |||
| self.model_path = model_path | |||
| self.device = device | |||
| self.pnet = PNet(model_path=os.path.join(self.model_path, 'pnet.npy')) | |||
| self.rnet = RNet(model_path=os.path.join(self.model_path, 'rnet.npy')) | |||
| self.onet = ONet(model_path=os.path.join(self.model_path, 'onet.npy')) | |||
| self.pnet = self.pnet.to(device) | |||
| self.rnet = self.rnet.to(device) | |||
| self.onet = self.onet.to(device) | |||
| def forward(self, input): | |||
| image = Image.fromarray(np.uint8(input['img'].cpu().numpy())) | |||
| pnet = self.pnet | |||
| rnet = self.rnet | |||
| onet = self.onet | |||
| onet.eval() | |||
| min_face_size = 20.0 | |||
| thresholds = [0.7, 0.8, 0.9] | |||
| nms_thresholds = [0.7, 0.7, 0.7] | |||
| # BUILD AN IMAGE PYRAMID | |||
| width, height = image.size | |||
| min_length = min(height, width) | |||
| min_detection_size = 12 | |||
| factor = 0.707 # sqrt(0.5) | |||
| # scales for scaling the image | |||
| scales = [] | |||
| m = min_detection_size / min_face_size | |||
| min_length *= m | |||
| factor_count = 0 | |||
| while min_length > min_detection_size: | |||
| scales.append(m * factor**factor_count) | |||
| min_length *= factor | |||
| factor_count += 1 | |||
| # STAGE 1 | |||
| # it will be returned | |||
| bounding_boxes = [] | |||
| # run P-Net on different scales | |||
| for s in scales: | |||
| boxes = run_first_stage( | |||
| image, | |||
| pnet, | |||
| scale=s, | |||
| threshold=thresholds[0], | |||
| device=self.device) | |||
| bounding_boxes.append(boxes) | |||
| # collect boxes (and offsets, and scores) from different scales | |||
| bounding_boxes = [i for i in bounding_boxes if i is not None] | |||
| bounding_boxes = np.vstack(bounding_boxes) | |||
| keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0]) | |||
| bounding_boxes = bounding_boxes[keep] | |||
| # use offsets predicted by pnet to transform bounding boxes | |||
| bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], | |||
| bounding_boxes[:, 5:]) | |||
| # shape [n_boxes, 5] | |||
| bounding_boxes = convert_to_square(bounding_boxes) | |||
| bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) | |||
| # STAGE 2 | |||
| img_boxes = get_image_boxes(bounding_boxes, image, size=24) | |||
| img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True) | |||
| output = rnet(img_boxes.to(self.device)) | |||
| offsets = output[0].cpu().data.numpy() # shape [n_boxes, 4] | |||
| probs = output[1].cpu().data.numpy() # shape [n_boxes, 2] | |||
| keep = np.where(probs[:, 1] > thresholds[1])[0] | |||
| bounding_boxes = bounding_boxes[keep] | |||
| bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) | |||
| offsets = offsets[keep] | |||
| keep = nms(bounding_boxes, nms_thresholds[1]) | |||
| bounding_boxes = bounding_boxes[keep] | |||
| bounding_boxes = calibrate_box(bounding_boxes, offsets[keep]) | |||
| bounding_boxes = convert_to_square(bounding_boxes) | |||
| bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) | |||
| # STAGE 3 | |||
| img_boxes = get_image_boxes(bounding_boxes, image, size=48) | |||
| if len(img_boxes) == 0: | |||
| return [], [] | |||
| img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True) | |||
| output = onet(img_boxes.to(self.device)) | |||
| landmarks = output[0].cpu().data.numpy() # shape [n_boxes, 10] | |||
| offsets = output[1].cpu().data.numpy() # shape [n_boxes, 4] | |||
| probs = output[2].cpu().data.numpy() # shape [n_boxes, 2] | |||
| keep = np.where(probs[:, 1] > thresholds[2])[0] | |||
| bounding_boxes = bounding_boxes[keep] | |||
| bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) | |||
| offsets = offsets[keep] | |||
| landmarks = landmarks[keep] | |||
| # compute landmark points | |||
| width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0 | |||
| height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0 | |||
| xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1] | |||
| landmarks[:, 0:5] = np.expand_dims( | |||
| xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5] | |||
| landmarks[:, 5:10] = np.expand_dims( | |||
| ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10] | |||
| bounding_boxes = calibrate_box(bounding_boxes, offsets) | |||
| keep = nms(bounding_boxes, nms_thresholds[2], mode='min') | |||
| bounding_boxes = bounding_boxes[keep] | |||
| landmarks = landmarks[keep] | |||
| landmarks = landmarks.reshape(-1, 2, 5).transpose( | |||
| (0, 2, 1)).reshape(-1, 10) | |||
| return bounding_boxes, landmarks | |||
| @@ -0,0 +1,100 @@ | |||
| # The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch | |||
| import math | |||
| import numpy as np | |||
| import torch | |||
| from PIL import Image | |||
| from torch.autograd import Variable | |||
| from .box_utils import _preprocess, nms | |||
| def run_first_stage(image, net, scale, threshold, device='cuda'): | |||
| """Run P-Net, generate bounding boxes, and do NMS. | |||
| Arguments: | |||
| image: an instance of PIL.Image. | |||
| net: an instance of pytorch's nn.Module, P-Net. | |||
| scale: a float number, | |||
| scale width and height of the image by this number. | |||
| threshold: a float number, | |||
| threshold on the probability of a face when generating | |||
| bounding boxes from predictions of the net. | |||
| Returns: | |||
| a float numpy array of shape [n_boxes, 9], | |||
| bounding boxes with scores and offsets (4 + 1 + 4). | |||
| """ | |||
| # scale the image and convert it to a float array | |||
| width, height = image.size | |||
| sw, sh = math.ceil(width * scale), math.ceil(height * scale) | |||
| img = image.resize((sw, sh), Image.BILINEAR) | |||
| img = np.asarray(img, 'float32') | |||
| img = Variable( | |||
| torch.FloatTensor(_preprocess(img)), volatile=True).to(device) | |||
| output = net(img) | |||
| probs = output[1].cpu().data.numpy()[0, 1, :, :] | |||
| offsets = output[0].cpu().data.numpy() | |||
| # probs: probability of a face at each sliding window | |||
| # offsets: transformations to true bounding boxes | |||
| boxes = _generate_bboxes(probs, offsets, scale, threshold) | |||
| if len(boxes) == 0: | |||
| return None | |||
| keep = nms(boxes[:, 0:5], overlap_threshold=0.5) | |||
| return boxes[keep] | |||
| def _generate_bboxes(probs, offsets, scale, threshold): | |||
| """Generate bounding boxes at places | |||
| where there is probably a face. | |||
| Arguments: | |||
| probs: a float numpy array of shape [n, m]. | |||
| offsets: a float numpy array of shape [1, 4, n, m]. | |||
| scale: a float number, | |||
| width and height of the image were scaled by this number. | |||
| threshold: a float number. | |||
| Returns: | |||
| a float numpy array of shape [n_boxes, 9] | |||
| """ | |||
| # applying P-Net is equivalent, in some sense, to | |||
| # moving 12x12 window with stride 2 | |||
| stride = 2 | |||
| cell_size = 12 | |||
| # indices of boxes where there is probably a face | |||
| inds = np.where(probs > threshold) | |||
| if inds[0].size == 0: | |||
| return np.array([]) | |||
| # transformations of bounding boxes | |||
| tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)] | |||
| # they are defined as: | |||
| # w = x2 - x1 + 1 | |||
| # h = y2 - y1 + 1 | |||
| # x1_true = x1 + tx1*w | |||
| # x2_true = x2 + tx2*w | |||
| # y1_true = y1 + ty1*h | |||
| # y2_true = y2 + ty2*h | |||
| offsets = np.array([tx1, ty1, tx2, ty2]) | |||
| score = probs[inds[0], inds[1]] | |||
| # P-Net is applied to scaled images | |||
| # so we need to rescale bounding boxes back | |||
| bounding_boxes = np.vstack([ | |||
| np.round((stride * inds[1] + 1.0) / scale), | |||
| np.round((stride * inds[0] + 1.0) / scale), | |||
| np.round((stride * inds[1] + 1.0 + cell_size) / scale), | |||
| np.round((stride * inds[0] + 1.0 + cell_size) / scale), score, offsets | |||
| ]) | |||
| # why one is added? | |||
| return bounding_boxes.T | |||
| @@ -0,0 +1,160 @@ | |||
| # The implementation is based on mtcnn, available at https://github.com/TropComplique/mtcnn-pytorch | |||
| from collections import OrderedDict | |||
| import numpy as np | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| class Flatten(nn.Module): | |||
| def __init__(self): | |||
| super(Flatten, self).__init__() | |||
| def forward(self, x): | |||
| """ | |||
| Arguments: | |||
| x: a float tensor with shape [batch_size, c, h, w]. | |||
| Returns: | |||
| a float tensor with shape [batch_size, c*h*w]. | |||
| """ | |||
| # without this pretrained model isn't working | |||
| x = x.transpose(3, 2).contiguous() | |||
| return x.view(x.size(0), -1) | |||
| class PNet(nn.Module): | |||
| def __init__(self, model_path=None): | |||
| super(PNet, self).__init__() | |||
| # suppose we have input with size HxW, then | |||
| # after first layer: H - 2, | |||
| # after pool: ceil((H - 2)/2), | |||
| # after second conv: ceil((H - 2)/2) - 2, | |||
| # after last conv: ceil((H - 2)/2) - 4, | |||
| # and the same for W | |||
| self.features = nn.Sequential( | |||
| OrderedDict([('conv1', nn.Conv2d(3, 10, 3, 1)), | |||
| ('prelu1', nn.PReLU(10)), | |||
| ('pool1', nn.MaxPool2d(2, 2, ceil_mode=True)), | |||
| ('conv2', nn.Conv2d(10, 16, 3, 1)), | |||
| ('prelu2', nn.PReLU(16)), | |||
| ('conv3', nn.Conv2d(16, 32, 3, 1)), | |||
| ('prelu3', nn.PReLU(32))])) | |||
| self.conv4_1 = nn.Conv2d(32, 2, 1, 1) | |||
| self.conv4_2 = nn.Conv2d(32, 4, 1, 1) | |||
| weights = np.load(model_path, allow_pickle=True)[()] | |||
| for n, p in self.named_parameters(): | |||
| p.data = torch.FloatTensor(weights[n]) | |||
| def forward(self, x): | |||
| """ | |||
| Arguments: | |||
| x: a float tensor with shape [batch_size, 3, h, w]. | |||
| Returns: | |||
| b: a float tensor with shape [batch_size, 4, h', w']. | |||
| a: a float tensor with shape [batch_size, 2, h', w']. | |||
| """ | |||
| x = self.features(x) | |||
| a = self.conv4_1(x) | |||
| b = self.conv4_2(x) | |||
| a = F.softmax(a) | |||
| return b, a | |||
| class RNet(nn.Module): | |||
| def __init__(self, model_path=None): | |||
| super(RNet, self).__init__() | |||
| self.features = nn.Sequential( | |||
| OrderedDict([('conv1', nn.Conv2d(3, 28, 3, 1)), | |||
| ('prelu1', nn.PReLU(28)), | |||
| ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)), | |||
| ('conv2', nn.Conv2d(28, 48, 3, 1)), | |||
| ('prelu2', nn.PReLU(48)), | |||
| ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)), | |||
| ('conv3', nn.Conv2d(48, 64, 2, 1)), | |||
| ('prelu3', nn.PReLU(64)), ('flatten', Flatten()), | |||
| ('conv4', nn.Linear(576, 128)), | |||
| ('prelu4', nn.PReLU(128))])) | |||
| self.conv5_1 = nn.Linear(128, 2) | |||
| self.conv5_2 = nn.Linear(128, 4) | |||
| weights = np.load(model_path, allow_pickle=True)[()] | |||
| for n, p in self.named_parameters(): | |||
| p.data = torch.FloatTensor(weights[n]) | |||
| def forward(self, x): | |||
| """ | |||
| Arguments: | |||
| x: a float tensor with shape [batch_size, 3, h, w]. | |||
| Returns: | |||
| b: a float tensor with shape [batch_size, 4]. | |||
| a: a float tensor with shape [batch_size, 2]. | |||
| """ | |||
| x = self.features(x) | |||
| a = self.conv5_1(x) | |||
| b = self.conv5_2(x) | |||
| a = F.softmax(a) | |||
| return b, a | |||
| class ONet(nn.Module): | |||
| def __init__(self, model_path=None): | |||
| super(ONet, self).__init__() | |||
| self.features = nn.Sequential( | |||
| OrderedDict([ | |||
| ('conv1', nn.Conv2d(3, 32, 3, 1)), | |||
| ('prelu1', nn.PReLU(32)), | |||
| ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)), | |||
| ('conv2', nn.Conv2d(32, 64, 3, 1)), | |||
| ('prelu2', nn.PReLU(64)), | |||
| ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)), | |||
| ('conv3', nn.Conv2d(64, 64, 3, 1)), | |||
| ('prelu3', nn.PReLU(64)), | |||
| ('pool3', nn.MaxPool2d(2, 2, ceil_mode=True)), | |||
| ('conv4', nn.Conv2d(64, 128, 2, 1)), | |||
| ('prelu4', nn.PReLU(128)), | |||
| ('flatten', Flatten()), | |||
| ('conv5', nn.Linear(1152, 256)), | |||
| ('drop5', nn.Dropout(0.25)), | |||
| ('prelu5', nn.PReLU(256)), | |||
| ])) | |||
| self.conv6_1 = nn.Linear(256, 2) | |||
| self.conv6_2 = nn.Linear(256, 4) | |||
| self.conv6_3 = nn.Linear(256, 10) | |||
| weights = np.load(model_path, allow_pickle=True)[()] | |||
| for n, p in self.named_parameters(): | |||
| p.data = torch.FloatTensor(weights[n]) | |||
| def forward(self, x): | |||
| """ | |||
| Arguments: | |||
| x: a float tensor with shape [batch_size, 3, h, w]. | |||
| Returns: | |||
| c: a float tensor with shape [batch_size, 10]. | |||
| b: a float tensor with shape [batch_size, 4]. | |||
| a: a float tensor with shape [batch_size, 2]. | |||
| """ | |||
| x = self.features(x) | |||
| a = self.conv6_1(x) | |||
| b = self.conv6_2(x) | |||
| c = self.conv6_3(x) | |||
| a = F.softmax(a) | |||
| return c, b, a | |||
| @@ -0,0 +1 @@ | |||
| from .detection import UlfdFaceDetector | |||
| @@ -0,0 +1,44 @@ | |||
| # The implementation is based on ULFD, available at | |||
| # https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
| import os | |||
| import cv2 | |||
| import numpy as np | |||
| import torch | |||
| import torch.backends.cudnn as cudnn | |||
| import torch.nn.functional as F | |||
| from modelscope.metainfo import Models | |||
| from modelscope.models.base import Tensor, TorchModel | |||
| from modelscope.models.builder import MODELS | |||
| from modelscope.utils.constant import ModelFile, Tasks | |||
| from .vision.ssd.fd_config import define_img_size | |||
| from .vision.ssd.mb_tiny_fd import (create_mb_tiny_fd, | |||
| create_mb_tiny_fd_predictor) | |||
| define_img_size(640) | |||
| @MODELS.register_module(Tasks.face_detection, module_name=Models.ulfd) | |||
| class UlfdFaceDetector(TorchModel): | |||
| def __init__(self, model_path, device='cuda'): | |||
| super().__init__(model_path) | |||
| torch.set_grad_enabled(False) | |||
| cudnn.benchmark = True | |||
| self.model_path = model_path | |||
| self.device = device | |||
| self.net = create_mb_tiny_fd(2, is_test=True, device=device) | |||
| self.predictor = create_mb_tiny_fd_predictor( | |||
| self.net, candidate_size=1500, device=device) | |||
| self.net.load(model_path) | |||
| self.net = self.net.to(device) | |||
| def forward(self, input): | |||
| img_raw = input['img'] | |||
| img = np.array(img_raw.cpu().detach()) | |||
| img = img[:, :, ::-1] | |||
| prob_th = 0.85 | |||
| keep_top_k = 750 | |||
| boxes, labels, probs = self.predictor.predict(img, keep_top_k, prob_th) | |||
| return boxes, probs | |||
| @@ -0,0 +1,124 @@ | |||
| # The implementation is based on ULFD, available at | |||
| # https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
| import math | |||
| import torch | |||
| def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): | |||
| """ | |||
| Args: | |||
| box_scores (N, 5): boxes in corner-form and probabilities. | |||
| iou_threshold: intersection over union threshold. | |||
| top_k: keep top_k results. If k <= 0, keep all the results. | |||
| candidate_size: only consider the candidates with the highest scores. | |||
| Returns: | |||
| picked: a list of indexes of the kept boxes | |||
| """ | |||
| scores = box_scores[:, -1] | |||
| boxes = box_scores[:, :-1] | |||
| picked = [] | |||
| _, indexes = scores.sort(descending=True) | |||
| indexes = indexes[:candidate_size] | |||
| while len(indexes) > 0: | |||
| current = indexes[0] | |||
| picked.append(current.item()) | |||
| if 0 < top_k == len(picked) or len(indexes) == 1: | |||
| break | |||
| current_box = boxes[current, :] | |||
| indexes = indexes[1:] | |||
| rest_boxes = boxes[indexes, :] | |||
| iou = iou_of( | |||
| rest_boxes, | |||
| current_box.unsqueeze(0), | |||
| ) | |||
| indexes = indexes[iou <= iou_threshold] | |||
| return box_scores[picked, :] | |||
| def nms(box_scores, | |||
| nms_method=None, | |||
| score_threshold=None, | |||
| iou_threshold=None, | |||
| sigma=0.5, | |||
| top_k=-1, | |||
| candidate_size=200): | |||
| return hard_nms( | |||
| box_scores, iou_threshold, top_k, candidate_size=candidate_size) | |||
| def generate_priors(feature_map_list, | |||
| shrinkage_list, | |||
| image_size, | |||
| min_boxes, | |||
| clamp=True) -> torch.Tensor: | |||
| priors = [] | |||
| for index in range(0, len(feature_map_list[0])): | |||
| scale_w = image_size[0] / shrinkage_list[0][index] | |||
| scale_h = image_size[1] / shrinkage_list[1][index] | |||
| for j in range(0, feature_map_list[1][index]): | |||
| for i in range(0, feature_map_list[0][index]): | |||
| x_center = (i + 0.5) / scale_w | |||
| y_center = (j + 0.5) / scale_h | |||
| for min_box in min_boxes[index]: | |||
| w = min_box / image_size[0] | |||
| h = min_box / image_size[1] | |||
| priors.append([x_center, y_center, w, h]) | |||
| priors = torch.tensor(priors) | |||
| if clamp: | |||
| torch.clamp(priors, 0.0, 1.0, out=priors) | |||
| return priors | |||
| def convert_locations_to_boxes(locations, priors, center_variance, | |||
| size_variance): | |||
| # priors can have one dimension less. | |||
| if priors.dim() + 1 == locations.dim(): | |||
| priors = priors.unsqueeze(0) | |||
| a = locations[..., :2] * center_variance * priors[..., | |||
| 2:] + priors[..., :2] | |||
| b = torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:] | |||
| return torch.cat([a, b], dim=locations.dim() - 1) | |||
| def center_form_to_corner_form(locations): | |||
| a = locations[..., :2] - locations[..., 2:] / 2 | |||
| b = locations[..., :2] + locations[..., 2:] / 2 | |||
| return torch.cat([a, b], locations.dim() - 1) | |||
| def iou_of(boxes0, boxes1, eps=1e-5): | |||
| """Return intersection-over-union (Jaccard index) of boxes. | |||
| Args: | |||
| boxes0 (N, 4): ground truth boxes. | |||
| boxes1 (N or 1, 4): predicted boxes. | |||
| eps: a small number to avoid 0 as denominator. | |||
| Returns: | |||
| iou (N): IoU values. | |||
| """ | |||
| overlap_left_top = torch.max(boxes0[..., :2], boxes1[..., :2]) | |||
| overlap_right_bottom = torch.min(boxes0[..., 2:], boxes1[..., 2:]) | |||
| overlap_area = area_of(overlap_left_top, overlap_right_bottom) | |||
| area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) | |||
| area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) | |||
| return overlap_area / (area0 + area1 - overlap_area + eps) | |||
| def area_of(left_top, right_bottom) -> torch.Tensor: | |||
| """Compute the areas of rectangles given two corners. | |||
| Args: | |||
| left_top (N, 2): left top corner. | |||
| right_bottom (N, 2): right bottom corner. | |||
| Returns: | |||
| area (N): return the area. | |||
| """ | |||
| hw = torch.clamp(right_bottom - left_top, min=0.0) | |||
| return hw[..., 0] * hw[..., 1] | |||
| @@ -0,0 +1,49 @@ | |||
| # The implementation is based on ULFD, available at | |||
| # https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| class Mb_Tiny(nn.Module): | |||
| def __init__(self, num_classes=2): | |||
| super(Mb_Tiny, self).__init__() | |||
| self.base_channel = 8 * 2 | |||
| def conv_bn(inp, oup, stride): | |||
| return nn.Sequential( | |||
| nn.Conv2d(inp, oup, 3, stride, 1, bias=False), | |||
| nn.BatchNorm2d(oup), nn.ReLU(inplace=True)) | |||
| def conv_dw(inp, oup, stride): | |||
| return nn.Sequential( | |||
| nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), | |||
| nn.BatchNorm2d(inp), | |||
| nn.ReLU(inplace=True), | |||
| nn.Conv2d(inp, oup, 1, 1, 0, bias=False), | |||
| nn.BatchNorm2d(oup), | |||
| nn.ReLU(inplace=True), | |||
| ) | |||
| self.model = nn.Sequential( | |||
| conv_bn(3, self.base_channel, 2), # 160*120 | |||
| conv_dw(self.base_channel, self.base_channel * 2, 1), | |||
| conv_dw(self.base_channel * 2, self.base_channel * 2, 2), # 80*60 | |||
| conv_dw(self.base_channel * 2, self.base_channel * 2, 1), | |||
| conv_dw(self.base_channel * 2, self.base_channel * 4, 2), # 40*30 | |||
| conv_dw(self.base_channel * 4, self.base_channel * 4, 1), | |||
| conv_dw(self.base_channel * 4, self.base_channel * 4, 1), | |||
| conv_dw(self.base_channel * 4, self.base_channel * 4, 1), | |||
| conv_dw(self.base_channel * 4, self.base_channel * 8, 2), # 20*15 | |||
| conv_dw(self.base_channel * 8, self.base_channel * 8, 1), | |||
| conv_dw(self.base_channel * 8, self.base_channel * 8, 1), | |||
| conv_dw(self.base_channel * 8, self.base_channel * 16, 2), # 10*8 | |||
| conv_dw(self.base_channel * 16, self.base_channel * 16, 1)) | |||
| self.fc = nn.Linear(1024, num_classes) | |||
| def forward(self, x): | |||
| x = self.model(x) | |||
| x = F.avg_pool2d(x, 7) | |||
| x = x.view(-1, 1024) | |||
| x = self.fc(x) | |||
| return x | |||
| @@ -0,0 +1,18 @@ | |||
| # The implementation is based on ULFD, available at | |||
| # https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
| from ..transforms import Compose, Resize, SubtractMeans, ToTensor | |||
| class PredictionTransform: | |||
| def __init__(self, size, mean=0.0, std=1.0): | |||
| self.transform = Compose([ | |||
| Resize(size), | |||
| SubtractMeans(mean), lambda img, boxes=None, labels=None: | |||
| (img / std, boxes, labels), | |||
| ToTensor() | |||
| ]) | |||
| def __call__(self, image): | |||
| image, _, _ = self.transform(image) | |||
| return image | |||
| @@ -0,0 +1,49 @@ | |||
| # The implementation is based on ULFD, available at | |||
| # https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
| import numpy as np | |||
| from ..box_utils import generate_priors | |||
| image_mean_test = image_mean = np.array([127, 127, 127]) | |||
| image_std = 128.0 | |||
| iou_threshold = 0.3 | |||
| center_variance = 0.1 | |||
| size_variance = 0.2 | |||
| min_boxes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]] | |||
| shrinkage_list = [] | |||
| image_size = [320, 240] # default input size 320*240 | |||
| feature_map_w_h_list = [[40, 20, 10, 5], [30, 15, 8, | |||
| 4]] # default feature map size | |||
| priors = [] | |||
| def define_img_size(size): | |||
| global image_size, feature_map_w_h_list, priors | |||
| img_size_dict = { | |||
| 128: [128, 96], | |||
| 160: [160, 120], | |||
| 320: [320, 240], | |||
| 480: [480, 360], | |||
| 640: [640, 480], | |||
| 1280: [1280, 960] | |||
| } | |||
| image_size = img_size_dict[size] | |||
| feature_map_w_h_list_dict = { | |||
| 128: [[16, 8, 4, 2], [12, 6, 3, 2]], | |||
| 160: [[20, 10, 5, 3], [15, 8, 4, 2]], | |||
| 320: [[40, 20, 10, 5], [30, 15, 8, 4]], | |||
| 480: [[60, 30, 15, 8], [45, 23, 12, 6]], | |||
| 640: [[80, 40, 20, 10], [60, 30, 15, 8]], | |||
| 1280: [[160, 80, 40, 20], [120, 60, 30, 15]] | |||
| } | |||
| feature_map_w_h_list = feature_map_w_h_list_dict[size] | |||
| for i in range(0, len(image_size)): | |||
| item_list = [] | |||
| for k in range(0, len(feature_map_w_h_list[i])): | |||
| item_list.append(image_size[i] / feature_map_w_h_list[i][k]) | |||
| shrinkage_list.append(item_list) | |||
| priors = generate_priors(feature_map_w_h_list, shrinkage_list, image_size, | |||
| min_boxes) | |||
| @@ -0,0 +1,124 @@ | |||
| # The implementation is based on ULFD, available at | |||
| # https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
| from torch.nn import Conv2d, ModuleList, ReLU, Sequential | |||
| from ..mb_tiny import Mb_Tiny | |||
| from . import fd_config as config | |||
| from .predictor import Predictor | |||
| from .ssd import SSD | |||
| def SeperableConv2d(in_channels, | |||
| out_channels, | |||
| kernel_size=1, | |||
| stride=1, | |||
| padding=0): | |||
| """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. | |||
| """ | |||
| return Sequential( | |||
| Conv2d( | |||
| in_channels=in_channels, | |||
| out_channels=in_channels, | |||
| kernel_size=kernel_size, | |||
| groups=in_channels, | |||
| stride=stride, | |||
| padding=padding), | |||
| ReLU(), | |||
| Conv2d( | |||
| in_channels=in_channels, out_channels=out_channels, kernel_size=1), | |||
| ) | |||
| def create_mb_tiny_fd(num_classes, is_test=False, device='cuda'): | |||
| base_net = Mb_Tiny(2) | |||
| base_net_model = base_net.model # disable dropout layer | |||
| source_layer_indexes = [8, 11, 13] | |||
| extras = ModuleList([ | |||
| Sequential( | |||
| Conv2d( | |||
| in_channels=base_net.base_channel * 16, | |||
| out_channels=base_net.base_channel * 4, | |||
| kernel_size=1), ReLU(), | |||
| SeperableConv2d( | |||
| in_channels=base_net.base_channel * 4, | |||
| out_channels=base_net.base_channel * 16, | |||
| kernel_size=3, | |||
| stride=2, | |||
| padding=1), ReLU()) | |||
| ]) | |||
| regression_headers = ModuleList([ | |||
| SeperableConv2d( | |||
| in_channels=base_net.base_channel * 4, | |||
| out_channels=3 * 4, | |||
| kernel_size=3, | |||
| padding=1), | |||
| SeperableConv2d( | |||
| in_channels=base_net.base_channel * 8, | |||
| out_channels=2 * 4, | |||
| kernel_size=3, | |||
| padding=1), | |||
| SeperableConv2d( | |||
| in_channels=base_net.base_channel * 16, | |||
| out_channels=2 * 4, | |||
| kernel_size=3, | |||
| padding=1), | |||
| Conv2d( | |||
| in_channels=base_net.base_channel * 16, | |||
| out_channels=3 * 4, | |||
| kernel_size=3, | |||
| padding=1) | |||
| ]) | |||
| classification_headers = ModuleList([ | |||
| SeperableConv2d( | |||
| in_channels=base_net.base_channel * 4, | |||
| out_channels=3 * num_classes, | |||
| kernel_size=3, | |||
| padding=1), | |||
| SeperableConv2d( | |||
| in_channels=base_net.base_channel * 8, | |||
| out_channels=2 * num_classes, | |||
| kernel_size=3, | |||
| padding=1), | |||
| SeperableConv2d( | |||
| in_channels=base_net.base_channel * 16, | |||
| out_channels=2 * num_classes, | |||
| kernel_size=3, | |||
| padding=1), | |||
| Conv2d( | |||
| in_channels=base_net.base_channel * 16, | |||
| out_channels=3 * num_classes, | |||
| kernel_size=3, | |||
| padding=1) | |||
| ]) | |||
| return SSD( | |||
| num_classes, | |||
| base_net_model, | |||
| source_layer_indexes, | |||
| extras, | |||
| classification_headers, | |||
| regression_headers, | |||
| is_test=is_test, | |||
| config=config, | |||
| device=device) | |||
| def create_mb_tiny_fd_predictor(net, | |||
| candidate_size=200, | |||
| nms_method=None, | |||
| sigma=0.5, | |||
| device=None): | |||
| predictor = Predictor( | |||
| net, | |||
| config.image_size, | |||
| config.image_mean_test, | |||
| config.image_std, | |||
| nms_method=nms_method, | |||
| iou_threshold=config.iou_threshold, | |||
| candidate_size=candidate_size, | |||
| sigma=sigma, | |||
| device=device) | |||
| return predictor | |||
| @@ -0,0 +1,80 @@ | |||
| # The implementation is based on ULFD, available at | |||
| # https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
| import torch | |||
| from .. import box_utils | |||
| from .data_preprocessing import PredictionTransform | |||
| class Predictor: | |||
| def __init__(self, | |||
| net, | |||
| size, | |||
| mean=0.0, | |||
| std=1.0, | |||
| nms_method=None, | |||
| iou_threshold=0.3, | |||
| filter_threshold=0.85, | |||
| candidate_size=200, | |||
| sigma=0.5, | |||
| device=None): | |||
| self.net = net | |||
| self.transform = PredictionTransform(size, mean, std) | |||
| self.iou_threshold = iou_threshold | |||
| self.filter_threshold = filter_threshold | |||
| self.candidate_size = candidate_size | |||
| self.nms_method = nms_method | |||
| self.sigma = sigma | |||
| if device: | |||
| self.device = device | |||
| else: | |||
| self.device = torch.device( | |||
| 'cuda:0' if torch.cuda.is_available() else 'cpu') | |||
| self.net.to(self.device) | |||
| self.net.eval() | |||
| def predict(self, image, top_k=-1, prob_threshold=None): | |||
| height, width, _ = image.shape | |||
| image = self.transform(image) | |||
| images = image.unsqueeze(0) | |||
| images = images.to(self.device) | |||
| with torch.no_grad(): | |||
| for i in range(1): | |||
| scores, boxes = self.net.forward(images) | |||
| boxes = boxes[0] | |||
| scores = scores[0] | |||
| if not prob_threshold: | |||
| prob_threshold = self.filter_threshold | |||
| # this version of nms is slower on GPU, so we move data to CPU. | |||
| picked_box_probs = [] | |||
| picked_labels = [] | |||
| for class_index in range(1, scores.size(1)): | |||
| probs = scores[:, class_index] | |||
| mask = probs > prob_threshold | |||
| probs = probs[mask] | |||
| if probs.size(0) == 0: | |||
| continue | |||
| subset_boxes = boxes[mask, :] | |||
| box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) | |||
| box_probs = box_utils.nms( | |||
| box_probs, | |||
| self.nms_method, | |||
| score_threshold=prob_threshold, | |||
| iou_threshold=self.iou_threshold, | |||
| sigma=self.sigma, | |||
| top_k=top_k, | |||
| candidate_size=self.candidate_size) | |||
| picked_box_probs.append(box_probs) | |||
| picked_labels.extend([class_index] * box_probs.size(0)) | |||
| if not picked_box_probs: | |||
| return torch.tensor([]), torch.tensor([]), torch.tensor([]) | |||
| picked_box_probs = torch.cat(picked_box_probs) | |||
| picked_box_probs[:, 0] *= width | |||
| picked_box_probs[:, 1] *= height | |||
| picked_box_probs[:, 2] *= width | |||
| picked_box_probs[:, 3] *= height | |||
| return picked_box_probs[:, :4], torch.tensor( | |||
| picked_labels), picked_box_probs[:, 4] | |||
| @@ -0,0 +1,129 @@ | |||
| # The implementation is based on ULFD, available at | |||
| # https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
| from collections import namedtuple | |||
| from typing import List, Tuple | |||
| import numpy as np | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| from .. import box_utils | |||
| GraphPath = namedtuple('GraphPath', ['s0', 'name', 's1']) | |||
| class SSD(nn.Module): | |||
| def __init__(self, | |||
| num_classes: int, | |||
| base_net: nn.ModuleList, | |||
| source_layer_indexes: List[int], | |||
| extras: nn.ModuleList, | |||
| classification_headers: nn.ModuleList, | |||
| regression_headers: nn.ModuleList, | |||
| is_test=False, | |||
| config=None, | |||
| device=None): | |||
| """Compose a SSD model using the given components. | |||
| """ | |||
| super(SSD, self).__init__() | |||
| self.num_classes = num_classes | |||
| self.base_net = base_net | |||
| self.source_layer_indexes = source_layer_indexes | |||
| self.extras = extras | |||
| self.classification_headers = classification_headers | |||
| self.regression_headers = regression_headers | |||
| self.is_test = is_test | |||
| self.config = config | |||
| # register layers in source_layer_indexes by adding them to a module list | |||
| self.source_layer_add_ons = nn.ModuleList([ | |||
| t[1] for t in source_layer_indexes | |||
| if isinstance(t, tuple) and not isinstance(t, GraphPath) | |||
| ]) | |||
| if device: | |||
| self.device = device | |||
| else: | |||
| self.device = torch.device( | |||
| 'cuda:0' if torch.cuda.is_available() else 'cpu') | |||
| if is_test: | |||
| self.config = config | |||
| self.priors = config.priors.to(self.device) | |||
| def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: | |||
| confidences = [] | |||
| locations = [] | |||
| start_layer_index = 0 | |||
| header_index = 0 | |||
| end_layer_index = 0 | |||
| for end_layer_index in self.source_layer_indexes: | |||
| if isinstance(end_layer_index, GraphPath): | |||
| path = end_layer_index | |||
| end_layer_index = end_layer_index.s0 | |||
| added_layer = None | |||
| elif isinstance(end_layer_index, tuple): | |||
| added_layer = end_layer_index[1] | |||
| end_layer_index = end_layer_index[0] | |||
| path = None | |||
| else: | |||
| added_layer = None | |||
| path = None | |||
| for layer in self.base_net[start_layer_index:end_layer_index]: | |||
| x = layer(x) | |||
| if added_layer: | |||
| y = added_layer(x) | |||
| else: | |||
| y = x | |||
| if path: | |||
| sub = getattr(self.base_net[end_layer_index], path.name) | |||
| for layer in sub[:path.s1]: | |||
| x = layer(x) | |||
| y = x | |||
| for layer in sub[path.s1:]: | |||
| x = layer(x) | |||
| end_layer_index += 1 | |||
| start_layer_index = end_layer_index | |||
| confidence, location = self.compute_header(header_index, y) | |||
| header_index += 1 | |||
| confidences.append(confidence) | |||
| locations.append(location) | |||
| for layer in self.base_net[end_layer_index:]: | |||
| x = layer(x) | |||
| for layer in self.extras: | |||
| x = layer(x) | |||
| confidence, location = self.compute_header(header_index, x) | |||
| header_index += 1 | |||
| confidences.append(confidence) | |||
| locations.append(location) | |||
| confidences = torch.cat(confidences, 1) | |||
| locations = torch.cat(locations, 1) | |||
| if self.is_test: | |||
| confidences = F.softmax(confidences, dim=2) | |||
| boxes = box_utils.convert_locations_to_boxes( | |||
| locations, self.priors, self.config.center_variance, | |||
| self.config.size_variance) | |||
| boxes = box_utils.center_form_to_corner_form(boxes) | |||
| return confidences, boxes | |||
| else: | |||
| return confidences, locations | |||
| def compute_header(self, i, x): | |||
| confidence = self.classification_headers[i](x) | |||
| confidence = confidence.permute(0, 2, 3, 1).contiguous() | |||
| confidence = confidence.view(confidence.size(0), -1, self.num_classes) | |||
| location = self.regression_headers[i](x) | |||
| location = location.permute(0, 2, 3, 1).contiguous() | |||
| location = location.view(location.size(0), -1, 4) | |||
| return confidence, location | |||
| def load(self, model): | |||
| self.load_state_dict( | |||
| torch.load(model, map_location=lambda storage, loc: storage)) | |||
| @@ -0,0 +1,56 @@ | |||
| # The implementation is based on ULFD, available at | |||
| # https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB | |||
| import types | |||
| import cv2 | |||
| import numpy as np | |||
| import torch | |||
| from numpy import random | |||
| class Compose(object): | |||
| """Composes several augmentations together. | |||
| Args: | |||
| transforms (List[Transform]): list of transforms to compose. | |||
| Example: | |||
| >>> augmentations.Compose([ | |||
| >>> transforms.CenterCrop(10), | |||
| >>> transforms.ToTensor(), | |||
| >>> ]) | |||
| """ | |||
| def __init__(self, transforms): | |||
| self.transforms = transforms | |||
| def __call__(self, img, boxes=None, labels=None): | |||
| for t in self.transforms: | |||
| img, boxes, labels = t(img, boxes, labels) | |||
| return img, boxes, labels | |||
| class SubtractMeans(object): | |||
| def __init__(self, mean): | |||
| self.mean = np.array(mean, dtype=np.float32) | |||
| def __call__(self, image, boxes=None, labels=None): | |||
| image = image.astype(np.float32) | |||
| image -= self.mean | |||
| return image.astype(np.float32), boxes, labels | |||
| class Resize(object): | |||
| def __init__(self, size=(300, 300)): | |||
| self.size = size | |||
| def __call__(self, image, boxes=None, labels=None): | |||
| image = cv2.resize(image, (self.size[0], self.size[1])) | |||
| return image, boxes, labels | |||
| class ToTensor(object): | |||
| def __call__(self, cvimage, boxes=None, labels=None): | |||
| return torch.from_numpy(cvimage.astype(np.float32)).permute( | |||
| 2, 0, 1), boxes, labels | |||
| @@ -1,3 +1,7 @@ | |||
| """ | |||
| The implementation here is modified based on insightface, originally MIT license and publicly avaialbe at | |||
| https://github.com/deepinsight/insightface/blob/master/python-package/insightface/utils/face_align.py | |||
| """ | |||
| import cv2 | |||
| import numpy as np | |||
| from skimage import transform as trans | |||
| @@ -1,3 +1,5 @@ | |||
| # The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at | |||
| # https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone | |||
| from .model_irse import (IR_18, IR_34, IR_50, IR_101, IR_152, IR_200, IR_SE_50, | |||
| IR_SE_101, IR_SE_152, IR_SE_200) | |||
| from .model_resnet import ResNet_50, ResNet_101, ResNet_152 | |||
| @@ -1,3 +1,5 @@ | |||
| # The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at | |||
| # https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone/common.py | |||
| import torch | |||
| import torch.nn as nn | |||
| from torch.nn import (BatchNorm1d, BatchNorm2d, Conv2d, Linear, Module, ReLU, | |||
| @@ -1,5 +1,5 @@ | |||
| # based on: | |||
| # https://github.com/ZhaoJ9014/face.evoLVe.PyTorch/blob/master/backbone/model_irse.py | |||
| # The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at | |||
| # https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone/model_irse.py | |||
| from collections import namedtuple | |||
| from torch.nn import (BatchNorm1d, BatchNorm2d, Conv2d, Dropout, Linear, | |||
| @@ -1,5 +1,5 @@ | |||
| # based on: | |||
| # https://github.com/ZhaoJ9014/face.evoLVe.PyTorch/blob/master/backbone/model_resnet.py | |||
| # The implementation is adopted from TFace,made pubicly available under the Apache-2.0 license at | |||
| # https://github.com/Tencent/TFace/blob/master/recognition/torchkit/backbone/model_resnet.py | |||
| import torch.nn as nn | |||
| from torch.nn import (BatchNorm1d, BatchNorm2d, Conv2d, Dropout, Linear, | |||
| MaxPool2d, Module, ReLU, Sequential) | |||
| @@ -105,12 +105,12 @@ def get_img_ins_seg_result(img_seg_result=None, | |||
| } | |||
| for seg_result in img_seg_result: | |||
| box = { | |||
| 'x': np.int(seg_result[0]), | |||
| 'y': np.int(seg_result[1]), | |||
| 'w': np.int(seg_result[2] - seg_result[0]), | |||
| 'h': np.int(seg_result[3] - seg_result[1]) | |||
| } | |||
| box = [ | |||
| np.int(seg_result[0]), | |||
| np.int(seg_result[1]), | |||
| np.int(seg_result[2]), | |||
| np.int(seg_result[3]) | |||
| ] | |||
| score = np.float(seg_result[4]) | |||
| category = seg_result[5] | |||
| @@ -161,12 +161,10 @@ def show_result( | |||
| np.random.random() * 255.0 | |||
| ]) | |||
| x1 = int(box['x']) | |||
| y1 = int(box['y']) | |||
| w = int(box['w']) | |||
| h = int(box['h']) | |||
| x2 = x1 + w | |||
| y2 = y1 + h | |||
| x1 = int(box[0]) | |||
| y1 = int(box[1]) | |||
| x2 = int(box[2]) | |||
| y2 = int(box[3]) | |||
| if show_box: | |||
| cv2.rectangle( | |||
| @@ -1,4 +1,4 @@ | |||
| # The implementation is also open-sourced by the authors as PASS-reID, and is available publicly on | |||
| # The implementation is adopted from PASS-reID, made pubicly available under the Apache-2.0 License at | |||
| # https://github.com/CASIA-IVA-Lab/PASS-reID | |||
| import os | |||
| @@ -1,4 +1,4 @@ | |||
| # The implementation is also open-sourced by the authors as PASS-reID, and is available publicly on | |||
| # The implementation is adopted from PASS-reID, made pubicly available under the Apache-2.0 License at | |||
| # https://github.com/CASIA-IVA-Lab/PASS-reID | |||
| import collections.abc as container_abcs | |||
| @@ -552,7 +552,7 @@ class CLIPVisionTransformer(nn.Module): | |||
| nn.GroupNorm(1, embed_dim), | |||
| nn.ConvTranspose2d( | |||
| embed_dim, embed_dim, kernel_size=2, stride=2), | |||
| nn.SyncBatchNorm(embed_dim), | |||
| nn.BatchNorm2d(embed_dim), | |||
| nn.GELU(), | |||
| nn.ConvTranspose2d( | |||
| embed_dim, embed_dim, kernel_size=2, stride=2), | |||