Browse Source

fix loss&log

master
翎航 3 years ago
parent
commit
73469b8400
100 changed files with 847 additions and 471 deletions
  1. +1
    -1
      .pre-commit-config.yaml
  2. +1
    -1
      .pre-commit-config_local.yaml
  3. +3
    -0
      data/test/audios/asr_example_8K.wav
  4. +3
    -0
      data/test/audios/asr_example_cn_dialect.wav
  5. +3
    -0
      data/test/audios/asr_example_cn_en.wav
  6. +3
    -0
      data/test/audios/asr_example_en.wav
  7. +3
    -0
      data/test/audios/asr_example_es.wav
  8. +3
    -0
      data/test/audios/asr_example_id.wav
  9. +3
    -0
      data/test/audios/asr_example_ja.wav
  10. +3
    -0
      data/test/audios/asr_example_ko.wav
  11. +3
    -0
      data/test/audios/asr_example_ru.wav
  12. +3
    -0
      data/test/images/image_ocr_recognition.jpg
  13. +3
    -0
      data/test/regression/sbert-base-tnews.bin
  14. +2
    -2
      data/test/regression/sbert_nli.bin
  15. +2
    -2
      data/test/regression/sbert_sen_sim.bin
  16. +3
    -1
      modelscope/exporters/nlp/sbert_for_sequence_classification_exporter.py
  17. +122
    -14
      modelscope/hub/api.py
  18. +1
    -2
      modelscope/hub/file_download.py
  19. +6
    -5
      modelscope/hub/git.py
  20. +2
    -2
      modelscope/hub/repository.py
  21. +1
    -1
      modelscope/hub/snapshot_download.py
  22. +0
    -117
      modelscope/hub/upload.py
  23. +12
    -6
      modelscope/metainfo.py
  24. +1
    -1
      modelscope/metrics/bleu_metric.py
  25. +2
    -0
      modelscope/metrics/builder.py
  26. +2
    -0
      modelscope/metrics/image_portrait_enhancement_metric.py
  27. +41
    -37
      modelscope/models/audio/tts/voice.py
  28. +0
    -3
      modelscope/models/cv/face_human_hand_detection/one_stage_detector.py
  29. +14
    -14
      modelscope/models/cv/image_portrait_enhancement/image_portrait_enhancement.py
  30. +1
    -1
      modelscope/models/cv/image_to_image_generation/__init__.py
  31. +1
    -1
      modelscope/models/cv/image_to_image_generation/data/__init__.py
  32. +1
    -0
      modelscope/models/cv/image_to_image_generation/data/transforms.py
  33. +1
    -1
      modelscope/models/cv/image_to_image_generation/models/__init__.py
  34. +1
    -1
      modelscope/models/cv/image_to_image_generation/ops/__init__.py
  35. +24
    -0
      modelscope/models/cv/image_to_image_translation/__init__.py
  36. +1
    -0
      modelscope/models/cv/image_to_image_translation/data/__init__.py
  37. +1
    -0
      modelscope/models/cv/image_to_image_translation/models/__init__.py
  38. +1
    -0
      modelscope/models/cv/image_to_image_translation/ops/__init__.py
  39. +1
    -1
      modelscope/models/cv/product_retrieval_embedding/__init__.py
  40. +1
    -0
      modelscope/models/cv/product_retrieval_embedding/item_detection.py
  41. +1
    -0
      modelscope/models/cv/product_retrieval_embedding/item_embedding.py
  42. +2
    -0
      modelscope/models/cv/product_retrieval_embedding/item_model.py
  43. +1
    -1
      modelscope/models/cv/text_driven_segmentation/lseg_model.py
  44. +2
    -0
      modelscope/models/cv/tinynas_detection/__init__.py
  45. +19
    -10
      modelscope/models/cv/tinynas_detection/backbone/tinynas.py
  46. +4
    -1
      modelscope/models/cv/tinynas_detection/detector.py
  47. +33
    -20
      modelscope/models/cv/tinynas_detection/head/gfocal_v2_tiny.py
  48. +1
    -4
      modelscope/models/cv/tinynas_detection/neck/giraffe_fpn_v2.py
  49. +15
    -0
      modelscope/models/cv/tinynas_detection/tinynas_damoyolo.py
  50. +1
    -1
      modelscope/models/cv/tinynas_detection/tinynas_detector.py
  51. +1
    -1
      modelscope/models/cv/video_summarization/summarizer.py
  52. +1
    -0
      modelscope/models/multi_modal/diffusion/__init__.py
  53. +1
    -0
      modelscope/models/multi_modal/gemm/__init__.py
  54. +1
    -0
      modelscope/models/multi_modal/multi_stage_diffusion/__init__.py
  55. +2
    -1
      modelscope/models/multi_modal/ofa/utils/constant.py
  56. +4
    -2
      modelscope/models/multi_modal/ofa_for_all_tasks.py
  57. +1
    -0
      modelscope/models/multi_modal/team/__init__.py
  58. +3
    -2
      modelscope/models/nlp/__init__.py
  59. +0
    -78
      modelscope/models/nlp/bert/modeling_bert.py
  60. +2
    -0
      modelscope/models/nlp/heads/infromation_extraction_head.py
  61. +2
    -0
      modelscope/models/nlp/heads/token_classification_head.py
  62. +2
    -0
      modelscope/models/nlp/task_models/information_extraction.py
  63. +2
    -0
      modelscope/models/nlp/task_models/token_classification.py
  64. +5
    -5
      modelscope/models/nlp/text_ranking.py
  65. +9
    -4
      modelscope/msdatasets/cv/easycv_base.py
  66. +4
    -2
      modelscope/msdatasets/task_datasets/__init__.py
  67. +23
    -0
      modelscope/msdatasets/task_datasets/image_portrait_enhancement/__init__.py
  68. +32
    -0
      modelscope/msdatasets/task_datasets/image_portrait_enhancement/data_utils.py
  69. +51
    -0
      modelscope/msdatasets/task_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py
  70. +8
    -8
      modelscope/msdatasets/task_datasets/text_ranking_dataset.py
  71. +3
    -5
      modelscope/msdatasets/utils/dataset_utils.py
  72. +2
    -1
      modelscope/outputs.py
  73. +1
    -1
      modelscope/pipeline_inputs.py
  74. +14
    -5
      modelscope/pipelines/audio/asr_inference_pipeline.py
  75. +2
    -2
      modelscope/pipelines/audio/kws_kwsbp_pipeline.py
  76. +2
    -0
      modelscope/pipelines/base.py
  77. +7
    -2
      modelscope/pipelines/builder.py
  78. +16
    -5
      modelscope/pipelines/cv/body_3d_keypoints_pipeline.py
  79. +2
    -0
      modelscope/pipelines/cv/face_image_generation_pipeline.py
  80. +1
    -0
      modelscope/pipelines/cv/image_reid_person_pipeline.py
  81. +16
    -6
      modelscope/pipelines/cv/tinynas_detection_pipeline.py
  82. +2
    -0
      modelscope/pipelines/multi_modal/multi_modal_embedding_pipeline.py
  83. +52
    -0
      modelscope/pipelines/multi_modal/ocr_recognition_pipeline.py
  84. +2
    -2
      modelscope/pipelines/nlp/__init__.py
  85. +2
    -0
      modelscope/pipelines/nlp/information_extraction_pipeline.py
  86. +1
    -1
      modelscope/pipelines/nlp/summarization_pipeline.py
  87. +27
    -24
      modelscope/pipelines/nlp/table_question_answering_pipeline.py
  88. +5
    -5
      modelscope/pipelines/nlp/text_ranking_pipeline.py
  89. +2
    -0
      modelscope/pipelines/nlp/token_classification_pipeline.py
  90. +2
    -2
      modelscope/preprocessors/__init__.py
  91. +6
    -0
      modelscope/preprocessors/asr.py
  92. +4
    -12
      modelscope/preprocessors/multi_modal.py
  93. +2
    -2
      modelscope/preprocessors/nlp/__init__.py
  94. +28
    -32
      modelscope/preprocessors/nlp/nlp_base.py
  95. +1
    -0
      modelscope/preprocessors/ofa/__init__.py
  96. +16
    -0
      modelscope/preprocessors/ofa/base.py
  97. +5
    -9
      modelscope/preprocessors/ofa/image_captioning.py
  98. +97
    -0
      modelscope/preprocessors/ofa/ocr_recognition.py
  99. +13
    -0
      modelscope/preprocessors/ofa/utils/constant.py
  100. +1
    -1
      modelscope/preprocessors/star3/fields/database.py

+ 1
- 1
.pre-commit-config.yaml View File

@@ -1,6 +1,6 @@
repos:
- repo: https://gitlab.com/pycqa/flake8.git
rev: 3.8.3
rev: 4.0.0
hooks:
- id: flake8
exclude: thirdparty/|examples/


+ 1
- 1
.pre-commit-config_local.yaml View File

@@ -1,6 +1,6 @@
repos:
- repo: /home/admin/pre-commit/flake8
rev: 3.8.3
rev: 4.0.0
hooks:
- id: flake8
exclude: thirdparty/|examples/


+ 3
- 0
data/test/audios/asr_example_8K.wav View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e999c247bfebb03d556a31722f0ce7145cac20a67fac9da813ad336e1f549f9f
size 38954

+ 3
- 0
data/test/audios/asr_example_cn_dialect.wav View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:32eb8d4d537941bf0edea69cd6723e8ba489fa3df64e13e29f96e4fae0b856f4
size 93676

+ 3
- 0
data/test/audios/asr_example_cn_en.wav View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f57aee13ade70be6b2c6e4f5e5c7404bdb03057b63828baefbaadcf23855a4cb
size 472012

+ 3
- 0
data/test/audios/asr_example_en.wav View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:fee8e0460ca707f108782be0d93c555bf34fb6b1cb297e5fceed70192cc65f9b
size 71244

+ 3
- 0
data/test/audios/asr_example_es.wav View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:450e31f9df8c5b48c617900625f01cb64c484f079a9843179fe9feaa7d163e61
size 181964

+ 3
- 0
data/test/audios/asr_example_id.wav View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:255494c41bc1dfb0c954d827ec6ce775900e4f7a55fb0a7881bdf9d66a03b425
size 112078

+ 3
- 0
data/test/audios/asr_example_ja.wav View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:22a55277908bbc3ef60a0cf56b230eb507b9e837574e8f493e93644b1d21c281
size 200556

+ 3
- 0
data/test/audios/asr_example_ko.wav View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ee92191836c76412463d8b282a7ab4e1aa57386ba699ec011a3e2c4d64f32f4b
size 162636

+ 3
- 0
data/test/audios/asr_example_ru.wav View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:77d1537fc584c1505d8aa10ec8c86af57ab661199e4f28fd7ffee3c22d1e4e61
size 160204

+ 3
- 0
data/test/images/image_ocr_recognition.jpg View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:772b19f76c98044e39330853928624f10e085106a4292b4dd19f865531080747
size 959

+ 3
- 0
data/test/regression/sbert-base-tnews.bin View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2bce1341f4b55d536771dad6e2b280458579f46c3216474ceb8a926022ab53d0
size 151572

+ 2
- 2
data/test/regression/sbert_nli.bin View File

@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:44e3925c15d86d8596baeb6bd1d153d86f57b7489798b2cf988a1248e110fd62
size 62231
oid sha256:6af5024a26337a440c7ea2935fce84af558dd982ee97a2f027bb922cc874292b
size 61741

+ 2
- 2
data/test/regression/sbert_sen_sim.bin View File

@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1ff17a0272752de4c88d4254b2e881f97f8ef022f03609d03ee1de0ae964368a
size 62235
oid sha256:bbce084781342ca7274c2e4d02ed5c5de43ba213a3b76328d5994404d6544c41
size 61745

+ 3
- 1
modelscope/exporters/nlp/sbert_for_sequence_classification_exporter.py View File

@@ -23,12 +23,14 @@ class SbertForSequenceClassificationExporter(TorchModelExporter):

def generate_dummy_inputs(self,
shape: Tuple = None,
pair: bool = False,
**kwargs) -> Dict[str, Any]:
"""Generate dummy inputs for model exportation to onnx or other formats by tracing.

@param shape: A tuple of input shape which should have at most two dimensions.
shape = (1, ) batch_size=1, sequence_length will be taken from the preprocessor.
shape = (8, 128) batch_size=1, sequence_length=128, which will cover the config of the preprocessor.
@param pair: Generate sentence pairs or single sentences for dummy inputs.
@return: Dummy inputs.
"""

@@ -55,7 +57,7 @@ class SbertForSequenceClassificationExporter(TorchModelExporter):
**sequence_length
})
preprocessor: Preprocessor = build_preprocessor(cfg, field_name)
if preprocessor.pair:
if pair:
first_sequence = preprocessor.tokenizer.unk_token
second_sequence = preprocessor.tokenizer.unk_token
else:


+ 122
- 14
modelscope/hub/api.py View File

@@ -1,8 +1,11 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

# yapf: disable
import datetime
import os
import pickle
import shutil
import tempfile
from collections import defaultdict
from http import HTTPStatus
from http.cookiejar import CookieJar
@@ -16,17 +19,25 @@ from modelscope.hub.constants import (API_RESPONSE_FIELD_DATA,
API_RESPONSE_FIELD_GIT_ACCESS_TOKEN,
API_RESPONSE_FIELD_MESSAGE,
API_RESPONSE_FIELD_USERNAME,
DEFAULT_CREDENTIALS_PATH)
DEFAULT_CREDENTIALS_PATH, Licenses,
ModelVisibility)
from modelscope.hub.errors import (InvalidParameter, NotExistError,
NotLoginException, RequestError,
datahub_raise_on_error,
handle_http_post_error,
handle_http_response, is_ok, raise_on_error)
from modelscope.hub.git import GitCommandWrapper
from modelscope.hub.repository import Repository
from modelscope.hub.utils.utils import (get_endpoint,
model_id_to_group_owner_name)
from modelscope.utils.config_ds import DOWNLOADED_DATASETS_PATH
from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
DEFAULT_MODEL_REVISION,
DatasetFormations, DatasetMetaFormats,
DownloadMode)
DownloadMode, ModelFile)
from modelscope.utils.logger import get_logger
from .errors import (InvalidParameter, NotExistError, RequestError,
datahub_raise_on_error, handle_http_post_error,
handle_http_response, is_ok, raise_on_error)
from .utils.utils import get_endpoint, model_id_to_group_owner_name

# yapf: enable

logger = get_logger()

@@ -169,11 +180,106 @@ class HubApi:
else:
r.raise_for_status()

def list_model(self,
owner_or_group: str,
page_number=1,
page_size=10) -> dict:
"""List model in owner or group.
def push_model(self,
model_id: str,
model_dir: str,
visibility: int = ModelVisibility.PUBLIC,
license: str = Licenses.APACHE_V2,
chinese_name: Optional[str] = None,
commit_message: Optional[str] = 'upload model',
revision: Optional[str] = DEFAULT_MODEL_REVISION):
"""
Upload model from a given directory to given repository. A valid model directory
must contain a configuration.json file.

This function upload the files in given directory to given repository. If the
given repository is not exists in remote, it will automatically create it with
given visibility, license and chinese_name parameters. If the revision is also
not exists in remote repository, it will create a new branch for it.

This function must be called before calling HubApi's login with a valid token
which can be obtained from ModelScope's website.

Args:
model_id (`str`):
The model id to be uploaded, caller must have write permission for it.
model_dir(`str`):
The Absolute Path of the finetune result.
visibility(`int`, defaults to `0`):
Visibility of the new created model(1-private, 5-public). If the model is
not exists in ModelScope, this function will create a new model with this
visibility and this parameter is required. You can ignore this parameter
if you make sure the model's existence.
license(`str`, defaults to `None`):
License of the new created model(see License). If the model is not exists
in ModelScope, this function will create a new model with this license
and this parameter is required. You can ignore this parameter if you
make sure the model's existence.
chinese_name(`str`, *optional*, defaults to `None`):
chinese name of the new created model.
commit_message(`str`, *optional*, defaults to `None`):
commit message of the push request.
revision (`str`, *optional*, default to DEFAULT_MODEL_REVISION):
which branch to push. If the branch is not exists, It will create a new
branch and push to it.
"""
if model_id is None:
raise InvalidParameter('model_id cannot be empty!')
if model_dir is None:
raise InvalidParameter('model_dir cannot be empty!')
if not os.path.exists(model_dir) or os.path.isfile(model_dir):
raise InvalidParameter('model_dir must be a valid directory.')
cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
if not os.path.exists(cfg_file):
raise ValueError(f'{model_dir} must contain a configuration.json.')
cookies = ModelScopeConfig.get_cookies()
if cookies is None:
raise NotLoginException('Must login before upload!')
files_to_save = os.listdir(model_dir)
try:
self.get_model(model_id=model_id)
except Exception:
if visibility is None or license is None:
raise InvalidParameter(
'visibility and license cannot be empty if want to create new repo'
)
logger.info('Create new model %s' % model_id)
self.create_model(
model_id=model_id,
visibility=visibility,
license=license,
chinese_name=chinese_name)
tmp_dir = tempfile.mkdtemp()
git_wrapper = GitCommandWrapper()
try:
repo = Repository(model_dir=tmp_dir, clone_from=model_id)
branches = git_wrapper.get_remote_branches(tmp_dir)
if revision not in branches:
logger.info('Create new branch %s' % revision)
git_wrapper.new_branch(tmp_dir, revision)
git_wrapper.checkout(tmp_dir, revision)
for f in files_to_save:
if f[0] != '.':
src = os.path.join(model_dir, f)
if os.path.isdir(src):
shutil.copytree(src, os.path.join(tmp_dir, f))
else:
shutil.copy(src, tmp_dir)
if not commit_message:
date = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
commit_message = '[automsg] push model %s to hub at %s' % (
model_id, date)
repo.push(commit_message=commit_message, branch=revision)
except Exception:
raise
finally:
shutil.rmtree(tmp_dir, ignore_errors=True)

def list_models(self,
owner_or_group: str,
page_number=1,
page_size=10) -> dict:
"""List models in owner or group.

Args:
owner_or_group(`str`): owner or group.
@@ -390,11 +496,13 @@ class HubApi:
return resp['Data']

def list_oss_dataset_objects(self, dataset_name, namespace, max_limit,
is_recursive, is_filter_dir, revision,
cookies):
is_recursive, is_filter_dir, revision):
url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/oss/tree/?' \
f'MaxLimit={max_limit}&Revision={revision}&Recursive={is_recursive}&FilterDir={is_filter_dir}'
cookies = requests.utils.dict_from_cookiejar(cookies)

cookies = ModelScopeConfig.get_cookies()
if cookies:
cookies = requests.utils.dict_from_cookiejar(cookies)

resp = requests.get(url=url, cookies=cookies)
resp = resp.json()


+ 1
- 2
modelscope/hub/file_download.py View File

@@ -11,13 +11,12 @@ from typing import Dict, Optional, Union
from uuid import uuid4

import requests
from filelock import FileLock
from tqdm import tqdm

from modelscope import __version__
from modelscope.hub.api import HubApi, ModelScopeConfig
from modelscope.utils.constant import DEFAULT_MODEL_REVISION
from modelscope.utils.logger import get_logger
from .api import HubApi, ModelScopeConfig
from .constants import FILE_HASH
from .errors import FileDownloadError, NotExistError
from .utils.caching import ModelFileSystemCache


+ 6
- 5
modelscope/hub/git.py View File

@@ -1,13 +1,10 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import os
import re
import subprocess
from typing import List
from xmlrpc.client import Boolean

from modelscope.utils.logger import get_logger
from .api import ModelScopeConfig
from .errors import GitError

logger = get_logger()
@@ -132,6 +129,7 @@ class GitCommandWrapper(metaclass=Singleton):
return response

def add_user_info(self, repo_base_dir, repo_name):
from modelscope.hub.api import ModelScopeConfig
user_name, user_email = ModelScopeConfig.get_user_info()
if user_name and user_email:
# config user.name and user.email if exist
@@ -184,8 +182,11 @@ class GitCommandWrapper(metaclass=Singleton):
info = [
line.strip()
for line in rsp.stdout.decode('utf8').strip().split(os.linesep)
][1:]
return ['/'.join(line.split('/')[1:]) for line in info]
]
if len(info) == 1:
return ['/'.join(info[0].split('/')[1:])]
else:
return ['/'.join(line.split('/')[1:]) for line in info[1:]]

def pull(self, repo_dir: str):
cmds = ['-C', repo_dir, 'pull']


+ 2
- 2
modelscope/hub/repository.py View File

@@ -7,7 +7,6 @@ from modelscope.hub.errors import GitError, InvalidParameter, NotLoginException
from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
DEFAULT_MODEL_REVISION)
from modelscope.utils.logger import get_logger
from .api import ModelScopeConfig
from .git import GitCommandWrapper
from .utils.utils import get_endpoint

@@ -47,6 +46,7 @@ class Repository:
err_msg = 'a non-default value of revision cannot be empty.'
raise InvalidParameter(err_msg)

from modelscope.hub.api import ModelScopeConfig
if auth_token:
self.auth_token = auth_token
else:
@@ -166,7 +166,7 @@ class DatasetRepository:
err_msg = 'a non-default value of revision cannot be empty.'
raise InvalidParameter(err_msg)
self.revision = revision
from modelscope.hub.api import ModelScopeConfig
if auth_token:
self.auth_token = auth_token
else:


+ 1
- 1
modelscope/hub/snapshot_download.py View File

@@ -5,9 +5,9 @@ import tempfile
from pathlib import Path
from typing import Dict, Optional, Union

from modelscope.hub.api import HubApi, ModelScopeConfig
from modelscope.utils.constant import DEFAULT_MODEL_REVISION
from modelscope.utils.logger import get_logger
from .api import HubApi, ModelScopeConfig
from .constants import FILE_HASH
from .errors import NotExistError
from .file_download import (get_file_download_url, http_get_file,


+ 0
- 117
modelscope/hub/upload.py View File

@@ -1,117 +0,0 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import datetime
import os
import shutil
import tempfile
import uuid
from typing import Dict, Optional
from uuid import uuid4

from filelock import FileLock

from modelscope import __version__
from modelscope.hub.api import HubApi, ModelScopeConfig
from modelscope.hub.errors import InvalidParameter, NotLoginException
from modelscope.hub.git import GitCommandWrapper
from modelscope.hub.repository import Repository
from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile
from modelscope.utils.logger import get_logger

logger = get_logger()


def upload_folder(model_id: str,
model_dir: str,
visibility: int = 0,
license: str = None,
chinese_name: Optional[str] = None,
commit_message: Optional[str] = None,
revision: Optional[str] = DEFAULT_MODEL_REVISION):
"""
Upload model from a given directory to given repository. A valid model directory
must contain a configuration.json file.

This function upload the files in given directory to given repository. If the
given repository is not exists in remote, it will automatically create it with
given visibility, license and chinese_name parameters. If the revision is also
not exists in remote repository, it will create a new branch for it.

This function must be called before calling HubApi's login with a valid token
which can be obtained from ModelScope's website.

Args:
model_id (`str`):
The model id to be uploaded, caller must have write permission for it.
model_dir(`str`):
The Absolute Path of the finetune result.
visibility(`int`, defaults to `0`):
Visibility of the new created model(1-private, 5-public). If the model is
not exists in ModelScope, this function will create a new model with this
visibility and this parameter is required. You can ignore this parameter
if you make sure the model's existence.
license(`str`, defaults to `None`):
License of the new created model(see License). If the model is not exists
in ModelScope, this function will create a new model with this license
and this parameter is required. You can ignore this parameter if you
make sure the model's existence.
chinese_name(`str`, *optional*, defaults to `None`):
chinese name of the new created model.
commit_message(`str`, *optional*, defaults to `None`):
commit message of the push request.
revision (`str`, *optional*, default to DEFAULT_MODEL_REVISION):
which branch to push. If the branch is not exists, It will create a new
branch and push to it.
"""
if model_id is None:
raise InvalidParameter('model_id cannot be empty!')
if model_dir is None:
raise InvalidParameter('model_dir cannot be empty!')
if not os.path.exists(model_dir) or os.path.isfile(model_dir):
raise InvalidParameter('model_dir must be a valid directory.')
cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION)
if not os.path.exists(cfg_file):
raise ValueError(f'{model_dir} must contain a configuration.json.')
cookies = ModelScopeConfig.get_cookies()
if cookies is None:
raise NotLoginException('Must login before upload!')
files_to_save = os.listdir(model_dir)
api = HubApi()
try:
api.get_model(model_id=model_id)
except Exception:
if visibility is None or license is None:
raise InvalidParameter(
'visibility and license cannot be empty if want to create new repo'
)
logger.info('Create new model %s' % model_id)
api.create_model(
model_id=model_id,
visibility=visibility,
license=license,
chinese_name=chinese_name)
tmp_dir = tempfile.mkdtemp()
git_wrapper = GitCommandWrapper()
try:
repo = Repository(model_dir=tmp_dir, clone_from=model_id)
branches = git_wrapper.get_remote_branches(tmp_dir)
if revision not in branches:
logger.info('Create new branch %s' % revision)
git_wrapper.new_branch(tmp_dir, revision)
git_wrapper.checkout(tmp_dir, revision)
for f in files_to_save:
if f[0] != '.':
src = os.path.join(model_dir, f)
if os.path.isdir(src):
shutil.copytree(src, os.path.join(tmp_dir, f))
else:
shutil.copy(src, tmp_dir)
if not commit_message:
date = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
commit_message = '[automsg] push model %s to hub at %s' % (
model_id, date)
repo.push(commit_message=commit_message, branch=revision)
except Exception:
raise
finally:
shutil.rmtree(tmp_dir, ignore_errors=True)

+ 12
- 6
modelscope/metainfo.py View File

@@ -9,7 +9,9 @@ class Models(object):

Model name should only contain model info but not task info.
"""
# tinynas models
tinynas_detection = 'tinynas-detection'
tinynas_damoyolo = 'tinynas-damoyolo'

# vision models
detection = 'detection'
@@ -234,7 +236,7 @@ class Pipelines(object):
conversational_text_to_sql = 'conversational-text-to-sql'
table_question_answering_pipeline = 'table-question-answering-pipeline'
sentence_embedding = 'sentence-embedding'
passage_ranking = 'passage-ranking'
text_ranking = 'text-ranking'
relation_extraction = 'relation-extraction'
document_segmentation = 'document-segmentation'
feature_extraction = 'feature-extraction'
@@ -261,6 +263,7 @@ class Pipelines(object):
text_to_image_synthesis = 'text-to-image-synthesis'
video_multi_modal_embedding = 'video-multi-modal-embedding'
image_text_retrieval = 'image-text-retrieval'
ofa_ocr_recognition = 'ofa-ocr-recognition'


class Trainers(object):
@@ -295,7 +298,7 @@ class Trainers(object):
dialog_intent_trainer = 'dialog-intent-trainer'
nlp_base_trainer = 'nlp-base-trainer'
nlp_veco_trainer = 'nlp-veco-trainer'
nlp_passage_ranking_trainer = 'nlp-passage-ranking-trainer'
nlp_text_ranking_trainer = 'nlp-text-ranking-trainer'

# audio trainers
speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k'
@@ -341,7 +344,7 @@ class Preprocessors(object):
zero_shot_cls_tokenizer = 'zero-shot-cls-tokenizer'
text_error_correction = 'text-error-correction'
sentence_embedding = 'sentence-embedding'
passage_ranking = 'passage-ranking'
text_ranking = 'text-ranking'
sequence_labeling_tokenizer = 'sequence-labeling-tokenizer'
word_segment_text_to_label_preprocessor = 'word-segment-text-to-label-preprocessor'
fill_mask = 'fill-mask'
@@ -374,7 +377,7 @@ class Metrics(object):
audio_noise_metric = 'audio-noise-metric'

# text gen
bleu = 'bleu'
BLEU = 'bleu'

# metrics for image denoise task
image_denoise_metric = 'image-denoise-metric'
@@ -396,6 +399,8 @@ class Metrics(object):
movie_scene_segmentation_metric = 'movie-scene-segmentation-metric'
# metric for inpainting task
image_inpainting_metric = 'image-inpainting-metric'
# metric for ocr
NED = 'ned'


class Optimizers(object):
@@ -454,9 +459,10 @@ class Datasets(object):
""" Names for different datasets.
"""
ClsDataset = 'ClsDataset'
Face2dKeypointsDataset = 'Face2dKeypointsDataset'
Face2dKeypointsDataset = 'FaceKeypointDataset'
HandCocoWholeBodyDataset = 'HandCocoWholeBodyDataset'
HumanWholeBodyKeypointDataset = 'HumanWholeBodyKeypointDataset'
HumanWholeBodyKeypointDataset = 'WholeBodyCocoTopDownDataset'
SegDataset = 'SegDataset'
DetDataset = 'DetDataset'
DetImagesMixDataset = 'DetImagesMixDataset'
PairedDataset = 'PairedDataset'

+ 1
- 1
modelscope/metrics/bleu_metric.py View File

@@ -11,7 +11,7 @@ from .builder import METRICS, MetricKeys
EVAL_BLEU_ORDER = 4


@METRICS.register_module(group_key=default_group, module_name=Metrics.bleu)
@METRICS.register_module(group_key=default_group, module_name=Metrics.BLEU)
class BleuMetric(Metric):
"""The metric computation bleu for text generation classes.



+ 2
- 0
modelscope/metrics/builder.py View File

@@ -23,6 +23,7 @@ class MetricKeys(object):
BLEU_4 = 'bleu-4'
ROUGE_1 = 'rouge-1'
ROUGE_L = 'rouge-l'
NED = 'ned' # ocr metric


task_default_metrics = {
@@ -32,6 +33,7 @@ task_default_metrics = {
Tasks.sentiment_classification: [Metrics.seq_cls_metric],
Tasks.token_classification: [Metrics.token_cls_metric],
Tasks.text_generation: [Metrics.text_gen_metric],
Tasks.text_classification: [Metrics.seq_cls_metric],
Tasks.image_denoising: [Metrics.image_denoise_metric],
Tasks.image_color_enhancement: [Metrics.image_color_enhance_metric],
Tasks.image_portrait_enhancement:


+ 2
- 0
modelscope/metrics/image_portrait_enhancement_metric.py View File

@@ -2,6 +2,7 @@
# https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/metrics/psnr_ssim.py
from typing import Dict

import cv2
import numpy as np

from modelscope.metainfo import Metrics
@@ -37,6 +38,7 @@ class ImagePortraitEnhancementMetric(Metric):
def add(self, outputs: Dict, inputs: Dict):
ground_truths = outputs['target']
eval_results = outputs['pred']

self.preds.extend(eval_results)
self.targets.extend(ground_truths)



+ 41
- 37
modelscope/models/audio/tts/voice.py View File

@@ -2,6 +2,7 @@

import os
import pickle as pkl
from threading import Lock

import json
import numpy as np
@@ -27,6 +28,7 @@ class Voice:
self.__am_config = AttrDict(**am_config)
self.__voc_config = AttrDict(**voc_config)
self.__model_loaded = False
self.__lock = Lock()
if 'am' not in self.__am_config:
raise TtsModelConfigurationException(
'modelscope error: am configuration invalid')
@@ -71,34 +73,35 @@ class Voice:
self.__generator.remove_weight_norm()

def __am_forward(self, symbol_seq):
with torch.no_grad():
inputs_feat_lst = self.__ling_unit.encode_symbol_sequence(
symbol_seq)
inputs_sy = torch.from_numpy(inputs_feat_lst[0]).long().to(
self.__device)
inputs_tone = torch.from_numpy(inputs_feat_lst[1]).long().to(
self.__device)
inputs_syllable = torch.from_numpy(inputs_feat_lst[2]).long().to(
self.__device)
inputs_ws = torch.from_numpy(inputs_feat_lst[3]).long().to(
self.__device)
inputs_ling = torch.stack(
[inputs_sy, inputs_tone, inputs_syllable, inputs_ws],
dim=-1).unsqueeze(0)
inputs_emo = torch.from_numpy(inputs_feat_lst[4]).long().to(
self.__device).unsqueeze(0)
inputs_spk = torch.from_numpy(inputs_feat_lst[5]).long().to(
self.__device).unsqueeze(0)
inputs_len = torch.zeros(1).to(self.__device).long(
) + inputs_emo.size(1) - 1 # minus 1 for "~"
res = self.__am_net(inputs_ling[:, :-1, :], inputs_emo[:, :-1],
inputs_spk[:, :-1], inputs_len)
postnet_outputs = res['postnet_outputs']
LR_length_rounded = res['LR_length_rounded']
valid_length = int(LR_length_rounded[0].item())
postnet_outputs = postnet_outputs[
0, :valid_length, :].cpu().numpy()
return postnet_outputs
with self.__lock:
with torch.no_grad():
inputs_feat_lst = self.__ling_unit.encode_symbol_sequence(
symbol_seq)
inputs_sy = torch.from_numpy(inputs_feat_lst[0]).long().to(
self.__device)
inputs_tone = torch.from_numpy(inputs_feat_lst[1]).long().to(
self.__device)
inputs_syllable = torch.from_numpy(
inputs_feat_lst[2]).long().to(self.__device)
inputs_ws = torch.from_numpy(inputs_feat_lst[3]).long().to(
self.__device)
inputs_ling = torch.stack(
[inputs_sy, inputs_tone, inputs_syllable, inputs_ws],
dim=-1).unsqueeze(0)
inputs_emo = torch.from_numpy(inputs_feat_lst[4]).long().to(
self.__device).unsqueeze(0)
inputs_spk = torch.from_numpy(inputs_feat_lst[5]).long().to(
self.__device).unsqueeze(0)
inputs_len = torch.zeros(1).to(self.__device).long(
) + inputs_emo.size(1) - 1 # minus 1 for "~"
res = self.__am_net(inputs_ling[:, :-1, :], inputs_emo[:, :-1],
inputs_spk[:, :-1], inputs_len)
postnet_outputs = res['postnet_outputs']
LR_length_rounded = res['LR_length_rounded']
valid_length = int(LR_length_rounded[0].item())
postnet_outputs = postnet_outputs[
0, :valid_length, :].cpu().numpy()
return postnet_outputs

def __vocoder_forward(self, melspec):
dim0 = list(melspec.shape)[-1]
@@ -118,14 +121,15 @@ class Voice:
return audio

def forward(self, symbol_seq):
if not self.__model_loaded:
torch.manual_seed(self.__am_config.seed)
if torch.cuda.is_available():
with self.__lock:
if not self.__model_loaded:
torch.manual_seed(self.__am_config.seed)
self.__device = torch.device('cuda')
else:
self.__device = torch.device('cpu')
self.__load_am()
self.__load_vocoder()
self.__model_loaded = True
if torch.cuda.is_available():
torch.manual_seed(self.__am_config.seed)
self.__device = torch.device('cuda')
else:
self.__device = torch.device('cpu')
self.__load_am()
self.__load_vocoder()
self.__model_loaded = True
return self.__vocoder_forward(self.__am_forward(symbol_seq))

+ 0
- 3
modelscope/models/cv/face_human_hand_detection/one_stage_detector.py View File

@@ -56,9 +56,6 @@ class OneStageDetector(nn.Module):

def inference(self, meta):
with torch.no_grad():
torch.cuda.synchronize()
preds = self(meta['img'])
torch.cuda.synchronize()
results = self.head.post_process(preds, meta)
torch.cuda.synchronize()
return results

+ 14
- 14
modelscope/models/cv/image_portrait_enhancement/image_portrait_enhancement.py View File

@@ -35,7 +35,7 @@ class ImagePortraitEnhancement(TorchModel):
"""
super().__init__(model_dir, *args, **kwargs)

self.size = 512
self.size = 256
self.style_dim = 512
self.n_mlp = 8
self.mean_path_length = 0
@@ -131,9 +131,9 @@ class ImagePortraitEnhancement(TorchModel):
return path_penalty, path_mean.detach(), path_lengths

@torch.no_grad()
def _evaluate_postprocess(self, src: Tensor,
def _evaluate_postprocess(self, input: Tensor,
target: Tensor) -> Dict[str, list]:
preds, _ = self.generator(src)
preds, _ = self.generator(input)
preds = list(torch.split(preds, 1, 0))
targets = list(torch.split(target, 1, 0))

@@ -144,11 +144,11 @@ class ImagePortraitEnhancement(TorchModel):

return {'pred': preds, 'target': targets}

def _train_forward_d(self, src: Tensor, target: Tensor) -> Tensor:
def _train_forward_d(self, input: Tensor, target: Tensor) -> Tensor:
self.requires_grad(self.generator, False)
self.requires_grad(self.discriminator, True)

preds, _ = self.generator(src)
preds, _ = self.generator(input)
fake_pred = self.discriminator(preds)
real_pred = self.discriminator(target)

@@ -156,27 +156,27 @@ class ImagePortraitEnhancement(TorchModel):

return d_loss

def _train_forward_d_r1(self, src: Tensor, target: Tensor) -> Tensor:
src.requires_grad = True
def _train_forward_d_r1(self, input: Tensor, target: Tensor) -> Tensor:
input.requires_grad = True
target.requires_grad = True
real_pred = self.discriminator(target)
r1_loss = self.d_r1_loss(real_pred, target)

return r1_loss

def _train_forward_g(self, src: Tensor, target: Tensor) -> Tensor:
def _train_forward_g(self, input: Tensor, target: Tensor) -> Tensor:
self.requires_grad(self.generator, True)
self.requires_grad(self.discriminator, False)

preds, _ = self.generator(src)
preds, _ = self.generator(input)
fake_pred = self.discriminator(preds)

g_loss = self.g_nonsaturating_loss(fake_pred, preds, target, src)
g_loss = self.g_nonsaturating_loss(fake_pred, preds, target, input)

return g_loss

def _train_forward_g_path(self, src: Tensor, target: Tensor) -> Tensor:
fake_img, latents = self.generator(src, return_latents=True)
def _train_forward_g_path(self, input: Tensor, target: Tensor) -> Tensor:
fake_img, latents = self.generator(input, return_latents=True)

path_loss, self.mean_path_length, path_lengths = self.g_path_regularize(
fake_img, latents, self.mean_path_length)
@@ -184,8 +184,8 @@ class ImagePortraitEnhancement(TorchModel):
return path_loss

@torch.no_grad()
def _inference_forward(self, src: Tensor) -> Dict[str, Tensor]:
return {'outputs': (self.generator(src)[0] * 0.5 + 0.5).clamp(0, 1)}
def _inference_forward(self, input: Tensor) -> Dict[str, Tensor]:
return {'outputs': (self.generator(input)[0] * 0.5 + 0.5).clamp(0, 1)}

def forward(self, input: Dict[str,
Tensor]) -> Dict[str, Union[list, Tensor]]:


+ 1
- 1
modelscope/models/cv/image_to_image_generation/__init__.py View File

@@ -1,2 +1,2 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
from . import data, models, ops

+ 1
- 1
modelscope/models/cv/image_to_image_generation/data/__init__.py View File

@@ -1,4 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule


+ 1
- 0
modelscope/models/cv/image_to_image_generation/data/transforms.py View File

@@ -1,3 +1,4 @@
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
import math
import random



+ 1
- 1
modelscope/models/cv/image_to_image_generation/models/__init__.py View File

@@ -1,4 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule


+ 1
- 1
modelscope/models/cv/image_to_image_generation/ops/__init__.py View File

@@ -1,4 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule


+ 24
- 0
modelscope/models/cv/image_to_image_translation/__init__.py View File

@@ -0,0 +1,24 @@
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.

from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:

from .model_translation import UNet

else:
_import_structure = {
'image_to_image_translation_model': ['UNet'],
}

import sys

sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

+ 1
- 0
modelscope/models/cv/image_to_image_translation/data/__init__.py View File

@@ -1 +1,2 @@
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
from .transforms import * # noqa F403

+ 1
- 0
modelscope/models/cv/image_to_image_translation/models/__init__.py View File

@@ -1,2 +1,3 @@
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
from .autoencoder import * # noqa F403
from .clip import * # noqa F403

+ 1
- 0
modelscope/models/cv/image_to_image_translation/ops/__init__.py View File

@@ -1,3 +1,4 @@
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
from .degradation import * # noqa F403
from .diffusion import * # noqa F403
from .losses import * # noqa F403


+ 1
- 1
modelscope/models/cv/product_retrieval_embedding/__init__.py View File

@@ -1,4 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule


+ 1
- 0
modelscope/models/cv/product_retrieval_embedding/item_detection.py View File

@@ -1,3 +1,4 @@
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
import cv2
import numpy as np



+ 1
- 0
modelscope/models/cv/product_retrieval_embedding/item_embedding.py View File

@@ -1,3 +1,4 @@
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
import cv2
import numpy as np
import torch.nn as nn


+ 2
- 0
modelscope/models/cv/product_retrieval_embedding/item_model.py View File

@@ -1,3 +1,5 @@
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.

import os.path as osp
from typing import Any, Dict



+ 1
- 1
modelscope/models/cv/text_driven_segmentation/lseg_model.py View File

@@ -93,7 +93,7 @@ class TextDrivenSeg(TorchModel):
"""
with torch.no_grad():
if self.device_id == -1:
output = self.model(image)
output = self.model(image, [text])
else:
device = torch.device('cuda', self.device_id)
output = self.model(image.to(device), [text])


+ 2
- 0
modelscope/models/cv/tinynas_detection/__init__.py View File

@@ -7,10 +7,12 @@ from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .tinynas_detector import Tinynas_detector
from .tinynas_damoyolo import DamoYolo

else:
_import_structure = {
'tinynas_detector': ['TinynasDetector'],
'tinynas_damoyolo': ['DamoYolo'],
}

import sys


+ 19
- 10
modelscope/models/cv/tinynas_detection/backbone/tinynas.py View File

@@ -4,6 +4,7 @@
import torch
import torch.nn as nn

from modelscope.utils.file_utils import read_file
from ..core.base_ops import Focus, SPPBottleneck, get_activation
from ..core.repvgg_block import RepVggBlock

@@ -49,12 +50,16 @@ class ResConvK1KX(nn.Module):
kernel_size,
stride,
force_resproj=False,
act='silu'):
act='silu',
reparam=False):
super(ResConvK1KX, self).__init__()
self.stride = stride
self.conv1 = ConvKXBN(in_c, btn_c, 1, 1)
self.conv2 = RepVggBlock(
btn_c, out_c, kernel_size, stride, act='identity')
if not reparam:
self.conv2 = ConvKXBN(btn_c, out_c, 3, stride)
else:
self.conv2 = RepVggBlock(
btn_c, out_c, kernel_size, stride, act='identity')

if act is None:
self.activation_function = torch.relu
@@ -97,7 +102,8 @@ class SuperResConvK1KX(nn.Module):
stride,
num_blocks,
with_spp=False,
act='silu'):
act='silu',
reparam=False):
super(SuperResConvK1KX, self).__init__()
if act is None:
self.act = torch.relu
@@ -124,7 +130,8 @@ class SuperResConvK1KX(nn.Module):
this_kernel_size,
this_stride,
force_resproj,
act=act)
act=act,
reparam=reparam)
self.block_list.append(the_block)
if block_id == 0 and with_spp:
self.block_list.append(
@@ -248,7 +255,8 @@ class TinyNAS(nn.Module):
with_spp=False,
use_focus=False,
need_conv1=True,
act='silu'):
act='silu',
reparam=False):
super(TinyNAS, self).__init__()
assert len(out_indices) == len(out_channels)
self.out_indices = out_indices
@@ -281,7 +289,8 @@ class TinyNAS(nn.Module):
block_info['s'],
block_info['L'],
spp,
act=act)
act=act,
reparam=reparam)
self.block_list.append(the_block)
elif the_block_class == 'SuperResConvKXKX':
spp = with_spp if idx == len(structure_info) - 1 else False
@@ -325,8 +334,8 @@ class TinyNAS(nn.Module):
def load_tinynas_net(backbone_cfg):
# load masternet model to path
import ast
struct_str = ''.join([x.strip() for x in backbone_cfg.net_structure_str])
net_structure_str = read_file(backbone_cfg.structure_file)
struct_str = ''.join([x.strip() for x in net_structure_str])
struct_info = ast.literal_eval(struct_str)
for layer in struct_info:
if 'nbitsA' in layer:
@@ -342,6 +351,6 @@ def load_tinynas_net(backbone_cfg):
use_focus=backbone_cfg.use_focus,
act=backbone_cfg.act,
need_conv1=backbone_cfg.need_conv1,
)
reparam=backbone_cfg.reparam)

return model

+ 4
- 1
modelscope/models/cv/tinynas_detection/detector.py View File

@@ -30,7 +30,7 @@ class SingleStageDetector(TorchModel):
"""
super().__init__(model_dir, *args, **kwargs)

config_path = osp.join(model_dir, 'airdet_s.py')
config_path = osp.join(model_dir, self.config_name)
config = parse_config(config_path)
self.cfg = config
model_path = osp.join(model_dir, config.model.name)
@@ -41,6 +41,9 @@ class SingleStageDetector(TorchModel):
self.conf_thre = config.model.head.nms_conf_thre
self.nms_thre = config.model.head.nms_iou_thre

if self.cfg.model.backbone.name == 'TinyNAS':
self.cfg.model.backbone.structure_file = osp.join(
model_dir, self.cfg.model.backbone.structure_file)
self.backbone = build_backbone(self.cfg.model.backbone)
self.neck = build_neck(self.cfg.model.neck)
self.head = build_head(self.cfg.model.head)


+ 33
- 20
modelscope/models/cv/tinynas_detection/head/gfocal_v2_tiny.py View File

@@ -124,11 +124,13 @@ class GFocalHead_Tiny(nn.Module):
simOTA_iou_weight=3.0,
octbase=8,
simlqe=False,
use_lqe=True,
**kwargs):
self.simlqe = simlqe
self.num_classes = num_classes
self.in_channels = in_channels
self.strides = strides
self.use_lqe = use_lqe
self.feat_channels = feat_channels if isinstance(feat_channels, list) \
else [feat_channels] * len(self.strides)

@@ -181,15 +183,20 @@ class GFocalHead_Tiny(nn.Module):
groups=self.conv_groups,
norm=self.norm,
act=self.act))
if not self.simlqe:
conf_vector = [nn.Conv2d(4 * self.total_dim, self.reg_channels, 1)]
if self.use_lqe:
if not self.simlqe:
conf_vector = [
nn.Conv2d(4 * self.total_dim, self.reg_channels, 1)
]
else:
conf_vector = [
nn.Conv2d(4 * (self.reg_max + 1), self.reg_channels, 1)
]
conf_vector += [self.relu]
conf_vector += [nn.Conv2d(self.reg_channels, 1, 1), nn.Sigmoid()]
reg_conf = nn.Sequential(*conf_vector)
else:
conf_vector = [
nn.Conv2d(4 * (self.reg_max + 1), self.reg_channels, 1)
]
conf_vector += [self.relu]
conf_vector += [nn.Conv2d(self.reg_channels, 1, 1), nn.Sigmoid()]
reg_conf = nn.Sequential(*conf_vector)
reg_conf = None

return cls_convs, reg_convs, reg_conf

@@ -290,21 +297,27 @@ class GFocalHead_Tiny(nn.Module):
N, C, H, W = bbox_pred.size()
prob = F.softmax(
bbox_pred.reshape(N, 4, self.reg_max + 1, H, W), dim=2)
if not self.simlqe:
prob_topk, _ = prob.topk(self.reg_topk, dim=2)

if self.add_mean:
stat = torch.cat(
[prob_topk, prob_topk.mean(dim=2, keepdim=True)], dim=2)
if self.use_lqe:
if not self.simlqe:
prob_topk, _ = prob.topk(self.reg_topk, dim=2)

if self.add_mean:
stat = torch.cat(
[prob_topk,
prob_topk.mean(dim=2, keepdim=True)],
dim=2)
else:
stat = prob_topk

quality_score = reg_conf(
stat.reshape(N, 4 * self.total_dim, H, W))
else:
stat = prob_topk
quality_score = reg_conf(
bbox_pred.reshape(N, 4 * (self.reg_max + 1), H, W))

quality_score = reg_conf(stat.reshape(N, 4 * self.total_dim, H, W))
cls_score = gfl_cls(cls_feat).sigmoid() * quality_score
else:
quality_score = reg_conf(
bbox_pred.reshape(N, 4 * (self.reg_max + 1), H, W))

cls_score = gfl_cls(cls_feat).sigmoid() * quality_score
cls_score = gfl_cls(cls_feat).sigmoid()

flatten_cls_score = cls_score.flatten(start_dim=2).transpose(1, 2)
flatten_bbox_pred = bbox_pred.flatten(start_dim=2).transpose(1, 2)


+ 1
- 4
modelscope/models/cv/tinynas_detection/neck/giraffe_fpn_v2.py View File

@@ -14,7 +14,6 @@ class GiraffeNeckV2(nn.Module):
self,
depth=1.0,
width=1.0,
in_features=[2, 3, 4],
in_channels=[256, 512, 1024],
out_channels=[256, 512, 1024],
depthwise=False,
@@ -24,7 +23,6 @@ class GiraffeNeckV2(nn.Module):
block_name='BasicBlock',
):
super().__init__()
self.in_features = in_features
self.in_channels = in_channels
Conv = DWConv if depthwise else BaseConv

@@ -169,8 +167,7 @@ class GiraffeNeckV2(nn.Module):
"""

# backbone
features = [out_features[f] for f in self.in_features]
[x2, x1, x0] = features
[x2, x1, x0] = out_features

# node x3
x13 = self.bu_conv13(x1)


+ 15
- 0
modelscope/models/cv/tinynas_detection/tinynas_damoyolo.py View File

@@ -0,0 +1,15 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

from modelscope.metainfo import Models
from modelscope.models.builder import MODELS
from modelscope.utils.constant import Tasks
from .detector import SingleStageDetector


@MODELS.register_module(
Tasks.image_object_detection, module_name=Models.tinynas_damoyolo)
class DamoYolo(SingleStageDetector):

def __init__(self, model_dir, *args, **kwargs):
self.config_name = 'damoyolo_s.py'
super(DamoYolo, self).__init__(model_dir, *args, **kwargs)

+ 1
- 1
modelscope/models/cv/tinynas_detection/tinynas_detector.py View File

@@ -12,5 +12,5 @@ from .detector import SingleStageDetector
class TinynasDetector(SingleStageDetector):

def __init__(self, model_dir, *args, **kwargs):
self.config_name = 'airdet_s.py'
super(TinynasDetector, self).__init__(model_dir, *args, **kwargs)

+ 1
- 1
modelscope/models/cv/video_summarization/summarizer.py View File

@@ -161,7 +161,7 @@ def summary_format(summary, fps):
is_summary_frame = False

if is_summary_frame and summary[-1] == 1:
end_frame = len(frame_idxes) - 1
end_frame = len(summary) - 1
frames_list.append([start_frame, end_frame])

output = []


+ 1
- 0
modelscope/models/multi_modal/diffusion/__init__.py View File

@@ -1 +1,2 @@
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
from .model import DiffusionForTextToImageSynthesis

+ 1
- 0
modelscope/models/multi_modal/gemm/__init__.py View File

@@ -1 +1,2 @@
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
from .gemm_model import GEMMForMultiModalEmbedding

+ 1
- 0
modelscope/models/multi_modal/multi_stage_diffusion/__init__.py View File

@@ -1 +1,2 @@
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
from .model import MultiStageDiffusionForTextToImageSynthesis

+ 2
- 1
modelscope/models/multi_modal/ofa/utils/constant.py View File

@@ -3,8 +3,9 @@ from modelscope.outputs import OutputKeys
from modelscope.utils.constant import Tasks

OFA_TASK_KEY_MAPPING = {
Tasks.ocr_recognition: OutputKeys.TEXT,
Tasks.image_captioning: OutputKeys.CAPTION,
Tasks.summarization: OutputKeys.TEXT,
Tasks.text_summarization: OutputKeys.TEXT,
Tasks.visual_question_answering: OutputKeys.TEXT,
Tasks.visual_grounding: OutputKeys.BOXES,
Tasks.text_classification: OutputKeys.LABELS,


+ 4
- 2
modelscope/models/multi_modal/ofa_for_all_tasks.py View File

@@ -28,12 +28,13 @@ __all__ = ['OfaForAllTasks']


@MODELS.register_module(Tasks.image_captioning, module_name=Models.ofa)
@MODELS.register_module(Tasks.ocr_recognition, module_name=Models.ofa)
@MODELS.register_module(Tasks.visual_grounding, module_name=Models.ofa)
@MODELS.register_module(
Tasks.visual_question_answering, module_name=Models.ofa)
@MODELS.register_module(Tasks.visual_entailment, module_name=Models.ofa)
@MODELS.register_module(Tasks.image_classification, module_name=Models.ofa)
@MODELS.register_module(Tasks.summarization, module_name=Models.ofa)
@MODELS.register_module(Tasks.text_summarization, module_name=Models.ofa)
@MODELS.register_module(Tasks.text_classification, module_name=Models.ofa)
class OfaForAllTasks(TorchModel):

@@ -97,8 +98,9 @@ class OfaForAllTasks(TorchModel):
'traverse': self._traverse_inference,
}
self.task_inference_mapping = {
Tasks.ocr_recognition: self._text_gen_inference,
Tasks.image_captioning: self._text_gen_inference,
Tasks.summarization: self._text_gen_inference,
Tasks.text_summarization: self._text_gen_inference,
Tasks.visual_grounding: self._visual_grounding_inference,
Tasks.visual_entailment: inference_d[self.gen_type],
Tasks.visual_question_answering: inference_d[self.gen_type],


+ 1
- 0
modelscope/models/multi_modal/team/__init__.py View File

@@ -1 +1,2 @@
# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved.
from .team_model import TEAMForMultiModalSimilarity

+ 3
- 2
modelscope/models/nlp/__init__.py View File

@@ -34,8 +34,9 @@ if TYPE_CHECKING:
TaskModelForTextGeneration)
from .token_classification import SbertForTokenClassification
from .sentence_embedding import SentenceEmbedding
from .passage_ranking import PassageRanking
from .text_ranking import TextRanking
from .T5 import T5ForConditionalGeneration

else:
_import_structure = {
'backbones': ['SbertModel'],
@@ -75,7 +76,7 @@ else:
'token_classification': ['SbertForTokenClassification'],
'table_question_answering': ['TableQuestionAnswering'],
'sentence_embedding': ['SentenceEmbedding'],
'passage_ranking': ['PassageRanking'],
'text_ranking': ['TextRanking'],
'T5': ['T5ForConditionalGeneration'],
}



+ 0
- 78
modelscope/models/nlp/bert/modeling_bert.py View File

@@ -15,7 +15,6 @@
"""PyTorch BERT model. """

import math
import os
import warnings
from dataclasses import dataclass
from typing import Optional, Tuple
@@ -41,7 +40,6 @@ from transformers.modeling_utils import (PreTrainedModel,
find_pruneable_heads_and_indices,
prune_linear_layer)

from modelscope.models.base import TorchModel
from modelscope.utils.logger import get_logger
from .configuration_bert import BertConfig

@@ -50,81 +48,6 @@ logger = get_logger(__name__)
_CONFIG_FOR_DOC = 'BertConfig'


def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
"""Load tf checkpoints in a pytorch model."""
try:
import re

import numpy as np
import tensorflow as tf
except ImportError:
logger.error(
'Loading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see '
'https://www.tensorflow.org/install/ for installation instructions.'
)
raise
tf_path = os.path.abspath(tf_checkpoint_path)
logger.info(f'Converting TensorFlow checkpoint from {tf_path}')
# Load weights from TF model
init_vars = tf.train.list_variables(tf_path)
names = []
arrays = []
for name, shape in init_vars:
logger.info(f'Loading TF weight {name} with shape {shape}')
array = tf.train.load_variable(tf_path, name)
names.append(name)
arrays.append(array)

for name, array in zip(names, arrays):
name = name.split('/')
# adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
# which are not required for using pretrained model
if any(n in [
'adam_v', 'adam_m', 'AdamWeightDecayOptimizer',
'AdamWeightDecayOptimizer_1', 'global_step'
] for n in name):
logger.info(f"Skipping {'/'.join(name)}")
continue
pointer = model
for m_name in name:
if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
scope_names = re.split(r'_(\d+)', m_name)
else:
scope_names = [m_name]
if scope_names[0] == 'kernel' or scope_names[0] == 'gamma':
pointer = getattr(pointer, 'weight')
elif scope_names[0] == 'output_bias' or scope_names[0] == 'beta':
pointer = getattr(pointer, 'bias')
elif scope_names[0] == 'output_weights':
pointer = getattr(pointer, 'weight')
elif scope_names[0] == 'squad':
pointer = getattr(pointer, 'classifier')
else:
try:
pointer = getattr(pointer, scope_names[0])
except AttributeError:
logger.info(f"Skipping {'/'.join(name)}")
continue
if len(scope_names) >= 2:
num = int(scope_names[1])
pointer = pointer[num]
if m_name[-11:] == '_embeddings':
pointer = getattr(pointer, 'weight')
elif m_name == 'kernel':
array = np.transpose(array)
try:
if pointer.shape != array.shape:
raise ValueError(
f'Pointer shape {pointer.shape} and array shape {array.shape} mismatched'
)
except AssertionError as e:
e.args += (pointer.shape, array.shape)
raise
logger.info(f'Initialize PyTorch weight {name}')
pointer.data = torch.from_numpy(array)
return model


class BertEmbeddings(nn.Module):
"""Construct the embeddings from word, position and token_type embeddings."""

@@ -750,7 +673,6 @@ class BertPreTrainedModel(PreTrainedModel):
"""

config_class = BertConfig
load_tf_weights = load_tf_weights_in_bert
base_model_prefix = 'bert'
supports_gradient_checkpointing = True
_keys_to_ignore_on_load_missing = [r'position_ids']


+ 2
- 0
modelscope/models/nlp/heads/infromation_extraction_head.py View File

@@ -10,6 +10,8 @@ from modelscope.utils.constant import Tasks

@HEADS.register_module(
Tasks.information_extraction, module_name=Heads.information_extraction)
@HEADS.register_module(
Tasks.relation_extraction, module_name=Heads.information_extraction)
class InformationExtractionHead(TorchHead):

def __init__(self, **kwargs):


+ 2
- 0
modelscope/models/nlp/heads/token_classification_head.py View File

@@ -14,6 +14,8 @@ from modelscope.utils.constant import Tasks

@HEADS.register_module(
Tasks.token_classification, module_name=Heads.token_classification)
@HEADS.register_module(
Tasks.part_of_speech, module_name=Heads.token_classification)
class TokenClassificationHead(TorchHead):

def __init__(self, **kwargs):


+ 2
- 0
modelscope/models/nlp/task_models/information_extraction.py View File

@@ -16,6 +16,8 @@ __all__ = ['InformationExtractionModel']
@MODELS.register_module(
Tasks.information_extraction,
module_name=TaskModels.information_extraction)
@MODELS.register_module(
Tasks.relation_extraction, module_name=TaskModels.information_extraction)
class InformationExtractionModel(SingleBackboneTaskModelBase):

def __init__(self, model_dir: str, *args, **kwargs):


+ 2
- 0
modelscope/models/nlp/task_models/token_classification.py View File

@@ -19,6 +19,8 @@ __all__ = ['TokenClassificationModel']

@MODELS.register_module(
Tasks.token_classification, module_name=TaskModels.token_classification)
@MODELS.register_module(
Tasks.part_of_speech, module_name=TaskModels.token_classification)
class TokenClassificationModel(SingleBackboneTaskModelBase):

def __init__(self, model_dir: str, *args, **kwargs):


modelscope/models/nlp/passage_ranking.py → modelscope/models/nlp/text_ranking.py View File

@@ -13,18 +13,18 @@ from modelscope.models.nlp.structbert import SbertPreTrainedModel
from modelscope.outputs import OutputKeys
from modelscope.utils.constant import Tasks

__all__ = ['PassageRanking']
__all__ = ['TextRanking']


@MODELS.register_module(Tasks.passage_ranking, module_name=Models.bert)
class PassageRanking(SbertForSequenceClassification, SbertPreTrainedModel):
@MODELS.register_module(Tasks.text_ranking, module_name=Models.bert)
class TextRanking(SbertForSequenceClassification, SbertPreTrainedModel):
base_model_prefix: str = 'bert'
supports_gradient_checkpointing = True
_keys_to_ignore_on_load_missing = [r'position_ids']

def __init__(self, config, model_dir, *args, **kwargs):
if hasattr(config, 'base_model_prefix'):
PassageRanking.base_model_prefix = config.base_model_prefix
TextRanking.base_model_prefix = config.base_model_prefix
super().__init__(config, model_dir)
self.train_batch_size = kwargs.get('train_batch_size', 4)
self.register_buffer(
@@ -74,7 +74,7 @@ class PassageRanking(SbertForSequenceClassification, SbertPreTrainedModel):
num_labels = kwargs.get('num_labels', 1)
model_args = {} if num_labels is None else {'num_labels': num_labels}

return super(SbertPreTrainedModel, PassageRanking).from_pretrained(
return super(SbertPreTrainedModel, TextRanking).from_pretrained(
pretrained_model_name_or_path=kwargs.get('model_dir'),
model_dir=kwargs.get('model_dir'),
**model_args)

+ 9
- 4
modelscope/msdatasets/cv/easycv_base.py View File

@@ -26,11 +26,16 @@ class EasyCVBaseDataset(object):
if self.split_config is not None:
self._update_data_source(kwargs['data_source'])

def _update_data_root(self, input_dict, data_root):
for k, v in input_dict.items():
if isinstance(v, str) and self.DATA_ROOT_PATTERN in v:
input_dict.update(
{k: v.replace(self.DATA_ROOT_PATTERN, data_root)})
elif isinstance(v, dict):
self._update_data_root(v, data_root)

def _update_data_source(self, data_source):
data_root = next(iter(self.split_config.values()))
data_root = data_root.rstrip(osp.sep)

for k, v in data_source.items():
if isinstance(v, str) and self.DATA_ROOT_PATTERN in v:
data_source.update(
{k: v.replace(self.DATA_ROOT_PATTERN, data_root)})
self._update_data_root(data_source, data_root)

+ 4
- 2
modelscope/msdatasets/task_datasets/__init__.py View File

@@ -12,14 +12,14 @@ if TYPE_CHECKING:
from .movie_scene_segmentation import MovieSceneSegmentationDataset
from .video_summarization_dataset import VideoSummarizationDataset
from .image_inpainting import ImageInpaintingDataset
from .passage_ranking_dataset import PassageRankingDataset
from .text_ranking_dataset import TextRankingDataset

else:
_import_structure = {
'base': ['TaskDataset'],
'builder': ['TASK_DATASETS', 'build_task_dataset'],
'torch_base_dataset': ['TorchTaskDataset'],
'passage_ranking_dataset': ['PassageRankingDataset'],
'text_ranking_dataset': ['TextRankingDataset'],
'veco_dataset': ['VecoDataset'],
'image_instance_segmentation_coco_dataset':
['ImageInstanceSegmentationCocoDataset'],
@@ -27,6 +27,8 @@ else:
'movie_scene_segmentation': ['MovieSceneSegmentationDataset'],
'image_inpainting': ['ImageInpaintingDataset'],
'sidd_image_denoising_dataset': ['SiddImageDenoisingDataset'],
'image_portrait_enhancement_dataset':
['ImagePortraitEnhancementDataset'],
}
import sys



+ 23
- 0
modelscope/msdatasets/task_datasets/image_portrait_enhancement/__init__.py View File

@@ -0,0 +1,23 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import TYPE_CHECKING

from modelscope.utils.import_utils import LazyImportModule

if TYPE_CHECKING:
from .image_portrait_enhancement_dataset import ImagePortraitEnhancementDataset

else:
_import_structure = {
'image_portrait_enhancement_dataset':
['ImagePortraitEnhancementDataset'],
}

import sys

sys.modules[__name__] = LazyImportModule(
__name__,
globals()['__file__'],
_import_structure,
module_spec=__spec__,
extra_objects={},
)

+ 32
- 0
modelscope/msdatasets/task_datasets/image_portrait_enhancement/data_utils.py View File

@@ -0,0 +1,32 @@
# ------------------------------------------------------------------------
# Modified from BasicSR (https://github.com/xinntao/BasicSR)
# Copyright 2018-2020 BasicSR Authors
# ------------------------------------------------------------------------

import cv2
import torch


def img2tensor(imgs, bgr2rgb=True, float32=True):
"""Numpy array to tensor.
Args:
imgs (list[ndarray] | ndarray): Input images.
bgr2rgb (bool): Whether to change bgr to rgb.
float32 (bool): Whether to change to float32.
Returns:
list[tensor] | tensor: Tensor images. If returned results only have
one element, just return tensor.
"""

def _totensor(img, bgr2rgb, float32):
if img.shape[2] == 3 and bgr2rgb:
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = torch.from_numpy(img.transpose(2, 0, 1))
if float32:
img = img.float()
return img

if isinstance(imgs, list):
return [_totensor(img, bgr2rgb, float32) for img in imgs]
else:
return _totensor(imgs, bgr2rgb, float32)

+ 51
- 0
modelscope/msdatasets/task_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py View File

@@ -0,0 +1,51 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import cv2
import numpy as np

from modelscope.metainfo import Datasets, Models
from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS
from modelscope.msdatasets.task_datasets.torch_base_dataset import \
TorchTaskDataset
from modelscope.utils.constant import Tasks
from .data_utils import img2tensor


def default_loader(path):
return cv2.imread(path, cv2.IMREAD_COLOR).astype(np.float32) / 255.0


@TASK_DATASETS.register_module(
Tasks.image_portrait_enhancement, module_name=Datasets.PairedDataset)
class ImagePortraitEnhancementDataset(TorchTaskDataset):
"""Paired image dataset for image portrait enhancement.
"""

def __init__(self, dataset, is_train):
self.dataset = dataset
self.gt_size = 256
self.is_train = is_train

def __len__(self):
return len(self.dataset)

def __getitem__(self, index):

# Load gt and lq images. Dimension order: HWC; channel order: BGR;
# image range: [0, 1], float32.
item_dict = self.dataset[index]
gt_path = item_dict['hq:FILE']
img_gt = default_loader(gt_path)
lq_path = item_dict['lq:FILE']
img_lq = default_loader(lq_path)

gt_size = self.gt_size
img_gt = cv2.resize(img_gt, (gt_size, gt_size))
img_lq = cv2.resize(img_lq, (gt_size, gt_size))

# BGR to RGB, HWC to CHW, numpy to tensor
img_gt, img_lq = img2tensor([img_gt, img_lq],
bgr2rgb=True,
float32=True)

return {'input': (img_lq - 0.5) / 0.5, 'target': (img_gt - 0.5) / 0.5}

modelscope/msdatasets/task_datasets/passage_ranking_dataset.py → modelscope/msdatasets/task_datasets/text_ranking_dataset.py View File

@@ -16,8 +16,8 @@ from .torch_base_dataset import TorchTaskDataset


@TASK_DATASETS.register_module(
group_key=Tasks.passage_ranking, module_name=Models.bert)
class PassageRankingDataset(TorchTaskDataset):
group_key=Tasks.text_ranking, module_name=Models.bert)
class TextRankingDataset(TorchTaskDataset):

def __init__(self,
datasets: Union[Any, List[Any]],
@@ -35,8 +35,8 @@ class PassageRankingDataset(TorchTaskDataset):
'positive_passages')
self.neg_sequence = self.dataset_config.get('neg_sequence',
'negative_passages')
self.passage_text_fileds = self.dataset_config.get(
'passage_text_fileds', ['title', 'text'])
self.text_fileds = self.dataset_config.get('text_fileds',
['title', 'text'])
self.qid_field = self.dataset_config.get('qid_field', 'query_id')
if mode == ModeKeys.TRAIN:
train_config = kwargs.get('train', {})
@@ -58,14 +58,14 @@ class PassageRankingDataset(TorchTaskDataset):

pos_sequences = group[self.pos_sequence]
pos_sequences = [
' '.join([ele[key] for key in self.passage_text_fileds])
' '.join([ele[key] for key in self.text_fileds])
for ele in pos_sequences
]
labels.extend([1] * len(pos_sequences))

neg_sequences = group[self.neg_sequence]
neg_sequences = [
' '.join([ele[key] for key in self.passage_text_fileds])
' '.join([ele[key] for key in self.text_fileds])
for ele in neg_sequences
]

@@ -88,13 +88,13 @@ class PassageRankingDataset(TorchTaskDataset):

pos_sequences = group[self.pos_sequence]
pos_sequences = [
' '.join([ele[key] for key in self.passage_text_fileds])
' '.join([ele[key] for key in self.text_fileds])
for ele in pos_sequences
]

neg_sequences = group[self.neg_sequence]
neg_sequences = [
' '.join([ele[key] for key in self.passage_text_fileds])
' '.join([ele[key] for key in self.text_fileds])
for ele in neg_sequences
]


+ 3
- 5
modelscope/msdatasets/utils/dataset_utils.py View File

@@ -7,7 +7,7 @@ from typing import Any, Mapping, Optional, Sequence, Union
from datasets.builder import DatasetBuilder

from modelscope.hub.api import HubApi
from modelscope.utils.constant import DEFAULT_DATASET_REVISION, DownloadParams
from modelscope.utils.constant import DEFAULT_DATASET_REVISION
from modelscope.utils.logger import get_logger
from .dataset_builder import MsCsvDatasetBuilder, TaskSpecificDatasetBuilder

@@ -95,15 +95,13 @@ def list_dataset_objects(hub_api: HubApi, max_limit: int, is_recursive: bool,
res (list): List of objects, i.e., ['train/images/001.png', 'train/images/002.png', 'val/images/001.png', ...]
"""
res = []
cookies = hub_api.check_cookies_upload_data(use_cookies=True)
objects = hub_api.list_oss_dataset_objects(
dataset_name=dataset_name,
namespace=namespace,
max_limit=max_limit,
is_recursive=is_recursive,
is_filter_dir=True,
revision=version,
cookies=cookies)
revision=version)

for item in objects:
object_key = item.get('Key')
@@ -174,7 +172,7 @@ def get_dataset_files(subset_split_into: dict,
modelscope_api = HubApi()
objects = list_dataset_objects(
hub_api=modelscope_api,
max_limit=DownloadParams.MAX_LIST_OBJECTS_NUM.value,
max_limit=-1,
is_recursive=True,
dataset_name=dataset_name,
namespace=namespace,


+ 2
- 1
modelscope/outputs.py View File

@@ -506,7 +506,7 @@ TASK_OUTPUTS = {
# }
Tasks.text_error_correction: [OutputKeys.OUTPUT],
Tasks.sentence_embedding: [OutputKeys.TEXT_EMBEDDING, OutputKeys.SCORES],
Tasks.passage_ranking: [OutputKeys.SCORES],
Tasks.text_ranking: [OutputKeys.SCORES],

# text generation result for single sample
# {
@@ -661,6 +661,7 @@ TASK_OUTPUTS = {
# "caption": "this is an image caption text."
# }
Tasks.image_captioning: [OutputKeys.CAPTION],
Tasks.ocr_recognition: [OutputKeys.TEXT],

# visual grounding result for single sample
# {


+ 1
- 1
modelscope/pipeline_inputs.py View File

@@ -162,7 +162,7 @@ TASK_INPUTS = {
'source_sentence': InputType.LIST,
'sentences_to_compare': InputType.LIST,
},
Tasks.passage_ranking: (InputType.TEXT, InputType.TEXT),
Tasks.text_ranking: (InputType.TEXT, InputType.TEXT),
Tasks.text_generation:
InputType.TEXT,
Tasks.fill_mask:


+ 14
- 5
modelscope/pipelines/audio/asr_inference_pipeline.py View File

@@ -47,22 +47,28 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):

if isinstance(audio_in, str):
# load pcm data from url if audio_in is url str
self.audio_in = load_bytes_from_url(audio_in)
self.audio_in, checking_audio_fs = load_bytes_from_url(audio_in)
elif isinstance(audio_in, bytes):
# load pcm data from wav data if audio_in is wave format
self.audio_in = extract_pcm_from_wav(audio_in)
self.audio_in, checking_audio_fs = extract_pcm_from_wav(audio_in)
else:
self.audio_in = audio_in

# set the sample_rate of audio_in if checking_audio_fs is valid
if checking_audio_fs is not None:
self.audio_fs = checking_audio_fs

if recog_type is None or audio_format is None:
self.recog_type, self.audio_format, self.audio_in = asr_utils.type_checking(
audio_in=self.audio_in,
recog_type=recog_type,
audio_format=audio_format)

if hasattr(asr_utils, 'sample_rate_checking') and audio_fs is None:
self.audio_fs = asr_utils.sample_rate_checking(
if hasattr(asr_utils, 'sample_rate_checking'):
checking_audio_fs = asr_utils.sample_rate_checking(
self.audio_in, self.audio_format)
if checking_audio_fs is not None:
self.audio_fs = checking_audio_fs

if self.preprocessor is None:
self.preprocessor = WavToScp()
@@ -80,7 +86,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):

logger.info(f"Decoding with {inputs['audio_format']} files ...")

data_cmd: Sequence[Tuple[str, str]]
data_cmd: Sequence[Tuple[str, str, str]]
if inputs['audio_format'] == 'wav' or inputs['audio_format'] == 'pcm':
data_cmd = ['speech', 'sound']
elif inputs['audio_format'] == 'kaldi_ark':
@@ -88,6 +94,9 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
elif inputs['audio_format'] == 'tfrecord':
data_cmd = ['speech', 'tfrecord']

if inputs.__contains__('mvn_file'):
data_cmd.append(inputs['mvn_file'])

# generate asr inference command
cmd = {
'model_type': inputs['model_type'],


+ 2
- 2
modelscope/pipelines/audio/kws_kwsbp_pipeline.py View File

@@ -51,10 +51,10 @@ class KeyWordSpottingKwsbpPipeline(Pipeline):

if isinstance(audio_in, str):
# load pcm data from url if audio_in is url str
audio_in = load_bytes_from_url(audio_in)
audio_in, audio_fs = load_bytes_from_url(audio_in)
elif isinstance(audio_in, bytes):
# load pcm data from wav data if audio_in is wave format
audio_in = extract_pcm_from_wav(audio_in)
audio_in, audio_fs = extract_pcm_from_wav(audio_in)

output = self.preprocessor.forward(self.model.forward(), audio_in)
output = self.forward(output)


+ 2
- 0
modelscope/pipelines/base.py View File

@@ -433,6 +433,8 @@ def collate_fn(data, device):
if isinstance(data, dict) or isinstance(data, Mapping):
return type(data)({k: collate_fn(v, device) for k, v in data.items()})
elif isinstance(data, (tuple, list)):
if 0 == len(data):
return torch.Tensor([])
if isinstance(data[0], (int, float)):
return default_collate(data).to(device)
else:


+ 7
- 2
modelscope/pipelines/builder.py View File

@@ -20,17 +20,22 @@ DEFAULT_MODEL_FOR_PIPELINE = {
Tasks.sentence_embedding:
(Pipelines.sentence_embedding,
'damo/nlp_corom_sentence-embedding_english-base'),
Tasks.passage_ranking: (Pipelines.passage_ranking,
'damo/nlp_corom_passage-ranking_english-base'),
Tasks.text_ranking: (Pipelines.text_ranking,
'damo/nlp_corom_passage-ranking_english-base'),
Tasks.word_segmentation:
(Pipelines.word_segmentation,
'damo/nlp_structbert_word-segmentation_chinese-base'),
Tasks.part_of_speech: (Pipelines.part_of_speech,
'damo/nlp_structbert_part-of-speech_chinese-base'),
Tasks.token_classification:
(Pipelines.part_of_speech,
'damo/nlp_structbert_part-of-speech_chinese-base'),
Tasks.named_entity_recognition:
(Pipelines.named_entity_recognition,
'damo/nlp_raner_named-entity-recognition_chinese-base-news'),
Tasks.relation_extraction:
(Pipelines.relation_extraction,
'damo/nlp_bert_relation-extraction_chinese-base'),
Tasks.information_extraction:
(Pipelines.relation_extraction,
'damo/nlp_bert_relation-extraction_chinese-base'),


+ 16
- 5
modelscope/pipelines/cv/body_3d_keypoints_pipeline.py View File

@@ -143,6 +143,13 @@ class Body3DKeypointsPipeline(Pipeline):
max_frame = self.keypoint_model_3d.cfg.model.INPUT.MAX_FRAME # max video frame number to be predicted 3D joints
for i, frame in enumerate(video_frames):
kps_2d = self.human_body_2d_kps_detector(frame)
if [] == kps_2d.get('boxes'):
res = {
'success': False,
'msg': f'fail to detect person at image frame {i}'
}
return res

box = kps_2d['boxes'][
0] # box: [[[x1, y1], [x2, y2]]], N human boxes per frame, [0] represent using first detected bbox
pose = kps_2d['keypoints'][0] # keypoints: [15, 2]
@@ -180,7 +187,15 @@ class Body3DKeypointsPipeline(Pipeline):
return res

def postprocess(self, input: Dict[str, Any], **kwargs) -> Dict[str, Any]:
res = {OutputKeys.KEYPOINTS: [], OutputKeys.TIMESTAMPS: []}
output_video_path = kwargs.get('output_video', None)
if output_video_path is None:
output_video_path = tempfile.NamedTemporaryFile(suffix='.mp4').name

res = {
OutputKeys.KEYPOINTS: [],
OutputKeys.TIMESTAMPS: [],
OutputKeys.OUTPUT_VIDEO: output_video_path
}

if not input['success']:
pass
@@ -189,10 +204,6 @@ class Body3DKeypointsPipeline(Pipeline):
pred_3d_pose = poses.data.cpu().numpy()[
0] # [frame_num, joint_num, joint_dim]

output_video_path = kwargs.get('output_video', None)
if output_video_path is None:
output_video_path = tempfile.NamedTemporaryFile(
suffix='.mp4').name
if 'render' in self.keypoint_model_3d.cfg.keys():
self.render_prediction(pred_3d_pose, output_video_path)
res[OutputKeys.OUTPUT_VIDEO] = output_video_path


+ 2
- 0
modelscope/pipelines/cv/face_image_generation_pipeline.py View File

@@ -61,6 +61,8 @@ class FaceImageGenerationPipeline(Pipeline):
return input

def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
if isinstance(input, str):
input = int(input)
assert isinstance(input, int)
torch.manual_seed(input)
torch.cuda.manual_seed(input)


+ 1
- 0
modelscope/pipelines/cv/image_reid_person_pipeline.py View File

@@ -53,6 +53,7 @@ class ImageReidPersonPipeline(Pipeline):
def forward(self, input: Dict[str, Any]) -> Dict[str, Any]:
img = input['img']
img_embedding = self.model(img)
img_embedding = img_embedding.detach().cpu().numpy()
return {OutputKeys.IMG_EMBEDDING: img_embedding}

def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:


+ 16
- 6
modelscope/pipelines/cv/tinynas_detection_pipeline.py View File

@@ -12,6 +12,8 @@ from modelscope.pipelines.base import Input, Pipeline
from modelscope.pipelines.builder import PIPELINES
from modelscope.preprocessors import LoadImage
from modelscope.utils.constant import Tasks
from modelscope.utils.cv.image_utils import \
show_image_object_detection_auto_result
from modelscope.utils.logger import get_logger

logger = get_logger()
@@ -52,10 +54,18 @@ class TinynasDetectionPipeline(Pipeline):

bboxes, scores, labels = self.model.postprocess(inputs['data'])
if bboxes is None:
return None
outputs = {
OutputKeys.SCORES: scores,
OutputKeys.LABELS: labels,
OutputKeys.BOXES: bboxes
}
outputs = {
OutputKeys.SCORES: [],
OutputKeys.LABELS: [],
OutputKeys.BOXES: []
}
else:
outputs = {
OutputKeys.SCORES: scores,
OutputKeys.LABELS: labels,
OutputKeys.BOXES: bboxes
}
return outputs

def show_result(self, img_path, result, save_path=None):
show_image_object_detection_auto_result(img_path, result, save_path)

+ 2
- 0
modelscope/pipelines/multi_modal/multi_modal_embedding_pipeline.py View File

@@ -11,6 +11,8 @@ from modelscope.utils.logger import get_logger
logger = get_logger()


@PIPELINES.register_module(
Tasks.image_text_retrieval, module_name=Pipelines.multi_modal_embedding)
@PIPELINES.register_module(
Tasks.multi_modal_embedding, module_name=Pipelines.multi_modal_embedding)
class MultiModalEmbeddingPipeline(Pipeline):


+ 52
- 0
modelscope/pipelines/multi_modal/ocr_recognition_pipeline.py View File

@@ -0,0 +1,52 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict, Optional, Union

import torch

from modelscope.metainfo import Pipelines
from modelscope.models.multi_modal import OfaForAllTasks
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Model, Pipeline
from modelscope.pipelines.builder import PIPELINES
from modelscope.preprocessors import OfaPreprocessor, Preprocessor
from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger

logger = get_logger()


@PIPELINES.register_module(
Tasks.ocr_recognition, module_name=Pipelines.ofa_ocr_recognition)
class OcrRecognitionPipeline(Pipeline):

def __init__(self,
model: Union[Model, str],
preprocessor: Optional[Preprocessor] = None,
**kwargs):
"""
use `model` and `preprocessor` to create a ocr recognition pipeline for prediction
Args:
model: model id on modelscope hub.
"""
super().__init__(model=model)
assert isinstance(model, str) or isinstance(model, Model), \
'model must be a single str or OfaForAllTasks'
if isinstance(model, str):
pipe_model = Model.from_pretrained(model)
elif isinstance(model, Model):
pipe_model = model
else:
raise NotImplementedError
pipe_model.model.eval()
if preprocessor is None:
if isinstance(pipe_model, OfaForAllTasks):
preprocessor = OfaPreprocessor(pipe_model.model_dir)
super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs)

def forward(self, inputs: Dict[str, Any],
**forward_params) -> Dict[str, Any]:
with torch.no_grad():
return super().forward(inputs, **forward_params)

def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
return inputs

+ 2
- 2
modelscope/pipelines/nlp/__init__.py View File

@@ -17,7 +17,7 @@ if TYPE_CHECKING:
from .fill_mask_ponet_pipeline import FillMaskPonetPipeline
from .information_extraction_pipeline import InformationExtractionPipeline
from .named_entity_recognition_pipeline import NamedEntityRecognitionPipeline
from .passage_ranking_pipeline import PassageRankingPipeline
from .text_ranking_pipeline import TextRankingPipeline
from .sentence_embedding_pipeline import SentenceEmbeddingPipeline
from .sequence_classification_pipeline import SequenceClassificationPipeline
from .summarization_pipeline import SummarizationPipeline
@@ -51,7 +51,7 @@ else:
'information_extraction_pipeline': ['InformationExtractionPipeline'],
'named_entity_recognition_pipeline':
['NamedEntityRecognitionPipeline'],
'passage_ranking_pipeline': ['PassageRankingPipeline'],
'text_ranking_pipeline': ['TextRankingPipeline'],
'sentence_embedding_pipeline': ['SentenceEmbeddingPipeline'],
'sequence_classification_pipeline': ['SequenceClassificationPipeline'],
'summarization_pipeline': ['SummarizationPipeline'],


+ 2
- 0
modelscope/pipelines/nlp/information_extraction_pipeline.py View File

@@ -17,6 +17,8 @@ __all__ = ['InformationExtractionPipeline']

@PIPELINES.register_module(
Tasks.information_extraction, module_name=Pipelines.relation_extraction)
@PIPELINES.register_module(
Tasks.relation_extraction, module_name=Pipelines.relation_extraction)
class InformationExtractionPipeline(Pipeline):

def __init__(self,


+ 1
- 1
modelscope/pipelines/nlp/summarization_pipeline.py View File

@@ -13,7 +13,7 @@ logger = get_logger()


@PIPELINES.register_module(
Tasks.summarization, module_name=Pipelines.text_generation)
Tasks.text_summarization, module_name=Pipelines.text_generation)
class SummarizationPipeline(Pipeline):

def __init__(self,


+ 27
- 24
modelscope/pipelines/nlp/table_question_answering_pipeline.py View File

@@ -72,6 +72,7 @@ class TableQuestionAnsweringPipeline(Pipeline):
action = self.action_ops[result['action']]
headers = table['header_name']
current_sql = result['sql']
current_sql['from'] = [table['table_id']]

if history_sql is None:
return current_sql
@@ -216,10 +217,11 @@ class TableQuestionAnsweringPipeline(Pipeline):
else:
return current_sql

def sql_dict_to_str(self, result, table):
def sql_dict_to_str(self, result, tables):
"""
convert sql struct to string
"""
table = tables[result['sql']['from'][0]]
header_names = table['header_name'] + ['空列']
header_ids = table['header_id'] + ['null']
sql = result['sql']
@@ -279,42 +281,43 @@ class TableQuestionAnsweringPipeline(Pipeline):
"""
result = inputs['result']
history_sql = inputs['history_sql']
result['sql'] = self.post_process_multi_turn(
history_sql=history_sql,
result=result,
table=self.db.tables[result['table_id']])
result['sql']['from'] = [result['table_id']]
sql = self.sql_dict_to_str(
result=result, table=self.db.tables[result['table_id']])
try:
result['sql'] = self.post_process_multi_turn(
history_sql=history_sql,
result=result,
table=self.db.tables[result['table_id']])
except Exception:
result['sql'] = history_sql
sql = self.sql_dict_to_str(result=result, tables=self.db.tables)

# add sqlite
if self.db.is_use_sqlite:
try:
cursor = self.db.connection_obj.cursor().execute(sql.query)
names = [{
'name':
description[0],
'label':
self.db.tables[result['table_id']]['headerid2name'].get(
description[0], description[0])
} for description in cursor.description]
cells = []
header_ids, header_names = [], []
for description in cursor.description:
header_ids.append(self.db.tables[result['table_id']]
['headerid2name'].get(
description[0], description[0]))
header_names.append(description[0])
rows = []
for res in cursor.fetchall():
row = {}
for name, cell in zip(names, res):
row[name['name']] = cell
cells.append(row)
tabledata = {'headers': names, 'cells': cells}
rows.append(list(res))
tabledata = {
'header_id': header_ids,
'header_name': header_names,
'rows': rows
}
except Exception:
tabledata = {'headers': [], 'cells': []}
tabledata = {'header_id': [], 'header_name': [], 'rows': []}
else:
tabledata = {'headers': [], 'cells': []}
tabledata = {'header_id': [], 'header_name': [], 'rows': []}

output = {
OutputKeys.SQL_STRING: sql.string,
OutputKeys.SQL_QUERY: sql.query,
OutputKeys.HISTORY: result['sql'],
OutputKeys.QUERT_RESULT: json.dumps(tabledata, ensure_ascii=False),
OutputKeys.QUERT_RESULT: tabledata,
}

return output


modelscope/pipelines/nlp/passage_ranking_pipeline.py → modelscope/pipelines/nlp/text_ranking_pipeline.py View File

@@ -9,15 +9,15 @@ from modelscope.models import Model
from modelscope.outputs import OutputKeys
from modelscope.pipelines.base import Pipeline
from modelscope.pipelines.builder import PIPELINES
from modelscope.preprocessors import PassageRankingPreprocessor, Preprocessor
from modelscope.preprocessors import Preprocessor, TextRankingPreprocessor
from modelscope.utils.constant import Tasks

__all__ = ['PassageRankingPipeline']
__all__ = ['TextRankingPipeline']


@PIPELINES.register_module(
Tasks.passage_ranking, module_name=Pipelines.passage_ranking)
class PassageRankingPipeline(Pipeline):
Tasks.text_ranking, module_name=Pipelines.text_ranking)
class TextRankingPipeline(Pipeline):

def __init__(self,
model: Union[Model, str],
@@ -36,7 +36,7 @@ class PassageRankingPipeline(Pipeline):
Model) else Model.from_pretrained(model)

if preprocessor is None:
preprocessor = PassageRankingPreprocessor(
preprocessor = TextRankingPreprocessor(
model.model_dir if isinstance(model, Model) else model,
sequence_length=kwargs.pop('sequence_length', 128))
model.eval()

+ 2
- 0
modelscope/pipelines/nlp/token_classification_pipeline.py View File

@@ -18,6 +18,8 @@ __all__ = ['TokenClassificationPipeline']

@PIPELINES.register_module(
Tasks.token_classification, module_name=Pipelines.part_of_speech)
@PIPELINES.register_module(
Tasks.part_of_speech, module_name=Pipelines.part_of_speech)
class TokenClassificationPipeline(Pipeline):

def __init__(self,


+ 2
- 2
modelscope/preprocessors/__init__.py View File

@@ -21,7 +21,7 @@ if TYPE_CHECKING:
FillMaskPoNetPreprocessor,
NLPPreprocessor,
NLPTokenizerPreprocessorBase,
PassageRankingPreprocessor,
TextRankingPreprocessor,
RelationExtractionPreprocessor,
SentenceEmbeddingPreprocessor,
SequenceClassificationPreprocessor,
@@ -62,7 +62,7 @@ else:
'FillMaskPoNetPreprocessor',
'NLPPreprocessor',
'NLPTokenizerPreprocessorBase',
'PassageRankingPreprocessor',
'TextRankingPreprocessor',
'RelationExtractionPreprocessor',
'SentenceEmbeddingPreprocessor',
'SequenceClassificationPreprocessor',


+ 6
- 0
modelscope/preprocessors/asr.py View File

@@ -133,6 +133,12 @@ class WavToScp(Preprocessor):
else:
inputs['asr_model_config'] = asr_model_config

if inputs['model_config'].__contains__('mvn_file'):
mvn_file = os.path.join(inputs['model_workspace'],
inputs['model_config']['mvn_file'])
assert os.path.exists(mvn_file), 'mvn_file does not exist'
inputs['mvn_file'] = mvn_file

elif inputs['model_type'] == Frameworks.tf:
assert inputs['model_config'].__contains__(
'vocab_file'), 'vocab_file does not exist'


+ 4
- 12
modelscope/preprocessors/multi_modal.py View File

@@ -16,6 +16,7 @@ from .base import Preprocessor
from .builder import PREPROCESSORS
from .ofa import * # noqa
from .ofa.utils.collate import collate_fn
from .ofa.utils.constant import OFA_TASK_KEY_MAPPING

__all__ = [
'OfaPreprocessor',
@@ -40,6 +41,7 @@ class OfaPreprocessor(Preprocessor):
"""
super().__init__(*args, **kwargs)
preprocess_mapping = {
Tasks.ocr_recognition: OfaOcrRecognitionPreprocessor,
Tasks.image_captioning: OfaImageCaptioningPreprocessor,
Tasks.visual_grounding: OfaVisualGroundingPreprocessor,
Tasks.visual_question_answering:
@@ -47,26 +49,16 @@ class OfaPreprocessor(Preprocessor):
Tasks.visual_entailment: OfaVisualEntailmentPreprocessor,
Tasks.image_classification: OfaImageClassificationPreprocessor,
Tasks.text_classification: OfaTextClassificationPreprocessor,
Tasks.summarization: OfaSummarizationPreprocessor,
Tasks.text_summarization: OfaSummarizationPreprocessor,
Tasks.text_to_image_synthesis: OfaTextToImageSynthesisPreprocessor
}
input_key_mapping = {
Tasks.image_captioning: ['image'],
Tasks.image_classification: ['image'],
Tasks.summarization: ['text'],
Tasks.text_classification: ['text', 'text2'],
Tasks.visual_grounding: ['image', 'text'],
Tasks.visual_question_answering: ['image', 'text'],
Tasks.visual_entailment: ['image', 'text', 'text2'],
Tasks.text_to_image_synthesis: ['text']
}
model_dir = model_dir if osp.exists(model_dir) else snapshot_download(
model_dir)
self.cfg = Config.from_file(
osp.join(model_dir, ModelFile.CONFIGURATION))
self.preprocess = preprocess_mapping[self.cfg.task](
cfg=self.cfg, model_dir=model_dir, mode=mode)
self.keys = input_key_mapping[self.cfg.task]
self.keys = OFA_TASK_KEY_MAPPING[self.cfg.task]
self.tokenizer = self.preprocess.tokenizer
if kwargs.get('no_collate', None):
self.no_collate = True


+ 2
- 2
modelscope/preprocessors/nlp/__init__.py View File

@@ -11,7 +11,7 @@ if TYPE_CHECKING:
FillMaskPoNetPreprocessor,
NLPPreprocessor,
NLPTokenizerPreprocessorBase,
PassageRankingPreprocessor,
TextRankingPreprocessor,
RelationExtractionPreprocessor,
SentenceEmbeddingPreprocessor,
SequenceClassificationPreprocessor,
@@ -33,7 +33,7 @@ else:
'FillMaskPoNetPreprocessor',
'NLPPreprocessor',
'NLPTokenizerPreprocessorBase',
'PassageRankingPreprocessor',
'TextRankingPreprocessor',
'RelationExtractionPreprocessor',
'SentenceEmbeddingPreprocessor',
'SequenceClassificationPreprocessor',


+ 28
- 32
modelscope/preprocessors/nlp/nlp_base.py View File

@@ -1,9 +1,10 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
import os.path as osp
import re
from typing import Any, Dict, Iterable, Optional, Tuple, Union
from typing import Any, Dict, Optional, Tuple, Union

import json
import numpy as np
import sentencepiece as spm
import torch
@@ -13,8 +14,7 @@ from modelscope.metainfo import Models, Preprocessors
from modelscope.outputs import OutputKeys
from modelscope.preprocessors.base import Preprocessor
from modelscope.preprocessors.builder import PREPROCESSORS
from modelscope.utils.config import (Config, ConfigFields,
use_task_specific_params)
from modelscope.utils.config import Config, ConfigFields
from modelscope.utils.constant import Fields, InputFields, ModeKeys, ModelFile
from modelscope.utils.hub import get_model_type, parse_label_mapping
from modelscope.utils.logger import get_logger
@@ -29,7 +29,7 @@ __all__ = [
'NLPPreprocessor',
'FillMaskPoNetPreprocessor',
'NLPTokenizerPreprocessorBase',
'PassageRankingPreprocessor',
'TextRankingPreprocessor',
'RelationExtractionPreprocessor',
'SentenceEmbeddingPreprocessor',
'SequenceClassificationPreprocessor',
@@ -83,6 +83,15 @@ class NLPTokenizerPreprocessorBase(Preprocessor):

self._mode = mode
self.label = kwargs.pop('label', OutputKeys.LABEL)
self.use_fast = kwargs.pop('use_fast', None)
if self.use_fast is None and os.path.isfile(
os.path.join(model_dir, 'tokenizer_config.json')):
with open(os.path.join(model_dir, 'tokenizer_config.json'),
'r') as f:
json_config = json.load(f)
self.use_fast = json_config.get('use_fast')
self.use_fast = False if self.use_fast is None else self.use_fast

self.label2id = None
if 'label2id' in kwargs:
self.label2id = kwargs.pop('label2id')
@@ -118,32 +127,23 @@ class NLPTokenizerPreprocessorBase(Preprocessor):
if model_type in (Models.structbert, Models.gpt3, Models.palm,
Models.plug):
from modelscope.models.nlp.structbert import SbertTokenizer, SbertTokenizerFast
return SbertTokenizer.from_pretrained(
model_dir
) if self._mode == ModeKeys.INFERENCE else SbertTokenizerFast.from_pretrained(
model_dir)
tokenizer = SbertTokenizerFast if self.use_fast else SbertTokenizer
return tokenizer.from_pretrained(model_dir)
elif model_type == Models.veco:
from modelscope.models.nlp.veco import VecoTokenizer, VecoTokenizerFast
return VecoTokenizer.from_pretrained(
model_dir
) if self._mode == ModeKeys.INFERENCE else VecoTokenizerFast.from_pretrained(
model_dir)
tokenizer = VecoTokenizerFast if self.use_fast else VecoTokenizer
return tokenizer.from_pretrained(model_dir)
elif model_type == Models.deberta_v2:
from modelscope.models.nlp.deberta_v2 import DebertaV2Tokenizer, DebertaV2TokenizerFast
return DebertaV2Tokenizer.from_pretrained(
model_dir
) if self._mode == ModeKeys.INFERENCE else DebertaV2TokenizerFast.from_pretrained(
model_dir)
tokenizer = DebertaV2TokenizerFast if self.use_fast else DebertaV2Tokenizer
return tokenizer.from_pretrained(model_dir)
elif not self.is_transformer_based_model:
from transformers import BertTokenizer, BertTokenizerFast
return BertTokenizer.from_pretrained(
model_dir
) if self._mode == ModeKeys.INFERENCE else BertTokenizerFast.from_pretrained(
model_dir)
tokenizer = BertTokenizerFast if self.use_fast else BertTokenizer
return tokenizer.from_pretrained(model_dir)
else:
return AutoTokenizer.from_pretrained(
model_dir,
use_fast=False if self._mode == ModeKeys.INFERENCE else True)
model_dir, use_fast=self.use_fast)

def __call__(self, data: Union[str, Tuple, Dict]) -> Dict[str, Any]:
"""process the raw input data
@@ -217,7 +217,7 @@ class NLPTokenizerPreprocessorBase(Preprocessor):
return isinstance(label, str) or isinstance(label, int)

if labels is not None:
if isinstance(labels, Iterable) and all([label_can_be_mapped(label) for label in labels]) \
if isinstance(labels, (tuple, list)) and all([label_can_be_mapped(label) for label in labels]) \
and self.label2id is not None:
output[OutputKeys.LABELS] = [
self.label2id[str(label)] for label in labels
@@ -245,9 +245,9 @@ class NLPPreprocessor(NLPTokenizerPreprocessorBase):


@PREPROCESSORS.register_module(
Fields.nlp, module_name=Preprocessors.passage_ranking)
class PassageRankingPreprocessor(NLPTokenizerPreprocessorBase):
"""The tokenizer preprocessor used in passage ranking model.
Fields.nlp, module_name=Preprocessors.text_ranking)
class TextRankingPreprocessor(NLPTokenizerPreprocessorBase):
"""The tokenizer preprocessor used in text-ranking model.
"""

def __init__(self,
@@ -314,8 +314,7 @@ class SequenceClassificationPreprocessor(NLPTokenizerPreprocessorBase):

def __init__(self, model_dir: str, mode=ModeKeys.INFERENCE, **kwargs):
kwargs['truncation'] = kwargs.get('truncation', True)
kwargs['padding'] = kwargs.get(
'padding', False if mode == ModeKeys.INFERENCE else 'max_length')
kwargs['padding'] = kwargs.get('padding', 'max_length')
kwargs['max_length'] = kwargs.pop('sequence_length', 128)
super().__init__(model_dir, mode=mode, **kwargs)

@@ -594,9 +593,6 @@ class TokenClassificationPreprocessor(NLPTokenizerPreprocessorBase):
else:
self.is_split_into_words = self.tokenizer.init_kwargs.get(
'is_split_into_words', False)
if 'label2id' in kwargs:
kwargs.pop('label2id')
self.tokenize_kwargs = kwargs

@type_assert(object, str)
def __call__(self, data: str) -> Dict[str, Any]:


+ 1
- 0
modelscope/preprocessors/ofa/__init__.py View File

@@ -1,6 +1,7 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .image_captioning import OfaImageCaptioningPreprocessor
from .image_classification import OfaImageClassificationPreprocessor
from .ocr_recognition import OfaOcrRecognitionPreprocessor
from .summarization import OfaSummarizationPreprocessor
from .text_classification import OfaTextClassificationPreprocessor
from .text_to_image_synthesis import OfaTextToImageSynthesisPreprocessor


+ 16
- 0
modelscope/preprocessors/ofa/base.py View File

@@ -6,9 +6,12 @@ from os import path as osp
import json
import numpy as np
import torch
from PIL import Image

from modelscope.models.multi_modal.ofa import OFATokenizer, OFATokenizerZH
from modelscope.preprocessors.image import load_image
from modelscope.utils.trie import Trie
from .utils.constant import OFA_TASK_KEY_MAPPING
from .utils.random_help import set_torch_seed


@@ -59,6 +62,14 @@ class OfaBasePreprocessor:
self.mean = [0.5, 0.5, 0.5]
self.std = [0.5, 0.5, 0.5]
self.patch_image_size = self.cfg.model.get('patch_image_size', 480)
self.column_map = {
key: key
for key in OFA_TASK_KEY_MAPPING[self.cfg.task]
}
if hasattr(self.cfg,
'dataset') and self.cfg.dataset.column_map is not None:
for k, v in self.cfg.dataset.column_map.items():
self.column_map[k] = v
self.transtab = str.maketrans(
{key: None
for key in string.punctuation})
@@ -147,3 +158,8 @@ class OfaBasePreprocessor:
constraint_prefix_token)
constraint_mask[i][constraint_nodes] = True
sample['constraint_mask'] = constraint_mask

def get_img_pil(self, path_or_url_or_pil):
image = path_or_url_or_pil if isinstance(path_or_url_or_pil, Image.Image) \
else load_image(path_or_url_or_pil)
return image

+ 5
- 9
modelscope/preprocessors/ofa/image_captioning.py View File

@@ -1,12 +1,9 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from typing import Any, Dict, Union
from typing import Any, Dict

import torch
from PIL import Image
from torchvision import transforms

from modelscope.preprocessors.image import load_image
from modelscope.utils.constant import ModeKeys
from .base import OfaBasePreprocessor

@@ -46,7 +43,7 @@ class OfaImageCaptioningPreprocessor(OfaBasePreprocessor):

def _build_train_sample(self, data: Dict[str, Any]) -> Dict[str, Any]:
sample = self._build_infer_sample(data)
target = data['text']
target = data[self.column_map['text']]
target = target.translate(self.transtab).strip()
target_token_list = target.strip().split()
target = ' '.join(target_token_list[:self.max_tgt_length])
@@ -56,8 +53,7 @@ class OfaImageCaptioningPreprocessor(OfaBasePreprocessor):
return sample

def _build_infer_sample(self, data: Dict[str, Any]) -> Dict[str, Any]:
image = data['image'] if isinstance(
data['image'], Image.Image) else load_image(data['image'])
image = self.get_img_pil(data[self.column_map['image']])
patch_image = self.patch_resize_transform(image)
prompt = self.cfg.model.get('prompt', ' what does the image describe?')
inputs = self.tokenize_text(prompt)
@@ -66,6 +62,6 @@ class OfaImageCaptioningPreprocessor(OfaBasePreprocessor):
'patch_image': patch_image,
'patch_mask': torch.tensor([True])
}
if 'text' in data:
sample['label'] = data['text']
if self.column_map['text'] in data:
sample['label'] = data[self.column_map['text']]
return sample

+ 97
- 0
modelscope/preprocessors/ofa/ocr_recognition.py View File

@@ -0,0 +1,97 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict

import torch
from PIL import Image
from torchvision import transforms
from torchvision.transforms import InterpolationMode
from torchvision.transforms import functional as F

from modelscope.preprocessors.image import load_image
from .base import OfaBasePreprocessor

IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)


def ocr_resize(img, patch_image_size, is_document=False):
img = img.convert('RGB')
width, height = img.size

if is_document:
new_height, new_width = 64, 1920
else:
if width >= height:
new_width = max(64, patch_image_size)
new_height = max(64, int(patch_image_size * (height / width)))
top = (patch_image_size - new_height) // 2
bottom = patch_image_size - new_height - top
left, right = 0, 0
else:
new_height = max(64, patch_image_size)
new_width = max(64, int(patch_image_size * (width / height)))
left = (patch_image_size - new_width) // 2
right = patch_image_size - new_width - left
top, bottom = 0, 0

img_new = F.resize(
img,
(new_height, new_width),
interpolation=InterpolationMode.BICUBIC,
)

if is_document:
img_split = transforms.ToTensor()(img_new).chunk(4, dim=-1)
img_new = transforms.ToPILImage()(torch.cat(img_split, dim=-2))
new_width, new_height = img_new.size
top = (patch_image_size - new_height) // 2
bottom = patch_image_size - new_height - top
left, right = 0, 0

img_new = F.pad(
img_new, padding=[left, top, right, bottom], padding_mode='edge')
assert img_new.size == (patch_image_size, patch_image_size)

return img_new


class OfaOcrRecognitionPreprocessor(OfaBasePreprocessor):

def __init__(self, cfg, model_dir):
"""preprocess the data

Args:
cfg(modelscope.utils.config.ConfigDict) : model config
model_dir (str): model path
"""
super(OfaOcrRecognitionPreprocessor, self).__init__(cfg, model_dir)
# Initialize transform
if self.cfg.model.imagenet_default_mean_and_std:
mean = IMAGENET_DEFAULT_MEAN
std = IMAGENET_DEFAULT_STD
else:
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]

self.patch_resize_transform = transforms.Compose([
lambda image: ocr_resize(
image,
self.cfg.model.patch_image_size,
is_document=self.cfg.model.is_document),
transforms.ToTensor(),
transforms.Normalize(mean=mean, std=std),
])

def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
image = data['image'] if isinstance(
data['image'], Image.Image) else load_image(data['image'])
patch_image = self.patch_resize_transform(image)
prompt = self.cfg.model.get('prompt', '图片上的文字是什么?')
inputs = self.get_inputs(prompt)

sample = {
'source': inputs,
'patch_image': patch_image,
'patch_mask': torch.tensor([True])
}
return sample

+ 13
- 0
modelscope/preprocessors/ofa/utils/constant.py View File

@@ -0,0 +1,13 @@
from modelscope.utils.constant import Tasks

OFA_TASK_KEY_MAPPING = {
Tasks.ocr_recognition: ['image'],
Tasks.image_captioning: ['image'],
Tasks.image_classification: ['image'],
Tasks.text_summarization: ['text'],
Tasks.text_classification: ['text', 'text2'],
Tasks.visual_grounding: ['image', 'text'],
Tasks.visual_question_answering: ['image', 'text'],
Tasks.visual_entailment: ['image', 'text', 'text2'],
Tasks.text_to_image_synthesis: ['text']
}

+ 1
- 1
modelscope/preprocessors/star3/fields/database.py View File

@@ -13,7 +13,7 @@ class Database:
tokenizer,
table_file_path,
syn_dict_file_path,
is_use_sqlite=False):
is_use_sqlite=True):
self.tokenizer = tokenizer
self.is_use_sqlite = is_use_sqlite
if self.is_use_sqlite:


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save