diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c6290ff4..48fe7547 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://gitlab.com/pycqa/flake8.git - rev: 3.8.3 + rev: 4.0.0 hooks: - id: flake8 exclude: thirdparty/|examples/ diff --git a/.pre-commit-config_local.yaml b/.pre-commit-config_local.yaml index 138561e3..0b2e2f39 100644 --- a/.pre-commit-config_local.yaml +++ b/.pre-commit-config_local.yaml @@ -1,6 +1,6 @@ repos: - repo: /home/admin/pre-commit/flake8 - rev: 3.8.3 + rev: 4.0.0 hooks: - id: flake8 exclude: thirdparty/|examples/ diff --git a/data/test/audios/asr_example_8K.wav b/data/test/audios/asr_example_8K.wav new file mode 100644 index 00000000..956aad27 --- /dev/null +++ b/data/test/audios/asr_example_8K.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e999c247bfebb03d556a31722f0ce7145cac20a67fac9da813ad336e1f549f9f +size 38954 diff --git a/data/test/audios/asr_example_cn_dialect.wav b/data/test/audios/asr_example_cn_dialect.wav new file mode 100644 index 00000000..e18fb05d --- /dev/null +++ b/data/test/audios/asr_example_cn_dialect.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32eb8d4d537941bf0edea69cd6723e8ba489fa3df64e13e29f96e4fae0b856f4 +size 93676 diff --git a/data/test/audios/asr_example_cn_en.wav b/data/test/audios/asr_example_cn_en.wav new file mode 100644 index 00000000..8baf3193 --- /dev/null +++ b/data/test/audios/asr_example_cn_en.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f57aee13ade70be6b2c6e4f5e5c7404bdb03057b63828baefbaadcf23855a4cb +size 472012 diff --git a/data/test/audios/asr_example_en.wav b/data/test/audios/asr_example_en.wav new file mode 100644 index 00000000..fa996eec --- /dev/null +++ b/data/test/audios/asr_example_en.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee8e0460ca707f108782be0d93c555bf34fb6b1cb297e5fceed70192cc65f9b +size 71244 diff --git a/data/test/audios/asr_example_es.wav b/data/test/audios/asr_example_es.wav new file mode 100644 index 00000000..95b22dc3 --- /dev/null +++ b/data/test/audios/asr_example_es.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:450e31f9df8c5b48c617900625f01cb64c484f079a9843179fe9feaa7d163e61 +size 181964 diff --git a/data/test/audios/asr_example_id.wav b/data/test/audios/asr_example_id.wav new file mode 100644 index 00000000..54c30614 --- /dev/null +++ b/data/test/audios/asr_example_id.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:255494c41bc1dfb0c954d827ec6ce775900e4f7a55fb0a7881bdf9d66a03b425 +size 112078 diff --git a/data/test/audios/asr_example_ja.wav b/data/test/audios/asr_example_ja.wav new file mode 100644 index 00000000..e953fee2 --- /dev/null +++ b/data/test/audios/asr_example_ja.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22a55277908bbc3ef60a0cf56b230eb507b9e837574e8f493e93644b1d21c281 +size 200556 diff --git a/data/test/audios/asr_example_ko.wav b/data/test/audios/asr_example_ko.wav new file mode 100644 index 00000000..0dad1be3 --- /dev/null +++ b/data/test/audios/asr_example_ko.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee92191836c76412463d8b282a7ab4e1aa57386ba699ec011a3e2c4d64f32f4b +size 162636 diff --git a/data/test/audios/asr_example_ru.wav b/data/test/audios/asr_example_ru.wav new file mode 100644 index 00000000..b0cb8f2f --- /dev/null +++ b/data/test/audios/asr_example_ru.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77d1537fc584c1505d8aa10ec8c86af57ab661199e4f28fd7ffee3c22d1e4e61 +size 160204 diff --git a/data/test/images/image_ocr_recognition.jpg b/data/test/images/image_ocr_recognition.jpg new file mode 100644 index 00000000..b41287cd --- /dev/null +++ b/data/test/images/image_ocr_recognition.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:772b19f76c98044e39330853928624f10e085106a4292b4dd19f865531080747 +size 959 diff --git a/data/test/regression/sbert-base-tnews.bin b/data/test/regression/sbert-base-tnews.bin new file mode 100644 index 00000000..1546860f --- /dev/null +++ b/data/test/regression/sbert-base-tnews.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bce1341f4b55d536771dad6e2b280458579f46c3216474ceb8a926022ab53d0 +size 151572 diff --git a/data/test/regression/sbert_nli.bin b/data/test/regression/sbert_nli.bin index a5f680bb..68efb778 100644 --- a/data/test/regression/sbert_nli.bin +++ b/data/test/regression/sbert_nli.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44e3925c15d86d8596baeb6bd1d153d86f57b7489798b2cf988a1248e110fd62 -size 62231 +oid sha256:6af5024a26337a440c7ea2935fce84af558dd982ee97a2f027bb922cc874292b +size 61741 diff --git a/data/test/regression/sbert_sen_sim.bin b/data/test/regression/sbert_sen_sim.bin index a59cbe0b..362f762c 100644 --- a/data/test/regression/sbert_sen_sim.bin +++ b/data/test/regression/sbert_sen_sim.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1ff17a0272752de4c88d4254b2e881f97f8ef022f03609d03ee1de0ae964368a -size 62235 +oid sha256:bbce084781342ca7274c2e4d02ed5c5de43ba213a3b76328d5994404d6544c41 +size 61745 diff --git a/modelscope/exporters/nlp/sbert_for_sequence_classification_exporter.py b/modelscope/exporters/nlp/sbert_for_sequence_classification_exporter.py index dc1e2b92..52dab4bc 100644 --- a/modelscope/exporters/nlp/sbert_for_sequence_classification_exporter.py +++ b/modelscope/exporters/nlp/sbert_for_sequence_classification_exporter.py @@ -23,12 +23,14 @@ class SbertForSequenceClassificationExporter(TorchModelExporter): def generate_dummy_inputs(self, shape: Tuple = None, + pair: bool = False, **kwargs) -> Dict[str, Any]: """Generate dummy inputs for model exportation to onnx or other formats by tracing. @param shape: A tuple of input shape which should have at most two dimensions. shape = (1, ) batch_size=1, sequence_length will be taken from the preprocessor. shape = (8, 128) batch_size=1, sequence_length=128, which will cover the config of the preprocessor. + @param pair: Generate sentence pairs or single sentences for dummy inputs. @return: Dummy inputs. """ @@ -55,7 +57,7 @@ class SbertForSequenceClassificationExporter(TorchModelExporter): **sequence_length }) preprocessor: Preprocessor = build_preprocessor(cfg, field_name) - if preprocessor.pair: + if pair: first_sequence = preprocessor.tokenizer.unk_token second_sequence = preprocessor.tokenizer.unk_token else: diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index dc4d0ab2..f8ca683a 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -1,8 +1,11 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +# yapf: disable +import datetime import os import pickle import shutil +import tempfile from collections import defaultdict from http import HTTPStatus from http.cookiejar import CookieJar @@ -16,17 +19,25 @@ from modelscope.hub.constants import (API_RESPONSE_FIELD_DATA, API_RESPONSE_FIELD_GIT_ACCESS_TOKEN, API_RESPONSE_FIELD_MESSAGE, API_RESPONSE_FIELD_USERNAME, - DEFAULT_CREDENTIALS_PATH) + DEFAULT_CREDENTIALS_PATH, Licenses, + ModelVisibility) +from modelscope.hub.errors import (InvalidParameter, NotExistError, + NotLoginException, RequestError, + datahub_raise_on_error, + handle_http_post_error, + handle_http_response, is_ok, raise_on_error) +from modelscope.hub.git import GitCommandWrapper +from modelscope.hub.repository import Repository +from modelscope.hub.utils.utils import (get_endpoint, + model_id_to_group_owner_name) from modelscope.utils.config_ds import DOWNLOADED_DATASETS_PATH from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DEFAULT_MODEL_REVISION, DatasetFormations, DatasetMetaFormats, - DownloadMode) + DownloadMode, ModelFile) from modelscope.utils.logger import get_logger -from .errors import (InvalidParameter, NotExistError, RequestError, - datahub_raise_on_error, handle_http_post_error, - handle_http_response, is_ok, raise_on_error) -from .utils.utils import get_endpoint, model_id_to_group_owner_name + +# yapf: enable logger = get_logger() @@ -169,11 +180,106 @@ class HubApi: else: r.raise_for_status() - def list_model(self, - owner_or_group: str, - page_number=1, - page_size=10) -> dict: - """List model in owner or group. + def push_model(self, + model_id: str, + model_dir: str, + visibility: int = ModelVisibility.PUBLIC, + license: str = Licenses.APACHE_V2, + chinese_name: Optional[str] = None, + commit_message: Optional[str] = 'upload model', + revision: Optional[str] = DEFAULT_MODEL_REVISION): + """ + Upload model from a given directory to given repository. A valid model directory + must contain a configuration.json file. + + This function upload the files in given directory to given repository. If the + given repository is not exists in remote, it will automatically create it with + given visibility, license and chinese_name parameters. If the revision is also + not exists in remote repository, it will create a new branch for it. + + This function must be called before calling HubApi's login with a valid token + which can be obtained from ModelScope's website. + + Args: + model_id (`str`): + The model id to be uploaded, caller must have write permission for it. + model_dir(`str`): + The Absolute Path of the finetune result. + visibility(`int`, defaults to `0`): + Visibility of the new created model(1-private, 5-public). If the model is + not exists in ModelScope, this function will create a new model with this + visibility and this parameter is required. You can ignore this parameter + if you make sure the model's existence. + license(`str`, defaults to `None`): + License of the new created model(see License). If the model is not exists + in ModelScope, this function will create a new model with this license + and this parameter is required. You can ignore this parameter if you + make sure the model's existence. + chinese_name(`str`, *optional*, defaults to `None`): + chinese name of the new created model. + commit_message(`str`, *optional*, defaults to `None`): + commit message of the push request. + revision (`str`, *optional*, default to DEFAULT_MODEL_REVISION): + which branch to push. If the branch is not exists, It will create a new + branch and push to it. + """ + if model_id is None: + raise InvalidParameter('model_id cannot be empty!') + if model_dir is None: + raise InvalidParameter('model_dir cannot be empty!') + if not os.path.exists(model_dir) or os.path.isfile(model_dir): + raise InvalidParameter('model_dir must be a valid directory.') + cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION) + if not os.path.exists(cfg_file): + raise ValueError(f'{model_dir} must contain a configuration.json.') + cookies = ModelScopeConfig.get_cookies() + if cookies is None: + raise NotLoginException('Must login before upload!') + files_to_save = os.listdir(model_dir) + try: + self.get_model(model_id=model_id) + except Exception: + if visibility is None or license is None: + raise InvalidParameter( + 'visibility and license cannot be empty if want to create new repo' + ) + logger.info('Create new model %s' % model_id) + self.create_model( + model_id=model_id, + visibility=visibility, + license=license, + chinese_name=chinese_name) + tmp_dir = tempfile.mkdtemp() + git_wrapper = GitCommandWrapper() + try: + repo = Repository(model_dir=tmp_dir, clone_from=model_id) + branches = git_wrapper.get_remote_branches(tmp_dir) + if revision not in branches: + logger.info('Create new branch %s' % revision) + git_wrapper.new_branch(tmp_dir, revision) + git_wrapper.checkout(tmp_dir, revision) + for f in files_to_save: + if f[0] != '.': + src = os.path.join(model_dir, f) + if os.path.isdir(src): + shutil.copytree(src, os.path.join(tmp_dir, f)) + else: + shutil.copy(src, tmp_dir) + if not commit_message: + date = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + commit_message = '[automsg] push model %s to hub at %s' % ( + model_id, date) + repo.push(commit_message=commit_message, branch=revision) + except Exception: + raise + finally: + shutil.rmtree(tmp_dir, ignore_errors=True) + + def list_models(self, + owner_or_group: str, + page_number=1, + page_size=10) -> dict: + """List models in owner or group. Args: owner_or_group(`str`): owner or group. @@ -390,11 +496,13 @@ class HubApi: return resp['Data'] def list_oss_dataset_objects(self, dataset_name, namespace, max_limit, - is_recursive, is_filter_dir, revision, - cookies): + is_recursive, is_filter_dir, revision): url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/oss/tree/?' \ f'MaxLimit={max_limit}&Revision={revision}&Recursive={is_recursive}&FilterDir={is_filter_dir}' - cookies = requests.utils.dict_from_cookiejar(cookies) + + cookies = ModelScopeConfig.get_cookies() + if cookies: + cookies = requests.utils.dict_from_cookiejar(cookies) resp = requests.get(url=url, cookies=cookies) resp = resp.json() diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py index 1cc5645b..8ffc60bc 100644 --- a/modelscope/hub/file_download.py +++ b/modelscope/hub/file_download.py @@ -11,13 +11,12 @@ from typing import Dict, Optional, Union from uuid import uuid4 import requests -from filelock import FileLock from tqdm import tqdm from modelscope import __version__ +from modelscope.hub.api import HubApi, ModelScopeConfig from modelscope.utils.constant import DEFAULT_MODEL_REVISION from modelscope.utils.logger import get_logger -from .api import HubApi, ModelScopeConfig from .constants import FILE_HASH from .errors import FileDownloadError, NotExistError from .utils.caching import ModelFileSystemCache diff --git a/modelscope/hub/git.py b/modelscope/hub/git.py index db76506e..fe1d1554 100644 --- a/modelscope/hub/git.py +++ b/modelscope/hub/git.py @@ -1,13 +1,10 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import os -import re import subprocess from typing import List -from xmlrpc.client import Boolean from modelscope.utils.logger import get_logger -from .api import ModelScopeConfig from .errors import GitError logger = get_logger() @@ -132,6 +129,7 @@ class GitCommandWrapper(metaclass=Singleton): return response def add_user_info(self, repo_base_dir, repo_name): + from modelscope.hub.api import ModelScopeConfig user_name, user_email = ModelScopeConfig.get_user_info() if user_name and user_email: # config user.name and user.email if exist @@ -184,8 +182,11 @@ class GitCommandWrapper(metaclass=Singleton): info = [ line.strip() for line in rsp.stdout.decode('utf8').strip().split(os.linesep) - ][1:] - return ['/'.join(line.split('/')[1:]) for line in info] + ] + if len(info) == 1: + return ['/'.join(info[0].split('/')[1:])] + else: + return ['/'.join(line.split('/')[1:]) for line in info[1:]] def pull(self, repo_dir: str): cmds = ['-C', repo_dir, 'pull'] diff --git a/modelscope/hub/repository.py b/modelscope/hub/repository.py index d92089ed..35c831a9 100644 --- a/modelscope/hub/repository.py +++ b/modelscope/hub/repository.py @@ -7,7 +7,6 @@ from modelscope.hub.errors import GitError, InvalidParameter, NotLoginException from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DEFAULT_MODEL_REVISION) from modelscope.utils.logger import get_logger -from .api import ModelScopeConfig from .git import GitCommandWrapper from .utils.utils import get_endpoint @@ -47,6 +46,7 @@ class Repository: err_msg = 'a non-default value of revision cannot be empty.' raise InvalidParameter(err_msg) + from modelscope.hub.api import ModelScopeConfig if auth_token: self.auth_token = auth_token else: @@ -166,7 +166,7 @@ class DatasetRepository: err_msg = 'a non-default value of revision cannot be empty.' raise InvalidParameter(err_msg) self.revision = revision - + from modelscope.hub.api import ModelScopeConfig if auth_token: self.auth_token = auth_token else: diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py index cde6ad34..ac57d1b1 100644 --- a/modelscope/hub/snapshot_download.py +++ b/modelscope/hub/snapshot_download.py @@ -5,9 +5,9 @@ import tempfile from pathlib import Path from typing import Dict, Optional, Union +from modelscope.hub.api import HubApi, ModelScopeConfig from modelscope.utils.constant import DEFAULT_MODEL_REVISION from modelscope.utils.logger import get_logger -from .api import HubApi, ModelScopeConfig from .constants import FILE_HASH from .errors import NotExistError from .file_download import (get_file_download_url, http_get_file, diff --git a/modelscope/hub/upload.py b/modelscope/hub/upload.py deleted file mode 100644 index 9dffc60e..00000000 --- a/modelscope/hub/upload.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. - -import datetime -import os -import shutil -import tempfile -import uuid -from typing import Dict, Optional -from uuid import uuid4 - -from filelock import FileLock - -from modelscope import __version__ -from modelscope.hub.api import HubApi, ModelScopeConfig -from modelscope.hub.errors import InvalidParameter, NotLoginException -from modelscope.hub.git import GitCommandWrapper -from modelscope.hub.repository import Repository -from modelscope.utils.constant import DEFAULT_MODEL_REVISION, ModelFile -from modelscope.utils.logger import get_logger - -logger = get_logger() - - -def upload_folder(model_id: str, - model_dir: str, - visibility: int = 0, - license: str = None, - chinese_name: Optional[str] = None, - commit_message: Optional[str] = None, - revision: Optional[str] = DEFAULT_MODEL_REVISION): - """ - Upload model from a given directory to given repository. A valid model directory - must contain a configuration.json file. - - This function upload the files in given directory to given repository. If the - given repository is not exists in remote, it will automatically create it with - given visibility, license and chinese_name parameters. If the revision is also - not exists in remote repository, it will create a new branch for it. - - This function must be called before calling HubApi's login with a valid token - which can be obtained from ModelScope's website. - - Args: - model_id (`str`): - The model id to be uploaded, caller must have write permission for it. - model_dir(`str`): - The Absolute Path of the finetune result. - visibility(`int`, defaults to `0`): - Visibility of the new created model(1-private, 5-public). If the model is - not exists in ModelScope, this function will create a new model with this - visibility and this parameter is required. You can ignore this parameter - if you make sure the model's existence. - license(`str`, defaults to `None`): - License of the new created model(see License). If the model is not exists - in ModelScope, this function will create a new model with this license - and this parameter is required. You can ignore this parameter if you - make sure the model's existence. - chinese_name(`str`, *optional*, defaults to `None`): - chinese name of the new created model. - commit_message(`str`, *optional*, defaults to `None`): - commit message of the push request. - revision (`str`, *optional*, default to DEFAULT_MODEL_REVISION): - which branch to push. If the branch is not exists, It will create a new - branch and push to it. - """ - if model_id is None: - raise InvalidParameter('model_id cannot be empty!') - if model_dir is None: - raise InvalidParameter('model_dir cannot be empty!') - if not os.path.exists(model_dir) or os.path.isfile(model_dir): - raise InvalidParameter('model_dir must be a valid directory.') - cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION) - if not os.path.exists(cfg_file): - raise ValueError(f'{model_dir} must contain a configuration.json.') - cookies = ModelScopeConfig.get_cookies() - if cookies is None: - raise NotLoginException('Must login before upload!') - files_to_save = os.listdir(model_dir) - api = HubApi() - try: - api.get_model(model_id=model_id) - except Exception: - if visibility is None or license is None: - raise InvalidParameter( - 'visibility and license cannot be empty if want to create new repo' - ) - logger.info('Create new model %s' % model_id) - api.create_model( - model_id=model_id, - visibility=visibility, - license=license, - chinese_name=chinese_name) - tmp_dir = tempfile.mkdtemp() - git_wrapper = GitCommandWrapper() - try: - repo = Repository(model_dir=tmp_dir, clone_from=model_id) - branches = git_wrapper.get_remote_branches(tmp_dir) - if revision not in branches: - logger.info('Create new branch %s' % revision) - git_wrapper.new_branch(tmp_dir, revision) - git_wrapper.checkout(tmp_dir, revision) - for f in files_to_save: - if f[0] != '.': - src = os.path.join(model_dir, f) - if os.path.isdir(src): - shutil.copytree(src, os.path.join(tmp_dir, f)) - else: - shutil.copy(src, tmp_dir) - if not commit_message: - date = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') - commit_message = '[automsg] push model %s to hub at %s' % ( - model_id, date) - repo.push(commit_message=commit_message, branch=revision) - except Exception: - raise - finally: - shutil.rmtree(tmp_dir, ignore_errors=True) diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index c3fe5594..b559f5c0 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -9,7 +9,9 @@ class Models(object): Model name should only contain model info but not task info. """ + # tinynas models tinynas_detection = 'tinynas-detection' + tinynas_damoyolo = 'tinynas-damoyolo' # vision models detection = 'detection' @@ -234,7 +236,7 @@ class Pipelines(object): conversational_text_to_sql = 'conversational-text-to-sql' table_question_answering_pipeline = 'table-question-answering-pipeline' sentence_embedding = 'sentence-embedding' - passage_ranking = 'passage-ranking' + text_ranking = 'text-ranking' relation_extraction = 'relation-extraction' document_segmentation = 'document-segmentation' feature_extraction = 'feature-extraction' @@ -261,6 +263,7 @@ class Pipelines(object): text_to_image_synthesis = 'text-to-image-synthesis' video_multi_modal_embedding = 'video-multi-modal-embedding' image_text_retrieval = 'image-text-retrieval' + ofa_ocr_recognition = 'ofa-ocr-recognition' class Trainers(object): @@ -295,7 +298,7 @@ class Trainers(object): dialog_intent_trainer = 'dialog-intent-trainer' nlp_base_trainer = 'nlp-base-trainer' nlp_veco_trainer = 'nlp-veco-trainer' - nlp_passage_ranking_trainer = 'nlp-passage-ranking-trainer' + nlp_text_ranking_trainer = 'nlp-text-ranking-trainer' # audio trainers speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k' @@ -341,7 +344,7 @@ class Preprocessors(object): zero_shot_cls_tokenizer = 'zero-shot-cls-tokenizer' text_error_correction = 'text-error-correction' sentence_embedding = 'sentence-embedding' - passage_ranking = 'passage-ranking' + text_ranking = 'text-ranking' sequence_labeling_tokenizer = 'sequence-labeling-tokenizer' word_segment_text_to_label_preprocessor = 'word-segment-text-to-label-preprocessor' fill_mask = 'fill-mask' @@ -374,7 +377,7 @@ class Metrics(object): audio_noise_metric = 'audio-noise-metric' # text gen - bleu = 'bleu' + BLEU = 'bleu' # metrics for image denoise task image_denoise_metric = 'image-denoise-metric' @@ -396,6 +399,8 @@ class Metrics(object): movie_scene_segmentation_metric = 'movie-scene-segmentation-metric' # metric for inpainting task image_inpainting_metric = 'image-inpainting-metric' + # metric for ocr + NED = 'ned' class Optimizers(object): @@ -454,9 +459,10 @@ class Datasets(object): """ Names for different datasets. """ ClsDataset = 'ClsDataset' - Face2dKeypointsDataset = 'Face2dKeypointsDataset' + Face2dKeypointsDataset = 'FaceKeypointDataset' HandCocoWholeBodyDataset = 'HandCocoWholeBodyDataset' - HumanWholeBodyKeypointDataset = 'HumanWholeBodyKeypointDataset' + HumanWholeBodyKeypointDataset = 'WholeBodyCocoTopDownDataset' SegDataset = 'SegDataset' DetDataset = 'DetDataset' DetImagesMixDataset = 'DetImagesMixDataset' + PairedDataset = 'PairedDataset' diff --git a/modelscope/metrics/bleu_metric.py b/modelscope/metrics/bleu_metric.py index 43d1b105..7c134b6a 100644 --- a/modelscope/metrics/bleu_metric.py +++ b/modelscope/metrics/bleu_metric.py @@ -11,7 +11,7 @@ from .builder import METRICS, MetricKeys EVAL_BLEU_ORDER = 4 -@METRICS.register_module(group_key=default_group, module_name=Metrics.bleu) +@METRICS.register_module(group_key=default_group, module_name=Metrics.BLEU) class BleuMetric(Metric): """The metric computation bleu for text generation classes. diff --git a/modelscope/metrics/builder.py b/modelscope/metrics/builder.py index ee4d2840..da3b64c7 100644 --- a/modelscope/metrics/builder.py +++ b/modelscope/metrics/builder.py @@ -23,6 +23,7 @@ class MetricKeys(object): BLEU_4 = 'bleu-4' ROUGE_1 = 'rouge-1' ROUGE_L = 'rouge-l' + NED = 'ned' # ocr metric task_default_metrics = { @@ -32,6 +33,7 @@ task_default_metrics = { Tasks.sentiment_classification: [Metrics.seq_cls_metric], Tasks.token_classification: [Metrics.token_cls_metric], Tasks.text_generation: [Metrics.text_gen_metric], + Tasks.text_classification: [Metrics.seq_cls_metric], Tasks.image_denoising: [Metrics.image_denoise_metric], Tasks.image_color_enhancement: [Metrics.image_color_enhance_metric], Tasks.image_portrait_enhancement: diff --git a/modelscope/metrics/image_portrait_enhancement_metric.py b/modelscope/metrics/image_portrait_enhancement_metric.py index 5a81e956..7d94aade 100644 --- a/modelscope/metrics/image_portrait_enhancement_metric.py +++ b/modelscope/metrics/image_portrait_enhancement_metric.py @@ -2,6 +2,7 @@ # https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/metrics/psnr_ssim.py from typing import Dict +import cv2 import numpy as np from modelscope.metainfo import Metrics @@ -37,6 +38,7 @@ class ImagePortraitEnhancementMetric(Metric): def add(self, outputs: Dict, inputs: Dict): ground_truths = outputs['target'] eval_results = outputs['pred'] + self.preds.extend(eval_results) self.targets.extend(ground_truths) diff --git a/modelscope/models/audio/tts/voice.py b/modelscope/models/audio/tts/voice.py index dc830db5..b7240088 100644 --- a/modelscope/models/audio/tts/voice.py +++ b/modelscope/models/audio/tts/voice.py @@ -2,6 +2,7 @@ import os import pickle as pkl +from threading import Lock import json import numpy as np @@ -27,6 +28,7 @@ class Voice: self.__am_config = AttrDict(**am_config) self.__voc_config = AttrDict(**voc_config) self.__model_loaded = False + self.__lock = Lock() if 'am' not in self.__am_config: raise TtsModelConfigurationException( 'modelscope error: am configuration invalid') @@ -71,34 +73,35 @@ class Voice: self.__generator.remove_weight_norm() def __am_forward(self, symbol_seq): - with torch.no_grad(): - inputs_feat_lst = self.__ling_unit.encode_symbol_sequence( - symbol_seq) - inputs_sy = torch.from_numpy(inputs_feat_lst[0]).long().to( - self.__device) - inputs_tone = torch.from_numpy(inputs_feat_lst[1]).long().to( - self.__device) - inputs_syllable = torch.from_numpy(inputs_feat_lst[2]).long().to( - self.__device) - inputs_ws = torch.from_numpy(inputs_feat_lst[3]).long().to( - self.__device) - inputs_ling = torch.stack( - [inputs_sy, inputs_tone, inputs_syllable, inputs_ws], - dim=-1).unsqueeze(0) - inputs_emo = torch.from_numpy(inputs_feat_lst[4]).long().to( - self.__device).unsqueeze(0) - inputs_spk = torch.from_numpy(inputs_feat_lst[5]).long().to( - self.__device).unsqueeze(0) - inputs_len = torch.zeros(1).to(self.__device).long( - ) + inputs_emo.size(1) - 1 # minus 1 for "~" - res = self.__am_net(inputs_ling[:, :-1, :], inputs_emo[:, :-1], - inputs_spk[:, :-1], inputs_len) - postnet_outputs = res['postnet_outputs'] - LR_length_rounded = res['LR_length_rounded'] - valid_length = int(LR_length_rounded[0].item()) - postnet_outputs = postnet_outputs[ - 0, :valid_length, :].cpu().numpy() - return postnet_outputs + with self.__lock: + with torch.no_grad(): + inputs_feat_lst = self.__ling_unit.encode_symbol_sequence( + symbol_seq) + inputs_sy = torch.from_numpy(inputs_feat_lst[0]).long().to( + self.__device) + inputs_tone = torch.from_numpy(inputs_feat_lst[1]).long().to( + self.__device) + inputs_syllable = torch.from_numpy( + inputs_feat_lst[2]).long().to(self.__device) + inputs_ws = torch.from_numpy(inputs_feat_lst[3]).long().to( + self.__device) + inputs_ling = torch.stack( + [inputs_sy, inputs_tone, inputs_syllable, inputs_ws], + dim=-1).unsqueeze(0) + inputs_emo = torch.from_numpy(inputs_feat_lst[4]).long().to( + self.__device).unsqueeze(0) + inputs_spk = torch.from_numpy(inputs_feat_lst[5]).long().to( + self.__device).unsqueeze(0) + inputs_len = torch.zeros(1).to(self.__device).long( + ) + inputs_emo.size(1) - 1 # minus 1 for "~" + res = self.__am_net(inputs_ling[:, :-1, :], inputs_emo[:, :-1], + inputs_spk[:, :-1], inputs_len) + postnet_outputs = res['postnet_outputs'] + LR_length_rounded = res['LR_length_rounded'] + valid_length = int(LR_length_rounded[0].item()) + postnet_outputs = postnet_outputs[ + 0, :valid_length, :].cpu().numpy() + return postnet_outputs def __vocoder_forward(self, melspec): dim0 = list(melspec.shape)[-1] @@ -118,14 +121,15 @@ class Voice: return audio def forward(self, symbol_seq): - if not self.__model_loaded: - torch.manual_seed(self.__am_config.seed) - if torch.cuda.is_available(): + with self.__lock: + if not self.__model_loaded: torch.manual_seed(self.__am_config.seed) - self.__device = torch.device('cuda') - else: - self.__device = torch.device('cpu') - self.__load_am() - self.__load_vocoder() - self.__model_loaded = True + if torch.cuda.is_available(): + torch.manual_seed(self.__am_config.seed) + self.__device = torch.device('cuda') + else: + self.__device = torch.device('cpu') + self.__load_am() + self.__load_vocoder() + self.__model_loaded = True return self.__vocoder_forward(self.__am_forward(symbol_seq)) diff --git a/modelscope/models/cv/face_human_hand_detection/one_stage_detector.py b/modelscope/models/cv/face_human_hand_detection/one_stage_detector.py index c1d0a52f..0d1cd15d 100644 --- a/modelscope/models/cv/face_human_hand_detection/one_stage_detector.py +++ b/modelscope/models/cv/face_human_hand_detection/one_stage_detector.py @@ -56,9 +56,6 @@ class OneStageDetector(nn.Module): def inference(self, meta): with torch.no_grad(): - torch.cuda.synchronize() preds = self(meta['img']) - torch.cuda.synchronize() results = self.head.post_process(preds, meta) - torch.cuda.synchronize() return results diff --git a/modelscope/models/cv/image_portrait_enhancement/image_portrait_enhancement.py b/modelscope/models/cv/image_portrait_enhancement/image_portrait_enhancement.py index 3650ac7b..26e9e532 100644 --- a/modelscope/models/cv/image_portrait_enhancement/image_portrait_enhancement.py +++ b/modelscope/models/cv/image_portrait_enhancement/image_portrait_enhancement.py @@ -35,7 +35,7 @@ class ImagePortraitEnhancement(TorchModel): """ super().__init__(model_dir, *args, **kwargs) - self.size = 512 + self.size = 256 self.style_dim = 512 self.n_mlp = 8 self.mean_path_length = 0 @@ -131,9 +131,9 @@ class ImagePortraitEnhancement(TorchModel): return path_penalty, path_mean.detach(), path_lengths @torch.no_grad() - def _evaluate_postprocess(self, src: Tensor, + def _evaluate_postprocess(self, input: Tensor, target: Tensor) -> Dict[str, list]: - preds, _ = self.generator(src) + preds, _ = self.generator(input) preds = list(torch.split(preds, 1, 0)) targets = list(torch.split(target, 1, 0)) @@ -144,11 +144,11 @@ class ImagePortraitEnhancement(TorchModel): return {'pred': preds, 'target': targets} - def _train_forward_d(self, src: Tensor, target: Tensor) -> Tensor: + def _train_forward_d(self, input: Tensor, target: Tensor) -> Tensor: self.requires_grad(self.generator, False) self.requires_grad(self.discriminator, True) - preds, _ = self.generator(src) + preds, _ = self.generator(input) fake_pred = self.discriminator(preds) real_pred = self.discriminator(target) @@ -156,27 +156,27 @@ class ImagePortraitEnhancement(TorchModel): return d_loss - def _train_forward_d_r1(self, src: Tensor, target: Tensor) -> Tensor: - src.requires_grad = True + def _train_forward_d_r1(self, input: Tensor, target: Tensor) -> Tensor: + input.requires_grad = True target.requires_grad = True real_pred = self.discriminator(target) r1_loss = self.d_r1_loss(real_pred, target) return r1_loss - def _train_forward_g(self, src: Tensor, target: Tensor) -> Tensor: + def _train_forward_g(self, input: Tensor, target: Tensor) -> Tensor: self.requires_grad(self.generator, True) self.requires_grad(self.discriminator, False) - preds, _ = self.generator(src) + preds, _ = self.generator(input) fake_pred = self.discriminator(preds) - g_loss = self.g_nonsaturating_loss(fake_pred, preds, target, src) + g_loss = self.g_nonsaturating_loss(fake_pred, preds, target, input) return g_loss - def _train_forward_g_path(self, src: Tensor, target: Tensor) -> Tensor: - fake_img, latents = self.generator(src, return_latents=True) + def _train_forward_g_path(self, input: Tensor, target: Tensor) -> Tensor: + fake_img, latents = self.generator(input, return_latents=True) path_loss, self.mean_path_length, path_lengths = self.g_path_regularize( fake_img, latents, self.mean_path_length) @@ -184,8 +184,8 @@ class ImagePortraitEnhancement(TorchModel): return path_loss @torch.no_grad() - def _inference_forward(self, src: Tensor) -> Dict[str, Tensor]: - return {'outputs': (self.generator(src)[0] * 0.5 + 0.5).clamp(0, 1)} + def _inference_forward(self, input: Tensor) -> Dict[str, Tensor]: + return {'outputs': (self.generator(input)[0] * 0.5 + 0.5).clamp(0, 1)} def forward(self, input: Dict[str, Tensor]) -> Dict[str, Union[list, Tensor]]: diff --git a/modelscope/models/cv/image_to_image_generation/__init__.py b/modelscope/models/cv/image_to_image_generation/__init__.py index fb408086..1af3e55f 100644 --- a/modelscope/models/cv/image_to_image_generation/__init__.py +++ b/modelscope/models/cv/image_to_image_generation/__init__.py @@ -1,2 +1,2 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. from . import data, models, ops diff --git a/modelscope/models/cv/image_to_image_generation/data/__init__.py b/modelscope/models/cv/image_to_image_generation/data/__init__.py index 33c8cf44..22b9d22c 100644 --- a/modelscope/models/cv/image_to_image_generation/data/__init__.py +++ b/modelscope/models/cv/image_to_image_generation/data/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. from typing import TYPE_CHECKING from modelscope.utils.import_utils import LazyImportModule diff --git a/modelscope/models/cv/image_to_image_generation/data/transforms.py b/modelscope/models/cv/image_to_image_generation/data/transforms.py index 5376d813..29a25b4b 100644 --- a/modelscope/models/cv/image_to_image_generation/data/transforms.py +++ b/modelscope/models/cv/image_to_image_generation/data/transforms.py @@ -1,3 +1,4 @@ +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. import math import random diff --git a/modelscope/models/cv/image_to_image_generation/models/__init__.py b/modelscope/models/cv/image_to_image_generation/models/__init__.py index ec6a46fd..e98421f2 100644 --- a/modelscope/models/cv/image_to_image_generation/models/__init__.py +++ b/modelscope/models/cv/image_to_image_generation/models/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. from typing import TYPE_CHECKING from modelscope.utils.import_utils import LazyImportModule diff --git a/modelscope/models/cv/image_to_image_generation/ops/__init__.py b/modelscope/models/cv/image_to_image_generation/ops/__init__.py index 49674b49..e3dac584 100644 --- a/modelscope/models/cv/image_to_image_generation/ops/__init__.py +++ b/modelscope/models/cv/image_to_image_generation/ops/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. from typing import TYPE_CHECKING from modelscope.utils.import_utils import LazyImportModule diff --git a/modelscope/models/cv/image_to_image_translation/__init__.py b/modelscope/models/cv/image_to_image_translation/__init__.py index e69de29b..35aab6be 100644 --- a/modelscope/models/cv/image_to_image_translation/__init__.py +++ b/modelscope/models/cv/image_to_image_translation/__init__.py @@ -0,0 +1,24 @@ +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. + +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + + from .model_translation import UNet + +else: + _import_structure = { + 'image_to_image_translation_model': ['UNet'], + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/models/cv/image_to_image_translation/data/__init__.py b/modelscope/models/cv/image_to_image_translation/data/__init__.py index 72450016..724bca04 100644 --- a/modelscope/models/cv/image_to_image_translation/data/__init__.py +++ b/modelscope/models/cv/image_to_image_translation/data/__init__.py @@ -1 +1,2 @@ +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. from .transforms import * # noqa F403 diff --git a/modelscope/models/cv/image_to_image_translation/models/__init__.py b/modelscope/models/cv/image_to_image_translation/models/__init__.py index 322d78f2..7fdd8189 100644 --- a/modelscope/models/cv/image_to_image_translation/models/__init__.py +++ b/modelscope/models/cv/image_to_image_translation/models/__init__.py @@ -1,2 +1,3 @@ +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. from .autoencoder import * # noqa F403 from .clip import * # noqa F403 diff --git a/modelscope/models/cv/image_to_image_translation/ops/__init__.py b/modelscope/models/cv/image_to_image_translation/ops/__init__.py index 59082d72..474c811b 100644 --- a/modelscope/models/cv/image_to_image_translation/ops/__init__.py +++ b/modelscope/models/cv/image_to_image_translation/ops/__init__.py @@ -1,3 +1,4 @@ +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. from .degradation import * # noqa F403 from .diffusion import * # noqa F403 from .losses import * # noqa F403 diff --git a/modelscope/models/cv/product_retrieval_embedding/__init__.py b/modelscope/models/cv/product_retrieval_embedding/__init__.py index 7a02a60f..2cbc9099 100644 --- a/modelscope/models/cv/product_retrieval_embedding/__init__.py +++ b/modelscope/models/cv/product_retrieval_embedding/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. from typing import TYPE_CHECKING from modelscope.utils.import_utils import LazyImportModule diff --git a/modelscope/models/cv/product_retrieval_embedding/item_detection.py b/modelscope/models/cv/product_retrieval_embedding/item_detection.py index d5589969..2002c6cb 100644 --- a/modelscope/models/cv/product_retrieval_embedding/item_detection.py +++ b/modelscope/models/cv/product_retrieval_embedding/item_detection.py @@ -1,3 +1,4 @@ +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. import cv2 import numpy as np diff --git a/modelscope/models/cv/product_retrieval_embedding/item_embedding.py b/modelscope/models/cv/product_retrieval_embedding/item_embedding.py index 0444596c..ea9ec846 100644 --- a/modelscope/models/cv/product_retrieval_embedding/item_embedding.py +++ b/modelscope/models/cv/product_retrieval_embedding/item_embedding.py @@ -1,3 +1,4 @@ +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. import cv2 import numpy as np import torch.nn as nn diff --git a/modelscope/models/cv/product_retrieval_embedding/item_model.py b/modelscope/models/cv/product_retrieval_embedding/item_model.py index 85a636c0..3964efbe 100644 --- a/modelscope/models/cv/product_retrieval_embedding/item_model.py +++ b/modelscope/models/cv/product_retrieval_embedding/item_model.py @@ -1,3 +1,5 @@ +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. + import os.path as osp from typing import Any, Dict diff --git a/modelscope/models/cv/text_driven_segmentation/lseg_model.py b/modelscope/models/cv/text_driven_segmentation/lseg_model.py index 9a5754c6..ec381356 100644 --- a/modelscope/models/cv/text_driven_segmentation/lseg_model.py +++ b/modelscope/models/cv/text_driven_segmentation/lseg_model.py @@ -93,7 +93,7 @@ class TextDrivenSeg(TorchModel): """ with torch.no_grad(): if self.device_id == -1: - output = self.model(image) + output = self.model(image, [text]) else: device = torch.device('cuda', self.device_id) output = self.model(image.to(device), [text]) diff --git a/modelscope/models/cv/tinynas_detection/__init__.py b/modelscope/models/cv/tinynas_detection/__init__.py index 13532d10..6d696ac4 100644 --- a/modelscope/models/cv/tinynas_detection/__init__.py +++ b/modelscope/models/cv/tinynas_detection/__init__.py @@ -7,10 +7,12 @@ from modelscope.utils.import_utils import LazyImportModule if TYPE_CHECKING: from .tinynas_detector import Tinynas_detector + from .tinynas_damoyolo import DamoYolo else: _import_structure = { 'tinynas_detector': ['TinynasDetector'], + 'tinynas_damoyolo': ['DamoYolo'], } import sys diff --git a/modelscope/models/cv/tinynas_detection/backbone/tinynas.py b/modelscope/models/cv/tinynas_detection/backbone/tinynas.py index 814ee550..87a28a2f 100755 --- a/modelscope/models/cv/tinynas_detection/backbone/tinynas.py +++ b/modelscope/models/cv/tinynas_detection/backbone/tinynas.py @@ -4,6 +4,7 @@ import torch import torch.nn as nn +from modelscope.utils.file_utils import read_file from ..core.base_ops import Focus, SPPBottleneck, get_activation from ..core.repvgg_block import RepVggBlock @@ -49,12 +50,16 @@ class ResConvK1KX(nn.Module): kernel_size, stride, force_resproj=False, - act='silu'): + act='silu', + reparam=False): super(ResConvK1KX, self).__init__() self.stride = stride self.conv1 = ConvKXBN(in_c, btn_c, 1, 1) - self.conv2 = RepVggBlock( - btn_c, out_c, kernel_size, stride, act='identity') + if not reparam: + self.conv2 = ConvKXBN(btn_c, out_c, 3, stride) + else: + self.conv2 = RepVggBlock( + btn_c, out_c, kernel_size, stride, act='identity') if act is None: self.activation_function = torch.relu @@ -97,7 +102,8 @@ class SuperResConvK1KX(nn.Module): stride, num_blocks, with_spp=False, - act='silu'): + act='silu', + reparam=False): super(SuperResConvK1KX, self).__init__() if act is None: self.act = torch.relu @@ -124,7 +130,8 @@ class SuperResConvK1KX(nn.Module): this_kernel_size, this_stride, force_resproj, - act=act) + act=act, + reparam=reparam) self.block_list.append(the_block) if block_id == 0 and with_spp: self.block_list.append( @@ -248,7 +255,8 @@ class TinyNAS(nn.Module): with_spp=False, use_focus=False, need_conv1=True, - act='silu'): + act='silu', + reparam=False): super(TinyNAS, self).__init__() assert len(out_indices) == len(out_channels) self.out_indices = out_indices @@ -281,7 +289,8 @@ class TinyNAS(nn.Module): block_info['s'], block_info['L'], spp, - act=act) + act=act, + reparam=reparam) self.block_list.append(the_block) elif the_block_class == 'SuperResConvKXKX': spp = with_spp if idx == len(structure_info) - 1 else False @@ -325,8 +334,8 @@ class TinyNAS(nn.Module): def load_tinynas_net(backbone_cfg): # load masternet model to path import ast - - struct_str = ''.join([x.strip() for x in backbone_cfg.net_structure_str]) + net_structure_str = read_file(backbone_cfg.structure_file) + struct_str = ''.join([x.strip() for x in net_structure_str]) struct_info = ast.literal_eval(struct_str) for layer in struct_info: if 'nbitsA' in layer: @@ -342,6 +351,6 @@ def load_tinynas_net(backbone_cfg): use_focus=backbone_cfg.use_focus, act=backbone_cfg.act, need_conv1=backbone_cfg.need_conv1, - ) + reparam=backbone_cfg.reparam) return model diff --git a/modelscope/models/cv/tinynas_detection/detector.py b/modelscope/models/cv/tinynas_detection/detector.py index 615b13a8..42a71381 100644 --- a/modelscope/models/cv/tinynas_detection/detector.py +++ b/modelscope/models/cv/tinynas_detection/detector.py @@ -30,7 +30,7 @@ class SingleStageDetector(TorchModel): """ super().__init__(model_dir, *args, **kwargs) - config_path = osp.join(model_dir, 'airdet_s.py') + config_path = osp.join(model_dir, self.config_name) config = parse_config(config_path) self.cfg = config model_path = osp.join(model_dir, config.model.name) @@ -41,6 +41,9 @@ class SingleStageDetector(TorchModel): self.conf_thre = config.model.head.nms_conf_thre self.nms_thre = config.model.head.nms_iou_thre + if self.cfg.model.backbone.name == 'TinyNAS': + self.cfg.model.backbone.structure_file = osp.join( + model_dir, self.cfg.model.backbone.structure_file) self.backbone = build_backbone(self.cfg.model.backbone) self.neck = build_neck(self.cfg.model.neck) self.head = build_head(self.cfg.model.head) diff --git a/modelscope/models/cv/tinynas_detection/head/gfocal_v2_tiny.py b/modelscope/models/cv/tinynas_detection/head/gfocal_v2_tiny.py index 41f35968..66904ed1 100644 --- a/modelscope/models/cv/tinynas_detection/head/gfocal_v2_tiny.py +++ b/modelscope/models/cv/tinynas_detection/head/gfocal_v2_tiny.py @@ -124,11 +124,13 @@ class GFocalHead_Tiny(nn.Module): simOTA_iou_weight=3.0, octbase=8, simlqe=False, + use_lqe=True, **kwargs): self.simlqe = simlqe self.num_classes = num_classes self.in_channels = in_channels self.strides = strides + self.use_lqe = use_lqe self.feat_channels = feat_channels if isinstance(feat_channels, list) \ else [feat_channels] * len(self.strides) @@ -181,15 +183,20 @@ class GFocalHead_Tiny(nn.Module): groups=self.conv_groups, norm=self.norm, act=self.act)) - if not self.simlqe: - conf_vector = [nn.Conv2d(4 * self.total_dim, self.reg_channels, 1)] + if self.use_lqe: + if not self.simlqe: + conf_vector = [ + nn.Conv2d(4 * self.total_dim, self.reg_channels, 1) + ] + else: + conf_vector = [ + nn.Conv2d(4 * (self.reg_max + 1), self.reg_channels, 1) + ] + conf_vector += [self.relu] + conf_vector += [nn.Conv2d(self.reg_channels, 1, 1), nn.Sigmoid()] + reg_conf = nn.Sequential(*conf_vector) else: - conf_vector = [ - nn.Conv2d(4 * (self.reg_max + 1), self.reg_channels, 1) - ] - conf_vector += [self.relu] - conf_vector += [nn.Conv2d(self.reg_channels, 1, 1), nn.Sigmoid()] - reg_conf = nn.Sequential(*conf_vector) + reg_conf = None return cls_convs, reg_convs, reg_conf @@ -290,21 +297,27 @@ class GFocalHead_Tiny(nn.Module): N, C, H, W = bbox_pred.size() prob = F.softmax( bbox_pred.reshape(N, 4, self.reg_max + 1, H, W), dim=2) - if not self.simlqe: - prob_topk, _ = prob.topk(self.reg_topk, dim=2) - - if self.add_mean: - stat = torch.cat( - [prob_topk, prob_topk.mean(dim=2, keepdim=True)], dim=2) + if self.use_lqe: + if not self.simlqe: + prob_topk, _ = prob.topk(self.reg_topk, dim=2) + + if self.add_mean: + stat = torch.cat( + [prob_topk, + prob_topk.mean(dim=2, keepdim=True)], + dim=2) + else: + stat = prob_topk + + quality_score = reg_conf( + stat.reshape(N, 4 * self.total_dim, H, W)) else: - stat = prob_topk + quality_score = reg_conf( + bbox_pred.reshape(N, 4 * (self.reg_max + 1), H, W)) - quality_score = reg_conf(stat.reshape(N, 4 * self.total_dim, H, W)) + cls_score = gfl_cls(cls_feat).sigmoid() * quality_score else: - quality_score = reg_conf( - bbox_pred.reshape(N, 4 * (self.reg_max + 1), H, W)) - - cls_score = gfl_cls(cls_feat).sigmoid() * quality_score + cls_score = gfl_cls(cls_feat).sigmoid() flatten_cls_score = cls_score.flatten(start_dim=2).transpose(1, 2) flatten_bbox_pred = bbox_pred.flatten(start_dim=2).transpose(1, 2) diff --git a/modelscope/models/cv/tinynas_detection/neck/giraffe_fpn_v2.py b/modelscope/models/cv/tinynas_detection/neck/giraffe_fpn_v2.py index b710572f..b88c39f2 100644 --- a/modelscope/models/cv/tinynas_detection/neck/giraffe_fpn_v2.py +++ b/modelscope/models/cv/tinynas_detection/neck/giraffe_fpn_v2.py @@ -14,7 +14,6 @@ class GiraffeNeckV2(nn.Module): self, depth=1.0, width=1.0, - in_features=[2, 3, 4], in_channels=[256, 512, 1024], out_channels=[256, 512, 1024], depthwise=False, @@ -24,7 +23,6 @@ class GiraffeNeckV2(nn.Module): block_name='BasicBlock', ): super().__init__() - self.in_features = in_features self.in_channels = in_channels Conv = DWConv if depthwise else BaseConv @@ -169,8 +167,7 @@ class GiraffeNeckV2(nn.Module): """ # backbone - features = [out_features[f] for f in self.in_features] - [x2, x1, x0] = features + [x2, x1, x0] = out_features # node x3 x13 = self.bu_conv13(x1) diff --git a/modelscope/models/cv/tinynas_detection/tinynas_damoyolo.py b/modelscope/models/cv/tinynas_detection/tinynas_damoyolo.py new file mode 100644 index 00000000..9effad3a --- /dev/null +++ b/modelscope/models/cv/tinynas_detection/tinynas_damoyolo.py @@ -0,0 +1,15 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +from modelscope.metainfo import Models +from modelscope.models.builder import MODELS +from modelscope.utils.constant import Tasks +from .detector import SingleStageDetector + + +@MODELS.register_module( + Tasks.image_object_detection, module_name=Models.tinynas_damoyolo) +class DamoYolo(SingleStageDetector): + + def __init__(self, model_dir, *args, **kwargs): + self.config_name = 'damoyolo_s.py' + super(DamoYolo, self).__init__(model_dir, *args, **kwargs) diff --git a/modelscope/models/cv/tinynas_detection/tinynas_detector.py b/modelscope/models/cv/tinynas_detection/tinynas_detector.py index e6f144df..92acf3fa 100644 --- a/modelscope/models/cv/tinynas_detection/tinynas_detector.py +++ b/modelscope/models/cv/tinynas_detection/tinynas_detector.py @@ -12,5 +12,5 @@ from .detector import SingleStageDetector class TinynasDetector(SingleStageDetector): def __init__(self, model_dir, *args, **kwargs): - + self.config_name = 'airdet_s.py' super(TinynasDetector, self).__init__(model_dir, *args, **kwargs) diff --git a/modelscope/models/cv/video_summarization/summarizer.py b/modelscope/models/cv/video_summarization/summarizer.py index 75251989..c9987670 100644 --- a/modelscope/models/cv/video_summarization/summarizer.py +++ b/modelscope/models/cv/video_summarization/summarizer.py @@ -161,7 +161,7 @@ def summary_format(summary, fps): is_summary_frame = False if is_summary_frame and summary[-1] == 1: - end_frame = len(frame_idxes) - 1 + end_frame = len(summary) - 1 frames_list.append([start_frame, end_frame]) output = [] diff --git a/modelscope/models/multi_modal/diffusion/__init__.py b/modelscope/models/multi_modal/diffusion/__init__.py index 28813cc9..e7e374b6 100644 --- a/modelscope/models/multi_modal/diffusion/__init__.py +++ b/modelscope/models/multi_modal/diffusion/__init__.py @@ -1 +1,2 @@ +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. from .model import DiffusionForTextToImageSynthesis diff --git a/modelscope/models/multi_modal/gemm/__init__.py b/modelscope/models/multi_modal/gemm/__init__.py index b920628e..fe5df1fe 100644 --- a/modelscope/models/multi_modal/gemm/__init__.py +++ b/modelscope/models/multi_modal/gemm/__init__.py @@ -1 +1,2 @@ +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. from .gemm_model import GEMMForMultiModalEmbedding diff --git a/modelscope/models/multi_modal/multi_stage_diffusion/__init__.py b/modelscope/models/multi_modal/multi_stage_diffusion/__init__.py index accbb56e..1b3f445b 100644 --- a/modelscope/models/multi_modal/multi_stage_diffusion/__init__.py +++ b/modelscope/models/multi_modal/multi_stage_diffusion/__init__.py @@ -1 +1,2 @@ +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. from .model import MultiStageDiffusionForTextToImageSynthesis diff --git a/modelscope/models/multi_modal/ofa/utils/constant.py b/modelscope/models/multi_modal/ofa/utils/constant.py index 124afefa..b3776f8f 100644 --- a/modelscope/models/multi_modal/ofa/utils/constant.py +++ b/modelscope/models/multi_modal/ofa/utils/constant.py @@ -3,8 +3,9 @@ from modelscope.outputs import OutputKeys from modelscope.utils.constant import Tasks OFA_TASK_KEY_MAPPING = { + Tasks.ocr_recognition: OutputKeys.TEXT, Tasks.image_captioning: OutputKeys.CAPTION, - Tasks.summarization: OutputKeys.TEXT, + Tasks.text_summarization: OutputKeys.TEXT, Tasks.visual_question_answering: OutputKeys.TEXT, Tasks.visual_grounding: OutputKeys.BOXES, Tasks.text_classification: OutputKeys.LABELS, diff --git a/modelscope/models/multi_modal/ofa_for_all_tasks.py b/modelscope/models/multi_modal/ofa_for_all_tasks.py index 41ca1f0b..56d19ad8 100644 --- a/modelscope/models/multi_modal/ofa_for_all_tasks.py +++ b/modelscope/models/multi_modal/ofa_for_all_tasks.py @@ -28,12 +28,13 @@ __all__ = ['OfaForAllTasks'] @MODELS.register_module(Tasks.image_captioning, module_name=Models.ofa) +@MODELS.register_module(Tasks.ocr_recognition, module_name=Models.ofa) @MODELS.register_module(Tasks.visual_grounding, module_name=Models.ofa) @MODELS.register_module( Tasks.visual_question_answering, module_name=Models.ofa) @MODELS.register_module(Tasks.visual_entailment, module_name=Models.ofa) @MODELS.register_module(Tasks.image_classification, module_name=Models.ofa) -@MODELS.register_module(Tasks.summarization, module_name=Models.ofa) +@MODELS.register_module(Tasks.text_summarization, module_name=Models.ofa) @MODELS.register_module(Tasks.text_classification, module_name=Models.ofa) class OfaForAllTasks(TorchModel): @@ -97,8 +98,9 @@ class OfaForAllTasks(TorchModel): 'traverse': self._traverse_inference, } self.task_inference_mapping = { + Tasks.ocr_recognition: self._text_gen_inference, Tasks.image_captioning: self._text_gen_inference, - Tasks.summarization: self._text_gen_inference, + Tasks.text_summarization: self._text_gen_inference, Tasks.visual_grounding: self._visual_grounding_inference, Tasks.visual_entailment: inference_d[self.gen_type], Tasks.visual_question_answering: inference_d[self.gen_type], diff --git a/modelscope/models/multi_modal/team/__init__.py b/modelscope/models/multi_modal/team/__init__.py index 0597040c..58bbdca5 100644 --- a/modelscope/models/multi_modal/team/__init__.py +++ b/modelscope/models/multi_modal/team/__init__.py @@ -1 +1,2 @@ +# Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. from .team_model import TEAMForMultiModalSimilarity diff --git a/modelscope/models/nlp/__init__.py b/modelscope/models/nlp/__init__.py index 9e830d17..57222698 100644 --- a/modelscope/models/nlp/__init__.py +++ b/modelscope/models/nlp/__init__.py @@ -34,8 +34,9 @@ if TYPE_CHECKING: TaskModelForTextGeneration) from .token_classification import SbertForTokenClassification from .sentence_embedding import SentenceEmbedding - from .passage_ranking import PassageRanking + from .text_ranking import TextRanking from .T5 import T5ForConditionalGeneration + else: _import_structure = { 'backbones': ['SbertModel'], @@ -75,7 +76,7 @@ else: 'token_classification': ['SbertForTokenClassification'], 'table_question_answering': ['TableQuestionAnswering'], 'sentence_embedding': ['SentenceEmbedding'], - 'passage_ranking': ['PassageRanking'], + 'text_ranking': ['TextRanking'], 'T5': ['T5ForConditionalGeneration'], } diff --git a/modelscope/models/nlp/bert/modeling_bert.py b/modelscope/models/nlp/bert/modeling_bert.py index e91a6433..7c1dfcf5 100755 --- a/modelscope/models/nlp/bert/modeling_bert.py +++ b/modelscope/models/nlp/bert/modeling_bert.py @@ -15,7 +15,6 @@ """PyTorch BERT model. """ import math -import os import warnings from dataclasses import dataclass from typing import Optional, Tuple @@ -41,7 +40,6 @@ from transformers.modeling_utils import (PreTrainedModel, find_pruneable_heads_and_indices, prune_linear_layer) -from modelscope.models.base import TorchModel from modelscope.utils.logger import get_logger from .configuration_bert import BertConfig @@ -50,81 +48,6 @@ logger = get_logger(__name__) _CONFIG_FOR_DOC = 'BertConfig' -def load_tf_weights_in_bert(model, config, tf_checkpoint_path): - """Load tf checkpoints in a pytorch model.""" - try: - import re - - import numpy as np - import tensorflow as tf - except ImportError: - logger.error( - 'Loading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see ' - 'https://www.tensorflow.org/install/ for installation instructions.' - ) - raise - tf_path = os.path.abspath(tf_checkpoint_path) - logger.info(f'Converting TensorFlow checkpoint from {tf_path}') - # Load weights from TF model - init_vars = tf.train.list_variables(tf_path) - names = [] - arrays = [] - for name, shape in init_vars: - logger.info(f'Loading TF weight {name} with shape {shape}') - array = tf.train.load_variable(tf_path, name) - names.append(name) - arrays.append(array) - - for name, array in zip(names, arrays): - name = name.split('/') - # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v - # which are not required for using pretrained model - if any(n in [ - 'adam_v', 'adam_m', 'AdamWeightDecayOptimizer', - 'AdamWeightDecayOptimizer_1', 'global_step' - ] for n in name): - logger.info(f"Skipping {'/'.join(name)}") - continue - pointer = model - for m_name in name: - if re.fullmatch(r'[A-Za-z]+_\d+', m_name): - scope_names = re.split(r'_(\d+)', m_name) - else: - scope_names = [m_name] - if scope_names[0] == 'kernel' or scope_names[0] == 'gamma': - pointer = getattr(pointer, 'weight') - elif scope_names[0] == 'output_bias' or scope_names[0] == 'beta': - pointer = getattr(pointer, 'bias') - elif scope_names[0] == 'output_weights': - pointer = getattr(pointer, 'weight') - elif scope_names[0] == 'squad': - pointer = getattr(pointer, 'classifier') - else: - try: - pointer = getattr(pointer, scope_names[0]) - except AttributeError: - logger.info(f"Skipping {'/'.join(name)}") - continue - if len(scope_names) >= 2: - num = int(scope_names[1]) - pointer = pointer[num] - if m_name[-11:] == '_embeddings': - pointer = getattr(pointer, 'weight') - elif m_name == 'kernel': - array = np.transpose(array) - try: - if pointer.shape != array.shape: - raise ValueError( - f'Pointer shape {pointer.shape} and array shape {array.shape} mismatched' - ) - except AssertionError as e: - e.args += (pointer.shape, array.shape) - raise - logger.info(f'Initialize PyTorch weight {name}') - pointer.data = torch.from_numpy(array) - return model - - class BertEmbeddings(nn.Module): """Construct the embeddings from word, position and token_type embeddings.""" @@ -750,7 +673,6 @@ class BertPreTrainedModel(PreTrainedModel): """ config_class = BertConfig - load_tf_weights = load_tf_weights_in_bert base_model_prefix = 'bert' supports_gradient_checkpointing = True _keys_to_ignore_on_load_missing = [r'position_ids'] diff --git a/modelscope/models/nlp/heads/infromation_extraction_head.py b/modelscope/models/nlp/heads/infromation_extraction_head.py index 6c3388f0..626f1b59 100644 --- a/modelscope/models/nlp/heads/infromation_extraction_head.py +++ b/modelscope/models/nlp/heads/infromation_extraction_head.py @@ -10,6 +10,8 @@ from modelscope.utils.constant import Tasks @HEADS.register_module( Tasks.information_extraction, module_name=Heads.information_extraction) +@HEADS.register_module( + Tasks.relation_extraction, module_name=Heads.information_extraction) class InformationExtractionHead(TorchHead): def __init__(self, **kwargs): diff --git a/modelscope/models/nlp/heads/token_classification_head.py b/modelscope/models/nlp/heads/token_classification_head.py index ace3deac..3f19ca67 100644 --- a/modelscope/models/nlp/heads/token_classification_head.py +++ b/modelscope/models/nlp/heads/token_classification_head.py @@ -14,6 +14,8 @@ from modelscope.utils.constant import Tasks @HEADS.register_module( Tasks.token_classification, module_name=Heads.token_classification) +@HEADS.register_module( + Tasks.part_of_speech, module_name=Heads.token_classification) class TokenClassificationHead(TorchHead): def __init__(self, **kwargs): diff --git a/modelscope/models/nlp/task_models/information_extraction.py b/modelscope/models/nlp/task_models/information_extraction.py index 0a7d5a47..a206c2fc 100644 --- a/modelscope/models/nlp/task_models/information_extraction.py +++ b/modelscope/models/nlp/task_models/information_extraction.py @@ -16,6 +16,8 @@ __all__ = ['InformationExtractionModel'] @MODELS.register_module( Tasks.information_extraction, module_name=TaskModels.information_extraction) +@MODELS.register_module( + Tasks.relation_extraction, module_name=TaskModels.information_extraction) class InformationExtractionModel(SingleBackboneTaskModelBase): def __init__(self, model_dir: str, *args, **kwargs): diff --git a/modelscope/models/nlp/task_models/token_classification.py b/modelscope/models/nlp/task_models/token_classification.py index f3930182..a39f58bf 100644 --- a/modelscope/models/nlp/task_models/token_classification.py +++ b/modelscope/models/nlp/task_models/token_classification.py @@ -19,6 +19,8 @@ __all__ = ['TokenClassificationModel'] @MODELS.register_module( Tasks.token_classification, module_name=TaskModels.token_classification) +@MODELS.register_module( + Tasks.part_of_speech, module_name=TaskModels.token_classification) class TokenClassificationModel(SingleBackboneTaskModelBase): def __init__(self, model_dir: str, *args, **kwargs): diff --git a/modelscope/models/nlp/passage_ranking.py b/modelscope/models/nlp/text_ranking.py similarity index 90% rename from modelscope/models/nlp/passage_ranking.py rename to modelscope/models/nlp/text_ranking.py index 2a06ce45..5bc0635a 100644 --- a/modelscope/models/nlp/passage_ranking.py +++ b/modelscope/models/nlp/text_ranking.py @@ -13,18 +13,18 @@ from modelscope.models.nlp.structbert import SbertPreTrainedModel from modelscope.outputs import OutputKeys from modelscope.utils.constant import Tasks -__all__ = ['PassageRanking'] +__all__ = ['TextRanking'] -@MODELS.register_module(Tasks.passage_ranking, module_name=Models.bert) -class PassageRanking(SbertForSequenceClassification, SbertPreTrainedModel): +@MODELS.register_module(Tasks.text_ranking, module_name=Models.bert) +class TextRanking(SbertForSequenceClassification, SbertPreTrainedModel): base_model_prefix: str = 'bert' supports_gradient_checkpointing = True _keys_to_ignore_on_load_missing = [r'position_ids'] def __init__(self, config, model_dir, *args, **kwargs): if hasattr(config, 'base_model_prefix'): - PassageRanking.base_model_prefix = config.base_model_prefix + TextRanking.base_model_prefix = config.base_model_prefix super().__init__(config, model_dir) self.train_batch_size = kwargs.get('train_batch_size', 4) self.register_buffer( @@ -74,7 +74,7 @@ class PassageRanking(SbertForSequenceClassification, SbertPreTrainedModel): num_labels = kwargs.get('num_labels', 1) model_args = {} if num_labels is None else {'num_labels': num_labels} - return super(SbertPreTrainedModel, PassageRanking).from_pretrained( + return super(SbertPreTrainedModel, TextRanking).from_pretrained( pretrained_model_name_or_path=kwargs.get('model_dir'), model_dir=kwargs.get('model_dir'), **model_args) diff --git a/modelscope/msdatasets/cv/easycv_base.py b/modelscope/msdatasets/cv/easycv_base.py index a45827a3..7b6df6e0 100644 --- a/modelscope/msdatasets/cv/easycv_base.py +++ b/modelscope/msdatasets/cv/easycv_base.py @@ -26,11 +26,16 @@ class EasyCVBaseDataset(object): if self.split_config is not None: self._update_data_source(kwargs['data_source']) + def _update_data_root(self, input_dict, data_root): + for k, v in input_dict.items(): + if isinstance(v, str) and self.DATA_ROOT_PATTERN in v: + input_dict.update( + {k: v.replace(self.DATA_ROOT_PATTERN, data_root)}) + elif isinstance(v, dict): + self._update_data_root(v, data_root) + def _update_data_source(self, data_source): data_root = next(iter(self.split_config.values())) data_root = data_root.rstrip(osp.sep) - for k, v in data_source.items(): - if isinstance(v, str) and self.DATA_ROOT_PATTERN in v: - data_source.update( - {k: v.replace(self.DATA_ROOT_PATTERN, data_root)}) + self._update_data_root(data_source, data_root) diff --git a/modelscope/msdatasets/task_datasets/__init__.py b/modelscope/msdatasets/task_datasets/__init__.py index 7c31969a..92764155 100644 --- a/modelscope/msdatasets/task_datasets/__init__.py +++ b/modelscope/msdatasets/task_datasets/__init__.py @@ -12,14 +12,14 @@ if TYPE_CHECKING: from .movie_scene_segmentation import MovieSceneSegmentationDataset from .video_summarization_dataset import VideoSummarizationDataset from .image_inpainting import ImageInpaintingDataset - from .passage_ranking_dataset import PassageRankingDataset + from .text_ranking_dataset import TextRankingDataset else: _import_structure = { 'base': ['TaskDataset'], 'builder': ['TASK_DATASETS', 'build_task_dataset'], 'torch_base_dataset': ['TorchTaskDataset'], - 'passage_ranking_dataset': ['PassageRankingDataset'], + 'text_ranking_dataset': ['TextRankingDataset'], 'veco_dataset': ['VecoDataset'], 'image_instance_segmentation_coco_dataset': ['ImageInstanceSegmentationCocoDataset'], @@ -27,6 +27,8 @@ else: 'movie_scene_segmentation': ['MovieSceneSegmentationDataset'], 'image_inpainting': ['ImageInpaintingDataset'], 'sidd_image_denoising_dataset': ['SiddImageDenoisingDataset'], + 'image_portrait_enhancement_dataset': + ['ImagePortraitEnhancementDataset'], } import sys diff --git a/modelscope/msdatasets/task_datasets/image_portrait_enhancement/__init__.py b/modelscope/msdatasets/task_datasets/image_portrait_enhancement/__init__.py new file mode 100644 index 00000000..4df24fae --- /dev/null +++ b/modelscope/msdatasets/task_datasets/image_portrait_enhancement/__init__.py @@ -0,0 +1,23 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .image_portrait_enhancement_dataset import ImagePortraitEnhancementDataset + +else: + _import_structure = { + 'image_portrait_enhancement_dataset': + ['ImagePortraitEnhancementDataset'], + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/msdatasets/task_datasets/image_portrait_enhancement/data_utils.py b/modelscope/msdatasets/task_datasets/image_portrait_enhancement/data_utils.py new file mode 100644 index 00000000..1133d3c2 --- /dev/null +++ b/modelscope/msdatasets/task_datasets/image_portrait_enhancement/data_utils.py @@ -0,0 +1,32 @@ +# ------------------------------------------------------------------------ +# Modified from BasicSR (https://github.com/xinntao/BasicSR) +# Copyright 2018-2020 BasicSR Authors +# ------------------------------------------------------------------------ + +import cv2 +import torch + + +def img2tensor(imgs, bgr2rgb=True, float32=True): + """Numpy array to tensor. + Args: + imgs (list[ndarray] | ndarray): Input images. + bgr2rgb (bool): Whether to change bgr to rgb. + float32 (bool): Whether to change to float32. + Returns: + list[tensor] | tensor: Tensor images. If returned results only have + one element, just return tensor. + """ + + def _totensor(img, bgr2rgb, float32): + if img.shape[2] == 3 and bgr2rgb: + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = torch.from_numpy(img.transpose(2, 0, 1)) + if float32: + img = img.float() + return img + + if isinstance(imgs, list): + return [_totensor(img, bgr2rgb, float32) for img in imgs] + else: + return _totensor(imgs, bgr2rgb, float32) diff --git a/modelscope/msdatasets/task_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py b/modelscope/msdatasets/task_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py new file mode 100644 index 00000000..58d40778 --- /dev/null +++ b/modelscope/msdatasets/task_datasets/image_portrait_enhancement/image_portrait_enhancement_dataset.py @@ -0,0 +1,51 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import cv2 +import numpy as np + +from modelscope.metainfo import Datasets, Models +from modelscope.msdatasets.task_datasets.builder import TASK_DATASETS +from modelscope.msdatasets.task_datasets.torch_base_dataset import \ + TorchTaskDataset +from modelscope.utils.constant import Tasks +from .data_utils import img2tensor + + +def default_loader(path): + return cv2.imread(path, cv2.IMREAD_COLOR).astype(np.float32) / 255.0 + + +@TASK_DATASETS.register_module( + Tasks.image_portrait_enhancement, module_name=Datasets.PairedDataset) +class ImagePortraitEnhancementDataset(TorchTaskDataset): + """Paired image dataset for image portrait enhancement. + """ + + def __init__(self, dataset, is_train): + self.dataset = dataset + self.gt_size = 256 + self.is_train = is_train + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, index): + + # Load gt and lq images. Dimension order: HWC; channel order: BGR; + # image range: [0, 1], float32. + item_dict = self.dataset[index] + gt_path = item_dict['hq:FILE'] + img_gt = default_loader(gt_path) + lq_path = item_dict['lq:FILE'] + img_lq = default_loader(lq_path) + + gt_size = self.gt_size + img_gt = cv2.resize(img_gt, (gt_size, gt_size)) + img_lq = cv2.resize(img_lq, (gt_size, gt_size)) + + # BGR to RGB, HWC to CHW, numpy to tensor + img_gt, img_lq = img2tensor([img_gt, img_lq], + bgr2rgb=True, + float32=True) + + return {'input': (img_lq - 0.5) / 0.5, 'target': (img_gt - 0.5) / 0.5} diff --git a/modelscope/msdatasets/task_datasets/passage_ranking_dataset.py b/modelscope/msdatasets/task_datasets/text_ranking_dataset.py similarity index 90% rename from modelscope/msdatasets/task_datasets/passage_ranking_dataset.py rename to modelscope/msdatasets/task_datasets/text_ranking_dataset.py index 517e0d36..dd44f7c2 100644 --- a/modelscope/msdatasets/task_datasets/passage_ranking_dataset.py +++ b/modelscope/msdatasets/task_datasets/text_ranking_dataset.py @@ -16,8 +16,8 @@ from .torch_base_dataset import TorchTaskDataset @TASK_DATASETS.register_module( - group_key=Tasks.passage_ranking, module_name=Models.bert) -class PassageRankingDataset(TorchTaskDataset): + group_key=Tasks.text_ranking, module_name=Models.bert) +class TextRankingDataset(TorchTaskDataset): def __init__(self, datasets: Union[Any, List[Any]], @@ -35,8 +35,8 @@ class PassageRankingDataset(TorchTaskDataset): 'positive_passages') self.neg_sequence = self.dataset_config.get('neg_sequence', 'negative_passages') - self.passage_text_fileds = self.dataset_config.get( - 'passage_text_fileds', ['title', 'text']) + self.text_fileds = self.dataset_config.get('text_fileds', + ['title', 'text']) self.qid_field = self.dataset_config.get('qid_field', 'query_id') if mode == ModeKeys.TRAIN: train_config = kwargs.get('train', {}) @@ -58,14 +58,14 @@ class PassageRankingDataset(TorchTaskDataset): pos_sequences = group[self.pos_sequence] pos_sequences = [ - ' '.join([ele[key] for key in self.passage_text_fileds]) + ' '.join([ele[key] for key in self.text_fileds]) for ele in pos_sequences ] labels.extend([1] * len(pos_sequences)) neg_sequences = group[self.neg_sequence] neg_sequences = [ - ' '.join([ele[key] for key in self.passage_text_fileds]) + ' '.join([ele[key] for key in self.text_fileds]) for ele in neg_sequences ] @@ -88,13 +88,13 @@ class PassageRankingDataset(TorchTaskDataset): pos_sequences = group[self.pos_sequence] pos_sequences = [ - ' '.join([ele[key] for key in self.passage_text_fileds]) + ' '.join([ele[key] for key in self.text_fileds]) for ele in pos_sequences ] neg_sequences = group[self.neg_sequence] neg_sequences = [ - ' '.join([ele[key] for key in self.passage_text_fileds]) + ' '.join([ele[key] for key in self.text_fileds]) for ele in neg_sequences ] diff --git a/modelscope/msdatasets/utils/dataset_utils.py b/modelscope/msdatasets/utils/dataset_utils.py index db9d1fee..c7aa7682 100644 --- a/modelscope/msdatasets/utils/dataset_utils.py +++ b/modelscope/msdatasets/utils/dataset_utils.py @@ -7,7 +7,7 @@ from typing import Any, Mapping, Optional, Sequence, Union from datasets.builder import DatasetBuilder from modelscope.hub.api import HubApi -from modelscope.utils.constant import DEFAULT_DATASET_REVISION, DownloadParams +from modelscope.utils.constant import DEFAULT_DATASET_REVISION from modelscope.utils.logger import get_logger from .dataset_builder import MsCsvDatasetBuilder, TaskSpecificDatasetBuilder @@ -95,15 +95,13 @@ def list_dataset_objects(hub_api: HubApi, max_limit: int, is_recursive: bool, res (list): List of objects, i.e., ['train/images/001.png', 'train/images/002.png', 'val/images/001.png', ...] """ res = [] - cookies = hub_api.check_cookies_upload_data(use_cookies=True) objects = hub_api.list_oss_dataset_objects( dataset_name=dataset_name, namespace=namespace, max_limit=max_limit, is_recursive=is_recursive, is_filter_dir=True, - revision=version, - cookies=cookies) + revision=version) for item in objects: object_key = item.get('Key') @@ -174,7 +172,7 @@ def get_dataset_files(subset_split_into: dict, modelscope_api = HubApi() objects = list_dataset_objects( hub_api=modelscope_api, - max_limit=DownloadParams.MAX_LIST_OBJECTS_NUM.value, + max_limit=-1, is_recursive=True, dataset_name=dataset_name, namespace=namespace, diff --git a/modelscope/outputs.py b/modelscope/outputs.py index fbe15646..13d440ca 100644 --- a/modelscope/outputs.py +++ b/modelscope/outputs.py @@ -506,7 +506,7 @@ TASK_OUTPUTS = { # } Tasks.text_error_correction: [OutputKeys.OUTPUT], Tasks.sentence_embedding: [OutputKeys.TEXT_EMBEDDING, OutputKeys.SCORES], - Tasks.passage_ranking: [OutputKeys.SCORES], + Tasks.text_ranking: [OutputKeys.SCORES], # text generation result for single sample # { @@ -661,6 +661,7 @@ TASK_OUTPUTS = { # "caption": "this is an image caption text." # } Tasks.image_captioning: [OutputKeys.CAPTION], + Tasks.ocr_recognition: [OutputKeys.TEXT], # visual grounding result for single sample # { diff --git a/modelscope/pipeline_inputs.py b/modelscope/pipeline_inputs.py index 34b731c6..77940c3c 100644 --- a/modelscope/pipeline_inputs.py +++ b/modelscope/pipeline_inputs.py @@ -162,7 +162,7 @@ TASK_INPUTS = { 'source_sentence': InputType.LIST, 'sentences_to_compare': InputType.LIST, }, - Tasks.passage_ranking: (InputType.TEXT, InputType.TEXT), + Tasks.text_ranking: (InputType.TEXT, InputType.TEXT), Tasks.text_generation: InputType.TEXT, Tasks.fill_mask: diff --git a/modelscope/pipelines/audio/asr_inference_pipeline.py b/modelscope/pipelines/audio/asr_inference_pipeline.py index 4e8b658d..6a4864bf 100644 --- a/modelscope/pipelines/audio/asr_inference_pipeline.py +++ b/modelscope/pipelines/audio/asr_inference_pipeline.py @@ -47,22 +47,28 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): if isinstance(audio_in, str): # load pcm data from url if audio_in is url str - self.audio_in = load_bytes_from_url(audio_in) + self.audio_in, checking_audio_fs = load_bytes_from_url(audio_in) elif isinstance(audio_in, bytes): # load pcm data from wav data if audio_in is wave format - self.audio_in = extract_pcm_from_wav(audio_in) + self.audio_in, checking_audio_fs = extract_pcm_from_wav(audio_in) else: self.audio_in = audio_in + # set the sample_rate of audio_in if checking_audio_fs is valid + if checking_audio_fs is not None: + self.audio_fs = checking_audio_fs + if recog_type is None or audio_format is None: self.recog_type, self.audio_format, self.audio_in = asr_utils.type_checking( audio_in=self.audio_in, recog_type=recog_type, audio_format=audio_format) - if hasattr(asr_utils, 'sample_rate_checking') and audio_fs is None: - self.audio_fs = asr_utils.sample_rate_checking( + if hasattr(asr_utils, 'sample_rate_checking'): + checking_audio_fs = asr_utils.sample_rate_checking( self.audio_in, self.audio_format) + if checking_audio_fs is not None: + self.audio_fs = checking_audio_fs if self.preprocessor is None: self.preprocessor = WavToScp() @@ -80,7 +86,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): logger.info(f"Decoding with {inputs['audio_format']} files ...") - data_cmd: Sequence[Tuple[str, str]] + data_cmd: Sequence[Tuple[str, str, str]] if inputs['audio_format'] == 'wav' or inputs['audio_format'] == 'pcm': data_cmd = ['speech', 'sound'] elif inputs['audio_format'] == 'kaldi_ark': @@ -88,6 +94,9 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): elif inputs['audio_format'] == 'tfrecord': data_cmd = ['speech', 'tfrecord'] + if inputs.__contains__('mvn_file'): + data_cmd.append(inputs['mvn_file']) + # generate asr inference command cmd = { 'model_type': inputs['model_type'], diff --git a/modelscope/pipelines/audio/kws_kwsbp_pipeline.py b/modelscope/pipelines/audio/kws_kwsbp_pipeline.py index 5555c9e6..db6fc65d 100644 --- a/modelscope/pipelines/audio/kws_kwsbp_pipeline.py +++ b/modelscope/pipelines/audio/kws_kwsbp_pipeline.py @@ -51,10 +51,10 @@ class KeyWordSpottingKwsbpPipeline(Pipeline): if isinstance(audio_in, str): # load pcm data from url if audio_in is url str - audio_in = load_bytes_from_url(audio_in) + audio_in, audio_fs = load_bytes_from_url(audio_in) elif isinstance(audio_in, bytes): # load pcm data from wav data if audio_in is wave format - audio_in = extract_pcm_from_wav(audio_in) + audio_in, audio_fs = extract_pcm_from_wav(audio_in) output = self.preprocessor.forward(self.model.forward(), audio_in) output = self.forward(output) diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index ea329be4..644749fc 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -433,6 +433,8 @@ def collate_fn(data, device): if isinstance(data, dict) or isinstance(data, Mapping): return type(data)({k: collate_fn(v, device) for k, v in data.items()}) elif isinstance(data, (tuple, list)): + if 0 == len(data): + return torch.Tensor([]) if isinstance(data[0], (int, float)): return default_collate(data).to(device) else: diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index 8098bdec..8c81118c 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -20,17 +20,22 @@ DEFAULT_MODEL_FOR_PIPELINE = { Tasks.sentence_embedding: (Pipelines.sentence_embedding, 'damo/nlp_corom_sentence-embedding_english-base'), - Tasks.passage_ranking: (Pipelines.passage_ranking, - 'damo/nlp_corom_passage-ranking_english-base'), + Tasks.text_ranking: (Pipelines.text_ranking, + 'damo/nlp_corom_passage-ranking_english-base'), Tasks.word_segmentation: (Pipelines.word_segmentation, 'damo/nlp_structbert_word-segmentation_chinese-base'), + Tasks.part_of_speech: (Pipelines.part_of_speech, + 'damo/nlp_structbert_part-of-speech_chinese-base'), Tasks.token_classification: (Pipelines.part_of_speech, 'damo/nlp_structbert_part-of-speech_chinese-base'), Tasks.named_entity_recognition: (Pipelines.named_entity_recognition, 'damo/nlp_raner_named-entity-recognition_chinese-base-news'), + Tasks.relation_extraction: + (Pipelines.relation_extraction, + 'damo/nlp_bert_relation-extraction_chinese-base'), Tasks.information_extraction: (Pipelines.relation_extraction, 'damo/nlp_bert_relation-extraction_chinese-base'), diff --git a/modelscope/pipelines/cv/body_3d_keypoints_pipeline.py b/modelscope/pipelines/cv/body_3d_keypoints_pipeline.py index 3502915c..8522ceff 100644 --- a/modelscope/pipelines/cv/body_3d_keypoints_pipeline.py +++ b/modelscope/pipelines/cv/body_3d_keypoints_pipeline.py @@ -143,6 +143,13 @@ class Body3DKeypointsPipeline(Pipeline): max_frame = self.keypoint_model_3d.cfg.model.INPUT.MAX_FRAME # max video frame number to be predicted 3D joints for i, frame in enumerate(video_frames): kps_2d = self.human_body_2d_kps_detector(frame) + if [] == kps_2d.get('boxes'): + res = { + 'success': False, + 'msg': f'fail to detect person at image frame {i}' + } + return res + box = kps_2d['boxes'][ 0] # box: [[[x1, y1], [x2, y2]]], N human boxes per frame, [0] represent using first detected bbox pose = kps_2d['keypoints'][0] # keypoints: [15, 2] @@ -180,7 +187,15 @@ class Body3DKeypointsPipeline(Pipeline): return res def postprocess(self, input: Dict[str, Any], **kwargs) -> Dict[str, Any]: - res = {OutputKeys.KEYPOINTS: [], OutputKeys.TIMESTAMPS: []} + output_video_path = kwargs.get('output_video', None) + if output_video_path is None: + output_video_path = tempfile.NamedTemporaryFile(suffix='.mp4').name + + res = { + OutputKeys.KEYPOINTS: [], + OutputKeys.TIMESTAMPS: [], + OutputKeys.OUTPUT_VIDEO: output_video_path + } if not input['success']: pass @@ -189,10 +204,6 @@ class Body3DKeypointsPipeline(Pipeline): pred_3d_pose = poses.data.cpu().numpy()[ 0] # [frame_num, joint_num, joint_dim] - output_video_path = kwargs.get('output_video', None) - if output_video_path is None: - output_video_path = tempfile.NamedTemporaryFile( - suffix='.mp4').name if 'render' in self.keypoint_model_3d.cfg.keys(): self.render_prediction(pred_3d_pose, output_video_path) res[OutputKeys.OUTPUT_VIDEO] = output_video_path diff --git a/modelscope/pipelines/cv/face_image_generation_pipeline.py b/modelscope/pipelines/cv/face_image_generation_pipeline.py index f00d639e..1b4e2e8a 100644 --- a/modelscope/pipelines/cv/face_image_generation_pipeline.py +++ b/modelscope/pipelines/cv/face_image_generation_pipeline.py @@ -61,6 +61,8 @@ class FaceImageGenerationPipeline(Pipeline): return input def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: + if isinstance(input, str): + input = int(input) assert isinstance(input, int) torch.manual_seed(input) torch.cuda.manual_seed(input) diff --git a/modelscope/pipelines/cv/image_reid_person_pipeline.py b/modelscope/pipelines/cv/image_reid_person_pipeline.py index 64674a65..9f60142a 100644 --- a/modelscope/pipelines/cv/image_reid_person_pipeline.py +++ b/modelscope/pipelines/cv/image_reid_person_pipeline.py @@ -53,6 +53,7 @@ class ImageReidPersonPipeline(Pipeline): def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: img = input['img'] img_embedding = self.model(img) + img_embedding = img_embedding.detach().cpu().numpy() return {OutputKeys.IMG_EMBEDDING: img_embedding} def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: diff --git a/modelscope/pipelines/cv/tinynas_detection_pipeline.py b/modelscope/pipelines/cv/tinynas_detection_pipeline.py index b2063629..d35d4d36 100644 --- a/modelscope/pipelines/cv/tinynas_detection_pipeline.py +++ b/modelscope/pipelines/cv/tinynas_detection_pipeline.py @@ -12,6 +12,8 @@ from modelscope.pipelines.base import Input, Pipeline from modelscope.pipelines.builder import PIPELINES from modelscope.preprocessors import LoadImage from modelscope.utils.constant import Tasks +from modelscope.utils.cv.image_utils import \ + show_image_object_detection_auto_result from modelscope.utils.logger import get_logger logger = get_logger() @@ -52,10 +54,18 @@ class TinynasDetectionPipeline(Pipeline): bboxes, scores, labels = self.model.postprocess(inputs['data']) if bboxes is None: - return None - outputs = { - OutputKeys.SCORES: scores, - OutputKeys.LABELS: labels, - OutputKeys.BOXES: bboxes - } + outputs = { + OutputKeys.SCORES: [], + OutputKeys.LABELS: [], + OutputKeys.BOXES: [] + } + else: + outputs = { + OutputKeys.SCORES: scores, + OutputKeys.LABELS: labels, + OutputKeys.BOXES: bboxes + } return outputs + + def show_result(self, img_path, result, save_path=None): + show_image_object_detection_auto_result(img_path, result, save_path) diff --git a/modelscope/pipelines/multi_modal/multi_modal_embedding_pipeline.py b/modelscope/pipelines/multi_modal/multi_modal_embedding_pipeline.py index 76011be0..d3f15c23 100644 --- a/modelscope/pipelines/multi_modal/multi_modal_embedding_pipeline.py +++ b/modelscope/pipelines/multi_modal/multi_modal_embedding_pipeline.py @@ -11,6 +11,8 @@ from modelscope.utils.logger import get_logger logger = get_logger() +@PIPELINES.register_module( + Tasks.image_text_retrieval, module_name=Pipelines.multi_modal_embedding) @PIPELINES.register_module( Tasks.multi_modal_embedding, module_name=Pipelines.multi_modal_embedding) class MultiModalEmbeddingPipeline(Pipeline): diff --git a/modelscope/pipelines/multi_modal/ocr_recognition_pipeline.py b/modelscope/pipelines/multi_modal/ocr_recognition_pipeline.py new file mode 100644 index 00000000..c61b38f3 --- /dev/null +++ b/modelscope/pipelines/multi_modal/ocr_recognition_pipeline.py @@ -0,0 +1,52 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import Any, Dict, Optional, Union + +import torch + +from modelscope.metainfo import Pipelines +from modelscope.models.multi_modal import OfaForAllTasks +from modelscope.outputs import OutputKeys +from modelscope.pipelines.base import Model, Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.preprocessors import OfaPreprocessor, Preprocessor +from modelscope.utils.constant import Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() + + +@PIPELINES.register_module( + Tasks.ocr_recognition, module_name=Pipelines.ofa_ocr_recognition) +class OcrRecognitionPipeline(Pipeline): + + def __init__(self, + model: Union[Model, str], + preprocessor: Optional[Preprocessor] = None, + **kwargs): + """ + use `model` and `preprocessor` to create a ocr recognition pipeline for prediction + Args: + model: model id on modelscope hub. + """ + super().__init__(model=model) + assert isinstance(model, str) or isinstance(model, Model), \ + 'model must be a single str or OfaForAllTasks' + if isinstance(model, str): + pipe_model = Model.from_pretrained(model) + elif isinstance(model, Model): + pipe_model = model + else: + raise NotImplementedError + pipe_model.model.eval() + if preprocessor is None: + if isinstance(pipe_model, OfaForAllTasks): + preprocessor = OfaPreprocessor(pipe_model.model_dir) + super().__init__(model=pipe_model, preprocessor=preprocessor, **kwargs) + + def forward(self, inputs: Dict[str, Any], + **forward_params) -> Dict[str, Any]: + with torch.no_grad(): + return super().forward(inputs, **forward_params) + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + return inputs diff --git a/modelscope/pipelines/nlp/__init__.py b/modelscope/pipelines/nlp/__init__.py index be854593..677151c0 100644 --- a/modelscope/pipelines/nlp/__init__.py +++ b/modelscope/pipelines/nlp/__init__.py @@ -17,7 +17,7 @@ if TYPE_CHECKING: from .fill_mask_ponet_pipeline import FillMaskPonetPipeline from .information_extraction_pipeline import InformationExtractionPipeline from .named_entity_recognition_pipeline import NamedEntityRecognitionPipeline - from .passage_ranking_pipeline import PassageRankingPipeline + from .text_ranking_pipeline import TextRankingPipeline from .sentence_embedding_pipeline import SentenceEmbeddingPipeline from .sequence_classification_pipeline import SequenceClassificationPipeline from .summarization_pipeline import SummarizationPipeline @@ -51,7 +51,7 @@ else: 'information_extraction_pipeline': ['InformationExtractionPipeline'], 'named_entity_recognition_pipeline': ['NamedEntityRecognitionPipeline'], - 'passage_ranking_pipeline': ['PassageRankingPipeline'], + 'text_ranking_pipeline': ['TextRankingPipeline'], 'sentence_embedding_pipeline': ['SentenceEmbeddingPipeline'], 'sequence_classification_pipeline': ['SequenceClassificationPipeline'], 'summarization_pipeline': ['SummarizationPipeline'], diff --git a/modelscope/pipelines/nlp/information_extraction_pipeline.py b/modelscope/pipelines/nlp/information_extraction_pipeline.py index 763e941c..8ac85f43 100644 --- a/modelscope/pipelines/nlp/information_extraction_pipeline.py +++ b/modelscope/pipelines/nlp/information_extraction_pipeline.py @@ -17,6 +17,8 @@ __all__ = ['InformationExtractionPipeline'] @PIPELINES.register_module( Tasks.information_extraction, module_name=Pipelines.relation_extraction) +@PIPELINES.register_module( + Tasks.relation_extraction, module_name=Pipelines.relation_extraction) class InformationExtractionPipeline(Pipeline): def __init__(self, diff --git a/modelscope/pipelines/nlp/summarization_pipeline.py b/modelscope/pipelines/nlp/summarization_pipeline.py index 7a91eff1..30dd4b30 100644 --- a/modelscope/pipelines/nlp/summarization_pipeline.py +++ b/modelscope/pipelines/nlp/summarization_pipeline.py @@ -13,7 +13,7 @@ logger = get_logger() @PIPELINES.register_module( - Tasks.summarization, module_name=Pipelines.text_generation) + Tasks.text_summarization, module_name=Pipelines.text_generation) class SummarizationPipeline(Pipeline): def __init__(self, diff --git a/modelscope/pipelines/nlp/table_question_answering_pipeline.py b/modelscope/pipelines/nlp/table_question_answering_pipeline.py index ca17c9b1..08501953 100644 --- a/modelscope/pipelines/nlp/table_question_answering_pipeline.py +++ b/modelscope/pipelines/nlp/table_question_answering_pipeline.py @@ -72,6 +72,7 @@ class TableQuestionAnsweringPipeline(Pipeline): action = self.action_ops[result['action']] headers = table['header_name'] current_sql = result['sql'] + current_sql['from'] = [table['table_id']] if history_sql is None: return current_sql @@ -216,10 +217,11 @@ class TableQuestionAnsweringPipeline(Pipeline): else: return current_sql - def sql_dict_to_str(self, result, table): + def sql_dict_to_str(self, result, tables): """ convert sql struct to string """ + table = tables[result['sql']['from'][0]] header_names = table['header_name'] + ['空列'] header_ids = table['header_id'] + ['null'] sql = result['sql'] @@ -279,42 +281,43 @@ class TableQuestionAnsweringPipeline(Pipeline): """ result = inputs['result'] history_sql = inputs['history_sql'] - result['sql'] = self.post_process_multi_turn( - history_sql=history_sql, - result=result, - table=self.db.tables[result['table_id']]) - result['sql']['from'] = [result['table_id']] - sql = self.sql_dict_to_str( - result=result, table=self.db.tables[result['table_id']]) + try: + result['sql'] = self.post_process_multi_turn( + history_sql=history_sql, + result=result, + table=self.db.tables[result['table_id']]) + except Exception: + result['sql'] = history_sql + sql = self.sql_dict_to_str(result=result, tables=self.db.tables) # add sqlite if self.db.is_use_sqlite: try: cursor = self.db.connection_obj.cursor().execute(sql.query) - names = [{ - 'name': - description[0], - 'label': - self.db.tables[result['table_id']]['headerid2name'].get( - description[0], description[0]) - } for description in cursor.description] - cells = [] + header_ids, header_names = [], [] + for description in cursor.description: + header_ids.append(self.db.tables[result['table_id']] + ['headerid2name'].get( + description[0], description[0])) + header_names.append(description[0]) + rows = [] for res in cursor.fetchall(): - row = {} - for name, cell in zip(names, res): - row[name['name']] = cell - cells.append(row) - tabledata = {'headers': names, 'cells': cells} + rows.append(list(res)) + tabledata = { + 'header_id': header_ids, + 'header_name': header_names, + 'rows': rows + } except Exception: - tabledata = {'headers': [], 'cells': []} + tabledata = {'header_id': [], 'header_name': [], 'rows': []} else: - tabledata = {'headers': [], 'cells': []} + tabledata = {'header_id': [], 'header_name': [], 'rows': []} output = { OutputKeys.SQL_STRING: sql.string, OutputKeys.SQL_QUERY: sql.query, OutputKeys.HISTORY: result['sql'], - OutputKeys.QUERT_RESULT: json.dumps(tabledata, ensure_ascii=False), + OutputKeys.QUERT_RESULT: tabledata, } return output diff --git a/modelscope/pipelines/nlp/passage_ranking_pipeline.py b/modelscope/pipelines/nlp/text_ranking_pipeline.py similarity index 88% rename from modelscope/pipelines/nlp/passage_ranking_pipeline.py rename to modelscope/pipelines/nlp/text_ranking_pipeline.py index 1d818ac0..4aa57238 100644 --- a/modelscope/pipelines/nlp/passage_ranking_pipeline.py +++ b/modelscope/pipelines/nlp/text_ranking_pipeline.py @@ -9,15 +9,15 @@ from modelscope.models import Model from modelscope.outputs import OutputKeys from modelscope.pipelines.base import Pipeline from modelscope.pipelines.builder import PIPELINES -from modelscope.preprocessors import PassageRankingPreprocessor, Preprocessor +from modelscope.preprocessors import Preprocessor, TextRankingPreprocessor from modelscope.utils.constant import Tasks -__all__ = ['PassageRankingPipeline'] +__all__ = ['TextRankingPipeline'] @PIPELINES.register_module( - Tasks.passage_ranking, module_name=Pipelines.passage_ranking) -class PassageRankingPipeline(Pipeline): + Tasks.text_ranking, module_name=Pipelines.text_ranking) +class TextRankingPipeline(Pipeline): def __init__(self, model: Union[Model, str], @@ -36,7 +36,7 @@ class PassageRankingPipeline(Pipeline): Model) else Model.from_pretrained(model) if preprocessor is None: - preprocessor = PassageRankingPreprocessor( + preprocessor = TextRankingPreprocessor( model.model_dir if isinstance(model, Model) else model, sequence_length=kwargs.pop('sequence_length', 128)) model.eval() diff --git a/modelscope/pipelines/nlp/token_classification_pipeline.py b/modelscope/pipelines/nlp/token_classification_pipeline.py index c57dbf20..055a4b8a 100644 --- a/modelscope/pipelines/nlp/token_classification_pipeline.py +++ b/modelscope/pipelines/nlp/token_classification_pipeline.py @@ -18,6 +18,8 @@ __all__ = ['TokenClassificationPipeline'] @PIPELINES.register_module( Tasks.token_classification, module_name=Pipelines.part_of_speech) +@PIPELINES.register_module( + Tasks.part_of_speech, module_name=Pipelines.part_of_speech) class TokenClassificationPipeline(Pipeline): def __init__(self, diff --git a/modelscope/preprocessors/__init__.py b/modelscope/preprocessors/__init__.py index f7defd92..63302aa7 100644 --- a/modelscope/preprocessors/__init__.py +++ b/modelscope/preprocessors/__init__.py @@ -21,7 +21,7 @@ if TYPE_CHECKING: FillMaskPoNetPreprocessor, NLPPreprocessor, NLPTokenizerPreprocessorBase, - PassageRankingPreprocessor, + TextRankingPreprocessor, RelationExtractionPreprocessor, SentenceEmbeddingPreprocessor, SequenceClassificationPreprocessor, @@ -62,7 +62,7 @@ else: 'FillMaskPoNetPreprocessor', 'NLPPreprocessor', 'NLPTokenizerPreprocessorBase', - 'PassageRankingPreprocessor', + 'TextRankingPreprocessor', 'RelationExtractionPreprocessor', 'SentenceEmbeddingPreprocessor', 'SequenceClassificationPreprocessor', diff --git a/modelscope/preprocessors/asr.py b/modelscope/preprocessors/asr.py index facaa132..91bf5860 100644 --- a/modelscope/preprocessors/asr.py +++ b/modelscope/preprocessors/asr.py @@ -133,6 +133,12 @@ class WavToScp(Preprocessor): else: inputs['asr_model_config'] = asr_model_config + if inputs['model_config'].__contains__('mvn_file'): + mvn_file = os.path.join(inputs['model_workspace'], + inputs['model_config']['mvn_file']) + assert os.path.exists(mvn_file), 'mvn_file does not exist' + inputs['mvn_file'] = mvn_file + elif inputs['model_type'] == Frameworks.tf: assert inputs['model_config'].__contains__( 'vocab_file'), 'vocab_file does not exist' diff --git a/modelscope/preprocessors/multi_modal.py b/modelscope/preprocessors/multi_modal.py index 73742c47..3c4ac58a 100644 --- a/modelscope/preprocessors/multi_modal.py +++ b/modelscope/preprocessors/multi_modal.py @@ -16,6 +16,7 @@ from .base import Preprocessor from .builder import PREPROCESSORS from .ofa import * # noqa from .ofa.utils.collate import collate_fn +from .ofa.utils.constant import OFA_TASK_KEY_MAPPING __all__ = [ 'OfaPreprocessor', @@ -40,6 +41,7 @@ class OfaPreprocessor(Preprocessor): """ super().__init__(*args, **kwargs) preprocess_mapping = { + Tasks.ocr_recognition: OfaOcrRecognitionPreprocessor, Tasks.image_captioning: OfaImageCaptioningPreprocessor, Tasks.visual_grounding: OfaVisualGroundingPreprocessor, Tasks.visual_question_answering: @@ -47,26 +49,16 @@ class OfaPreprocessor(Preprocessor): Tasks.visual_entailment: OfaVisualEntailmentPreprocessor, Tasks.image_classification: OfaImageClassificationPreprocessor, Tasks.text_classification: OfaTextClassificationPreprocessor, - Tasks.summarization: OfaSummarizationPreprocessor, + Tasks.text_summarization: OfaSummarizationPreprocessor, Tasks.text_to_image_synthesis: OfaTextToImageSynthesisPreprocessor } - input_key_mapping = { - Tasks.image_captioning: ['image'], - Tasks.image_classification: ['image'], - Tasks.summarization: ['text'], - Tasks.text_classification: ['text', 'text2'], - Tasks.visual_grounding: ['image', 'text'], - Tasks.visual_question_answering: ['image', 'text'], - Tasks.visual_entailment: ['image', 'text', 'text2'], - Tasks.text_to_image_synthesis: ['text'] - } model_dir = model_dir if osp.exists(model_dir) else snapshot_download( model_dir) self.cfg = Config.from_file( osp.join(model_dir, ModelFile.CONFIGURATION)) self.preprocess = preprocess_mapping[self.cfg.task]( cfg=self.cfg, model_dir=model_dir, mode=mode) - self.keys = input_key_mapping[self.cfg.task] + self.keys = OFA_TASK_KEY_MAPPING[self.cfg.task] self.tokenizer = self.preprocess.tokenizer if kwargs.get('no_collate', None): self.no_collate = True diff --git a/modelscope/preprocessors/nlp/__init__.py b/modelscope/preprocessors/nlp/__init__.py index f7478329..b95048ba 100644 --- a/modelscope/preprocessors/nlp/__init__.py +++ b/modelscope/preprocessors/nlp/__init__.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: FillMaskPoNetPreprocessor, NLPPreprocessor, NLPTokenizerPreprocessorBase, - PassageRankingPreprocessor, + TextRankingPreprocessor, RelationExtractionPreprocessor, SentenceEmbeddingPreprocessor, SequenceClassificationPreprocessor, @@ -33,7 +33,7 @@ else: 'FillMaskPoNetPreprocessor', 'NLPPreprocessor', 'NLPTokenizerPreprocessorBase', - 'PassageRankingPreprocessor', + 'TextRankingPreprocessor', 'RelationExtractionPreprocessor', 'SentenceEmbeddingPreprocessor', 'SequenceClassificationPreprocessor', diff --git a/modelscope/preprocessors/nlp/nlp_base.py b/modelscope/preprocessors/nlp/nlp_base.py index 267dbb8c..6075a4b3 100644 --- a/modelscope/preprocessors/nlp/nlp_base.py +++ b/modelscope/preprocessors/nlp/nlp_base.py @@ -1,9 +1,10 @@ # Copyright (c) Alibaba, Inc. and its affiliates. - +import os import os.path as osp import re -from typing import Any, Dict, Iterable, Optional, Tuple, Union +from typing import Any, Dict, Optional, Tuple, Union +import json import numpy as np import sentencepiece as spm import torch @@ -13,8 +14,7 @@ from modelscope.metainfo import Models, Preprocessors from modelscope.outputs import OutputKeys from modelscope.preprocessors.base import Preprocessor from modelscope.preprocessors.builder import PREPROCESSORS -from modelscope.utils.config import (Config, ConfigFields, - use_task_specific_params) +from modelscope.utils.config import Config, ConfigFields from modelscope.utils.constant import Fields, InputFields, ModeKeys, ModelFile from modelscope.utils.hub import get_model_type, parse_label_mapping from modelscope.utils.logger import get_logger @@ -29,7 +29,7 @@ __all__ = [ 'NLPPreprocessor', 'FillMaskPoNetPreprocessor', 'NLPTokenizerPreprocessorBase', - 'PassageRankingPreprocessor', + 'TextRankingPreprocessor', 'RelationExtractionPreprocessor', 'SentenceEmbeddingPreprocessor', 'SequenceClassificationPreprocessor', @@ -83,6 +83,15 @@ class NLPTokenizerPreprocessorBase(Preprocessor): self._mode = mode self.label = kwargs.pop('label', OutputKeys.LABEL) + self.use_fast = kwargs.pop('use_fast', None) + if self.use_fast is None and os.path.isfile( + os.path.join(model_dir, 'tokenizer_config.json')): + with open(os.path.join(model_dir, 'tokenizer_config.json'), + 'r') as f: + json_config = json.load(f) + self.use_fast = json_config.get('use_fast') + self.use_fast = False if self.use_fast is None else self.use_fast + self.label2id = None if 'label2id' in kwargs: self.label2id = kwargs.pop('label2id') @@ -118,32 +127,23 @@ class NLPTokenizerPreprocessorBase(Preprocessor): if model_type in (Models.structbert, Models.gpt3, Models.palm, Models.plug): from modelscope.models.nlp.structbert import SbertTokenizer, SbertTokenizerFast - return SbertTokenizer.from_pretrained( - model_dir - ) if self._mode == ModeKeys.INFERENCE else SbertTokenizerFast.from_pretrained( - model_dir) + tokenizer = SbertTokenizerFast if self.use_fast else SbertTokenizer + return tokenizer.from_pretrained(model_dir) elif model_type == Models.veco: from modelscope.models.nlp.veco import VecoTokenizer, VecoTokenizerFast - return VecoTokenizer.from_pretrained( - model_dir - ) if self._mode == ModeKeys.INFERENCE else VecoTokenizerFast.from_pretrained( - model_dir) + tokenizer = VecoTokenizerFast if self.use_fast else VecoTokenizer + return tokenizer.from_pretrained(model_dir) elif model_type == Models.deberta_v2: from modelscope.models.nlp.deberta_v2 import DebertaV2Tokenizer, DebertaV2TokenizerFast - return DebertaV2Tokenizer.from_pretrained( - model_dir - ) if self._mode == ModeKeys.INFERENCE else DebertaV2TokenizerFast.from_pretrained( - model_dir) + tokenizer = DebertaV2TokenizerFast if self.use_fast else DebertaV2Tokenizer + return tokenizer.from_pretrained(model_dir) elif not self.is_transformer_based_model: from transformers import BertTokenizer, BertTokenizerFast - return BertTokenizer.from_pretrained( - model_dir - ) if self._mode == ModeKeys.INFERENCE else BertTokenizerFast.from_pretrained( - model_dir) + tokenizer = BertTokenizerFast if self.use_fast else BertTokenizer + return tokenizer.from_pretrained(model_dir) else: return AutoTokenizer.from_pretrained( - model_dir, - use_fast=False if self._mode == ModeKeys.INFERENCE else True) + model_dir, use_fast=self.use_fast) def __call__(self, data: Union[str, Tuple, Dict]) -> Dict[str, Any]: """process the raw input data @@ -217,7 +217,7 @@ class NLPTokenizerPreprocessorBase(Preprocessor): return isinstance(label, str) or isinstance(label, int) if labels is not None: - if isinstance(labels, Iterable) and all([label_can_be_mapped(label) for label in labels]) \ + if isinstance(labels, (tuple, list)) and all([label_can_be_mapped(label) for label in labels]) \ and self.label2id is not None: output[OutputKeys.LABELS] = [ self.label2id[str(label)] for label in labels @@ -245,9 +245,9 @@ class NLPPreprocessor(NLPTokenizerPreprocessorBase): @PREPROCESSORS.register_module( - Fields.nlp, module_name=Preprocessors.passage_ranking) -class PassageRankingPreprocessor(NLPTokenizerPreprocessorBase): - """The tokenizer preprocessor used in passage ranking model. + Fields.nlp, module_name=Preprocessors.text_ranking) +class TextRankingPreprocessor(NLPTokenizerPreprocessorBase): + """The tokenizer preprocessor used in text-ranking model. """ def __init__(self, @@ -314,8 +314,7 @@ class SequenceClassificationPreprocessor(NLPTokenizerPreprocessorBase): def __init__(self, model_dir: str, mode=ModeKeys.INFERENCE, **kwargs): kwargs['truncation'] = kwargs.get('truncation', True) - kwargs['padding'] = kwargs.get( - 'padding', False if mode == ModeKeys.INFERENCE else 'max_length') + kwargs['padding'] = kwargs.get('padding', 'max_length') kwargs['max_length'] = kwargs.pop('sequence_length', 128) super().__init__(model_dir, mode=mode, **kwargs) @@ -594,9 +593,6 @@ class TokenClassificationPreprocessor(NLPTokenizerPreprocessorBase): else: self.is_split_into_words = self.tokenizer.init_kwargs.get( 'is_split_into_words', False) - if 'label2id' in kwargs: - kwargs.pop('label2id') - self.tokenize_kwargs = kwargs @type_assert(object, str) def __call__(self, data: str) -> Dict[str, Any]: diff --git a/modelscope/preprocessors/ofa/__init__.py b/modelscope/preprocessors/ofa/__init__.py index 95d72fe1..59b94b2b 100644 --- a/modelscope/preprocessors/ofa/__init__.py +++ b/modelscope/preprocessors/ofa/__init__.py @@ -1,6 +1,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from .image_captioning import OfaImageCaptioningPreprocessor from .image_classification import OfaImageClassificationPreprocessor +from .ocr_recognition import OfaOcrRecognitionPreprocessor from .summarization import OfaSummarizationPreprocessor from .text_classification import OfaTextClassificationPreprocessor from .text_to_image_synthesis import OfaTextToImageSynthesisPreprocessor diff --git a/modelscope/preprocessors/ofa/base.py b/modelscope/preprocessors/ofa/base.py index 47d70f6d..55b3895d 100644 --- a/modelscope/preprocessors/ofa/base.py +++ b/modelscope/preprocessors/ofa/base.py @@ -6,9 +6,12 @@ from os import path as osp import json import numpy as np import torch +from PIL import Image from modelscope.models.multi_modal.ofa import OFATokenizer, OFATokenizerZH +from modelscope.preprocessors.image import load_image from modelscope.utils.trie import Trie +from .utils.constant import OFA_TASK_KEY_MAPPING from .utils.random_help import set_torch_seed @@ -59,6 +62,14 @@ class OfaBasePreprocessor: self.mean = [0.5, 0.5, 0.5] self.std = [0.5, 0.5, 0.5] self.patch_image_size = self.cfg.model.get('patch_image_size', 480) + self.column_map = { + key: key + for key in OFA_TASK_KEY_MAPPING[self.cfg.task] + } + if hasattr(self.cfg, + 'dataset') and self.cfg.dataset.column_map is not None: + for k, v in self.cfg.dataset.column_map.items(): + self.column_map[k] = v self.transtab = str.maketrans( {key: None for key in string.punctuation}) @@ -147,3 +158,8 @@ class OfaBasePreprocessor: constraint_prefix_token) constraint_mask[i][constraint_nodes] = True sample['constraint_mask'] = constraint_mask + + def get_img_pil(self, path_or_url_or_pil): + image = path_or_url_or_pil if isinstance(path_or_url_or_pil, Image.Image) \ + else load_image(path_or_url_or_pil) + return image diff --git a/modelscope/preprocessors/ofa/image_captioning.py b/modelscope/preprocessors/ofa/image_captioning.py index 6c842aa9..99eda15d 100644 --- a/modelscope/preprocessors/ofa/image_captioning.py +++ b/modelscope/preprocessors/ofa/image_captioning.py @@ -1,12 +1,9 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -import os -from typing import Any, Dict, Union +from typing import Any, Dict import torch -from PIL import Image from torchvision import transforms -from modelscope.preprocessors.image import load_image from modelscope.utils.constant import ModeKeys from .base import OfaBasePreprocessor @@ -46,7 +43,7 @@ class OfaImageCaptioningPreprocessor(OfaBasePreprocessor): def _build_train_sample(self, data: Dict[str, Any]) -> Dict[str, Any]: sample = self._build_infer_sample(data) - target = data['text'] + target = data[self.column_map['text']] target = target.translate(self.transtab).strip() target_token_list = target.strip().split() target = ' '.join(target_token_list[:self.max_tgt_length]) @@ -56,8 +53,7 @@ class OfaImageCaptioningPreprocessor(OfaBasePreprocessor): return sample def _build_infer_sample(self, data: Dict[str, Any]) -> Dict[str, Any]: - image = data['image'] if isinstance( - data['image'], Image.Image) else load_image(data['image']) + image = self.get_img_pil(data[self.column_map['image']]) patch_image = self.patch_resize_transform(image) prompt = self.cfg.model.get('prompt', ' what does the image describe?') inputs = self.tokenize_text(prompt) @@ -66,6 +62,6 @@ class OfaImageCaptioningPreprocessor(OfaBasePreprocessor): 'patch_image': patch_image, 'patch_mask': torch.tensor([True]) } - if 'text' in data: - sample['label'] = data['text'] + if self.column_map['text'] in data: + sample['label'] = data[self.column_map['text']] return sample diff --git a/modelscope/preprocessors/ofa/ocr_recognition.py b/modelscope/preprocessors/ofa/ocr_recognition.py new file mode 100644 index 00000000..4c8c245a --- /dev/null +++ b/modelscope/preprocessors/ofa/ocr_recognition.py @@ -0,0 +1,97 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import Any, Dict + +import torch +from PIL import Image +from torchvision import transforms +from torchvision.transforms import InterpolationMode +from torchvision.transforms import functional as F + +from modelscope.preprocessors.image import load_image +from .base import OfaBasePreprocessor + +IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406) +IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225) + + +def ocr_resize(img, patch_image_size, is_document=False): + img = img.convert('RGB') + width, height = img.size + + if is_document: + new_height, new_width = 64, 1920 + else: + if width >= height: + new_width = max(64, patch_image_size) + new_height = max(64, int(patch_image_size * (height / width))) + top = (patch_image_size - new_height) // 2 + bottom = patch_image_size - new_height - top + left, right = 0, 0 + else: + new_height = max(64, patch_image_size) + new_width = max(64, int(patch_image_size * (width / height))) + left = (patch_image_size - new_width) // 2 + right = patch_image_size - new_width - left + top, bottom = 0, 0 + + img_new = F.resize( + img, + (new_height, new_width), + interpolation=InterpolationMode.BICUBIC, + ) + + if is_document: + img_split = transforms.ToTensor()(img_new).chunk(4, dim=-1) + img_new = transforms.ToPILImage()(torch.cat(img_split, dim=-2)) + new_width, new_height = img_new.size + top = (patch_image_size - new_height) // 2 + bottom = patch_image_size - new_height - top + left, right = 0, 0 + + img_new = F.pad( + img_new, padding=[left, top, right, bottom], padding_mode='edge') + assert img_new.size == (patch_image_size, patch_image_size) + + return img_new + + +class OfaOcrRecognitionPreprocessor(OfaBasePreprocessor): + + def __init__(self, cfg, model_dir): + """preprocess the data + + Args: + cfg(modelscope.utils.config.ConfigDict) : model config + model_dir (str): model path + """ + super(OfaOcrRecognitionPreprocessor, self).__init__(cfg, model_dir) + # Initialize transform + if self.cfg.model.imagenet_default_mean_and_std: + mean = IMAGENET_DEFAULT_MEAN + std = IMAGENET_DEFAULT_STD + else: + mean = [0.5, 0.5, 0.5] + std = [0.5, 0.5, 0.5] + + self.patch_resize_transform = transforms.Compose([ + lambda image: ocr_resize( + image, + self.cfg.model.patch_image_size, + is_document=self.cfg.model.is_document), + transforms.ToTensor(), + transforms.Normalize(mean=mean, std=std), + ]) + + def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: + image = data['image'] if isinstance( + data['image'], Image.Image) else load_image(data['image']) + patch_image = self.patch_resize_transform(image) + prompt = self.cfg.model.get('prompt', '图片上的文字是什么?') + inputs = self.get_inputs(prompt) + + sample = { + 'source': inputs, + 'patch_image': patch_image, + 'patch_mask': torch.tensor([True]) + } + return sample diff --git a/modelscope/preprocessors/ofa/utils/constant.py b/modelscope/preprocessors/ofa/utils/constant.py new file mode 100644 index 00000000..102d27c0 --- /dev/null +++ b/modelscope/preprocessors/ofa/utils/constant.py @@ -0,0 +1,13 @@ +from modelscope.utils.constant import Tasks + +OFA_TASK_KEY_MAPPING = { + Tasks.ocr_recognition: ['image'], + Tasks.image_captioning: ['image'], + Tasks.image_classification: ['image'], + Tasks.text_summarization: ['text'], + Tasks.text_classification: ['text', 'text2'], + Tasks.visual_grounding: ['image', 'text'], + Tasks.visual_question_answering: ['image', 'text'], + Tasks.visual_entailment: ['image', 'text', 'text2'], + Tasks.text_to_image_synthesis: ['text'] +} diff --git a/modelscope/preprocessors/star3/fields/database.py b/modelscope/preprocessors/star3/fields/database.py index 3d3a1f8d..5debfe2c 100644 --- a/modelscope/preprocessors/star3/fields/database.py +++ b/modelscope/preprocessors/star3/fields/database.py @@ -13,7 +13,7 @@ class Database: tokenizer, table_file_path, syn_dict_file_path, - is_use_sqlite=False): + is_use_sqlite=True): self.tokenizer = tokenizer self.is_use_sqlite = is_use_sqlite if self.is_use_sqlite: diff --git a/modelscope/preprocessors/star3/fields/schema_link.py b/modelscope/preprocessors/star3/fields/schema_link.py index 7f483a1f..220a71d8 100644 --- a/modelscope/preprocessors/star3/fields/schema_link.py +++ b/modelscope/preprocessors/star3/fields/schema_link.py @@ -293,6 +293,7 @@ class SchemaLinker: nlu_t, tables, col_syn_dict, + table_id=None, history_sql=None): """ get linking between question and schema column @@ -300,6 +301,9 @@ class SchemaLinker: typeinfos = [] numbers = re.findall(r'[-]?\d*\.\d+|[-]?\d+|\d+', nlu) + if table_id is not None and table_id in tables: + tables = {table_id: tables[table_id]} + # search schema link in every table search_result_list = [] for tablename in tables: @@ -411,26 +415,25 @@ class SchemaLinker: # get the match score of each table match_score = self.get_table_match_score(nlu_t, schema_link) + # cal table_score + if history_sql is not None and 'from' in history_sql: + table_score = int(table['table_id'] == history_sql['from'][0]) + else: + table_score = 0 + search_result = { - 'table_id': - table['table_id'], - 'question_knowledge': - final_question, - 'header_knowledge': - final_header, - 'schema_link': - schema_link, - 'match_score': - match_score, - 'table_score': - int(table['table_id'] == history_sql['from'][0]) - if history_sql is not None else 0 + 'table_id': table['table_id'], + 'question_knowledge': final_question, + 'header_knowledge': final_header, + 'schema_link': schema_link, + 'match_score': match_score, + 'table_score': table_score } search_result_list.append(search_result) search_result_list = sorted( search_result_list, key=lambda x: (x['match_score'], x['table_score']), - reverse=True)[0:4] + reverse=True)[0:1] return search_result_list diff --git a/modelscope/preprocessors/star3/table_question_answering_preprocessor.py b/modelscope/preprocessors/star3/table_question_answering_preprocessor.py index f98aa6d0..ed2911f6 100644 --- a/modelscope/preprocessors/star3/table_question_answering_preprocessor.py +++ b/modelscope/preprocessors/star3/table_question_answering_preprocessor.py @@ -95,6 +95,7 @@ class TableQuestionAnsweringPreprocessor(Preprocessor): # tokenize question question = data['question'] + table_id = data.get('table_id', None) history_sql = data.get('history_sql', None) nlu = question.lower() nlu_t = self.tokenizer.tokenize(nlu) @@ -106,6 +107,7 @@ class TableQuestionAnsweringPreprocessor(Preprocessor): nlu_t=nlu_t, tables=self.db.tables, col_syn_dict=self.db.syn_dict, + table_id=table_id, history_sql=history_sql) # collect data diff --git a/modelscope/preprocessors/video.py b/modelscope/preprocessors/video.py index f693cd9e..794033b5 100644 --- a/modelscope/preprocessors/video.py +++ b/modelscope/preprocessors/video.py @@ -1,5 +1,10 @@ import math +import os import random +import uuid +from os.path import exists +from tempfile import TemporaryDirectory +from urllib.parse import urlparse import numpy as np import torch @@ -9,6 +14,7 @@ import torchvision.transforms._transforms_video as transforms from decord import VideoReader from torchvision.transforms import Compose +from modelscope.hub.file_download import http_get_file from modelscope.metainfo import Preprocessors from modelscope.utils.constant import Fields, ModeKeys from modelscope.utils.type_assert import type_assert @@ -30,7 +36,22 @@ def ReadVideoData(cfg, Returns: data (Tensor): the normalized video clips for model inputs """ - data = _decode_video(cfg, video_path, num_temporal_views_override) + url_parsed = urlparse(video_path) + if url_parsed.scheme in ('file', '') and exists( + url_parsed.path): # Possibly a local file + data = _decode_video(cfg, video_path, num_temporal_views_override) + else: + with TemporaryDirectory() as temporary_cache_dir: + random_str = uuid.uuid4().hex + http_get_file( + url=video_path, + local_dir=temporary_cache_dir, + file_name=random_str, + cookies=None) + temp_file_path = os.path.join(temporary_cache_dir, random_str) + data = _decode_video(cfg, temp_file_path, + num_temporal_views_override) + if num_spatial_crops_override is not None: num_spatial_crops = num_spatial_crops_override transform = kinetics400_tranform(cfg, num_spatial_crops_override) diff --git a/modelscope/trainers/__init__.py b/modelscope/trainers/__init__.py index 86917261..dbfe5ba7 100644 --- a/modelscope/trainers/__init__.py +++ b/modelscope/trainers/__init__.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: ImagePortraitEnhancementTrainer, MovieSceneSegmentationTrainer, ImageInpaintingTrainer) from .multi_modal import CLIPTrainer - from .nlp import SequenceClassificationTrainer, PassageRankingTrainer + from .nlp import SequenceClassificationTrainer, TextRankingTrainer from .nlp_trainer import NlpEpochBasedTrainer, VecoTrainer from .trainer import EpochBasedTrainer @@ -26,7 +26,7 @@ else: 'ImageInpaintingTrainer' ], 'multi_modal': ['CLIPTrainer'], - 'nlp': ['SequenceClassificationTrainer', 'PassageRankingTrainer'], + 'nlp': ['SequenceClassificationTrainer', 'TextRankingTrainer'], 'nlp_trainer': ['NlpEpochBasedTrainer', 'VecoTrainer'], 'trainer': ['EpochBasedTrainer'] } diff --git a/modelscope/trainers/hooks/logger/text_logger_hook.py b/modelscope/trainers/hooks/logger/text_logger_hook.py index 6629a0c9..8552ab4e 100644 --- a/modelscope/trainers/hooks/logger/text_logger_hook.py +++ b/modelscope/trainers/hooks/logger/text_logger_hook.py @@ -51,7 +51,7 @@ class TextLoggerHook(LoggerHook): if self.out_dir is None: self.out_dir = trainer.work_dir - if not osp.exists(self.out_dir): + if not osp.exists(self.out_dir) and is_master(): os.makedirs(self.out_dir) trainer.logger.info('Text logs will be saved to {}'.format( diff --git a/modelscope/trainers/hooks/lr_scheduler_hook.py b/modelscope/trainers/hooks/lr_scheduler_hook.py index ca0ec01b..32fb0250 100644 --- a/modelscope/trainers/hooks/lr_scheduler_hook.py +++ b/modelscope/trainers/hooks/lr_scheduler_hook.py @@ -47,7 +47,7 @@ class LrSchedulerHook(Hook): return lr def before_train_iter(self, trainer): - if not self.by_epoch: + if not self.by_epoch and trainer.iter > 0: if self.warmup_lr_scheduler is not None: self.warmup_lr_scheduler.step() else: diff --git a/modelscope/trainers/multi_modal/ofa/ofa_trainer.py b/modelscope/trainers/multi_modal/ofa/ofa_trainer.py index 474a6772..96863c77 100644 --- a/modelscope/trainers/multi_modal/ofa/ofa_trainer.py +++ b/modelscope/trainers/multi_modal/ofa/ofa_trainer.py @@ -2,21 +2,27 @@ import math import os +import shutil from functools import partial +from typing import Callable, Dict, Optional, Tuple, Union -from datasets import load_dataset +import torch from torch import distributed as dist +from torch import nn +from torch.utils.data import Dataset from modelscope.metainfo import Trainers -from modelscope.models.base import Model +from modelscope.models.base import Model, TorchModel from modelscope.msdatasets.ms_dataset import MsDataset +from modelscope.preprocessors.base import Preprocessor from modelscope.preprocessors.multi_modal import OfaPreprocessor from modelscope.preprocessors.ofa.utils.collate import collate_fn from modelscope.trainers import EpochBasedTrainer from modelscope.trainers.builder import TRAINERS from modelscope.trainers.optimizer.builder import build_optimizer from modelscope.utils.config import Config -from modelscope.utils.constant import ConfigKeys, ModeKeys, ModelFile +from modelscope.utils.constant import (DEFAULT_MODEL_REVISION, ConfigKeys, + ModeKeys) from .ofa_trainer_utils import (AdjustLabelSmoothedCrossEntropyCriterion, get_schedule) @@ -24,21 +30,55 @@ from .ofa_trainer_utils import (AdjustLabelSmoothedCrossEntropyCriterion, @TRAINERS.register_module(module_name=Trainers.ofa_tasks) class OFATrainer(EpochBasedTrainer): - def __init__(self, model: str, cfg_file, work_dir, train_dataset, - eval_dataset, *args, **kwargs): - model = Model.from_pretrained(model) + def __init__( + self, + model: Optional[Union[TorchModel, nn.Module, str]] = None, + cfg_file: Optional[str] = None, + arg_parse_fn: Optional[Callable] = None, + data_collator: Optional[Union[Callable, Dict[str, + Callable]]] = None, + train_dataset: Optional[Union[MsDataset, Dataset]] = None, + eval_dataset: Optional[Union[MsDataset, Dataset]] = None, + preprocessor: Optional[Union[Preprocessor, + Dict[str, Preprocessor]]] = None, + optimizers: Tuple[torch.optim.Optimizer, + torch.optim.lr_scheduler._LRScheduler] = (None, + None), + model_revision: Optional[str] = DEFAULT_MODEL_REVISION, + seed: int = 42, + **kwargs): + model = Model.from_pretrained(model, revision=model_revision) model_dir = model.model_dir - # cfg_file = os.path.join(model_dir, ModelFile.CONFIGURATION) cfg = Config.from_file(cfg_file) - # dataset = self._build_dataset_with_config(cfg) - preprocessor = { - ConfigKeys.train: - OfaPreprocessor( - model_dir=model_dir, mode=ModeKeys.TRAIN, no_collate=True), - ConfigKeys.val: - OfaPreprocessor( - model_dir=model_dir, mode=ModeKeys.EVAL, no_collate=True), + if 'work_dir' not in kwargs or len(kwargs['work_dir']) == 0: + work_dir = cfg.train.work_dir + else: + work_dir = kwargs['work_dir'] + tokenizer_files = { + 'zh': [ + 'tokenizer.json', 'tokenizer_config.json', 'vocab.txt', + 'config.json' + ], + 'en': + ['tokenizer.json', 'vocab.json', 'merges.txt', 'config.json'], } + for filename in tokenizer_files[cfg.model.get('language', 'en')]: + finetune_file = os.path.join(work_dir, filename) + pretrain_file = os.path.join(model_dir, filename) + if os.path.exists(finetune_file): + continue + if os.path.exists(pretrain_file): + shutil.copy(pretrain_file, finetune_file) + + if preprocessor is None: + preprocessor = { + ConfigKeys.train: + OfaPreprocessor( + model_dir=work_dir, mode=ModeKeys.TRAIN, no_collate=True), + ConfigKeys.val: + OfaPreprocessor( + model_dir=work_dir, mode=ModeKeys.EVAL, no_collate=True), + } # use torchrun launch world_size = int(os.environ.get('WORLD_SIZE', 1)) epoch_steps = math.ceil( @@ -48,42 +88,50 @@ class OFATrainer(EpochBasedTrainer): cfg.train.criterion.tokenizer = model.tokenizer self.criterion = AdjustLabelSmoothedCrossEntropyCriterion( cfg.train.criterion) - optimizer = build_optimizer(model, cfg=cfg.train.optimizer) - scheduler_class, scheduler_args = get_schedule(cfg.train.lr_scheduler) - if scheduler_class is not None: - lr_scheduler = scheduler_class(**{'optimizer': optimizer}, - **scheduler_args) + if optimizers[0] is None: + optimizer = build_optimizer(model, cfg=cfg.train.optimizer) else: - lr_scheduler = None - collator = partial( - collate_fn, - pad_idx=model.tokenizer.pad_token_id, - eos_idx=model.tokenizer.eos_token_id, - ) + optimizer = optimizers[0] + if optimizers[1] is None: + scheduler_class, scheduler_args = get_schedule( + cfg.train.lr_scheduler) + if scheduler_class is not None: + lr_scheduler = scheduler_class(**{'optimizer': optimizer}, + **scheduler_args) + else: + lr_scheduler = None + else: + lr_scheduler = optimizers[1] + optimizers = (optimizer, lr_scheduler) + if data_collator is None: + data_collator = partial( + collate_fn, + pad_idx=model.tokenizer.pad_token_id, + eos_idx=model.tokenizer.eos_token_id, + ) if 'launcher' not in kwargs and cfg.train.get('launcher', None): kwargs['launcher'] = cfg.train.launcher if 'use_fp16' not in kwargs and cfg.train.get('use_fp16', False): kwargs['use_fp16'] = cfg.train.use_fp16 kwargs['to_tensor'] = False super().__init__( - cfg_file=cfg_file, model=model, - data_collator=collator, + cfg_file=cfg_file, + arg_parse_fn=arg_parse_fn, + data_collator=data_collator, train_dataset=train_dataset, eval_dataset=eval_dataset, preprocessor=preprocessor, - optimizers=(optimizer, lr_scheduler), - work_dir=work_dir, - *args, + optimizers=optimizers, + seed=seed, **kwargs, ) def train_step(self, model, inputs): model.train() - model_outputs = model.forward(inputs) - loss, sample_size, logging_output = self.criterion( - model_outputs, inputs) - train_outputs = {'loss': loss} + # model_outputs = model.forward(inputs) + loss, sample_size, logging_output = self.criterion(model, inputs) + train_outputs = {'loss': loss / 100} # add model output info to log if 'log_vars' not in train_outputs: default_keys_pattern = ['loss'] @@ -103,24 +151,3 @@ class OFATrainer(EpochBasedTrainer): else: self.log_buffer.update(train_outputs['log_vars']) self.train_outputs = train_outputs - - def _build_dataset_with_config(self, cfg): - if hasattr(cfg.dataset, 'hf_dataset'): - dataset = load_dataset( - cfg.dataset.script, - data_files=cfg.dataset.hf_dataset, - sep=cfg.dataset.sep, - ) - dataset = MsDataset.from_hf_dataset( - dataset.rename_columns(cfg.dataset.column_map)) - return dataset - elif hasattr(cfg.dataset, 'ms_dataset'): - dataset_d = dict() - for key in cfg.dataset.ms_dataset.keys(): - dataset_d[key] = MsDataset.load(**cfg.dataset.ms_dataset[key]) - dataset_d[key] = MsDataset.from_hf_dataset( - dataset_d[key]._hf_ds.rename_columns( - cfg.dataset.column_map)) - return dataset_d - else: - raise NotImplementedError diff --git a/modelscope/trainers/multi_modal/ofa/ofa_trainer_utils.py b/modelscope/trainers/multi_modal/ofa/ofa_trainer_utils.py index b2e54ec6..4805f3bc 100644 --- a/modelscope/trainers/multi_modal/ofa/ofa_trainer_utils.py +++ b/modelscope/trainers/multi_modal/ofa/ofa_trainer_utils.py @@ -123,7 +123,7 @@ class AdjustLabelSmoothedCrossEntropyCriterion(_Loss): self.padding_idx = args.tokenizer.pad_token_id self.args = args - def forward(self, output, sample, update_num=0, reduce=True): + def forward(self, model, sample, update_num=0, reduce=True): """Compute the loss for the given sample. Returns a tuple with three elements: @@ -131,15 +131,20 @@ class AdjustLabelSmoothedCrossEntropyCriterion(_Loss): 2) the sample size, which is used as the denominator for the gradient 3) logging outputs to display while training """ + if 'labels' in sample: + del sample['labels'] + if 'samples' in sample: + del sample['samples'] + if self.use_rdrop: construct_rdrop_sample(sample) - + output = model.model(**sample['net_input']) loss, nll_loss, ntokens = self.compute_loss( - output, sample, update_num, reduce=reduce) + output.logits, sample, update_num, reduce=reduce) sample_size = ( sample['target'].size(0) if self.sentence_avg else ntokens) logging_output = { - 'loss': loss.data, + 'loss': loss.data / 100, 'nll_loss': nll_loss.data, 'ntokens': sample['ntokens'], 'nsentences': sample['nsentences'], @@ -147,19 +152,18 @@ class AdjustLabelSmoothedCrossEntropyCriterion(_Loss): } return loss, sample_size, logging_output - def get_lprobs_and_target(self, net_output, sample): + def get_lprobs_and_target(self, logits, sample): conf = sample['conf'][:, None, None] if 'conf' in sample and sample[ 'conf'] is not None else 1 constraint_masks = None if 'constraint_masks' in sample and sample[ 'constraint_masks'] is not None: constraint_masks = sample['constraint_masks'] - net_output[0].masked_fill_(~constraint_masks, -math.inf) + logits.masked_fill_(~constraint_masks, -math.inf) if self.constraint_start is not None and self.constraint_end is not None: - net_output[0][:, :, 4:self.constraint_start] = -math.inf - net_output[0][:, :, self.constraint_end:] = -math.inf - lprobs = F.log_softmax( - net_output[0], dim=-1, dtype=torch.float32) * conf + logits[:, :, 4:self.constraint_start] = -math.inf + logits[:, :, self.constraint_end:] = -math.inf + lprobs = F.log_softmax(logits, dim=-1, dtype=torch.float32) * conf target = sample['target'] if self.ignore_prefix_size > 0: lprobs = lprobs[:, self.ignore_prefix_size:, :].contiguous() @@ -180,9 +184,9 @@ class AdjustLabelSmoothedCrossEntropyCriterion(_Loss): return lprobs.view(-1, lprobs.size(-1)), target.view(-1), constraint_masks - def compute_loss(self, net_output, sample, update_num, reduce=True): + def compute_loss(self, logits, sample, update_num, reduce=True): lprobs, target, constraint_masks = self.get_lprobs_and_target( - net_output, sample) + logits, sample) if constraint_masks is not None: constraint_masks = constraint_masks[target != self.padding_idx] lprobs = lprobs[target != self.padding_idx] diff --git a/modelscope/trainers/nlp/__init__.py b/modelscope/trainers/nlp/__init__.py index 001cfefc..7f1bcd63 100644 --- a/modelscope/trainers/nlp/__init__.py +++ b/modelscope/trainers/nlp/__init__.py @@ -6,12 +6,12 @@ from modelscope.utils.import_utils import LazyImportModule if TYPE_CHECKING: from .sequence_classification_trainer import SequenceClassificationTrainer from .csanmt_translation_trainer import CsanmtTranslationTrainer - from .passage_ranking_trainer import PassageRankingTranier + from .text_ranking_trainer import TextRankingTranier else: _import_structure = { 'sequence_classification_trainer': ['SequenceClassificationTrainer'], 'csanmt_translation_trainer': ['CsanmtTranslationTrainer'], - 'passage_ranking_trainer': ['PassageRankingTrainer'] + 'text_ranking_trainer': ['TextRankingTrainer'] } import sys diff --git a/modelscope/trainers/nlp/passage_ranking_trainer.py b/modelscope/trainers/nlp/text_ranking_trainer.py similarity index 95% rename from modelscope/trainers/nlp/passage_ranking_trainer.py rename to modelscope/trainers/nlp/text_ranking_trainer.py index 711fd0c4..5da9c76a 100644 --- a/modelscope/trainers/nlp/passage_ranking_trainer.py +++ b/modelscope/trainers/nlp/text_ranking_trainer.py @@ -8,6 +8,7 @@ import numpy as np import torch from torch import nn from torch.utils.data import DataLoader, Dataset +from tqdm import tqdm from modelscope.metainfo import Trainers from modelscope.models.base import Model, TorchModel @@ -42,8 +43,8 @@ class GroupCollator(): return batch -@TRAINERS.register_module(module_name=Trainers.nlp_passage_ranking_trainer) -class PassageRankingTrainer(NlpEpochBasedTrainer): +@TRAINERS.register_module(module_name=Trainers.nlp_text_ranking_trainer) +class TextRankingTrainer(NlpEpochBasedTrainer): def __init__( self, @@ -117,7 +118,7 @@ class PassageRankingTrainer(NlpEpochBasedTrainer): Example: {"accuracy": 0.5091743119266054, "f1": 0.673780487804878} """ - from modelscope.models.nlp import PassageRanking + from modelscope.models.nlp import TextRanking # get the raw online dataset self.eval_dataloader = self._build_dataloader_with_dataset( self.eval_dataset, @@ -126,7 +127,7 @@ class PassageRankingTrainer(NlpEpochBasedTrainer): # generate a standard dataloader # generate a model if checkpoint_path is not None: - model = PassageRanking.from_pretrained(checkpoint_path) + model = TextRanking.from_pretrained(checkpoint_path) else: model = self.model @@ -141,7 +142,7 @@ class PassageRankingTrainer(NlpEpochBasedTrainer): total_spent_time = 0.0 device = 'cuda:0' if torch.cuda.is_available() else 'cpu' model.to(device) - for _step, batch in enumerate(self.eval_dataloader): + for _step, batch in enumerate(tqdm(self.eval_dataloader)): try: batch = { key: diff --git a/modelscope/trainers/trainer.py b/modelscope/trainers/trainer.py index 916a6def..35caed0d 100644 --- a/modelscope/trainers/trainer.py +++ b/modelscope/trainers/trainer.py @@ -656,7 +656,7 @@ class EpochBasedTrainer(BaseTrainer): # TODO: support MsDataset load for cv if hasattr(data_cfg, 'name'): dataset = MsDataset.load( - dataset_name=data_cfg.name, + dataset_name=data_cfg.pop('name'), **data_cfg, ) cfg = ConfigDict(type=self.cfg.model.type, mode=mode) diff --git a/modelscope/utils/audio/audio_utils.py b/modelscope/utils/audio/audio_utils.py index 647d9521..32e2fa54 100644 --- a/modelscope/utils/audio/audio_utils.py +++ b/modelscope/utils/audio/audio_utils.py @@ -57,6 +57,7 @@ def update_conf(origin_config_file, new_config_file, conf_item: [str, str]): def extract_pcm_from_wav(wav: bytes) -> bytes: data = wav + sample_rate = None if len(data) > 44: frame_len = 44 file_len = len(data) @@ -70,29 +71,33 @@ def extract_pcm_from_wav(wav: bytes) -> bytes: 'Subchunk1ID'] == 'fmt ': header_fields['SubChunk1Size'] = struct.unpack( ' Union[bytes, str]: + sample_rate = None result = urlparse(url) if result.scheme is not None and len(result.scheme) > 0: storage = HTTPStorage() data = storage.read(url) - data = extract_pcm_from_wav(data) + data, sample_rate = extract_pcm_from_wav(data) else: data = url - return data + return data, sample_rate diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 6ba58c19..a3f4a935 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -103,7 +103,7 @@ class NLPTasks(object): sentence_similarity = 'sentence-similarity' text_classification = 'text-classification' sentence_embedding = 'sentence-embedding' - passage_ranking = 'passage-ranking' + text_ranking = 'text-ranking' relation_extraction = 'relation-extraction' zero_shot = 'zero-shot' translation = 'translation' @@ -117,7 +117,7 @@ class NLPTasks(object): table_question_answering = 'table-question-answering' sentence_embedding = 'sentence-embedding' fill_mask = 'fill-mask' - summarization = 'summarization' + text_summarization = 'text-summarization' question_answering = 'question-answering' zero_shot_classification = 'zero-shot-classification' backbone = 'backbone' @@ -231,13 +231,6 @@ class DownloadMode(enum.Enum): FORCE_REDOWNLOAD = 'force_redownload' -class DownloadParams(enum.Enum): - """ - Parameters for downloading dataset. - """ - MAX_LIST_OBJECTS_NUM = 50000 - - class DatasetFormations(enum.Enum): """ How a dataset is organized and interpreted """ @@ -289,6 +282,7 @@ class ConfigKeys(object): """Fixed keywords in configuration file""" train = 'train' val = 'val' + test = 'test' class Requirements(object): diff --git a/modelscope/utils/device.py b/modelscope/utils/device.py index 4bbd09d8..83faa261 100644 --- a/modelscope/utils/device.py +++ b/modelscope/utils/device.py @@ -61,8 +61,8 @@ def device_placement(framework, device_name='gpu:0'): if framework == Frameworks.tf: import tensorflow as tf if device_type == Devices.gpu and not tf.test.is_gpu_available(): - logger.warning( - 'tensorflow cuda is not available, using cpu instead.') + logger.debug( + 'tensorflow: cuda is not available, using cpu instead.') device_type = Devices.cpu if device_type == Devices.cpu: with tf.device('/CPU:0'): @@ -78,7 +78,8 @@ def device_placement(framework, device_name='gpu:0'): if torch.cuda.is_available(): torch.cuda.set_device(f'cuda:{device_id}') else: - logger.warning('cuda is not available, using cpu instead.') + logger.debug( + 'pytorch: cuda is not available, using cpu instead.') yield else: yield @@ -96,9 +97,7 @@ def create_device(device_name): if device_type == Devices.gpu: use_cuda = True if not torch.cuda.is_available(): - logger.warning( - 'cuda is not available, create gpu device failed, using cpu instead.' - ) + logger.info('cuda is not available, using cpu instead.') use_cuda = False if use_cuda: diff --git a/modelscope/utils/file_utils.py b/modelscope/utils/file_utils.py index 9b82f8d2..cf59dc57 100644 --- a/modelscope/utils/file_utils.py +++ b/modelscope/utils/file_utils.py @@ -1,6 +1,7 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import inspect +import os from pathlib import Path @@ -35,3 +36,10 @@ def get_default_cache_dir(): """ default_cache_dir = Path.home().joinpath('.cache', 'modelscope') return default_cache_dir + + +def read_file(path): + + with open(path, 'r') as f: + text = f.read() + return text diff --git a/modelscope/utils/registry.py b/modelscope/utils/registry.py index 7a9c79e2..d6994bd3 100644 --- a/modelscope/utils/registry.py +++ b/modelscope/utils/registry.py @@ -176,7 +176,7 @@ def build_from_cfg(cfg, raise TypeError('default_args must be a dict or None, ' f'but got {type(default_args)}') - # dynamic load installation reqruiements for this module + # dynamic load installation requirements for this module from modelscope.utils.import_utils import LazyImportModule sig = (registry.name.upper(), group_key, cfg['type']) LazyImportModule.import_module(sig) @@ -193,8 +193,10 @@ def build_from_cfg(cfg, if isinstance(obj_type, str): obj_cls = registry.get(obj_type, group_key=group_key) if obj_cls is None: - raise KeyError(f'{obj_type} is not in the {registry.name}' - f' registry group {group_key}') + raise KeyError( + f'{obj_type} is not in the {registry.name}' + f' registry group {group_key}. Please make' + f' sure the correct version of ModelScope library is used.') obj_cls.group_key = group_key elif inspect.isclass(obj_type) or inspect.isfunction(obj_type): obj_cls = obj_type diff --git a/modelscope/utils/regress_test_utils.py b/modelscope/utils/regress_test_utils.py index 47bbadfe..3c1e5c1c 100644 --- a/modelscope/utils/regress_test_utils.py +++ b/modelscope/utils/regress_test_utils.py @@ -65,7 +65,8 @@ class RegressTool: def monitor_module_single_forward(self, module: nn.Module, file_name: str, - compare_fn=None): + compare_fn=None, + **kwargs): """Monitor a pytorch module in a single forward. @param module: A torch module @@ -107,7 +108,7 @@ class RegressTool: baseline = os.path.join(tempfile.gettempdir(), name) self.load(baseline, name) with open(baseline, 'rb') as f: - baseline_json = pickle.load(f) + base = pickle.load(f) class NumpyEncoder(json.JSONEncoder): """Special json encoder for numpy types @@ -122,9 +123,9 @@ class RegressTool: return obj.tolist() return json.JSONEncoder.default(self, obj) - print(f'baseline: {json.dumps(baseline_json, cls=NumpyEncoder)}') + print(f'baseline: {json.dumps(base, cls=NumpyEncoder)}') print(f'latest : {json.dumps(io_json, cls=NumpyEncoder)}') - if not compare_io_and_print(baseline_json, io_json, compare_fn): + if not compare_io_and_print(base, io_json, compare_fn, **kwargs): raise ValueError('Result not match!') @contextlib.contextmanager @@ -136,7 +137,8 @@ class RegressTool: ignore_keys=None, compare_random=True, reset_dropout=True, - lazy_stop_callback=None): + lazy_stop_callback=None, + **kwargs): """Monitor a pytorch module's backward data and cfg data within a step of the optimizer. This is usually useful when you try to change some dangerous code @@ -265,14 +267,15 @@ class RegressTool: baseline_json = pickle.load(f) if level == 'strict' and not compare_io_and_print( - baseline_json['forward'], io_json, compare_fn): + baseline_json['forward'], io_json, compare_fn, **kwargs): raise RuntimeError('Forward not match!') if not compare_backward_and_print( baseline_json['backward'], bw_json, compare_fn=compare_fn, ignore_keys=ignore_keys, - level=level): + level=level, + **kwargs): raise RuntimeError('Backward not match!') cfg_opt1 = { 'optimizer': baseline_json['optimizer'], @@ -286,7 +289,8 @@ class RegressTool: 'cfg': summary['cfg'], 'state': None if not compare_random else summary['state'] } - if not compare_cfg_and_optimizers(cfg_opt1, cfg_opt2, compare_fn): + if not compare_cfg_and_optimizers(cfg_opt1, cfg_opt2, compare_fn, + **kwargs): raise RuntimeError('Cfg or optimizers not match!') @@ -303,7 +307,8 @@ class MsRegressTool(RegressTool): compare_fn=None, ignore_keys=None, compare_random=True, - lazy_stop_callback=None): + lazy_stop_callback=None, + **kwargs): if lazy_stop_callback is None: @@ -319,7 +324,7 @@ class MsRegressTool(RegressTool): trainer.register_hook(EarlyStopHook()) - def _train_loop(trainer, *args, **kwargs): + def _train_loop(trainer, *args_train, **kwargs_train): with self.monitor_module_train( trainer, file_name, @@ -327,9 +332,11 @@ class MsRegressTool(RegressTool): compare_fn=compare_fn, ignore_keys=ignore_keys, compare_random=compare_random, - lazy_stop_callback=lazy_stop_callback): + lazy_stop_callback=lazy_stop_callback, + **kwargs): try: - return trainer.train_loop_origin(*args, **kwargs) + return trainer.train_loop_origin(*args_train, + **kwargs_train) except MsRegressTool.EarlyStopError: pass @@ -530,7 +537,8 @@ def compare_arguments_nested(print_content, ) return False if not all([ - compare_arguments_nested(None, sub_arg1, sub_arg2) + compare_arguments_nested( + None, sub_arg1, sub_arg2, rtol=rtol, atol=atol) for sub_arg1, sub_arg2 in zip(arg1, arg2) ]): if print_content is not None: @@ -551,7 +559,8 @@ def compare_arguments_nested(print_content, print(f'{print_content}, key diff:{set(keys1) - set(keys2)}') return False if not all([ - compare_arguments_nested(None, arg1[key], arg2[key]) + compare_arguments_nested( + None, arg1[key], arg2[key], rtol=rtol, atol=atol) for key in keys1 ]): if print_content is not None: @@ -574,7 +583,7 @@ def compare_arguments_nested(print_content, raise ValueError(f'type not supported: {type1}') -def compare_io_and_print(baseline_json, io_json, compare_fn=None): +def compare_io_and_print(baseline_json, io_json, compare_fn=None, **kwargs): if compare_fn is None: def compare_fn(*args, **kwargs): @@ -602,10 +611,10 @@ def compare_io_and_print(baseline_json, io_json, compare_fn=None): else: match = compare_arguments_nested( f'unmatched module {key} input args', v1input['args'], - v2input['args']) and match + v2input['args'], **kwargs) and match match = compare_arguments_nested( f'unmatched module {key} input kwargs', v1input['kwargs'], - v2input['kwargs']) and match + v2input['kwargs'], **kwargs) and match v1output = numpify_tensor_nested(v1['output']) v2output = numpify_tensor_nested(v2['output']) res = compare_fn(v1output, v2output, key, 'output') @@ -615,8 +624,11 @@ def compare_io_and_print(baseline_json, io_json, compare_fn=None): ) match = match and res else: - match = compare_arguments_nested(f'unmatched module {key} outputs', - v1output, v2output) and match + match = compare_arguments_nested( + f'unmatched module {key} outputs', + arg1=v1output, + arg2=v2output, + **kwargs) and match return match @@ -624,7 +636,8 @@ def compare_backward_and_print(baseline_json, bw_json, level, ignore_keys=None, - compare_fn=None): + compare_fn=None, + **kwargs): if compare_fn is None: def compare_fn(*args, **kwargs): @@ -653,18 +666,26 @@ def compare_backward_and_print(baseline_json, data2, grad2, data_after2 = bw_json[key]['data'], bw_json[key][ 'grad'], bw_json[key]['data_after'] match = compare_arguments_nested( - f'unmatched module {key} tensor data', data1, data2) and match + f'unmatched module {key} tensor data', + arg1=data1, + arg2=data2, + **kwargs) and match if level == 'strict': match = compare_arguments_nested( - f'unmatched module {key} grad data', grad1, - grad2) and match + f'unmatched module {key} grad data', + arg1=grad1, + arg2=grad2, + **kwargs) and match match = compare_arguments_nested( f'unmatched module {key} data after step', data_after1, - data_after2) and match + data_after2, **kwargs) and match return match -def compare_cfg_and_optimizers(baseline_json, cfg_json, compare_fn=None): +def compare_cfg_and_optimizers(baseline_json, + cfg_json, + compare_fn=None, + **kwargs): if compare_fn is None: def compare_fn(*args, **kwargs): @@ -686,12 +707,12 @@ def compare_cfg_and_optimizers(baseline_json, cfg_json, compare_fn=None): print( f"Optimizer type not equal:{optimizer1['type']} and {optimizer2['type']}" ) - match = compare_arguments_nested('unmatched optimizer defaults', - optimizer1['defaults'], - optimizer2['defaults']) and match - match = compare_arguments_nested('unmatched optimizer state_dict', - optimizer1['state_dict'], - optimizer2['state_dict']) and match + match = compare_arguments_nested( + 'unmatched optimizer defaults', optimizer1['defaults'], + optimizer2['defaults'], **kwargs) and match + match = compare_arguments_nested( + 'unmatched optimizer state_dict', optimizer1['state_dict'], + optimizer2['state_dict'], **kwargs) and match res = compare_fn(lr_scheduler1, lr_scheduler2, None, 'lr_scheduler') if res is not None: @@ -703,16 +724,17 @@ def compare_cfg_and_optimizers(baseline_json, cfg_json, compare_fn=None): print( f"Optimizer type not equal:{lr_scheduler1['type']} and {lr_scheduler2['type']}" ) - match = compare_arguments_nested('unmatched lr_scheduler state_dict', - lr_scheduler1['state_dict'], - lr_scheduler2['state_dict']) and match + match = compare_arguments_nested( + 'unmatched lr_scheduler state_dict', lr_scheduler1['state_dict'], + lr_scheduler2['state_dict'], **kwargs) and match res = compare_fn(cfg1, cfg2, None, 'cfg') if res is not None: print(f'cfg compared with user compare_fn with result:{res}\n') match = match and res else: - match = compare_arguments_nested('unmatched cfg', cfg1, cfg2) and match + match = compare_arguments_nested( + 'unmatched cfg', arg1=cfg1, arg2=cfg2, **kwargs) and match res = compare_fn(state1, state2, None, 'state') if res is not None: @@ -721,6 +743,6 @@ def compare_cfg_and_optimizers(baseline_json, cfg_json, compare_fn=None): match = match and res else: match = compare_arguments_nested('unmatched random state', state1, - state2) and match + state2, **kwargs) and match return match diff --git a/requirements/cv.txt b/requirements/cv.txt index d23fab3a..f29b296b 100644 --- a/requirements/cv.txt +++ b/requirements/cv.txt @@ -19,7 +19,7 @@ moviepy>=1.0.3 networkx>=2.5 numba onnxruntime>=1.10 -pai-easycv>=0.6.3.7 +pai-easycv>=0.6.3.9 pandas psutil regex diff --git a/tests/export/test_export_sbert_sequence_classification.py b/tests/export/test_export_sbert_sequence_classification.py index 535b3f5d..97926539 100644 --- a/tests/export/test_export_sbert_sequence_classification.py +++ b/tests/export/test_export_sbert_sequence_classification.py @@ -22,7 +22,7 @@ class TestExportSbertSequenceClassification(unittest.TestCase): shutil.rmtree(self.tmp_dir) super().tearDown() - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skip def test_export_sbert_sequence_classification(self): model = Model.from_pretrained(self.model_id) print( diff --git a/tests/hub/test_hub_operation.py b/tests/hub/test_hub_operation.py index c96db986..f2bdb2d3 100644 --- a/tests/hub/test_hub_operation.py +++ b/tests/hub/test_hub_operation.py @@ -127,7 +127,7 @@ class HubOperationTest(unittest.TestCase): return None def test_list_model(self): - data = self.api.list_model(TEST_MODEL_ORG) + data = self.api.list_models(TEST_MODEL_ORG) assert len(data['Models']) >= 1 diff --git a/tests/hub/test_hub_upload.py b/tests/hub/test_hub_upload.py index 2250164b..e1f61467 100644 --- a/tests/hub/test_hub_upload.py +++ b/tests/hub/test_hub_upload.py @@ -7,12 +7,12 @@ import uuid from modelscope.hub.api import HubApi from modelscope.hub.constants import Licenses, ModelVisibility +from modelscope.hub.errors import HTTPError, NotLoginException from modelscope.hub.repository import Repository -from modelscope.hub.upload import upload_folder from modelscope.utils.constant import ModelFile from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import test_level -from .test_utils import TEST_ACCESS_TOKEN1, delete_credential +from .test_utils import TEST_ACCESS_TOKEN1, TEST_MODEL_ORG, delete_credential logger = get_logger() @@ -22,7 +22,7 @@ class HubUploadTest(unittest.TestCase): def setUp(self): logger.info('SetUp') self.api = HubApi() - self.user = os.environ.get('TEST_MODEL_ORG', 'citest') + self.user = TEST_MODEL_ORG logger.info(self.user) self.create_model_name = '%s/%s_%s' % (self.user, 'test_model_upload', uuid.uuid4().hex) @@ -39,7 +39,10 @@ class HubUploadTest(unittest.TestCase): def tearDown(self): logger.info('TearDown') shutil.rmtree(self.model_dir, ignore_errors=True) - self.api.delete_model(model_id=self.create_model_name) + try: + self.api.delete_model(model_id=self.create_model_name) + except Exception: + pass def test_upload_exits_repo_master(self): logger.info('basic test for upload!') @@ -50,14 +53,14 @@ class HubUploadTest(unittest.TestCase): license=Licenses.APACHE_V2) os.system("echo '111'>%s" % os.path.join(self.finetune_path, 'add1.py')) - upload_folder( + self.api.push_model( model_id=self.create_model_name, model_dir=self.finetune_path) Repository(model_dir=self.repo_path, clone_from=self.create_model_name) assert os.path.exists(os.path.join(self.repo_path, 'add1.py')) shutil.rmtree(self.repo_path, ignore_errors=True) os.system("echo '222'>%s" % os.path.join(self.finetune_path, 'add2.py')) - upload_folder( + self.api.push_model( model_id=self.create_model_name, model_dir=self.finetune_path, revision='new_revision/version1') @@ -69,7 +72,7 @@ class HubUploadTest(unittest.TestCase): shutil.rmtree(self.repo_path, ignore_errors=True) os.system("echo '333'>%s" % os.path.join(self.finetune_path, 'add3.py')) - upload_folder( + self.api.push_model( model_id=self.create_model_name, model_dir=self.finetune_path, revision='new_revision/version2', @@ -84,7 +87,7 @@ class HubUploadTest(unittest.TestCase): add4_path = os.path.join(self.finetune_path, 'temp') os.mkdir(add4_path) os.system("echo '444'>%s" % os.path.join(add4_path, 'add4.py')) - upload_folder( + self.api.push_model( model_id=self.create_model_name, model_dir=self.finetune_path, revision='new_revision/version1') @@ -101,7 +104,7 @@ class HubUploadTest(unittest.TestCase): self.api.login(TEST_ACCESS_TOKEN1) os.system("echo '111'>%s" % os.path.join(self.finetune_path, 'add1.py')) - upload_folder( + self.api.push_model( model_id=self.create_model_name, model_dir=self.finetune_path, revision='new_model_new_revision', @@ -119,48 +122,23 @@ class HubUploadTest(unittest.TestCase): logger.info('test upload without login!') self.api.login(TEST_ACCESS_TOKEN1) delete_credential() - try: - upload_folder( - model_id=self.create_model_name, - model_dir=self.finetune_path, - visibility=ModelVisibility.PUBLIC, - license=Licenses.APACHE_V2) - except Exception as e: - logger.info(e) - self.api.login(TEST_ACCESS_TOKEN1) - upload_folder( + with self.assertRaises(NotLoginException): + self.api.push_model( model_id=self.create_model_name, model_dir=self.finetune_path, visibility=ModelVisibility.PUBLIC, license=Licenses.APACHE_V2) - Repository( - model_dir=self.repo_path, clone_from=self.create_model_name) - assert os.path.exists( - os.path.join(self.repo_path, 'configuration.json')) - shutil.rmtree(self.repo_path, ignore_errors=True) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_upload_invalid_repo(self): logger.info('test upload to invalid repo!') self.api.login(TEST_ACCESS_TOKEN1) - try: - upload_folder( + with self.assertRaises(HTTPError): + self.api.push_model( model_id='%s/%s' % ('speech_tts', 'invalid_model_test'), model_dir=self.finetune_path, visibility=ModelVisibility.PUBLIC, license=Licenses.APACHE_V2) - except Exception as e: - logger.info(e) - upload_folder( - model_id=self.create_model_name, - model_dir=self.finetune_path, - visibility=ModelVisibility.PUBLIC, - license=Licenses.APACHE_V2) - Repository( - model_dir=self.repo_path, clone_from=self.create_model_name) - assert os.path.exists( - os.path.join(self.repo_path, 'configuration.json')) - shutil.rmtree(self.repo_path, ignore_errors=True) if __name__ == '__main__': diff --git a/tests/msdatasets/test_ms_dataset.py b/tests/msdatasets/test_ms_dataset.py index 91a3b5c5..dff411f6 100644 --- a/tests/msdatasets/test_ms_dataset.py +++ b/tests/msdatasets/test_ms_dataset.py @@ -52,7 +52,8 @@ class MsDatasetTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_ms_csv_basic(self): ms_ds_train = MsDataset.load( - 'afqmc_small', namespace='userxiaoming', split='train') + 'clue', subset_name='afqmc', + split='train').to_hf_dataset().select(range(5)) print(next(iter(ms_ds_train))) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') @@ -70,7 +71,7 @@ class MsDatasetTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') @require_torch def test_to_torch_dataset_text(self): - model_id = 'damo/bert-base-sst2' + model_id = 'damo/nlp_structbert_sentence-similarity_chinese-tiny' nlp_model = Model.from_pretrained(model_id) preprocessor = SequenceClassificationPreprocessor( nlp_model.model_dir, @@ -92,7 +93,7 @@ class MsDatasetTest(unittest.TestCase): def test_to_tf_dataset_text(self): import tensorflow as tf tf.compat.v1.enable_eager_execution() - model_id = 'damo/bert-base-sst2' + model_id = 'damo/nlp_structbert_sentence-similarity_chinese-tiny' nlp_model = Model.from_pretrained(model_id) preprocessor = SequenceClassificationPreprocessor( nlp_model.model_dir, diff --git a/tests/pipelines/test_automatic_speech_recognition.py b/tests/pipelines/test_automatic_speech_recognition.py index 303fb6b9..b6532868 100644 --- a/tests/pipelines/test_automatic_speech_recognition.py +++ b/tests/pipelines/test_automatic_speech_recognition.py @@ -45,6 +45,10 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase, 'checking_item': OutputKeys.TEXT, 'example': 'wav_example' }, + 'test_run_with_url_pytorch': { + 'checking_item': OutputKeys.TEXT, + 'example': 'wav_example' + }, 'test_run_with_url_tf': { 'checking_item': OutputKeys.TEXT, 'example': 'wav_example' @@ -74,6 +78,147 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase, } } + all_models_info = [ + { + 'model_id': + 'damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1', + 'wav_path': 'data/test/audios/asr_example.wav' + }, + { + 'model_id': 'damo/speech_paraformer_asr_nat-aishell1-pytorch', + 'wav_path': 'data/test/audios/asr_example.wav' + }, + { + 'model_id': 'damo/speech_paraformer_asr_nat-aishell2-pytorch', + 'wav_path': 'data/test/audios/asr_example.wav' + }, + { + 'model_id': + 'damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1', + 'wav_path': 'data/test/audios/asr_example.wav' + }, + { + 'model_id': + 'damo/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1', + 'wav_path': 'data/test/audios/asr_example_8K.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-online', + 'wav_path': 'data/test/audios/asr_example.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline', + 'wav_path': 'data/test/audios/asr_example.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab8358-tensorflow1-online', + 'wav_path': 'data/test/audios/asr_example_8K.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab8358-tensorflow1-offline', + 'wav_path': 'data/test/audios/asr_example_8K.wav' + }, + { + 'model_id': + 'damo/speech_UniASR-large_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline', + 'wav_path': 'data/test/audios/asr_example.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-cn-en-moe-16k-vocab8358-tensorflow1-online', + 'wav_path': 'data/test/audios/asr_example_cn_en.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-cn-en-moe-16k-vocab8358-tensorflow1-offline', + 'wav_path': 'data/test/audios/asr_example_cn_en.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-cn-dialect-16k-vocab8358-tensorflow1-online', + 'wav_path': 'data/test/audios/asr_example_cn_dialect.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-cn-dialect-16k-vocab8358-tensorflow1-offline', + 'wav_path': 'data/test/audios/asr_example_cn_dialect.wav' + }, + { + 'model_id': + 'damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab3444-tensorflow1-online', + 'wav_path': 'data/test/audios/asr_example.wav' + }, + { + 'model_id': + 'damo/speech_paraformer_asr_nat-zh-cn-8k-common-vocab3444-tensorflow1-online', + 'wav_path': 'data/test/audios/asr_example_8K.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-offline', + 'wav_path': 'data/test/audios/asr_example_en.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-online', + 'wav_path': 'data/test/audios/asr_example_en.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-ru-16k-common-vocab1664-tensorflow1-offline', + 'wav_path': 'data/test/audios/asr_example_ru.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-ru-16k-common-vocab1664-tensorflow1-online', + 'wav_path': 'data/test/audios/asr_example_ru.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-es-16k-common-vocab3445-tensorflow1-offline', + 'wav_path': 'data/test/audios/asr_example_es.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-es-16k-common-vocab3445-tensorflow1-online', + 'wav_path': 'data/test/audios/asr_example_es.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-ko-16k-common-vocab6400-tensorflow1-offline', + 'wav_path': 'data/test/audios/asr_example_ko.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-ko-16k-common-vocab6400-tensorflow1-online', + 'wav_path': 'data/test/audios/asr_example_ko.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-online', + 'wav_path': 'data/test/audios/asr_example_ja.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline', + 'wav_path': 'data/test/audios/asr_example_ja.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-id-16k-common-vocab1067-tensorflow1-online', + 'wav_path': 'data/test/audios/asr_example_id.wav' + }, + { + 'model_id': + 'damo/speech_UniASR_asr_2pass-id-16k-common-vocab1067-tensorflow1-offline', + 'wav_path': 'data/test/audios/asr_example_id.wav' + }, + ] + def setUp(self) -> None: self.am_pytorch_model_id = 'damo/speech_paraformer_asr_nat-aishell1-pytorch' self.am_tf_model_id = 'damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1' @@ -90,7 +235,7 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase, def run_pipeline(self, model_id: str, audio_in: Union[str, bytes], - sr: int = 16000) -> Dict[str, Any]: + sr: int = None) -> Dict[str, Any]: inference_16k_pipline = pipeline( task=Tasks.auto_speech_recognition, model=model_id) @@ -136,33 +281,26 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase, return audio, fs @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_run_with_wav_pytorch(self): - """run with single waveform file + def test_run_with_pcm(self): + """run with wav data """ - logger.info('Run ASR test with waveform file (pytorch)...') + logger.info('Run ASR test with wav data (tensorflow)...') - wav_file_path = os.path.join(os.getcwd(), WAV_FILE) + audio, sr = self.wav2bytes(os.path.join(os.getcwd(), WAV_FILE)) rec_result = self.run_pipeline( - model_id=self.am_pytorch_model_id, audio_in=wav_file_path) - self.check_result('test_run_with_wav_pytorch', rec_result) - - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_run_with_pcm_pytorch(self): - """run with wav data - """ + model_id=self.am_tf_model_id, audio_in=audio, sr=sr) + self.check_result('test_run_with_pcm_tf', rec_result) logger.info('Run ASR test with wav data (pytorch)...') - audio, sr = self.wav2bytes(os.path.join(os.getcwd(), WAV_FILE)) - rec_result = self.run_pipeline( model_id=self.am_pytorch_model_id, audio_in=audio, sr=sr) self.check_result('test_run_with_pcm_pytorch', rec_result) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_run_with_wav_tf(self): + def test_run_with_wav(self): """run with single waveform file """ @@ -174,21 +312,14 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase, model_id=self.am_tf_model_id, audio_in=wav_file_path) self.check_result('test_run_with_wav_tf', rec_result) - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_run_with_pcm_tf(self): - """run with wav data - """ - - logger.info('Run ASR test with wav data (tensorflow)...') - - audio, sr = self.wav2bytes(os.path.join(os.getcwd(), WAV_FILE)) + logger.info('Run ASR test with waveform file (pytorch)...') rec_result = self.run_pipeline( - model_id=self.am_tf_model_id, audio_in=audio, sr=sr) - self.check_result('test_run_with_pcm_tf', rec_result) + model_id=self.am_pytorch_model_id, audio_in=wav_file_path) + self.check_result('test_run_with_wav_pytorch', rec_result) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_run_with_url_tf(self): + def test_run_with_url(self): """run with single url file """ @@ -198,6 +329,12 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase, model_id=self.am_tf_model_id, audio_in=URL_FILE) self.check_result('test_run_with_url_tf', rec_result) + logger.info('Run ASR test with url file (pytorch)...') + + rec_result = self.run_pipeline( + model_id=self.am_pytorch_model_id, audio_in=URL_FILE) + self.check_result('test_run_with_url_pytorch', rec_result) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_wav_dataset_pytorch(self): """run with datasets, and audio format is waveform @@ -217,7 +354,6 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase, data.text # hypothesis text """ - logger.info('Run ASR test with waveform dataset (pytorch)...') logger.info('Downloading waveform testsets file ...') dataset_path = download_and_untar( @@ -225,40 +361,38 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase, LITTLE_TESTSETS_URL, self.workspace) dataset_path = os.path.join(dataset_path, 'wav', 'test') + logger.info('Run ASR test with waveform dataset (tensorflow)...') + + rec_result = self.run_pipeline( + model_id=self.am_tf_model_id, audio_in=dataset_path) + self.check_result('test_run_with_wav_dataset_tf', rec_result) + + logger.info('Run ASR test with waveform dataset (pytorch)...') + rec_result = self.run_pipeline( model_id=self.am_pytorch_model_id, audio_in=dataset_path) self.check_result('test_run_with_wav_dataset_pytorch', rec_result) - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - def test_run_with_wav_dataset_tf(self): - """run with datasets, and audio format is waveform - datasets directory: - - wav - test # testsets - xx.wav - ... - dev # devsets - yy.wav - ... - train # trainsets - zz.wav - ... - transcript - data.text # hypothesis text + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_with_all_models(self): + """run with all models """ - logger.info('Run ASR test with waveform dataset (tensorflow)...') - logger.info('Downloading waveform testsets file ...') - - dataset_path = download_and_untar( - os.path.join(self.workspace, LITTLE_TESTSETS_FILE), - LITTLE_TESTSETS_URL, self.workspace) - dataset_path = os.path.join(dataset_path, 'wav', 'test') - - rec_result = self.run_pipeline( - model_id=self.am_tf_model_id, audio_in=dataset_path) - self.check_result('test_run_with_wav_dataset_tf', rec_result) + logger.info('Run ASR test with all models') + + for item in self.all_models_info: + model_id = item['model_id'] + wav_path = item['wav_path'] + rec_result = self.run_pipeline( + model_id=model_id, audio_in=wav_path) + if rec_result.__contains__(OutputKeys.TEXT): + logger.info(ColorCodes.MAGENTA + str(item['model_id']) + ' ' + + ColorCodes.YELLOW + + str(rec_result[OutputKeys.TEXT]) + + ColorCodes.END) + else: + logger.info(ColorCodes.MAGENTA + str(rec_result) + + ColorCodes.END) @unittest.skip('demo compatibility test is only enabled on a needed-basis') def test_demo_compatibility(self): diff --git a/tests/pipelines/test_csanmt_translation.py b/tests/pipelines/test_csanmt_translation.py index f7ec81cd..83827813 100644 --- a/tests/pipelines/test_csanmt_translation.py +++ b/tests/pipelines/test_csanmt_translation.py @@ -26,6 +26,20 @@ class TranslationTest(unittest.TestCase, DemoCompatibilityCheck): pipeline_ins = pipeline(self.task, model=model_id) print(pipeline_ins(input=inputs)) + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_name_for_en2fr(self): + model_id = 'damo/nlp_csanmt_translation_en2fr' + inputs = 'When I was in my 20s, I saw my very first psychotherapy client.' + pipeline_ins = pipeline(self.task, model=model_id) + print(pipeline_ins(input=inputs)) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_name_for_fr2en(self): + model_id = 'damo/nlp_csanmt_translation_fr2en' + inputs = "Quand j'avais la vingtaine, j'ai vu mes tout premiers clients comme psychothérapeute." + pipeline_ins = pipeline(self.task, model=model_id) + print(pipeline_ins(input=inputs)) + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_default_model(self): inputs = '声明补充说,沃伦的同事都深感震惊,并且希望他能够投案自首。' diff --git a/tests/pipelines/test_gpt3_text_generation.py b/tests/pipelines/test_gpt3_text_generation.py index 413b5874..674e95bb 100644 --- a/tests/pipelines/test_gpt3_text_generation.py +++ b/tests/pipelines/test_gpt3_text_generation.py @@ -17,12 +17,12 @@ class TextGPT3GenerationTest(unittest.TestCase): self.model_dir_13B = snapshot_download(self.model_id_13B) self.input = '好的' - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + @unittest.skip('distributed gpt3 1.3B, skipped') def test_gpt3_1_3B(self): pipe = pipeline(Tasks.text_generation, model=self.model_id_1_3B) print(pipe(self.input)) - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + @unittest.skip('distributed gpt3 2.7B, skipped') def test_gpt3_2_7B(self): pipe = pipeline(Tasks.text_generation, model=self.model_id_2_7B) print(pipe(self.input)) diff --git a/tests/pipelines/test_ofa_tasks.py b/tests/pipelines/test_ofa_tasks.py index f8366508..57dcb0c3 100644 --- a/tests/pipelines/test_ofa_tasks.py +++ b/tests/pipelines/test_ofa_tasks.py @@ -45,6 +45,14 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): result = img_captioning('data/test/images/image_captioning.png') print(result[OutputKeys.CAPTION]) + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_ocr_recognize_with_name(self): + ocr_recognize = pipeline( + Tasks.ocr_recognition, + model='damo/ofa_ocr-recognition_scene_base_zh') + result = ocr_recognize('data/test/images/image_ocr_recognition.jpg') + print(result[OutputKeys.TEXT]) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_image_classification_with_model(self): model = Model.from_pretrained( @@ -67,7 +75,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): def test_run_with_summarization_with_model(self): model = Model.from_pretrained( 'damo/ofa_summarization_gigaword_large_en') - ofa_pipe = pipeline(Tasks.summarization, model=model) + ofa_pipe = pipeline(Tasks.text_summarization, model=model) text = 'five-time world champion michelle kwan withdrew' + \ 'from the #### us figure skating championships on wednesday ,' + \ ' but will petition us skating officials for the chance to ' + \ @@ -79,7 +87,7 @@ class OfaTasksTest(unittest.TestCase, DemoCompatibilityCheck): @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_summarization_with_name(self): ofa_pipe = pipeline( - Tasks.summarization, + Tasks.text_summarization, model='damo/ofa_summarization_gigaword_large_en') text = 'five-time world champion michelle kwan withdrew' + \ 'from the #### us figure skating championships on wednesday ,' + \ diff --git a/tests/pipelines/test_part_of_speech.py b/tests/pipelines/test_part_of_speech.py index 25f4491c..61cdfe73 100644 --- a/tests/pipelines/test_part_of_speech.py +++ b/tests/pipelines/test_part_of_speech.py @@ -13,7 +13,7 @@ from modelscope.utils.test_utils import test_level class PartOfSpeechTest(unittest.TestCase): - model_id = 'damo/nlp_structbert_part-of-speech_chinese-base' + model_id = 'damo/nlp_structbert_part-of-speech_chinese-lite' sentence = '今天天气不错,适合出去游玩' @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') @@ -34,20 +34,17 @@ class PartOfSpeechTest(unittest.TestCase): model = Model.from_pretrained(self.model_id) tokenizer = TokenClassificationPreprocessor(model.model_dir) pipeline_ins = pipeline( - task=Tasks.token_classification, - model=model, - preprocessor=tokenizer) + task=Tasks.part_of_speech, model=model, preprocessor=tokenizer) print(pipeline_ins(input=self.sentence)) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_model_name(self): - pipeline_ins = pipeline( - task=Tasks.token_classification, model=self.model_id) + pipeline_ins = pipeline(task=Tasks.part_of_speech, model=self.model_id) print(pipeline_ins(input=self.sentence)) @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_default_model(self): - pipeline_ins = pipeline(task=Tasks.token_classification) + pipeline_ins = pipeline(task=Tasks.part_of_speech) print(pipeline_ins(input=self.sentence)) diff --git a/tests/pipelines/test_relation_extraction.py b/tests/pipelines/test_relation_extraction.py index 57d98f66..561eaf21 100644 --- a/tests/pipelines/test_relation_extraction.py +++ b/tests/pipelines/test_relation_extraction.py @@ -15,7 +15,7 @@ from modelscope.utils.test_utils import test_level class RelationExtractionTest(unittest.TestCase, DemoCompatibilityCheck): def setUp(self) -> None: - self.task = Tasks.information_extraction + self.task = Tasks.relation_extraction self.model_id = 'damo/nlp_bert_relation-extraction_chinese-base' sentence = '高捷,祖籍江苏,本科毕业于东南大学' @@ -28,7 +28,7 @@ class RelationExtractionTest(unittest.TestCase, DemoCompatibilityCheck): pipeline1 = InformationExtractionPipeline( model, preprocessor=tokenizer) pipeline2 = pipeline( - Tasks.information_extraction, model=model, preprocessor=tokenizer) + Tasks.relation_extraction, model=model, preprocessor=tokenizer) print(f'sentence: {self.sentence}\n' f'pipeline1:{pipeline1(input=self.sentence)}') print() @@ -39,7 +39,7 @@ class RelationExtractionTest(unittest.TestCase, DemoCompatibilityCheck): model = Model.from_pretrained(self.model_id) tokenizer = RelationExtractionPreprocessor(model.model_dir) pipeline_ins = pipeline( - task=Tasks.information_extraction, + task=Tasks.relation_extraction, model=model, preprocessor=tokenizer) print(pipeline_ins(input=self.sentence)) @@ -47,12 +47,12 @@ class RelationExtractionTest(unittest.TestCase, DemoCompatibilityCheck): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_model_name(self): pipeline_ins = pipeline( - task=Tasks.information_extraction, model=self.model_id) + task=Tasks.relation_extraction, model=self.model_id) print(pipeline_ins(input=self.sentence)) @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_default_model(self): - pipeline_ins = pipeline(task=Tasks.information_extraction) + pipeline_ins = pipeline(task=Tasks.relation_extraction) print(pipeline_ins(input=self.sentence)) @unittest.skip('demo compatibility test is only enabled on a needed-basis') diff --git a/tests/pipelines/test_table_question_answering.py b/tests/pipelines/test_table_question_answering.py index 3d943e51..571ca795 100644 --- a/tests/pipelines/test_table_question_answering.py +++ b/tests/pipelines/test_table_question_answering.py @@ -43,7 +43,7 @@ def tableqa_tracking_and_print_results_with_history( print('sql text:', output_dict[OutputKeys.SQL_STRING]) print('sql query:', output_dict[OutputKeys.SQL_QUERY]) print('query result:', output_dict[OutputKeys.QUERT_RESULT]) - print('json dumps', json.dumps(output_dict)) + print('json dumps', json.dumps(output_dict, ensure_ascii=False)) print() historical_queries = output_dict[OutputKeys.HISTORY] @@ -66,10 +66,42 @@ def tableqa_tracking_and_print_results_without_history( print('sql text:', output_dict[OutputKeys.SQL_STRING]) print('sql query:', output_dict[OutputKeys.SQL_QUERY]) print('query result:', output_dict[OutputKeys.QUERT_RESULT]) - print('json dumps', json.dumps(output_dict)) + print('json dumps', json.dumps(output_dict, ensure_ascii=False)) print() +def tableqa_tracking_and_print_results_with_tableid( + pipelines: List[TableQuestionAnsweringPipeline]): + test_case = { + 'utterance': [ + ['有哪些风险类型?', 'fund'], + ['风险类型有多少种?', 'reservoir'], + ['珠江流域的小(2)型水库的库容总量是多少?', 'reservoir'], + ['那平均值是多少?', 'reservoir'], + ['那水库的名称呢?', 'reservoir'], + ['换成中型的呢?', 'reservoir'], + ['枣庄营业厅的电话', 'business'], + ['那地址呢?', 'business'], + ['枣庄营业厅的电话和地址', 'business'], + ], + } + for p in pipelines: + historical_queries = None + for question, table_id in test_case['utterance']: + output_dict = p({ + 'question': question, + 'table_id': table_id, + 'history_sql': historical_queries + }) + print('question', question) + print('sql text:', output_dict[OutputKeys.SQL_STRING]) + print('sql query:', output_dict[OutputKeys.SQL_QUERY]) + print('query result:', output_dict[OutputKeys.QUERT_RESULT]) + print('json dumps', json.dumps(output_dict, ensure_ascii=False)) + print() + historical_queries = output_dict[OutputKeys.HISTORY] + + class TableQuestionAnswering(unittest.TestCase): def setUp(self) -> None: @@ -93,15 +125,27 @@ class TableQuestionAnswering(unittest.TestCase): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_model_from_modelhub(self): model = Model.from_pretrained(self.model_id) + self.tokenizer = BertTokenizer( + os.path.join(model.model_dir, ModelFile.VOCAB_FILE)) + db = Database( + tokenizer=self.tokenizer, + table_file_path=[ + os.path.join(model.model_dir, 'databases', fname) + for fname in os.listdir( + os.path.join(model.model_dir, 'databases')) + ], + syn_dict_file_path=os.path.join(model.model_dir, 'synonym.txt'), + is_use_sqlite=False) preprocessor = TableQuestionAnsweringPreprocessor( - model_dir=model.model_dir) + model_dir=model.model_dir, db=db) pipelines = [ pipeline( Tasks.table_question_answering, model=model, - preprocessor=preprocessor) + preprocessor=preprocessor, + db=db) ] - tableqa_tracking_and_print_results_with_history(pipelines) + tableqa_tracking_and_print_results_with_tableid(pipelines) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_from_task(self): @@ -132,7 +176,6 @@ class TableQuestionAnswering(unittest.TestCase): db=db) ] tableqa_tracking_and_print_results_without_history(pipelines) - tableqa_tracking_and_print_results_with_history(pipelines) if __name__ == '__main__': diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py deleted file mode 100644 index 39dbac99..00000000 --- a/tests/pipelines/test_text_classification.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from modelscope.models import Model -from modelscope.msdatasets import MsDataset -from modelscope.pipelines import pipeline -from modelscope.pipelines.nlp import SequenceClassificationPipeline -from modelscope.preprocessors import SequenceClassificationPreprocessor -from modelscope.utils.constant import Tasks -from modelscope.utils.demo_utils import DemoCompatibilityCheck -from modelscope.utils.test_utils import test_level - - -class SequenceClassificationTest(unittest.TestCase, DemoCompatibilityCheck): - sentence1 = 'i like this wonderful place' - - def setUp(self) -> None: - self.model_id = 'damo/bert-base-sst2' - self.task = Tasks.text_classification - - def predict(self, pipeline_ins: SequenceClassificationPipeline): - from easynlp.appzoo import load_dataset - - set = load_dataset('glue', 'sst2') - data = set['test']['sentence'][:3] - - results = pipeline_ins(data[0]) - print(results) - results = pipeline_ins(data[1]) - print(results) - - print(data) - - def printDataset(self, dataset: MsDataset): - for i, r in enumerate(dataset): - if i > 10: - break - print(r) - - # @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - @unittest.skip('nlp model does not support tensor input, skipped') - def test_run_with_model_from_modelhub(self): - model = Model.from_pretrained(self.model_id) - preprocessor = SequenceClassificationPreprocessor( - model.model_dir, first_sequence='sentence', second_sequence=None) - pipeline_ins = pipeline( - task=Tasks.text_classification, - model=model, - preprocessor=preprocessor) - print(f'sentence1: {self.sentence1}\n' - f'pipeline1:{pipeline_ins(input=self.sentence1)}') - - # @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - @unittest.skip('nlp model does not support tensor input, skipped') - def test_run_with_model_name(self): - text_classification = pipeline( - task=Tasks.text_classification, model=self.model_id) - result = text_classification( - MsDataset.load( - 'xcopa', - subset_name='translation-et', - namespace='damotest', - split='test', - target='premise')) - self.printDataset(result) - - # @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') - @unittest.skip('nlp model does not support tensor input, skipped') - def test_run_with_default_model(self): - text_classification = pipeline(task=Tasks.text_classification) - result = text_classification( - MsDataset.load( - 'xcopa', - subset_name='translation-et', - namespace='damotest', - split='test', - target='premise')) - self.printDataset(result) - - # @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - @unittest.skip('nlp model does not support tensor input, skipped') - def test_run_with_modelscope_dataset(self): - text_classification = pipeline(task=Tasks.text_classification) - # loaded from modelscope dataset - dataset = MsDataset.load( - 'xcopa', - subset_name='translation-et', - namespace='damotest', - split='test', - target='premise') - result = text_classification(dataset) - self.printDataset(result) - - @unittest.skip('demo compatibility test is only enabled on a needed-basis') - def test_demo_compatibility(self): - self.compatibility_check() - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/pipelines/test_text_generation.py b/tests/pipelines/test_text_generation.py index 5a270f83..4b0ebd47 100644 --- a/tests/pipelines/test_text_generation.py +++ b/tests/pipelines/test_text_generation.py @@ -15,12 +15,17 @@ from modelscope.utils.test_utils import test_level class TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck): def setUp(self) -> None: - self.palm_model_id_zh = 'damo/nlp_palm2.0_text-generation_chinese-base' + self.palm_model_id_zh_base = 'damo/nlp_palm2.0_text-generation_chinese-base' + self.palm_model_id_zh_large = 'damo/nlp_palm2.0_text-generation_chinese-large' + self.palm_model_id_zh_commodity = 'damo/nlp_palm2.0_text-generation_commodity_chinese-base' + self.palm_model_id_zh_weather = 'damo/nlp_palm2.0_text-generation_weather_chinese-base' self.palm_model_id_en = 'damo/nlp_palm2.0_text-generation_english-base' self.palm_input_zh = """ 本文总结了十个可穿戴产品的设计原则,而这些原则,同样也是笔者认为是这个行业最吸引人的地方: 1.为人们解决重复性问题;2.从人开始,而不是从机器开始;3.要引起注意,但不要刻意;4.提升用户能力,而不是取代 """ + self.palm_input_commodity = '垃圾桶,双层,可拆卸,加高,加高双层,把手,垃圾桶,内附,万向轮' + self.palm_input_weather = "今日天气类型='浮尘'&空气质量等级='重度污染'&紫外线强度指数='中等'" self.palm_input_en = """ The Director of Public Prosecutions who let off Lord Janner over alleged child sex abuse started her career at a legal chambers when the disgraced Labour peer was a top QC there . Alison Saunders , @@ -51,8 +56,8 @@ class TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck): print(pipeline_ins(input)) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_palm_zh_with_model_name(self): - self.run_pipeline_with_model_id(self.palm_model_id_zh, + def test_palm_zh_base_with_model_name(self): + self.run_pipeline_with_model_id(self.palm_model_id_zh_base, self.palm_input_zh) @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') @@ -71,10 +76,40 @@ class TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck): self.gpt3_input) @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') - def test_palm_zh_with_model_instance(self): - self.run_pipeline_with_model_instance(self.palm_model_id_zh, + def test_palm_zh_large_with_model_name(self): + self.run_pipeline_with_model_id(self.palm_model_id_zh_large, + self.palm_input_zh) + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_palm_zh_commodity_with_model_name(self): + self.run_pipeline_with_model_id(self.palm_model_id_zh_commodity, + self.palm_input_commodity) + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_palm_zh_weather_with_model_name(self): + self.run_pipeline_with_model_id(self.palm_model_id_zh_weather, + self.palm_input_weather) + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_palm_zh_base_with_model_instance(self): + self.run_pipeline_with_model_instance(self.palm_model_id_zh_base, self.palm_input_zh) + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_palm_zh_large_with_model_instance(self): + self.run_pipeline_with_model_instance(self.palm_model_id_zh_large, + self.palm_input_zh) + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_palm_zh_commodity_with_model_instance(self): + self.run_pipeline_with_model_instance(self.palm_model_id_zh_commodity, + self.palm_input_commodity) + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_palm_zh_weather_with_model_instance(self): + self.run_pipeline_with_model_instance(self.palm_model_id_zh_weather, + self.palm_input_weather) + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_palm_en_with_model_instance(self): self.run_pipeline_with_model_instance(self.palm_model_id_en, @@ -92,8 +127,9 @@ class TextGenerationTest(unittest.TestCase, DemoCompatibilityCheck): @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_palm(self): - for model_id, input in ((self.palm_model_id_zh, self.palm_input_zh), - (self.palm_model_id_en, self.palm_input_en)): + for model_id, input in ((self.palm_model_id_zh_base, + self.palm_input_zh), (self.palm_model_id_en, + self.palm_input_en)): cache_path = snapshot_download(model_id) model = PalmForTextGeneration.from_pretrained(cache_path) preprocessor = TextGenerationPreprocessor( diff --git a/tests/pipelines/test_passage_ranking.py b/tests/pipelines/test_text_ranking.py similarity index 70% rename from tests/pipelines/test_passage_ranking.py rename to tests/pipelines/test_text_ranking.py index 5faa365e..ece3c617 100644 --- a/tests/pipelines/test_passage_ranking.py +++ b/tests/pipelines/test_text_ranking.py @@ -4,15 +4,15 @@ import unittest from modelscope.hub.snapshot_download import snapshot_download from modelscope.models import Model -from modelscope.models.nlp import PassageRanking +from modelscope.models.nlp import TextRanking from modelscope.pipelines import pipeline -from modelscope.pipelines.nlp import PassageRankingPipeline -from modelscope.preprocessors import PassageRankingPreprocessor +from modelscope.pipelines.nlp import TextRankingPipeline +from modelscope.preprocessors import TextRankingPreprocessor from modelscope.utils.constant import Tasks from modelscope.utils.test_utils import test_level -class PassageRankingTest(unittest.TestCase): +class TextRankingTest(unittest.TestCase): model_id = 'damo/nlp_corom_passage-ranking_english-base' inputs = { 'source_sentence': ["how long it take to get a master's degree"], @@ -27,11 +27,11 @@ class PassageRankingTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_by_direct_model_download(self): cache_path = snapshot_download(self.model_id) - tokenizer = PassageRankingPreprocessor(cache_path) - model = PassageRanking.from_pretrained(cache_path) - pipeline1 = PassageRankingPipeline(model, preprocessor=tokenizer) + tokenizer = TextRankingPreprocessor(cache_path) + model = TextRanking.from_pretrained(cache_path) + pipeline1 = TextRankingPipeline(model, preprocessor=tokenizer) pipeline2 = pipeline( - Tasks.passage_ranking, model=model, preprocessor=tokenizer) + Tasks.text_ranking, model=model, preprocessor=tokenizer) print(f'sentence: {self.inputs}\n' f'pipeline1:{pipeline1(input=self.inputs)}') print() @@ -40,20 +40,19 @@ class PassageRankingTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_from_modelhub(self): model = Model.from_pretrained(self.model_id) - tokenizer = PassageRankingPreprocessor(model.model_dir) + tokenizer = TextRankingPreprocessor(model.model_dir) pipeline_ins = pipeline( - task=Tasks.passage_ranking, model=model, preprocessor=tokenizer) + task=Tasks.text_ranking, model=model, preprocessor=tokenizer) print(pipeline_ins(input=self.inputs)) @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_model_name(self): - pipeline_ins = pipeline( - task=Tasks.passage_ranking, model=self.model_id) + pipeline_ins = pipeline(task=Tasks.text_ranking, model=self.model_id) print(pipeline_ins(input=self.inputs)) @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_default_model(self): - pipeline_ins = pipeline(task=Tasks.passage_ranking) + pipeline_ins = pipeline(task=Tasks.text_ranking) print(pipeline_ins(input=self.inputs)) diff --git a/tests/pipelines/test_tinynas_detection.py b/tests/pipelines/test_tinynas_detection.py index 63db9145..43e1842d 100644 --- a/tests/pipelines/test_tinynas_detection.py +++ b/tests/pipelines/test_tinynas_detection.py @@ -4,22 +4,45 @@ import unittest from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks +from modelscope.utils.demo_utils import DemoCompatibilityCheck from modelscope.utils.test_utils import test_level -class TinynasObjectDetectionTest(unittest.TestCase): +class TinynasObjectDetectionTest(unittest.TestCase, DemoCompatibilityCheck): + + def setUp(self) -> None: + self.task = Tasks.image_object_detection + self.model_id = 'damo/cv_tinynas_object-detection_damoyolo' @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') - def test_run(self): + def test_run_airdet(self): tinynas_object_detection = pipeline( Tasks.image_object_detection, model='damo/cv_tinynas_detection') result = tinynas_object_detection( 'data/test/images/image_detection.jpg') print(result) + @unittest.skip('will be enabled after damoyolo officially released') + def test_run_damoyolo(self): + tinynas_object_detection = pipeline( + Tasks.image_object_detection, + model='damo/cv_tinynas_object-detection_damoyolo') + result = tinynas_object_detection( + 'data/test/images/image_detection.jpg') + print(result) + @unittest.skip('demo compatibility test is only enabled on a needed-basis') def test_demo_compatibility(self): - self.test_demo() + self.compatibility_check() + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_image_object_detection_auto_pipeline(self): + test_image = 'data/test/images/image_detection.jpg' + tinynas_object_detection = pipeline( + Tasks.image_object_detection, model='damo/cv_tinynas_detection') + result = tinynas_object_detection(test_image) + tinynas_object_detection.show_result(test_image, result, + 'demo_ret.jpg') if __name__ == '__main__': diff --git a/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py b/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py new file mode 100644 index 00000000..4dffa998 --- /dev/null +++ b/tests/trainers/easycv/test_easycv_trainer_face_2d_keypoints.py @@ -0,0 +1,71 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import glob +import os +import shutil +import tempfile +import unittest + +import torch + +from modelscope.metainfo import Trainers +from modelscope.msdatasets import MsDataset +from modelscope.trainers import build_trainer +from modelscope.utils.constant import DownloadMode, LogKeys, Tasks +from modelscope.utils.logger import get_logger +from modelscope.utils.test_utils import test_level + + +@unittest.skipIf(not torch.cuda.is_available(), 'cuda unittest') +class EasyCVTrainerTestFace2DKeypoints(unittest.TestCase): + model_id = 'damo/cv_mobilenet_face-2d-keypoints_alignment' + + def setUp(self): + self.logger = get_logger() + self.logger.info(('Testing %s.%s' % + (type(self).__name__, self._testMethodName))) + + def _train(self, tmp_dir): + cfg_options = {'train.max_epochs': 2} + + trainer_name = Trainers.easycv + + train_dataset = MsDataset.load( + dataset_name='face_2d_keypoints_dataset', + namespace='modelscope', + split='train', + download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS) + eval_dataset = MsDataset.load( + dataset_name='face_2d_keypoints_dataset', + namespace='modelscope', + split='train', + download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS) + + kwargs = dict( + model=self.model_id, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + work_dir=tmp_dir, + cfg_options=cfg_options) + + trainer = build_trainer(trainer_name, kwargs) + trainer.train() + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_trainer_single_gpu(self): + temp_file_dir = tempfile.TemporaryDirectory() + tmp_dir = temp_file_dir.name + if not os.path.exists(tmp_dir): + os.makedirs(tmp_dir) + + self._train(tmp_dir) + + results_files = os.listdir(tmp_dir) + json_files = glob.glob(os.path.join(tmp_dir, '*.log.json')) + self.assertEqual(len(json_files), 1) + self.assertIn(f'{LogKeys.EPOCH}_2.pth', results_files) + + temp_file_dir.cleanup() + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/trainers/test_finetune_sequence_classification.py b/tests/trainers/test_finetune_sequence_classification.py index f2adfa22..aa8aba5c 100644 --- a/tests/trainers/test_finetune_sequence_classification.py +++ b/tests/trainers/test_finetune_sequence_classification.py @@ -16,7 +16,8 @@ from modelscope.trainers.optimizer.child_tuning_adamw_optimizer import \ calculate_fisher from modelscope.utils.constant import ModelFile, Tasks from modelscope.utils.data_utils import to_device -from modelscope.utils.regress_test_utils import MsRegressTool +from modelscope.utils.regress_test_utils import (MsRegressTool, + compare_arguments_nested) from modelscope.utils.test_utils import test_level @@ -37,10 +38,38 @@ class TestFinetuneSequenceClassification(unittest.TestCase): shutil.rmtree(self.tmp_dir) super().tearDown() - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skip( + 'Skip testing trainer repeatable, because it\'s unstable in daily UT') def test_trainer_repeatable(self): import torch # noqa + def compare_fn(value1, value2, key, type): + # Ignore the differences between optimizers of two torch versions + if type != 'optimizer': + return None + + match = (value1['type'] == value2['type']) + shared_defaults = set(value1['defaults'].keys()).intersection( + set(value2['defaults'].keys())) + match = all([ + compare_arguments_nested(f'Optimizer defaults {key} not match', + value1['defaults'][key], + value2['defaults'][key]) + for key in shared_defaults + ]) and match + match = (len(value1['state_dict']['param_groups']) == len( + value2['state_dict']['param_groups'])) and match + for group1, group2 in zip(value1['state_dict']['param_groups'], + value2['state_dict']['param_groups']): + shared_keys = set(group1.keys()).intersection( + set(group2.keys())) + match = all([ + compare_arguments_nested( + f'Optimizer param_groups {key} not match', group1[key], + group2[key]) for key in shared_keys + ]) and match + return match + def cfg_modify_fn(cfg): cfg.task = 'nli' cfg['preprocessor'] = {'type': 'nli-tokenizer'} @@ -98,7 +127,8 @@ class TestFinetuneSequenceClassification(unittest.TestCase): name=Trainers.nlp_base_trainer, default_args=kwargs) with self.regress_tool.monitor_ms_train( - trainer, 'sbert-base-tnews', level='strict'): + trainer, 'sbert-base-tnews', level='strict', + compare_fn=compare_fn): trainer.train() def finetune(self, diff --git a/tests/trainers/test_finetune_passage_ranking.py b/tests/trainers/test_finetune_text_ranking.py similarity index 90% rename from tests/trainers/test_finetune_passage_ranking.py rename to tests/trainers/test_finetune_text_ranking.py index f833f981..e603bff2 100644 --- a/tests/trainers/test_finetune_passage_ranking.py +++ b/tests/trainers/test_finetune_text_ranking.py @@ -41,7 +41,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase): model_id, train_dataset, eval_dataset, - name=Trainers.nlp_passage_ranking_trainer, + name=Trainers.nlp_text_ranking_trainer, cfg_modify_fn=None, **kwargs): kwargs = dict( @@ -61,8 +61,8 @@ class TestFinetuneSequenceClassification(unittest.TestCase): def test_finetune_msmarco(self): def cfg_modify_fn(cfg): - cfg.task = 'passage-ranking' - cfg['preprocessor'] = {'type': 'passage-ranking'} + cfg.task = 'text-ranking' + cfg['preprocessor'] = {'type': 'text-ranking'} cfg.train.optimizer.lr = 2e-5 cfg['dataset'] = { 'train': { @@ -105,7 +105,7 @@ class TestFinetuneSequenceClassification(unittest.TestCase): }, { 'type': 'EvaluationHook', 'by_epoch': False, - 'interval': 3000 + 'interval': 15 }] return cfg @@ -114,18 +114,19 @@ class TestFinetuneSequenceClassification(unittest.TestCase): train_ds = ds['train'].to_hf_dataset() dev_ds = ds['train'].to_hf_dataset() + model_id = 'damo/nlp_corom_passage-ranking_english-base' self.finetune( - model_id='damo/nlp_corom_passage-ranking_english-base', + model_id=model_id, train_dataset=train_ds, eval_dataset=dev_ds, cfg_modify_fn=cfg_modify_fn) output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR) - self.pipeline_passage_ranking(output_dir) + self.pipeline_text_ranking(output_dir) - def pipeline_passage_ranking(self, model_dir): + def pipeline_text_ranking(self, model_dir): model = Model.from_pretrained(model_dir) - pipeline_ins = pipeline(task=Tasks.passage_ranking, model=model) + pipeline_ins = pipeline(task=Tasks.text_ranking, model=model) print(pipeline_ins(input=self.inputs)) diff --git a/tests/trainers/test_image_denoise_trainer.py b/tests/trainers/test_image_denoise_trainer.py index 68ddf616..c4abca6a 100644 --- a/tests/trainers/test_image_denoise_trainer.py +++ b/tests/trainers/test_image_denoise_trainer.py @@ -51,7 +51,7 @@ class ImageDenoiseTrainerTest(unittest.TestCase): shutil.rmtree(self.tmp_dir, ignore_errors=True) super().tearDown() - @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_trainer(self): kwargs = dict( model=self.model_id, @@ -65,7 +65,7 @@ class ImageDenoiseTrainerTest(unittest.TestCase): for i in range(2): self.assertIn(f'epoch_{i+1}.pth', results_files) - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_trainer_with_model_and_args(self): model = NAFNetForImageDenoise.from_pretrained(self.cache_path) kwargs = dict( diff --git a/tests/trainers/test_image_portrait_enhancement_trainer.py b/tests/trainers/test_image_portrait_enhancement_trainer.py index 049adf7e..123e0098 100644 --- a/tests/trainers/test_image_portrait_enhancement_trainer.py +++ b/tests/trainers/test_image_portrait_enhancement_trainer.py @@ -14,52 +14,14 @@ from modelscope.hub.snapshot_download import snapshot_download from modelscope.metainfo import Trainers from modelscope.models.cv.image_portrait_enhancement import \ ImagePortraitEnhancement +from modelscope.msdatasets import MsDataset +from modelscope.msdatasets.task_datasets.image_portrait_enhancement import \ + ImagePortraitEnhancementDataset from modelscope.trainers import build_trainer -from modelscope.utils.constant import ModelFile +from modelscope.utils.constant import DownloadMode, ModelFile from modelscope.utils.test_utils import test_level -class PairedImageDataset(data.Dataset): - - def __init__(self, root, size=512): - super(PairedImageDataset, self).__init__() - self.size = size - gt_dir = osp.join(root, 'gt') - lq_dir = osp.join(root, 'lq') - self.gt_filelist = os.listdir(gt_dir) - self.gt_filelist = sorted(self.gt_filelist, key=lambda x: int(x[:-4])) - self.gt_filelist = [osp.join(gt_dir, f) for f in self.gt_filelist] - self.lq_filelist = os.listdir(lq_dir) - self.lq_filelist = sorted(self.lq_filelist, key=lambda x: int(x[:-4])) - self.lq_filelist = [osp.join(lq_dir, f) for f in self.lq_filelist] - - def _img_to_tensor(self, img): - img = torch.from_numpy(img[:, :, [2, 1, 0]]).permute(2, 0, 1).type( - torch.float32) / 255. - return (img - 0.5) / 0.5 - - def __getitem__(self, index): - lq = cv2.imread(self.lq_filelist[index]) - gt = cv2.imread(self.gt_filelist[index]) - lq = cv2.resize( - lq, (self.size, self.size), interpolation=cv2.INTER_CUBIC) - gt = cv2.resize( - gt, (self.size, self.size), interpolation=cv2.INTER_CUBIC) - - return \ - {'src': self._img_to_tensor(lq), 'target': self._img_to_tensor(gt)} - - def __len__(self): - return len(self.gt_filelist) - - def to_torch_dataset(self, - columns: Union[str, List[str]] = None, - preprocessors: Union[Callable, List[Callable]] = None, - **format_kwargs): - # self.preprocessor = preprocessors - return self - - class TestImagePortraitEnhancementTrainer(unittest.TestCase): def setUp(self): @@ -70,8 +32,23 @@ class TestImagePortraitEnhancementTrainer(unittest.TestCase): self.model_id = 'damo/cv_gpen_image-portrait-enhancement' - self.dataset = PairedImageDataset( - './data/test/images/face_enhancement/') + dataset_train = MsDataset.load( + 'image-portrait-enhancement-dataset', + namespace='modelscope', + subset_name='default', + split='test', + download_mode=DownloadMode.FORCE_REDOWNLOAD)._hf_ds + dataset_val = MsDataset.load( + 'image-portrait-enhancement-dataset', + namespace='modelscope', + subset_name='default', + split='test', + download_mode=DownloadMode.FORCE_REDOWNLOAD)._hf_ds + + self.dataset_train = ImagePortraitEnhancementDataset( + dataset_train, is_train=True) + self.dataset_val = ImagePortraitEnhancementDataset( + dataset_val, is_train=False) def tearDown(self): shutil.rmtree(self.tmp_dir, ignore_errors=True) @@ -81,8 +58,8 @@ class TestImagePortraitEnhancementTrainer(unittest.TestCase): def test_trainer(self): kwargs = dict( model=self.model_id, - train_dataset=self.dataset, - eval_dataset=self.dataset, + train_dataset=self.dataset_train, + eval_dataset=self.dataset_val, device='gpu', work_dir=self.tmp_dir) @@ -101,8 +78,8 @@ class TestImagePortraitEnhancementTrainer(unittest.TestCase): kwargs = dict( cfg_file=os.path.join(cache_path, ModelFile.CONFIGURATION), model=model, - train_dataset=self.dataset, - eval_dataset=self.dataset, + train_dataset=self.dataset_train, + eval_dataset=self.dataset_val, device='gpu', max_epochs=2, work_dir=self.tmp_dir) diff --git a/tests/trainers/test_ofa_trainer.py b/tests/trainers/test_ofa_trainer.py index 3322271d..ac2e0678 100644 --- a/tests/trainers/test_ofa_trainer.py +++ b/tests/trainers/test_ofa_trainer.py @@ -3,58 +3,104 @@ import glob import os import os.path as osp import shutil -import tempfile import unittest -from modelscope.metainfo import Trainers +import json + +from modelscope.metainfo import Metrics, Trainers from modelscope.msdatasets import MsDataset from modelscope.trainers import build_trainer -from modelscope.utils.constant import DownloadMode +from modelscope.utils.constant import ModelFile from modelscope.utils.test_utils import test_level class TestOfaTrainer(unittest.TestCase): - def setUp(self): - column_map = {'premise': 'text', 'hypothesis': 'text2'} - data_train = MsDataset.load( - dataset_name='glue', - subset_name='mnli', - namespace='modelscope', - split='train[:100]', - download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS) - self.train_dataset = MsDataset.from_hf_dataset( - data_train._hf_ds.rename_columns(column_map)) - data_eval = MsDataset.load( - dataset_name='glue', - subset_name='mnli', - namespace='modelscope', - split='validation_matched[:8]', - download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS) - self.test_dataset = MsDataset.from_hf_dataset( - data_eval._hf_ds.rename_columns(column_map)) - - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') - def test_trainer(self): - os.environ['LOCAL_RANK'] = '0' - model_id = 'damo/ofa_text-classification_mnli_large_en' - - kwargs = dict( - model=model_id, - cfg_file= - '/Users/running_you/.cache/modelscope/hub/damo/ofa_text-classification_mnli_large_en//configuration.json', - train_dataset=self.train_dataset, - eval_dataset=self.test_dataset, - work_dir='/Users/running_you/.cache/modelscope/hub/work/mnli') - - trainer = build_trainer(name=Trainers.ofa_tasks, default_args=kwargs) - os.makedirs(trainer.work_dir, exist_ok=True) + def setUp(self) -> None: + self.finetune_cfg = \ + {'framework': 'pytorch', + 'task': 'image-captioning', + 'model': {'type': 'ofa', + 'beam_search': {'beam_size': 5, + 'max_len_b': 16, + 'min_len': 1, + 'no_repeat_ngram_size': 0}, + 'seed': 7, + 'max_src_length': 256, + 'language': 'en', + 'gen_type': 'generation', + 'patch_image_size': 480, + 'max_image_size': 480, + 'imagenet_default_mean_and_std': False}, + 'pipeline': {'type': 'image-captioning'}, + 'dataset': {'column_map': {'text': 'caption'}}, + 'train': {'work_dir': 'work/ckpts/caption', + # 'launcher': 'pytorch', + 'max_epochs': 1, + 'use_fp16': True, + 'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0}, + 'lr_scheduler': {'name': 'polynomial_decay', + 'warmup_proportion': 0.01, + 'lr_end': 1e-07}, + 'lr_scheduler_hook': {'type': 'LrSchedulerHook', 'by_epoch': False}, + 'optimizer': {'type': 'AdamW', 'lr': 5e-05, 'weight_decay': 0.01}, + 'optimizer_hook': {'type': 'TorchAMPOptimizerHook', + 'cumulative_iters': 1, + 'grad_clip': {'max_norm': 1.0, 'norm_type': 2}, + 'loss_keys': 'loss'}, + 'criterion': {'name': 'AdjustLabelSmoothedCrossEntropyCriterion', + 'constraint_range': None, + 'drop_worst_after': 0, + 'drop_worst_ratio': 0.0, + 'ignore_eos': False, + 'ignore_prefix_size': 0, + 'label_smoothing': 0.0, + 'reg_alpha': 1.0, + 'report_accuracy': False, + 'sample_patch_num': 196, + 'sentence_avg': False, + 'use_rdrop': True}, + 'hooks': [{'type': 'BestCkptSaverHook', + 'metric_key': 'bleu-4', + 'interval': 100}, + {'type': 'TextLoggerHook', 'interval': 1}, + {'type': 'IterTimerHook'}, + {'type': 'EvaluationHook', 'by_epoch': True, 'interval': 1}]}, + 'evaluation': {'dataloader': {'batch_size_per_gpu': 4, 'workers_per_gpu': 0}, + 'metrics': [{'type': 'bleu', + 'eval_tokenized_bleu': False, + 'ref_name': 'labels', + 'hyp_name': 'caption'}]}, + 'preprocessor': []} + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_trainer_std(self): + WORKSPACE = './workspace/ckpts/caption' + os.makedirs(WORKSPACE, exist_ok=True) + config_file = os.path.join(WORKSPACE, 'configuration.json') + with open(config_file, 'w') as writer: + json.dump(self.finetune_cfg, writer) + + pretrained_model = 'damo/ofa_image-caption_coco_large_en' + args = dict( + model=pretrained_model, + work_dir=WORKSPACE, + train_dataset=MsDataset.load( + 'coco_2014_caption', + namespace='modelscope', + split='train[:12]'), + eval_dataset=MsDataset.load( + 'coco_2014_caption', + namespace='modelscope', + split='validation[:4]'), + metrics=[Metrics.BLEU], + cfg_file=config_file) + trainer = build_trainer(name=Trainers.ofa_tasks, default_args=args) trainer.train() - assert len( - glob.glob(osp.join(trainer.work_dir, - 'best_epoch*_accuracy*.pth'))) == 2 - if os.path.exists(self.trainer.work_dir): - shutil.rmtree(self.trainer.work_dir) + + self.assertIn(ModelFile.TORCH_MODEL_BIN_FILE, + os.path.join(WORKSPACE, 'output')) + shutil.rmtree(WORKSPACE) if __name__ == '__main__': diff --git a/tests/trainers/test_trainer_with_nlp.py b/tests/trainers/test_trainer_with_nlp.py index 6030ada9..5b0c9982 100644 --- a/tests/trainers/test_trainer_with_nlp.py +++ b/tests/trainers/test_trainer_with_nlp.py @@ -29,7 +29,8 @@ class TestTrainerWithNlp(unittest.TestCase): os.makedirs(self.tmp_dir) self.dataset = MsDataset.load( - 'afqmc_small', namespace='userxiaoming', split='train') + 'clue', subset_name='afqmc', + split='train').to_hf_dataset().select(range(2)) def tearDown(self): shutil.rmtree(self.tmp_dir) @@ -73,7 +74,7 @@ class TestTrainerWithNlp(unittest.TestCase): output_dir = os.path.join(self.tmp_dir, ModelFile.TRAIN_OUTPUT_DIR) pipeline_sentence_similarity(output_dir) - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 3, 'skip test in current test level') def test_trainer_with_backbone_head(self): model_id = 'damo/nlp_structbert_sentiment-classification_chinese-base' kwargs = dict( @@ -99,6 +100,8 @@ class TestTrainerWithNlp(unittest.TestCase): model_id = 'damo/nlp_structbert_sentiment-classification_chinese-base' cfg = read_config(model_id, revision='beta') cfg.train.max_epochs = 20 + cfg.preprocessor.train['label2id'] = {'0': 0, '1': 1} + cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1} cfg.train.work_dir = self.tmp_dir cfg_file = os.path.join(self.tmp_dir, 'config.json') cfg.dump(cfg_file) @@ -120,22 +123,24 @@ class TestTrainerWithNlp(unittest.TestCase): checkpoint_path=os.path.join(self.tmp_dir, 'epoch_10.pth')) self.assertTrue(Metrics.accuracy in eval_results) - @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_trainer_with_configured_datasets(self): model_id = 'damo/nlp_structbert_sentence-similarity_chinese-base' cfg: Config = read_config(model_id) cfg.train.max_epochs = 20 + cfg.preprocessor.train['label2id'] = {'0': 0, '1': 1} + cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1} cfg.train.work_dir = self.tmp_dir cfg.dataset = { 'train': { - 'name': 'afqmc_small', + 'name': 'clue', + 'subset_name': 'afqmc', 'split': 'train', - 'namespace': 'userxiaoming' }, 'val': { - 'name': 'afqmc_small', + 'name': 'clue', + 'subset_name': 'afqmc', 'split': 'train', - 'namespace': 'userxiaoming' }, } cfg_file = os.path.join(self.tmp_dir, 'config.json') @@ -159,11 +164,30 @@ class TestTrainerWithNlp(unittest.TestCase): model_id = 'damo/nlp_structbert_sentence-similarity_chinese-base' cfg: Config = read_config(model_id) cfg.train.max_epochs = 3 + cfg.preprocessor.first_sequence = 'sentence1' + cfg.preprocessor.second_sequence = 'sentence2' + cfg.preprocessor.label = 'label' + cfg.preprocessor.train['label2id'] = {'0': 0, '1': 1} + cfg.preprocessor.val['label2id'] = {'0': 0, '1': 1} + cfg.train.dataloader.batch_size_per_gpu = 2 + cfg.train.hooks = [{ + 'type': 'CheckpointHook', + 'interval': 3, + 'by_epoch': False, + }, { + 'type': 'TextLoggerHook', + 'interval': 1 + }, { + 'type': 'IterTimerHook' + }, { + 'type': 'EvaluationHook', + 'interval': 1 + }] cfg.train.work_dir = self.tmp_dir cfg_file = os.path.join(self.tmp_dir, 'config.json') cfg.dump(cfg_file) dataset = MsDataset.load('clue', subset_name='afqmc', split='train') - dataset = dataset.to_hf_dataset().select(range(128)) + dataset = dataset.to_hf_dataset().select(range(4)) kwargs = dict( model=model_id, train_dataset=dataset, @@ -180,7 +204,7 @@ class TestTrainerWithNlp(unittest.TestCase): PRIORITY = Priority.VERY_LOW def after_iter(self, trainer): - if trainer.iter == 12: + if trainer.iter == 3: raise MsRegressTool.EarlyStopError('Test finished.') if 'EarlyStopHook' not in [ @@ -197,12 +221,11 @@ class TestTrainerWithNlp(unittest.TestCase): results_files = os.listdir(self.tmp_dir) self.assertIn(f'{trainer.timestamp}.log.json', results_files) - trainer = build_trainer(default_args=kwargs) regress_tool = MsRegressTool(baseline=False) with regress_tool.monitor_ms_train( trainer, 'trainer_continue_train', level='strict'): - trainer.train(os.path.join(self.tmp_dir, 'iter_12.pth')) + trainer.train(os.path.join(self.tmp_dir, 'iter_3.pth')) @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_trainer_with_model_and_args(self): diff --git a/tests/utils/test_compatibility.py b/tests/utils/test_compatibility.py new file mode 100644 index 00000000..f5222261 --- /dev/null +++ b/tests/utils/test_compatibility.py @@ -0,0 +1,19 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import unittest + + +class CompatibilityTest(unittest.TestCase): + + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def tearDown(self): + super().tearDown() + + def test_xtcocotools(self): + from xtcocotools.coco import COCO + + +if __name__ == '__main__': + unittest.main()