diff --git a/.dev_scripts/ci_container_test.sh b/.dev_scripts/ci_container_test.sh index 96e02cf9..35b43535 100644 --- a/.dev_scripts/ci_container_test.sh +++ b/.dev_scripts/ci_container_test.sh @@ -1,17 +1,23 @@ if [ "$MODELSCOPE_SDK_DEBUG" == "True" ]; then + pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple pip install -r requirements/tests.txt git config --global --add safe.directory /Maas-lib - # run linter test first + git config --global user.email tmp + git config --global user.name tmp.com + + # linter test # use internal project for pre-commit due to the network problem - pre-commit run -c .pre-commit-config_local.yaml --all-files - if [ $? -ne 0 ]; then - echo "linter test failed" - echo "From the repository folder" - echo "Run 'pip install -r requirements/tests.txt' install test dependencies." - echo "Run 'pre-commit install' install pre-commit hooks." - echo "Finally run linter with command: 'pre-commit run --all-files' to check." - echo "Ensure there is no failure!!!!!!!!" - exit -1 + if [ `git remote -v | grep alibaba | wc -l` -gt 1 ]; then + pre-commit run -c .pre-commit-config_local.yaml --all-files + if [ $? -ne 0 ]; then + echo "linter test failed, please run 'pre-commit run --all-files' to check" + echo "From the repository folder" + echo "Run 'pip install -r requirements/tests.txt' install test dependencies." + echo "Run 'pre-commit install' install pre-commit hooks." + echo "Finally run linter with command: 'pre-commit run --all-files' to check." + echo "Ensure there is no failure!!!!!!!!" + exit -1 + fi fi awk -F: '/^[^#]/ { print $1 }' requirements/framework.txt | xargs -n 1 pip install -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html diff --git a/.github/workflows/citest.yaml b/.github/workflows/citest.yaml new file mode 100644 index 00000000..00c6bbbf --- /dev/null +++ b/.github/workflows/citest.yaml @@ -0,0 +1,64 @@ +name: citest + +on: + push: + branches: + - master + - "release/**" + paths-ignore: + - "setup.*" + - "requirements.txt" + - "requirements/**" + - "docs/**" + - "tools/**" + - ".dev_scripts/**" + - "README.md" + - "README_zh-CN.md" + - "NOTICE" + - ".github/workflows/lint.yaml" + - ".github/workflows/publish.yaml" + + pull_request: + paths-ignore: + - "setup.*" + - "requirements.txt" + - "requirements/**" + - "docs/**" + - "tools/**" + - ".dev_scripts/**" + - "README.md" + - "README_zh-CN.md" + - "NOTICE" + - ".github/workflows/lint.yaml" + - ".github/workflows/publish.yaml" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + unittest: + # The type of runner that the job will run on + runs-on: [modelscope-self-hosted] + steps: + - name: ResetFileMode + shell: bash + run: | + # reset filemode to allow action runner to delete files + # generated by root in docker + set -e + source ~/.bashrc + sudo chown -R $USER:$USER $ACTION_RUNNER_DIR + + - name: Checkout + uses: actions/checkout@v2 + with: + lfs: 'true' + - name: Checkout LFS objects + run: git lfs checkout + - name: Run unittest + shell: bash + run: | + set -e + source /mnt/modelscope/ci_env.sh + bash .dev_scripts/dockerci.sh diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 00000000..dc4b5487 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,22 @@ +name: Lint test + +on: [push, pull_request] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.7 + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Install pre-commit hook + run: | + pip install pre-commit + - name: Linting + run: pre-commit run --all-files diff --git a/docs/source/develop.md b/docs/source/develop.md index fad87d33..62801353 100644 --- a/docs/source/develop.md +++ b/docs/source/develop.md @@ -44,7 +44,7 @@ There are mainly three test levels: * level 2: scenario tests for all the implemented modules such as model, pipeline in different algorithm filed. Default test level is 0, which will only run those cases of level 0, you can set test level -via environment variable `TEST_LEVEL`. For more details, you can refer to [test-doc](https://alidocs.dingtalk.com/i/nodes/mdvQnONayjBJKLXy1Bp38PY2MeXzp5o0?dontjump=true&nav=spaces&navQuery=spaceId%3Dnb9XJNlZxbgrOXyA) +via environment variable `TEST_LEVEL`. ```bash @@ -159,9 +159,7 @@ git pull origin branch_name git push --set-upstream origin dev/my-dev-branch ``` Note that you may push multiple times to the same branch with 'git push' commands later. -5. Open the remote url `https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/new` to create a new merge request that merges your development branch (aka, the "dev/my-dev-branch in this example) into master branch. Please follow the instruction on aone page to submit the merge request a code review. - - +5. Create a pull request on github to merge your code into master. ## Build pip package ```bash diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md index 68979c55..7cefa048 100644 --- a/docs/source/quick_start.md +++ b/docs/source/quick_start.md @@ -74,7 +74,7 @@ pip install "modelscope[multi-modal]" -f https://modelscope.oss-cn-beijing.aliyu ModelScope的源码可以直接clone到本地: ```shell -git clone git@gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib.git modelscope +git clone git@github.com:modelscope/modelscope.git cd modelscope git fetch origin master git checkout master diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index 32806fa2..4a416875 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -86,6 +86,7 @@ class Models(object): ponet = 'ponet' T5 = 'T5' mglm = 'mglm' + codegeex = 'codegeex' bloom = 'bloom' # audio models @@ -94,6 +95,7 @@ class Models(object): speech_dfsmn_kws_char_farfield = 'speech_dfsmn_kws_char_farfield' kws_kwsbp = 'kws-kwsbp' generic_asr = 'generic-asr' + wenet_asr = 'wenet-asr' # multi-modal models ofa = 'ofa' @@ -261,6 +263,8 @@ class Pipelines(object): extractive_summarization = 'extractive-summarization' feature_extraction = 'feature-extraction' mglm_text_summarization = 'mglm-text-summarization' + codegeex_code_translation = 'codegeex-code-translation' + codegeex_code_generation = 'codegeex-code-generation' translation_en_to_de = 'translation_en_to_de' # keep it underscore translation_en_to_ro = 'translation_en_to_ro' # keep it underscore translation_en_to_fr = 'translation_en_to_fr' # keep it underscore @@ -273,6 +277,7 @@ class Pipelines(object): speech_dfsmn_kws_char_farfield = 'speech_dfsmn_kws_char_farfield' kws_kwsbp = 'kws-kwsbp' asr_inference = 'asr-inference' + asr_wenet_inference = 'asr-wenet-inference' # multi-modal tasks image_captioning = 'image-captioning' diff --git a/modelscope/models/audio/asr/wenet_automatic_speech_recognition.py b/modelscope/models/audio/asr/wenet_automatic_speech_recognition.py new file mode 100644 index 00000000..feb822d4 --- /dev/null +++ b/modelscope/models/audio/asr/wenet_automatic_speech_recognition.py @@ -0,0 +1,38 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import os +from typing import Any, Dict + +import json +import wenetruntime as wenet + +from modelscope.metainfo import Models +from modelscope.models.base import Model +from modelscope.models.builder import MODELS +from modelscope.utils.constant import Tasks + +__all__ = ['WeNetAutomaticSpeechRecognition'] + + +@MODELS.register_module( + Tasks.auto_speech_recognition, module_name=Models.wenet_asr) +class WeNetAutomaticSpeechRecognition(Model): + + def __init__(self, model_dir: str, am_model_name: str, + model_config: Dict[str, Any], *args, **kwargs): + """initialize the info of model. + + Args: + model_dir (str): the model path. + """ + super().__init__(model_dir, am_model_name, model_config, *args, + **kwargs) + self.decoder = wenet.Decoder(model_dir, lang='chs') + + def forward(self, inputs: Dict[str, Any]) -> str: + if inputs['audio_format'] == 'wav': + rst = self.decoder.decode_wav(inputs['audio']) + else: + rst = self.decoder.decode(inputs['audio']) + text = json.loads(rst)['nbest'][0]['sentence'] + return {'text': text} diff --git a/modelscope/models/nlp/__init__.py b/modelscope/models/nlp/__init__.py index cfa67700..ef2dc424 100644 --- a/modelscope/models/nlp/__init__.py +++ b/modelscope/models/nlp/__init__.py @@ -36,6 +36,7 @@ if TYPE_CHECKING: ) from .T5 import T5ForConditionalGeneration from .mglm import MGLMForTextSummarization + from .codegeex import CodeGeeXForCodeTranslation, CodeGeeXForCodeGeneration from .task_models import ( FeatureExtractionModel, InformationExtractionModel, @@ -110,6 +111,8 @@ else: 'sentence_embedding': ['SentenceEmbedding'], 'T5': ['T5ForConditionalGeneration'], 'mglm': ['MGLMForTextSummarization'], + 'codegeex': + ['CodeGeeXForCodeTranslation', 'CodeGeeXForCodeGeneration'], 'gpt_neo': ['GPTNeoModel'], 'bloom': ['BloomModel'], } diff --git a/modelscope/models/nlp/codegeex/__init__.py b/modelscope/models/nlp/codegeex/__init__.py new file mode 100755 index 00000000..0bcdb4bc --- /dev/null +++ b/modelscope/models/nlp/codegeex/__init__.py @@ -0,0 +1,24 @@ +# Modified by Zhipu.AI +# Original Copyright (c) Alibaba, Inc. and its affiliates. +from typing import TYPE_CHECKING, Union + +from modelscope.utils.import_utils import LazyImportModule + +if TYPE_CHECKING: + from .codegeex_for_code_translation import CodeGeeXForCodeTranslation + from .codegeex_for_code_generation import CodeGeeXForCodeGeneration +else: + _import_structure = { + 'codegeex_for_code_translation': ['CodeGeeXForCodeTranslation'], + 'codegeex_for_code_generation': ['CodeGeeXForCodeGeneration'], + } + + import sys + + sys.modules[__name__] = LazyImportModule( + __name__, + globals()['__file__'], + _import_structure, + module_spec=__spec__, + extra_objects={}, + ) diff --git a/modelscope/models/nlp/codegeex/codegeex.py b/modelscope/models/nlp/codegeex/codegeex.py new file mode 100755 index 00000000..f8d43008 --- /dev/null +++ b/modelscope/models/nlp/codegeex/codegeex.py @@ -0,0 +1,1030 @@ +# Copyright (c) 2022 Zhipu.AI +import math + +import torch +import torch.nn.functional as F + + +def fast_gelu(x): + """Mindspore's fast gelu implementation.""" + return x / (1 + torch.exp(-1.702 * torch.abs(x))) * torch.exp( + 0.851 * (x - torch.abs(x))) + + +class MLP(torch.nn.Module): + """MLP. + + MLP will take the input with h hidden state, project it to 4*h + hidden dimension, perform nonlinear transformation, and project the + state back into h hidden dimension. At the end, dropout is also + applied. + """ + + def __init__( + self, + hidden_size, + ): + super(MLP, self).__init__() + self.hidden_size = hidden_size + # Project to 4h. + self.dense_h_to_4h = torch.nn.Linear( + self.hidden_size, + 4 * self.hidden_size, + ) + + self.activation_func = fast_gelu + + # Project back to h. + self.dense_4h_to_h = torch.nn.Linear( + 4 * self.hidden_size, + self.hidden_size, + ) + + def forward(self, hidden_states): + # [s, b, 4hp] + intermediate_parallel = self.dense_h_to_4h(hidden_states) + intermediate_parallel = self.activation_func(intermediate_parallel) + # [s, b, h] + output = self.dense_4h_to_h(intermediate_parallel) + + return output + + +class SelfAttention(torch.nn.Module): + """self-attention layer abstract class. + + Self-attention layer takes input with size [b, s, h] + and returns output of the same size. + """ + + def __init__( + self, + hidden_size, + num_attention_heads, + layer_number, + fp16=True, + attention_softmax_in_fp32=True, + ): + super(SelfAttention, self).__init__() + self.hidden_size = hidden_size + self.num_attention_heads = num_attention_heads + self.fp16 = fp16 + self.attention_softmax_in_fp32 = attention_softmax_in_fp32 + self.layer_number = max(1, layer_number) + + assert self.hidden_size % self.num_attention_heads == 0 + self.hidden_size_per_attention_head = int(self.hidden_size + // self.num_attention_heads) + + self.query = torch.nn.Linear(self.hidden_size, self.hidden_size) + self.key = torch.nn.Linear(self.hidden_size, self.hidden_size) + self.value = torch.nn.Linear(self.hidden_size, self.hidden_size) + + self.norm_factor = math.sqrt(self.hidden_size_per_attention_head) + self.softmax = torch.nn.Softmax(dim=-1) + + self.dense = torch.nn.Linear(self.hidden_size, self.hidden_size) + + def forward( + self, + hidden_states, + attention_mask, + layer_past=None, + get_key_value=False, + prompt_length=None, + context_length=None, + ): + # hidden_states: [sq, b, h] + + # ===================== + # Query, Key, and Value + # ===================== + + query_layer = self.query(hidden_states) + key_layer = self.key(hidden_states) + value_layer = self.value(hidden_states) + + new_query_layer_shape = query_layer.size()[:-1] + ( + self.num_attention_heads, self.hidden_size_per_attention_head + ) # noqa + query_layer = query_layer.view(*new_query_layer_shape) + + new_query_layer_shape = key_layer.size()[:-1] + ( + self.num_attention_heads, self.hidden_size_per_attention_head) + key_layer = key_layer.view(*new_query_layer_shape) + + new_query_layer_shape = value_layer.size()[:-1] + ( + self.num_attention_heads, self.hidden_size_per_attention_head + ) # noqa + value_layer = value_layer.view(*new_query_layer_shape) + + # ================================== + # Adjust key and value for inference + # ================================== + + if layer_past is not None: + past_key, past_value = layer_past + key_layer = torch.cat((past_key.type_as(key_layer), key_layer), + dim=0) + value_layer = torch.cat( + (past_value.type_as(value_layer), value_layer), dim=0) + if get_key_value: + present = (key_layer, value_layer) + + # =================================== + # Raw attention scores. [b, np, sq, sk] + # =================================== + + # [b, np, sq, sk] + output_size = (query_layer.size(1), query_layer.size(2), + query_layer.size(0), key_layer.size(0)) + + # [sq, b, np, hn] -> [sq, b * np, hn] + query_layer = query_layer.contiguous().view( + output_size[2], output_size[0] * output_size[1], -1) + key_layer = key_layer.contiguous().view( + output_size[3], output_size[0] * output_size[1], -1) + + # Raw attention scores. [b * np, sq, sk] + matmul_result = torch.matmul( + query_layer.transpose(0, 1), + key_layer.transpose(0, 1).transpose(1, 2)) / self.norm_factor + + # change view to [b, np, sq, sk] + attention_scores = matmul_result.view(*output_size) + + # ================================================== + # Update attention mask for inference. [b, np, sq, sk] + # ================================================== + + if get_key_value: + with torch.no_grad(): + if layer_past is not None: + attention_mask = attention_mask[ + ..., + attention_scores.size(3) + - 1, :attention_scores.size(3)].unsqueeze(2) + else: + attention_mask = attention_mask[ + ..., :attention_scores.size(3), :attention_scores. + size(3)] + + if context_length is not None: + attention_mask = torch.clone(attention_mask) + attention_mask[:, :, context_length:, :] = True + + # attention scores and attention mask [b, np, sq, sk] + # attention_scores = attention_mask_func(attention_scores, attention_mask) + attention_scores = attention_scores - attention_mask * 10000.0 + if self.attention_softmax_in_fp32: + attention_probs = self.softmax(attention_scores.float()).half() + else: + attention_probs = self.softmax(attention_scores) + + # ========================= + # Context layer. [sq, b, hp] + # ========================= + + # value_layer -> context layer. + # [sq, b, np, hn] --> [b, np, sq, hn] + + # context layer shape: [b, np, sq, hn] + output_size = (value_layer.size(1), value_layer.size(2), + query_layer.size(0), value_layer.size(3)) + + # change view [sq, b * np, hn] + value_layer = value_layer.view( + value_layer.size(0), output_size[0] * output_size[1], -1) + + # change view [b * np, sq, sk] + attention_probs = attention_probs.view(output_size[0] * output_size[1], + output_size[2], -1) + + context_layer = torch.bmm( + attention_probs, + value_layer.unsqueeze(0).transpose(1, 2).squeeze(0)) + + # change view [b, np, sq, hn] + context_layer = context_layer.view(*output_size) + + # # [b, np, sq, hn] --> [sq, b, np, hn] + context_layer = context_layer.permute(2, 0, 1, 3).contiguous() + + # # [sq, b, np, hn] --> [sq, b, hp] + new_context_layer_shape = context_layer.size()[:-2] + ( + self.hidden_size, ) + context_layer = context_layer.view(*new_context_layer_shape) + + # ================= + # Output. [sq, b, h] + # ================= + + output = self.dense(context_layer) + + if get_key_value: + output = [output, present] + + return output + + +class TopQuerySelfAttention(torch.nn.Module): + """Top query self-attention layer abstract class. + + Self-attention layer takes input with size [b, s, h] + and returns output of the same size. + """ + + def __init__( + self, + hidden_size, + num_attention_heads, + layer_number, + fp16=True, + attention_softmax_in_fp32=True, + ): + super(TopQuerySelfAttention, self).__init__() + self.hidden_size = hidden_size + self.num_attention_heads = num_attention_heads + self.fp16 = fp16 + self.attention_softmax_in_fp32 = attention_softmax_in_fp32 + self.layer_number = max(1, layer_number) + + assert self.hidden_size % self.num_attention_heads == 0 + self.hidden_size_per_attention_head = int(self.hidden_size + // self.num_attention_heads) + + self.query = torch.nn.Linear(self.hidden_size, self.hidden_size) + self.key = torch.nn.Linear(self.hidden_size, self.hidden_size) + self.value = torch.nn.Linear(self.hidden_size, self.hidden_size) + + self.norm_factor = math.sqrt(self.hidden_size_per_attention_head) + self.softmax = torch.nn.Softmax(dim=-1) + + self.dense = torch.nn.Linear(self.hidden_size, self.hidden_size) + + def forward( + self, + hidden_states, + query_hidden_state, + attention_mask, + layer_past=None, + get_key_value=False, + prompt_length=None, + context_length=None, + ): + + # hidden_states: [sq, b, h] + query_layer = self.query(query_hidden_state) + key_layer = self.key(hidden_states) + value_layer = self.value(hidden_states) + + new_query_layer_shape = query_layer.size()[:-1] + ( + self.num_attention_heads, self.hidden_size_per_attention_head + ) # noqa + query_layer = query_layer.view(*new_query_layer_shape) + + new_query_layer_shape = key_layer.size()[:-1] + ( + self.num_attention_heads, self.hidden_size_per_attention_head) + key_layer = key_layer.view(*new_query_layer_shape) + + new_query_layer_shape = value_layer.size()[:-1] + ( + self.num_attention_heads, self.hidden_size_per_attention_head + ) # noqa + value_layer = value_layer.view(*new_query_layer_shape) + + # ================================== + # Adjust key and value for inference + # ================================== + + if layer_past is not None: + past_key, past_value = layer_past + key_layer = torch.cat((past_key.type_as(key_layer), key_layer), + dim=0) + value_layer = torch.cat( + (past_value.type_as(value_layer), value_layer), dim=0) + if get_key_value: + present = (key_layer, value_layer) + + # =================================== + # Raw attention scores. [b, np, sq, sk] + # =================================== + + # [b, np, sq, sk] + output_size = (query_layer.size(1), query_layer.size(2), + query_layer.size(0), key_layer.size(0)) + + # [s, b, np, hn] -> [s, b * np, hn] + query_layer = query_layer.contiguous().view( + output_size[2], output_size[0] * output_size[1], -1) + key_layer = key_layer.contiguous().view( + output_size[3], output_size[0] * output_size[1], -1) + + # Raw attention scores. [b * np, sq, sk] + matmul_result = torch.matmul( + query_layer.transpose(0, 1), + key_layer.transpose(0, 1).transpose(1, 2)) / self.norm_factor + + # change view to [b, np, s, s] + attention_scores = matmul_result.view(*output_size) + + # ================================================== + # Update attention mask for inference. [b, np, sq, sk] + # ================================================== + + if get_key_value: + with torch.no_grad(): + if layer_past is not None: + attention_mask = attention_mask[ + ..., + attention_scores.size(3) + - 1, :attention_scores.size(3)].unsqueeze(2) + else: + attention_mask = attention_mask[ + ..., :attention_scores.size(3), :attention_scores. + size(3)] + + if context_length is not None: + attention_mask = torch.clone(attention_mask) + attention_mask[:, :, context_length:, :] = True + + # attention scores and attention mask [b, np, sq, sk] + # attention_scores = attention_mask_func(attention_scores, attention_mask) + attention_scores = attention_scores - attention_mask * 10000.0 + if self.attention_softmax_in_fp32: + attention_probs = self.softmax(attention_scores.float()).half() + else: + attention_probs = self.softmax(attention_scores) + + # ========================= + # Context layer. [sq, b, hp] + # ========================= + + # value_layer -> context layer. + # [sq, b, np, hn] --> [b, np, sq, hn] + + # context layer shape: [b, np, sq, hn] + output_size = (value_layer.size(1), value_layer.size(2), + query_layer.size(0), value_layer.size(3)) + + # change view [sq, b * np, hn] + value_layer = value_layer.view( + value_layer.size(0), output_size[0] * output_size[1], -1) + + # change view [b * np, sq, sk] + attention_probs = attention_probs.view(output_size[0] * output_size[1], + output_size[2], -1) + + # matmul: [b * np, sq, hn] + context_layer = torch.bmm( + attention_probs, + value_layer.unsqueeze(0).transpose(1, 2).squeeze(0)) + + # change view [b, np, sq, hn] + context_layer = context_layer.view(*output_size) + + # [b, np, sq, hn] --> [sq, b, np, hn] + context_layer = context_layer.permute(2, 0, 1, 3).contiguous() + + # [sq, b, np, hn] --> [sq, b, hp] + new_context_layer_shape = context_layer.size()[:-2] + \ + (self.hidden_size,) # noqa + context_layer = context_layer.view(*new_context_layer_shape) + + # ================= + # Output. [sq, b, h] + # ================= + + output = self.dense(context_layer) + + if get_key_value: + output = [output, present] + + return output + + +class TransformerLayer(torch.nn.Module): + """A single transformer layer. + + Transformore layer takes input with size [b, s, h] and returns an + output of the same size. + """ + + def __init__( + self, + hidden_size, + num_attention_heads, + layer_number, + layernorm_epsilon=1e-5, + fp16=True, + attention_softmax_in_fp32=True, + ): + super(TransformerLayer, self).__init__() + self.hidden_size = hidden_size + self.layernorm_epsilon = layernorm_epsilon + self.layer_number = layer_number + + # Layernorm on the input data. + self.input_layernorm = torch.nn.LayerNorm( + hidden_size, eps=self.layernorm_epsilon) + + # Self attention. + self.attention = SelfAttention(hidden_size, num_attention_heads, + layer_number, fp16, + attention_softmax_in_fp32) + + # Layernorm on the input data. + self.post_attention_layernorm = torch.nn.LayerNorm( + self.hidden_size, eps=self.layernorm_epsilon) + self.mlp = MLP(self.hidden_size) + + def forward( + self, + hidden_states, + attention_mask, + layer_past=None, + get_key_value=False, + prompt_length=None, + context_length=None, + ): + # hidden_states: [b, s, h] + # Use FP32 for Layernorm + # layernorm_output = self.input_layernorm(hidden_states.float()).half() + layernorm_output = self.input_layernorm(hidden_states) + + # Self attention. + attention_output = self.attention( + layernorm_output, + attention_mask, + layer_past=layer_past, + get_key_value=get_key_value, + prompt_length=prompt_length, + context_length=context_length) + + if get_key_value: + attention_output, presents = attention_output + + # Residual connection. + residual = hidden_states + layernorm_input = attention_output + residual + + # Use FP32 for Layernorm + # layernorm_output = self.post_attention_layernorm(layernorm_input.float()).half() + layernorm_output = self.post_attention_layernorm(layernorm_input) + mlp_output = self.mlp(layernorm_output) + output = mlp_output + layernorm_input + + if get_key_value: + output = [output, presents] + + return output + + +class TopQueryLayer(torch.nn.Module): + """A single top query layer. + + Top query layer takes input with size [b, s, h] and returns an + output of the same size. + """ + + def __init__( + self, + hidden_size, + num_attention_heads, + layer_number, + layernorm_epsilon=1e-5, + ): + super(TopQueryLayer, self).__init__() + self.hidden_size = hidden_size + self.num_attention_heads = num_attention_heads + self.layernorm_epsilon = layernorm_epsilon + self.layer_number = layer_number + + # Use FP32 for Layernorm + self.input_layernorm = torch.nn.LayerNorm( + self.hidden_size, eps=self.layernorm_epsilon) + + # Self attention. + self.attention = TopQuerySelfAttention(self.hidden_size, + self.num_attention_heads, + self.layer_number) + # Layernorm on the input data. + self.post_attention_layernorm = torch.nn.LayerNorm( + self.hidden_size, eps=self.layernorm_epsilon) + + # MLP + self.mlp = MLP(self.hidden_size) + + def forward( + self, + hidden_states, + query_hidden_state, + attention_mask, + layer_past=None, + get_key_value=False, + prompt_length=None, + context_length=None, + ): + # hidden_states: [b, s, h] + assert query_hidden_state != None # noqa + + # Use FP32 for Layernorm + # layernorm_output = self.input_layernorm(hidden_states.float()).half() + layernorm_output = self.input_layernorm(hidden_states) + + # Self attention. + attention_output = self.attention( + layernorm_output, + query_hidden_state, + attention_mask, + layer_past=layer_past, + get_key_value=get_key_value, + prompt_length=prompt_length, + context_length=context_length) + + if get_key_value: + attention_output, presents = attention_output + + # Residual connection. + residual = hidden_states + layernorm_input = attention_output + residual + + # Use FP32 for Layernorm + # layernorm_output = self.post_attention_layernorm(layernorm_input.float()).half() + layernorm_output = self.post_attention_layernorm(layernorm_input) + + # MLP. + mlp_output = self.mlp(layernorm_output) + + # Second residual connection. + residual = layernorm_input + output = mlp_output + residual + + if get_key_value: + output = [output, presents] + + return output + + +class Transformer(torch.nn.Module): + """Transformer class.""" + + def __init__( + self, + hidden_size, + num_attention_heads, + num_layers, + layernorm_epsilon=1e-5, + ): + super(Transformer, self).__init__() + self.hidden_size = hidden_size + self.num_attention_heads = num_attention_heads + self.layernorm_epsilon = layernorm_epsilon + # Number of layers: + self.num_layers = num_layers + self.num_unique_layers = None + + ################# + assert self.num_unique_layers is None + ################# + + if self.num_unique_layers is None: + self.num_unique_layers = self.num_layers + assert self.num_layers % self.num_unique_layers == 0, \ + 'number of layers should be divisible by number of unique layers' + + # Transformer layers. + def build_layer(layer_number): + return TransformerLayer(self.hidden_size, self.num_attention_heads, + layer_number) + + self.layers = torch.nn.ModuleList( + [build_layer(i + 1) for i in range(self.num_unique_layers)]) + + self.topQueryLayer = TopQueryLayer(self.hidden_size, + self.num_attention_heads, + self.num_unique_layers) + + self.final_layernorm = torch.nn.LayerNorm( + self.hidden_size, eps=self.layernorm_epsilon) + + def _get_layer_index(self, layer_number): + return layer_number % self.num_unique_layers + + def _get_layer(self, layer_number): + return self.layers[self._get_layer_index(layer_number)] + + def forward( + self, + hidden_states, + query_hidden_state, + attention_mask, + layer_past=None, + get_key_value=False, + prompt_length=None, + context_length=None, + ): + # data format change to avoid explicit tranposes : [b s h] --> [s b h] + hidden_states = hidden_states.transpose(0, 1).contiguous() + query_hidden_state = query_hidden_state.transpose(0, 1).contiguous() + + if get_key_value: + presents = [] + for index in range(self.num_layers): + layer = self._get_layer(index) + past = None + if layer_past is not None: + past = layer_past[index] + hidden_states = layer( + hidden_states, + attention_mask, + layer_past=past, + get_key_value=get_key_value, + prompt_length=prompt_length, + context_length=context_length) + if get_key_value: + hidden_states, present = hidden_states + presents.append(present) + + # Use FP32 for Layernorm + # hidden_states_ = self.final_layernorm(hidden_states.float()).half() + hidden_states_ = self.final_layernorm(hidden_states) + + ################################# + # top query layer + ################################# + past = None + if layer_past is not None: + past = layer_past[self.num_layers] + hidden_states = self.topQueryLayer( + hidden_states_, + query_hidden_state, + attention_mask, + layer_past=past, + get_key_value=get_key_value, + prompt_length=prompt_length, + context_length=context_length) + + if get_key_value: + hidden_states, present = hidden_states + presents.append(present) + + # reverting data format change [s b h] --> [b s h] + output = hidden_states.transpose(0, 1).contiguous() + + if get_key_value: + output = [output, presents] + + return output + + def state_dict_for_save_checkpoint(self, + destination=None, + prefix='', + keep_vars=False): + return self.state_dict(destination, prefix, keep_vars) + + +class Embedding(torch.nn.Module): + """Language model embeddings. + + Arguments: + hidden_size: hidden size + vocab_size: vocabulary size + max_sequence_length: maximum size of sequence. This + is used for positional embedding + """ + + def __init__( + self, + hidden_size, + vocab_size, + max_sequence_length, + ): + super(Embedding, self).__init__() + self.hidden_size = hidden_size + self.vocab_size = vocab_size + self.max_sequence_length = max_sequence_length + + # Word embeddings. + self.word_embeddings = torch.nn.Embedding(self.vocab_size, + self.hidden_size) + self._word_embeddings_key = 'word_embeddings' + + # Position embedding. + self.position_embeddings = torch.nn.Embedding(self.max_sequence_length, + self.hidden_size) + self.position_embeddings = self.position_embeddings.half() + self._position_embeddings_key = 'position_embeddings' + + def forward(self, input_ids, position_ids): + # Embeddings. + words_embeddings = self.word_embeddings(input_ids) + position_embeddings = self.position_embeddings(position_ids) + embeddings = words_embeddings + position_embeddings + + return embeddings + + def state_dict_for_save_checkpoint(self, + destination=None, + prefix='', + keep_vars=False): + """For easy load.""" + + state_dict_ = {} + state_dict_[self._word_embeddings_key] \ + = self.word_embeddings.state_dict(destination, prefix, keep_vars) + state_dict_[self._position_embeddings_key] \ + = self.position_embeddings.state_dict( + destination, prefix, keep_vars) + + return state_dict_ + + def load_state_dict(self, state_dict, strict=True): + """Customized load.""" + + # Word embedding. + if self._word_embeddings_key in state_dict: + state_dict_ = state_dict[self._word_embeddings_key] + else: + # for backward compatibility. + state_dict_ = {} + for key in state_dict.keys(): + if 'word_embeddings' in key: + state_dict_[key.split('word_embeddings.')[1]] \ + = state_dict[key] + state_dict_['weight'] = state_dict_['weight'][:self.vocab_size] + self.word_embeddings.load_state_dict(state_dict_, strict=strict) + + # Position embedding. + if self._position_embeddings_key in state_dict: + state_dict_ = state_dict[self._position_embeddings_key] + else: + # for backward compatibility. + state_dict_ = {} + for key in state_dict.keys(): + if 'position_embeddings' in key: + state_dict_[key.split('position_embeddings.')[1]] \ + = state_dict[key] + self.position_embeddings.load_state_dict(state_dict_, strict=strict) + + +class QueryEmbedding(torch.nn.Module): + """Language model embeddings. + + Arguments: + hidden_size: hidden size + vocab_size: vocabulary size + max_sequence_length: maximum size of sequence. This + is used for positional embedding + """ + + def __init__( + self, + hidden_size, + vocab_size, + max_sequence_length, + ): + super(QueryEmbedding, self).__init__() + + self.hidden_size = hidden_size + self.vocab_size = vocab_size + self.max_sequence_length = max_sequence_length + + # Top query position embedding (serial). + self.top_query_embeddings = torch.nn.Embedding( + self.max_sequence_length, self.hidden_size) + self.top_query_embeddings = self.top_query_embeddings.half() + self._top_query_embeddings_key = 'top_query_embeddings' + + def forward(self, position_ids): + # Embeddings. + embeddings = self.top_query_embeddings(position_ids) + + return embeddings + + def state_dict_for_save_checkpoint(self, + destination=None, + prefix='', + keep_vars=False): + """For easy load.""" + + state_dict_ = {} + state_dict_[self._top_query_embeddings_key] \ + = self.top_query_embeddings.state_dict( + destination, prefix, keep_vars) + + return state_dict_ + + def load_state_dict(self, state_dict, strict=True): + """Customized load.""" + + # Position embedding. + if self._top_query_embeddings_key in state_dict: + state_dict_ = state_dict[self._top_query_embeddings_key] + else: + # for backward compatibility. + state_dict_ = {} + for key in state_dict.keys(): + if 'top_query_embeddings' in key: + state_dict_[key.split('top_query_embeddings.')[1]] \ + = state_dict[key] + self.top_query_embeddings.load_state_dict(state_dict_, strict=strict) + + +class TransformerLanguageModel(torch.nn.Module): + """Transformer language model. + + Arguments: + transformer_hparams: transformer hyperparameters + attention_mask_func: a function that takes `unmaksed-attention-scores` + with size [b, np, s, s] and an `attention-mask` and will apply + the masking. The function should return a masked score of the + same size [b, np, s, s]. + masked-attention-scores = attention_mask_func( + unmaksed-attention-scores, attention-mask) + vocab_size: vocabulary size + max_sequence_length: maximum size of sequence. This + is used for positional embedding + """ + + def __init__( + self, + hidden_size, + num_layers, + num_attention_heads, + padded_vocab_size, + max_position_embeddings, + ): + super(TransformerLanguageModel, self).__init__() + self.hidden_size = hidden_size + self.num_layers = num_layers + self.num_attention_heads = num_attention_heads + self.padded_vocab_size = padded_vocab_size + self.max_position_embeddings = max_position_embeddings + + # Embeddings + self.embedding = Embedding(self.hidden_size, self.padded_vocab_size, + self.max_position_embeddings) + self._embedding_key = 'embedding' + + # Query embeddings + self.topQueryEmbedding = QueryEmbedding(self.hidden_size, + self.padded_vocab_size, + self.max_position_embeddings) + self._topQueryEmbedding_key = 'topQueryEmbedding' + + # Transformer + self.transformer = Transformer(self.hidden_size, + self.num_attention_heads, + self.num_layers) + self._transformer_key = 'transformer' + + def forward( + self, + input_ids, + position_ids, + attention_mask, + layer_past=None, + get_key_value=False, + prompt_length=None, + context_length=None, + ): + + # Embeddings. + embedding_output = self.embedding(input_ids, position_ids) + query_position_ids = position_ids + queryEmbedding_out = self.topQueryEmbedding(query_position_ids) + + # Transformer. + transformer_output = self.transformer( + embedding_output, + queryEmbedding_out, + attention_mask, + layer_past=layer_past, + get_key_value=get_key_value, + prompt_length=prompt_length, + context_length=context_length) + + return transformer_output + + def state_dict_for_save_checkpoint(self, + destination=None, + prefix='', + keep_vars=False): + """For easy load.""" + + state_dict_ = {} + state_dict_[self._embedding_key] \ + = self.embedding.state_dict_for_save_checkpoint( + destination, prefix, keep_vars) + state_dict_[self._topQueryEmbedding_key] \ + = self.topQueryEmbedding.state_dict_for_save_checkpoint( + destination, prefix, keep_vars) + state_dict_[self._transformer_key] \ + = self.transformer.state_dict_for_save_checkpoint( + destination, prefix, keep_vars) + + return state_dict_ + + def load_state_dict(self, state_dict, strict=True): + """Customized load.""" + + # Embedding. + if self._embedding_key in state_dict: + state_dict_ = state_dict[self._embedding_key] + else: + # for backward compatibility. + state_dict_ = {} + for key in state_dict.keys(): + if '_embeddings' in key: + state_dict_[key] = state_dict[key] + self.embedding.load_state_dict(state_dict_, strict=strict) + + if self._topQueryEmbedding_key in state_dict: + state_dict_ = state_dict[self._topQueryEmbedding_key] + else: + # for backward compatibility. + state_dict_ = {} + for key in state_dict.keys(): + if '_embeddings' in key: + state_dict_[key] = state_dict[key] + self.topQueryEmbedding.load_state_dict(state_dict_, strict=strict) + + # Transformer. + if self._transformer_key in state_dict: + state_dict_ = state_dict[self._transformer_key] + else: + # for backward compatibility. + state_dict_ = {} + for key in state_dict.keys(): + if 'transformer.' in key: + state_dict_[key.split('transformer.')[1]] = state_dict[key] + self.transformer.load_state_dict(state_dict_, strict=strict) + + +class CodeGeeXModel(torch.nn.Module): + """CodeGeeX: A Multilingual Code Generation Model.""" + + def __init__( + self, + hidden_size, + num_layers, + num_attention_heads, + padded_vocab_size, + max_position_embeddings, + ): + super(CodeGeeXModel, self).__init__() + + self.language_model = TransformerLanguageModel( + hidden_size, num_layers, num_attention_heads, padded_vocab_size, + max_position_embeddings) + self._language_model_key = 'language_model' + + def forward( + self, + input_ids, + position_ids, + attention_mask, + layer_past=None, + get_key_value=False, + prompt_length=None, + context_length=None, + ): + # Language model. + lm_output = self.language_model( + input_ids, + position_ids, + attention_mask, + layer_past=layer_past, + get_key_value=get_key_value, + prompt_length=prompt_length, + context_length=context_length) + + if get_key_value: + lm_output, presents = lm_output + + output = F.linear( + lm_output, + self.language_model.embedding.word_embeddings.weight.half()) + + if get_key_value: + output = [output, presents] + + return output + + def state_dict_for_save_checkpoint(self, + destination=None, + prefix='', + keep_vars=False): + + state_dict_ = {} + state_dict_[self._language_model_key] \ + = self.language_model.state_dict_for_save_checkpoint( + destination, prefix, keep_vars) + return state_dict_ + + def load_state_dict(self, state_dict, strict=True): + """Customized load.""" + + if self._language_model_key in state_dict: + state_dict = state_dict[self._language_model_key] + self.language_model.load_state_dict(state_dict, strict=strict) diff --git a/modelscope/models/nlp/codegeex/codegeex_for_code_generation.py b/modelscope/models/nlp/codegeex/codegeex_for_code_generation.py new file mode 100755 index 00000000..ff191cba --- /dev/null +++ b/modelscope/models/nlp/codegeex/codegeex_for_code_generation.py @@ -0,0 +1,110 @@ +# Copyright (c) 2022 Zhipu.AI +import copy +from typing import Any, Dict + +import torch + +from modelscope.metainfo import Models +from modelscope.models.base import TorchModel +from modelscope.models.builder import MODELS +from modelscope.outputs import OutputKeys +from modelscope.utils.constant import Tasks +from modelscope.utils.logger import get_logger +from .codegeex import CodeGeeXModel +from .inference import get_token_stream +from .tokenizer import CodeGeeXTokenizer + + +def model_provider(): + """Build the model.""" + + hidden_size = 5120 + num_attention_heads = 40 + num_layers = 39 + padded_vocab_size = 52224 + max_position_embeddings = 2048 + + model = CodeGeeXModel(hidden_size, num_layers, num_attention_heads, + padded_vocab_size, max_position_embeddings) + + return model + + +@MODELS.register_module(Tasks.code_generation, module_name=Models.codegeex) +class CodeGeeXForCodeGeneration(TorchModel): + + def __init__(self, model_dir: str, *args, **kwargs): + """initialize the fast poem model from the `model_dir` path. + + Args: + model_dir (str): the model path. + """ + super().__init__(model_dir, *args, **kwargs) + logger = get_logger() + # loading tokenizer + logger.info('Loading tokenizer ...') + self.tokenizer = CodeGeeXTokenizer( + tokenizer_path=model_dir + '/tokenizer', mode='codegeex-13b') + # loading model + state_dict_path = model_dir + '/ckpt_ms_213000_fp32_52224.pt' + logger.info('Loading state dict ...') + state_dict = torch.load(state_dict_path, map_location='cpu') + state_dict = state_dict['module'] + + logger.info('Building CodeGeeX model ...') + self.model = model_provider() + self.model.load_state_dict(state_dict) + self.model.eval() + self.model.half() + self.model.cuda() + + def forward(self, input: Dict[str, str]) -> Dict[str, str]: + micro_batch_size = 1 + seq_length = 2048 + out_seq_length = 256 + bad_ids = None + lang = input['language'] + prompt = input['prompt'] + prompt = f'# language: {lang}\n{prompt}' + logger = get_logger() + tokenizer = self.tokenizer + model = self.model + for prompt in [prompt]: + tokens = tokenizer.encode_code(prompt) + n_token_prompt = len(tokens) + token_stream = get_token_stream( + model, + tokenizer, + seq_length, + out_seq_length, + [copy.deepcopy(tokens) for _ in range(micro_batch_size)], + micro_batch_size=micro_batch_size, + bad_ids=bad_ids, + topk=1, + topp=0.9, + temperature=0.9, + greedy=True) + is_finished = [False for _ in range(micro_batch_size)] + for i, generated in enumerate(token_stream): + generated_tokens = generated[0] + for j in range(micro_batch_size): + if is_finished[j]: + continue + if generated_tokens[j].cpu().numpy( + )[-1] == tokenizer.eos_token_id or len( + generated_tokens[j]) >= out_seq_length: + is_finished[j] = True + generated_tokens_ = generated_tokens[j].cpu().numpy( + ).tolist() + generated_code = tokenizer.decode_code( + generated_tokens_[n_token_prompt:]) + generated_code = ''.join(generated_code) + logger.info( + '================================= Generated code:' + ) + logger.info(generated_code) + if all(is_finished): + break + + logger.info('Generation finished.') + return {OutputKeys.TEXT: generated_code} diff --git a/modelscope/models/nlp/codegeex/codegeex_for_code_translation.py b/modelscope/models/nlp/codegeex/codegeex_for_code_translation.py new file mode 100755 index 00000000..fece907d --- /dev/null +++ b/modelscope/models/nlp/codegeex/codegeex_for_code_translation.py @@ -0,0 +1,109 @@ +# Copyright (c) 2022 Zhipu.AI +import copy +from typing import Any, Dict + +import torch + +from modelscope.metainfo import Models +from modelscope.models.base import TorchModel +from modelscope.models.builder import MODELS +from modelscope.outputs import OutputKeys +from modelscope.utils.constant import Tasks +from modelscope.utils.logger import get_logger +from .codegeex import CodeGeeXModel +from .inference import get_token_stream +from .tokenizer import CodeGeeXTokenizer + + +def model_provider(): + """Build the model.""" + + hidden_size = 5120 + num_attention_heads = 40 + num_layers = 39 + padded_vocab_size = 52224 + max_position_embeddings = 2048 + + model = CodeGeeXModel(hidden_size, num_layers, num_attention_heads, + padded_vocab_size, max_position_embeddings) + + return model + + +@MODELS.register_module(Tasks.code_translation, module_name=Models.codegeex) +class CodeGeeXForCodeTranslation(TorchModel): + + def __init__(self, model_dir: str, *args, **kwargs): + """initialize the fast poem model from the `model_dir` path. + + Args: + model_dir (str): the model path. + """ + super().__init__(model_dir, *args, **kwargs) + logger = get_logger() + # loading tokenizer + logger.info('Loading tokenizer ...') + self.tokenizer = CodeGeeXTokenizer( + tokenizer_path=model_dir + '/tokenizer', mode='codegeex-13b') + # loading model + state_dict_path = model_dir + '/ckpt_ms_translation_0817.pt' + logger.info('Loading state dict ...') + state_dict = torch.load(state_dict_path, map_location='cpu') + state_dict = state_dict['module'] + + logger.info('Building CodeGeeX model ...') + self.model = model_provider() + self.model.load_state_dict(state_dict) + self.model.eval() + self.model.half() + self.model.cuda() + + def forward(self, input: Dict[str, str]) -> Dict[str, str]: + micro_batch_size = 1 + seq_length = 2048 + out_seq_length = 256 + bad_ids = None + src_lang = input['source language'] + dst_lang = input['target language'] + prompt = input['prompt'] + prompt = f'code translation\n{src_lang}:\n{prompt}\n{dst_lang}:\n' + logger = get_logger() + tokenizer = self.tokenizer + model = self.model + for prompt in [prompt]: + tokens = tokenizer.encode_code(prompt) + n_token_prompt = len(tokens) + token_stream = get_token_stream( + model, + tokenizer, + seq_length, + out_seq_length, + [copy.deepcopy(tokens) for _ in range(micro_batch_size)], + micro_batch_size=micro_batch_size, + bad_ids=bad_ids, + greedy=True, + ) + is_finished = [False for _ in range(micro_batch_size)] + for i, generated in enumerate(token_stream): + generated_tokens = generated[0] + for j in range(micro_batch_size): + if is_finished[j]: + continue + if generated_tokens[j].cpu().numpy( + )[-1] == tokenizer.eos_token_id or len( + generated_tokens[j]) >= out_seq_length: + is_finished[j] = True + generated_tokens_ = generated_tokens[j].cpu().numpy( + ).tolist() + generated_code = tokenizer.decode_code( + generated_tokens_[n_token_prompt:]) + generated_code = ''.join(generated_code) + logger.info( + '================================= Generated code:' + ) + logger.info(generated_code) + if all(is_finished): + break + + logger.info('Generation finished.') + return {OutputKeys.TEXT: generated_code} diff --git a/modelscope/models/nlp/codegeex/inference.py b/modelscope/models/nlp/codegeex/inference.py new file mode 100755 index 00000000..38f14d6c --- /dev/null +++ b/modelscope/models/nlp/codegeex/inference.py @@ -0,0 +1,301 @@ +# Copyright (c) 2022 Zhipu.AI + +from typing import List + +import torch +import torch.nn.functional as F + + +def get_ltor_masks_and_position_ids( + data, + eod_token, + reset_position_ids, + reset_attention_mask, +): + """Build masks and position id for left to right model.""" + + # Extract batch size and sequence length. + micro_batch_size, seq_length = data.size() + + # Attention mask (lower triangular). + if reset_attention_mask: + att_mask_batch = micro_batch_size + else: + att_mask_batch = 1 + attention_mask = torch.tril( + torch.ones((att_mask_batch, seq_length, seq_length), + device=data.device)).view(att_mask_batch, 1, seq_length, + seq_length) + + # Position ids. + position_ids = torch.arange( + seq_length, dtype=torch.long, device=data.device) + position_ids = position_ids.unsqueeze(0).expand_as(data) + # We need to clone as the ids will be modifed based on batch index. + if reset_position_ids: + position_ids = position_ids.clone() + + if reset_position_ids or reset_attention_mask: + # Loop through the batches: + for b in range(micro_batch_size): + + # Find indecies where EOD token is. + eod_index = position_ids[b, data[b] == eod_token] + # Detach indecies from positions if going to modify positions. + if reset_position_ids: + eod_index = eod_index.clone() + + # Loop through EOD indecies: + prev_index = 0 + for j in range(eod_index.size()[0]): + i = eod_index[j] + # Mask attention loss. + if reset_attention_mask: + attention_mask[b, 0, (i + 1):, :(i + 1)] = 0 + # Reset positions. + if reset_position_ids: + position_ids[b, (i + 1):] -= i + 1 - prev_index + prev_index = i + 1 + + # Convert attention mask to binary: + attention_mask = attention_mask < 0.5 + + return attention_mask, position_ids + + +def get_batch( + context_tokens, + micro_batch_size, + eod_token, + reset_position_ids=False, + reset_attention_mask=False, +): + """Generate batch from context tokens.""" + tokens = context_tokens.view(micro_batch_size, -1).contiguous().cuda() + # Get the attention mask and postition ids. + attention_mask, position_ids = get_ltor_masks_and_position_ids( + tokens, + eod_token, + reset_position_ids, + reset_attention_mask, + ) + + return tokens, attention_mask, position_ids + + +def top_k_logits(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')): + """This function has been mostly taken from huggingface conversational + ai code at + https://medium.com/huggingface/how-to-build-a-state-of-the-art- + conversational-ai-with-transfer-learning-2d818ac26313""" + + if top_k > 0: + # Remove all tokens with a probability less than the + # last token of the top-k + indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, + None] + logits[indices_to_remove] = filter_value + + if top_p > 0.0: + # Cconvert to 1D + sorted_logits, sorted_indices = torch.sort( + logits, descending=True, dim=-1) + cumulative_probs = torch.cumsum( + F.softmax(sorted_logits, dim=-1), dim=-1) + + # Remove tokens with cumulative probability above the threshold + sorted_indices_to_remove = cumulative_probs > top_p + # Shift the indices to the right to keep also the first token + # above the threshold + sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[ + ..., :-1].clone() + sorted_indices_to_remove[..., 0] = 0 + for i in range(sorted_indices.size(0)): + indices_to_remove = sorted_indices[i][sorted_indices_to_remove[i]] + logits[i][indices_to_remove] = filter_value + + return logits + + +def pad_batch(batch, pad_id, seq_length): + context_lengths = [] + for tokens in batch: + context_length = len(tokens) + if context_length < seq_length: + tokens.extend([pad_id] * (seq_length - context_length)) + context_lengths.append(context_length) + return batch, context_lengths + + +def get_token_stream( + model, + tokenizer, + seq_length, + out_seq_length, + context_tokens, + return_scores: bool = False, + prompt_length: int = None, + micro_batch_size: int = None, + bad_ids: List = None, + temperature: float = 1.0, + topp: float = 1.0, + topk: int = 0.0, + greedy: bool = False, +): + context_tokens, context_lengths = pad_batch(context_tokens, + tokenizer.eos_token_id, + seq_length) + + context_tokens_tensor = torch.cuda.LongTensor(context_tokens) + context_length_tensor = torch.cuda.LongTensor(context_lengths) + context_length = context_length_tensor.min().item() + tokens, attention_mask, position_ids = get_batch( + context_tokens_tensor, + micro_batch_size, + tokenizer.eos_token_id, + ) + + batch_token_iterator = sample_sequence_batch( + model, + tokenizer, + context_tokens_tensor, + context_length_tensor, + attention_mask, + position_ids, + seq_length=seq_length, + out_seq_length=out_seq_length, + return_scores=return_scores, + prompt_length=prompt_length, + bad_ids=bad_ids, + temperature=temperature, + topp=topp, + topk=topk, + greedy=greedy, + ) + + for tokens, lengths in batch_token_iterator: + context_length += 1 + if tokens is not None: + yield tokens[:, :context_length], lengths + else: + yield None, None + + +def switch(val1, val2, boolean): + boolean = boolean.type_as(val1) + return (1 - boolean) * val1 + boolean * val2 + + +def sample_sequence_batch( + model, + tokenizer, + context_tokens, + context_lengths, + attention_mask, + position_ids, + seq_length, + out_seq_length, + maxlen=None, + return_scores: bool = False, + prompt_length: int = None, + bad_ids: List = None, + temperature: float = 1.0, + topp: float = 1.0, + topk: int = 0.0, + recompute: bool = False, + greedy: bool = False, +): + model.eval() + with torch.no_grad(): + context_length = context_lengths.min().item() + eos_id = tokenizer.eos_token_id + + counter = 0 + org_context_length = context_length + + layer_past = None + batch_size = context_tokens.size(0) + is_done = torch.zeros([batch_size]).byte().cuda() + tokens = context_tokens + if maxlen is None: + maxlen = seq_length - 1 + if maxlen > (org_context_length + out_seq_length): + maxlen = org_context_length + out_seq_length + + lengths = torch.ones([batch_size]).long().cuda() * maxlen + if return_scores: + scores = torch.zeros([batch_size]).float().cuda() + + while context_length <= (maxlen): + + if recompute: + logits = model( + tokens, + position_ids, + attention_mask, + prompt_length=prompt_length, + context_length=context_length, + ) + logits = logits[:, context_length - 1, :] + else: + if counter == 0: + tokens2use = tokens[:, :context_length] + positions2use = position_ids[:, :context_length] + else: + tokens2use = tokens[:, context_length - 1].view( + batch_size, -1) + positions2use = position_ids[:, context_length - 1].view( + batch_size, -1) + logits, layer_past = model( + tokens2use, + positions2use, + attention_mask, + layer_past=layer_past, + get_key_value=True, + prompt_length=prompt_length, + context_length=context_length, + ) + logits = logits[:, -1].view(batch_size, -1).contiguous() + + if bad_ids is not None: + for bad_id in bad_ids: + logits[:, bad_id] = -10000 + if greedy: + prev = torch.argmax(logits, dim=-1).view(-1) + else: + logits = logits.float() + if return_scores: + orig_log_probs = torch.log_softmax(logits, dim=-1) + logits /= temperature + logits = top_k_logits(logits, top_k=topk, top_p=topp) + log_probs = F.softmax(logits, dim=-1) + prev = torch.multinomial(log_probs, num_samples=1).view(-1) + + started = context_lengths <= context_length + + new_tokens = switch(tokens[:, context_length].view(-1), prev, + started) + + if not greedy and return_scores: + indices = prev.view(-1, 1) + new_scores = orig_log_probs.gather(1, indices).view(-1) + new_scores = new_scores * started + new_scores = new_scores * is_done.bool().logical_not() + scores += new_scores + + tokens[:, context_length] = new_tokens + done_token = (prev == eos_id).byte() & started.byte() + just_finished = (done_token & ~is_done).bool() + lengths[just_finished.view(-1)] = context_length + is_done = is_done | done_token + done = torch.all(is_done) + + if return_scores: + yield tokens, (lengths, scores) + else: + yield tokens, lengths + + context_length += 1 + counter += 1 + if done: + break diff --git a/modelscope/models/nlp/codegeex/tokenizer.py b/modelscope/models/nlp/codegeex/tokenizer.py new file mode 100755 index 00000000..a5da9a3c --- /dev/null +++ b/modelscope/models/nlp/codegeex/tokenizer.py @@ -0,0 +1,187 @@ +# Copyright (c) 2022 Zhipu.AI +from typing import List, Union + +import torch +from transformers import AutoTokenizer +from transformers.models.gpt2 import GPT2TokenizerFast + + +def encode_whitespaces(text, start_extra_id: int, max_len: int): + """ Encode whitespaces to extra tokens in GPT-J. + + >>> encode_whitespaces('a\\n b\\n c', 10, 10) + 'a\\n<|extratoken_10|>b\\n<|extratoken_11|>c' + """ + + def push_acc_space(acc_len: int, text: str): + if acc_len == 0: + return text + if acc_len == 1: + return text + ' ' + assert acc_len <= max_len, f'Max whitespace run length {max_len}, but found {acc_len}' + extra_id = start_extra_id - 2 + acc_len + extra_token = f'<|extratoken_{extra_id}|>' + return text + extra_token + + acc_len = 0 + res = '' + for ch in text: + if ch == ' ': + acc_len += 1 + if acc_len == max_len: + res = push_acc_space(acc_len, res) + acc_len = 0 + else: + res = push_acc_space(acc_len, res) + acc_len = 0 + res = res + ch + + res = push_acc_space(acc_len, res) + + return res + + +def decode_whitespaces(text: str, start_extra_id: int, max_len: int): + """ Decode the whitespace-encoded strings produced by encode_whitespace. + + >>> text = 'a\\n b\\n c' + >>> s, l = 10, 10 + >>> text == decode_whitespaces(encode_whitespaces(text, s, l), s, l) + True + """ + for l in range(2, max_len + 1): # noqa + token_id = start_extra_id - 2 + l + token = f'<|extratoken_{token_id}|>' + text = text.replace(token, ' ' * l) + return text + + +class Code13BDictionary(object): + + def __init__( + self, + dict_file: str, + extra_token_ids: List[str] = None, + pad_to_vocab_size: int = -1, + ): + self._idx = dict() + self._count = dict() + self._num_symbols = 0 + self._symbols = [] + + self._add_symbol('', 0) + self._add_symbol('', 0) + self._add_symbol('', 0) + self._add_symbol('', 0) + self._load_dict(dict_file) + + if extra_token_ids is None: + extra_token_ids = [str(x) for x in range(50257, 50400) + ] # follows GPT-J settings + + for token_id in extra_token_ids: + self._add_symbol(token_id, 0) + + if pad_to_vocab_size > 0: + self._pad_to_vocab_size(pad_to_vocab_size) + + def _pad_to_vocab_size(self, vocab_size: int): + num_pad = vocab_size - len(self) + if num_pad <= 0: + return + for i in range(1, num_pad + 1): + self._add_symbol('vocab_pad_token{}'.format(i), 0) + + def _load_dict(self, dict_file: str): + with open(dict_file, 'r') as f: + for line in f: + line = line.strip() + if line == '' or line.startswith('#'): + continue + sym, count = line.split() + self._add_symbol(sym, int(count)) + + def _add_symbol(self, sym: str, count: int): + self._idx[sym] = self._num_symbols + self._count[sym] = count + self._symbols.append(sym) + self._num_symbols += 1 + + def __len__(self): + return self._num_symbols + + def index(self, sym: str): + return self._idx[sym] + + def string(self, idx: int): + return self._symbols[idx] + + def map_token(self, token: Union[int, str]): + if isinstance(token, int): + token = str(token) + return self.index(token) + + def map_tokens(self, tokens): + return [self.map_token(token) for token in tokens] + + def decode_tokens(self, tokens): + decoded = [ + '50256' if token == 50256 else self.string(token) + for token in tokens + ] + return [int(x) for x in decoded if not x.startswith('vocab_pad_token')] + + +class CodeGeeXTokenizer(object): + + def __init__( + self, + tokenizer: GPT2TokenizerFast = None, + tokenizer_path: str = 'EleutherAI/gpt-j-6B', + start_extra_id: int = 10, + max_len: int = 10, + mode='codegeex-13b', + dict_file: str = None, + ): + self.tokenizer = tokenizer if tokenizer is not None else AutoTokenizer.from_pretrained( + tokenizer_path) + if mode not in ['codegeex-13b', 'codegeex-python-13b']: + raise ValueError( + f"Invalid mode {mode}, choose from ['codegeex-13b', 'codegeex-python-13b']" + ) + self.start_extra_id = start_extra_id + self.max_len = max_len + self.mode = mode + if dict_file is not None: + self.code_dict = Code13BDictionary( + dict_file, pad_to_vocab_size=51200 + ) if self.mode == 'codegeex-python-13b' else None + else: + self.code_dict = None + self.eos_token_id = self.tokenizer.eos_token_id + + def encode_code(self, code: str): + if self.mode == 'codegeex-13b': + code = encode_whitespaces(code, self.start_extra_id, self.max_len) + input_ids = self.tokenizer( + code, is_split_into_words=False).input_ids + + elif self.mode == 'codegeex-python-13b': + code = encode_whitespaces(code, self.start_extra_id, self.max_len) + input_ids = self.code_dict.map_tokens(self.tokenizer.encode(code)) + input_ids = torch.LongTensor(input_ids).reshape(1, -1) + + return input_ids + + def decode_code(self, input_ids): + if self.mode == 'codegeex-13b': + text = self.tokenizer.decode(input_ids, skip_special_tokens=False) + output_code = decode_whitespaces(text, self.start_extra_id, + self.max_len) + elif self.mode == 'codegeex-python-13b': + input_ids = [self.code_dict.decode_tokens(input_ids.tolist()[0])] + text = self.tokenizer.decode(input_ids, skip_special_tokens=False) + output_code = decode_whitespaces(text, self.start_extra_id, + self.max_len) + + return output_code diff --git a/modelscope/pipelines/audio/asr_wenet_inference_pipeline.py b/modelscope/pipelines/audio/asr_wenet_inference_pipeline.py new file mode 100644 index 00000000..6df47bcb --- /dev/null +++ b/modelscope/pipelines/audio/asr_wenet_inference_pipeline.py @@ -0,0 +1,87 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from typing import Any, Dict, Union + +from modelscope.metainfo import Pipelines +from modelscope.models import Model +from modelscope.pipelines.base import Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.preprocessors import WavToScp +from modelscope.utils.audio.audio_utils import (extract_pcm_from_wav, + load_bytes_from_url) +from modelscope.utils.constant import Tasks +from modelscope.utils.logger import get_logger + +logger = get_logger() + +__all__ = ['WeNetAutomaticSpeechRecognitionPipeline'] + + +@PIPELINES.register_module( + Tasks.auto_speech_recognition, module_name=Pipelines.asr_wenet_inference) +class WeNetAutomaticSpeechRecognitionPipeline(Pipeline): + """ASR Inference Pipeline + """ + + def __init__(self, + model: Union[Model, str] = None, + preprocessor: WavToScp = None, + **kwargs): + """use `model` and `preprocessor` to create an asr pipeline for prediction + """ + super().__init__(model=model, preprocessor=preprocessor, **kwargs) + + def __call__(self, + audio_in: Union[str, bytes], + audio_fs: int = None, + recog_type: str = None, + audio_format: str = None) -> Dict[str, Any]: + from easyasr.common import asr_utils + + self.recog_type = recog_type + self.audio_format = audio_format + self.audio_fs = audio_fs + + if isinstance(audio_in, str): + # load pcm data from url if audio_in is url str + self.audio_in, checking_audio_fs = load_bytes_from_url(audio_in) + elif isinstance(audio_in, bytes): + # load pcm data from wav data if audio_in is wave format + self.audio_in, checking_audio_fs = extract_pcm_from_wav(audio_in) + else: + self.audio_in = audio_in + + # set the sample_rate of audio_in if checking_audio_fs is valid + if checking_audio_fs is not None: + self.audio_fs = checking_audio_fs + + if recog_type is None or audio_format is None: + self.recog_type, self.audio_format, self.audio_in = asr_utils.type_checking( + audio_in=self.audio_in, + recog_type=recog_type, + audio_format=audio_format) + + if hasattr(asr_utils, 'sample_rate_checking'): + checking_audio_fs = asr_utils.sample_rate_checking( + self.audio_in, self.audio_format) + if checking_audio_fs is not None: + self.audio_fs = checking_audio_fs + + inputs = { + 'audio': self.audio_in, + 'audio_format': self.audio_format, + 'audio_fs': self.audio_fs + } + output = self.forward(inputs) + rst = self.postprocess(output['asr_result']) + return rst + + def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + """Decoding + """ + inputs['asr_result'] = self.model(inputs) + return inputs + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + """process the asr results + """ + return inputs diff --git a/modelscope/pipelines/nlp/__init__.py b/modelscope/pipelines/nlp/__init__.py index f94398b6..fd731ef6 100644 --- a/modelscope/pipelines/nlp/__init__.py +++ b/modelscope/pipelines/nlp/__init__.py @@ -33,6 +33,8 @@ if TYPE_CHECKING: from .word_segmentation_pipeline import WordSegmentationPipeline, WordSegmentationThaiPipeline from .zero_shot_classification_pipeline import ZeroShotClassificationPipeline from .mglm_text_summarization_pipeline import MGLMTextSummarizationPipeline + from .codegeex_code_translation_pipeline import CodeGeeXCodeTranslationPipeline + from .codegeex_code_generation_pipeline import CodeGeeXCodeGenerationPipeline else: _import_structure = { @@ -75,6 +77,10 @@ else: 'zero_shot_classification_pipeline': ['ZeroShotClassificationPipeline'], 'mglm_text_summarization_pipeline': ['MGLMTextSummarizationPipeline'], + 'codegeex_code_translation_pipeline': + ['CodeGeeXCodeTranslationPipeline'], + 'codegeex_code_generation_pipeline': + ['CodeGeeXCodeGenerationPipeline'], } import sys diff --git a/modelscope/pipelines/nlp/codegeex_code_generation_pipeline.py b/modelscope/pipelines/nlp/codegeex_code_generation_pipeline.py new file mode 100755 index 00000000..f23461b1 --- /dev/null +++ b/modelscope/pipelines/nlp/codegeex_code_generation_pipeline.py @@ -0,0 +1,55 @@ +# Copyright (c) 2022 Zhipu.AI + +from typing import Any, Dict, Union + +from modelscope.metainfo import Pipelines +from modelscope.models.nlp import CodeGeeXForCodeGeneration +from modelscope.pipelines.base import Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.preprocessors import Preprocessor +from modelscope.utils.constant import Tasks + + +@PIPELINES.register_module( + group_key=Tasks.code_generation, + module_name=Pipelines.codegeex_code_generation) +class CodeGeeXCodeGenerationPipeline(Pipeline): + + def __init__(self, + model: Union[CodeGeeXForCodeGeneration, str], + preprocessor: [Preprocessor] = None, + *args, + **kwargs): + model = CodeGeeXForCodeGeneration(model) if isinstance(model, + str) else model + self.model = model + self.model.eval() + self.model.half() + self.model.cuda() + + super().__init__(model=model, **kwargs) + + def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]: + return inputs + + # define the forward pass + def forward(self, inputs: Union[Dict], **forward_params) -> Dict[str, Any]: + # check input format + for para in ['prompt', 'language']: + if para not in inputs: + raise Exception('Please check your input format.') + if inputs['language'] not in [ + 'C++', 'C', 'C#', 'Cuda', 'Objective-C', 'Objective-C++', + 'Python', 'Java', 'Scala', 'TeX', 'HTML', 'PHP', 'JavaScript', + 'TypeScript', 'Go', 'Shell', 'Rust', 'CSS', 'SQL', 'Kotlin', + 'Pascal', 'R', 'Fortran', 'Lean' + ]: # noqa + raise Exception( + 'Make sure the language is in ["C++","C","C#","Cuda","Objective-C","Objective-C++","Python","Java","Scala","TeX","HTML","PHP","JavaScript","TypeScript","Go","Shell","Rust","CSS","SQL","Kotlin","Pascal","R","Fortran","Lean"]' # noqa + ) # noqa + + return self.model(inputs) + + # format the outputs from pipeline + def postprocess(self, input, **kwargs) -> Dict[str, Any]: + return input diff --git a/modelscope/pipelines/nlp/codegeex_code_translation_pipeline.py b/modelscope/pipelines/nlp/codegeex_code_translation_pipeline.py new file mode 100755 index 00000000..8bd5a6da --- /dev/null +++ b/modelscope/pipelines/nlp/codegeex_code_translation_pipeline.py @@ -0,0 +1,65 @@ +# Copyright (c) 2022 Zhipu.AI + +from typing import Any, Dict, Union + +from modelscope.metainfo import Pipelines +from modelscope.models.nlp import CodeGeeXForCodeTranslation +from modelscope.pipelines.base import Pipeline +from modelscope.pipelines.builder import PIPELINES +from modelscope.preprocessors import Preprocessor +from modelscope.utils.constant import Tasks + + +@PIPELINES.register_module( + group_key=Tasks.code_translation, + module_name=Pipelines.codegeex_code_translation) +class CodeGeeXCodeTranslationPipeline(Pipeline): + + def __init__(self, + model: Union[CodeGeeXForCodeTranslation, str], + preprocessor: [Preprocessor] = None, + *args, + **kwargs): + model = CodeGeeXForCodeTranslation(model) if isinstance(model, + str) else model + self.model = model + self.model.eval() + self.model.half() + self.model.cuda() + + super().__init__(model=model, **kwargs) + + def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]: + return inputs + + # define the forward pass + def forward(self, inputs: Union[Dict], **forward_params) -> Dict[str, Any]: + # check input format + for para in ['prompt', 'source language', 'target language']: + if para not in inputs: + raise Exception('please check your input format.') + if inputs['source language'] not in [ + 'C++', 'C', 'C#', 'Cuda', 'Objective-C', 'Objective-C++', + 'Python', 'Java', 'Scala', 'TeX', 'HTML', 'PHP', 'JavaScript', + 'TypeScript', 'Go', 'Shell', 'Rust', 'CSS', 'SQL', 'Kotlin', + 'Pascal', 'R', 'Fortran', 'Lean' + ]: + raise Exception( + 'Make sure the source language is in ["C++","C","C#","Cuda","Objective-C","Objective-C++","Python","Java","Scala","TeX","HTML","PHP","JavaScript","TypeScript","Go","Shell","Rust","CSS","SQL","Kotlin","Pascal","R","Fortran","Lean"]' # noqa + ) # noqa + + if inputs['target language'] not in [ + 'C++', 'C', 'C#', 'Cuda', 'Objective-C', 'Objective-C++', + 'Python', 'Java', 'Scala', 'TeX', 'HTML', 'PHP', 'JavaScript', + 'TypeScript', 'Go', 'Shell', 'Rust', 'CSS', 'SQL', 'Kotlin', + 'Pascal', 'R', 'Fortran', 'Lean' + ]: + raise Exception( + 'Make sure the target language is in ["C++","C","C#","Cuda","Objective-C","Objective-C++","Python","Java","Scala","TeX","HTML","PHP","JavaScript","TypeScript","Go","Shell","Rust","CSS","SQL","Kotlin","Pascal","R","Fortran","Lean"]' # noqa + ) # noqa + + return self.model(inputs) + + # format the outputs from pipeline + def postprocess(self, input, **kwargs) -> Dict[str, Any]: + return input diff --git a/modelscope/preprocessors/__init__.py b/modelscope/preprocessors/__init__.py index 0db1c7e0..ce053459 100644 --- a/modelscope/preprocessors/__init__.py +++ b/modelscope/preprocessors/__init__.py @@ -23,7 +23,7 @@ if TYPE_CHECKING: SentenceEmbeddingPreprocessor, SequenceClassificationPreprocessor, TokenClassificationPreprocessor, TextErrorCorrectionPreprocessor, TextGenerationPreprocessor, Text2TextGenerationPreprocessor, Tokenize, - WordSegmentationBlankSetToLabelPreprocessor, + WordSegmentationBlankSetToLabelPreprocessor, CodeGeeXPreprocessor, MGLMSummarizationPreprocessor, ZeroShotClassificationPreprocessor, TextGenerationJiebaPreprocessor, SentencePiecePreprocessor, DialogIntentPredictionPreprocessor, DialogModelingPreprocessor, @@ -57,7 +57,7 @@ else: 'TextErrorCorrectionPreprocessor', 'TextGenerationPreprocessor', 'Tokenize', 'Text2TextGenerationPreprocessor', 'WordSegmentationBlankSetToLabelPreprocessor', - 'MGLMSummarizationPreprocessor', + 'MGLMSummarizationPreprocessor', 'CodeGeeXPreprocessor', 'ZeroShotClassificationPreprocessor', 'TextGenerationJiebaPreprocessor', 'SentencePiecePreprocessor', 'NERPreprocessorViet', 'NERPreprocessorThai', diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 23ffa381..007a6174 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -121,6 +121,8 @@ class NLPTasks(object): fill_mask = 'fill-mask' text_summarization = 'text-summarization' question_answering = 'question-answering' + code_translation = 'code-translation' + code_generation = 'code-generation' zero_shot_classification = 'zero-shot-classification' backbone = 'backbone' text_error_correction = 'text-error-correction' diff --git a/modelscope/utils/error.py b/modelscope/utils/error.py index a894063c..8128f7b0 100644 --- a/modelscope/utils/error.py +++ b/modelscope/utils/error.py @@ -70,6 +70,11 @@ PYTORCH_IMPORT_ERROR = """ installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your environment. """ +WENETRUNTIME_IMPORT_ERROR = """ +{0} requires the wenetruntime library but it was not found in your environment. You can install it with pip: +`pip install wenetruntime==TORCH_VER` +""" + # docstyle-ignore SCIPY_IMPORT_ERROR = """ {0} requires the scipy library but it was not found in your environment. You can install it with pip: diff --git a/modelscope/utils/import_utils.py b/modelscope/utils/import_utils.py index f817b7a5..74b2d8e9 100644 --- a/modelscope/utils/import_utils.py +++ b/modelscope/utils/import_utils.py @@ -245,6 +245,10 @@ def is_torch_cuda_available(): return False +def is_wenetruntime_available(): + return importlib.util.find_spec('wenetruntime') is not None + + def is_tf_available(): return _tf_available @@ -280,6 +284,9 @@ REQUIREMENTS_MAAPING = OrderedDict([ ('timm', (is_timm_available, TIMM_IMPORT_ERROR)), ('tokenizers', (is_tokenizers_available, TOKENIZERS_IMPORT_ERROR)), ('torch', (is_torch_available, PYTORCH_IMPORT_ERROR)), + ('wenetruntime', + (is_wenetruntime_available, + WENETRUNTIME_IMPORT_ERROR.replace('TORCH_VER', _torch_version))), ('scipy', (is_scipy_available, SCIPY_IMPORT_ERROR)), ('cv2', (is_opencv_available, OPENCV_IMPORT_ERROR)), ('PIL', (is_pillow_available, PILLOW_IMPORT_ERROR)), diff --git a/tests/pipelines/test_wenet_automatic_speech_recognition.py b/tests/pipelines/test_wenet_automatic_speech_recognition.py new file mode 100644 index 00000000..4adf8119 --- /dev/null +++ b/tests/pipelines/test_wenet_automatic_speech_recognition.py @@ -0,0 +1,131 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +import shutil +import unittest +from typing import Any, Dict, Union + +import numpy as np +import soundfile + +from modelscope.outputs import OutputKeys +from modelscope.pipelines import pipeline +from modelscope.utils.constant import ColorCodes, Tasks +from modelscope.utils.demo_utils import DemoCompatibilityCheck +from modelscope.utils.logger import get_logger +from modelscope.utils.test_utils import download_and_untar, test_level + +logger = get_logger() + +WAV_FILE = 'data/test/audios/asr_example.wav' +URL_FILE = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example.wav' + + +class WeNetAutomaticSpeechRecognitionTest(unittest.TestCase, + DemoCompatibilityCheck): + action_info = { + 'test_run_with_pcm': { + 'checking_item': OutputKeys.TEXT, + 'example': 'wav_example' + }, + 'test_run_with_url': { + 'checking_item': OutputKeys.TEXT, + 'example': 'wav_example' + }, + 'test_run_with_wav': { + 'checking_item': OutputKeys.TEXT, + 'example': 'wav_example' + }, + 'wav_example': { + 'text': '每一天都要快乐喔' + } + } + + def setUp(self) -> None: + self.am_model_id = 'wenet/u2pp_conformer-asr-cn-16k-online' + # this temporary workspace dir will store waveform files + self.workspace = os.path.join(os.getcwd(), '.tmp') + self.task = Tasks.auto_speech_recognition + if not os.path.exists(self.workspace): + os.mkdir(self.workspace) + + def tearDown(self) -> None: + # remove workspace dir (.tmp) + shutil.rmtree(self.workspace, ignore_errors=True) + + def run_pipeline(self, + model_id: str, + audio_in: Union[str, bytes], + sr: int = None) -> Dict[str, Any]: + inference_16k_pipline = pipeline( + task=Tasks.auto_speech_recognition, model=model_id) + rec_result = inference_16k_pipline(audio_in, audio_fs=sr) + return rec_result + + def log_error(self, functions: str, result: Dict[str, Any]) -> None: + logger.error(ColorCodes.MAGENTA + functions + ': FAILED.' + + ColorCodes.END) + logger.error( + ColorCodes.MAGENTA + functions + ' correct result example:' + + ColorCodes.YELLOW + + str(self.action_info[self.action_info[functions]['example']]) + + ColorCodes.END) + raise ValueError('asr result is mismatched') + + def check_result(self, functions: str, result: Dict[str, Any]) -> None: + if result.__contains__(self.action_info[functions]['checking_item']): + logger.info(ColorCodes.MAGENTA + functions + ': SUCCESS.' + + ColorCodes.END) + logger.info( + ColorCodes.YELLOW + + str(result[self.action_info[functions]['checking_item']]) + + ColorCodes.END) + else: + self.log_error(functions, result) + + def wav2bytes(self, wav_file): + audio, fs = soundfile.read(wav_file) + + # float32 -> int16 + audio = np.asarray(audio) + dtype = np.dtype('int16') + i = np.iinfo(dtype) + abs_max = 2**(i.bits - 1) + offset = i.min + abs_max + audio = (audio * abs_max + offset).clip(i.min, i.max).astype(dtype) + + # int16(PCM_16) -> byte + audio = audio.tobytes() + return audio, fs + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_pcm(self): + """run with wav data + """ + logger.info('Run ASR test with wav data (wenet)...') + audio, sr = self.wav2bytes(os.path.join(os.getcwd(), WAV_FILE)) + rec_result = self.run_pipeline( + model_id=self.am_model_id, audio_in=audio, sr=sr) + self.check_result('test_run_with_pcm', rec_result) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_wav(self): + """run with single waveform file + """ + logger.info('Run ASR test with waveform file (wenet)...') + wav_file_path = os.path.join(os.getcwd(), WAV_FILE) + rec_result = self.run_pipeline( + model_id=self.am_model_id, audio_in=wav_file_path) + self.check_result('test_run_with_wav', rec_result) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_url(self): + """run with single url file + """ + logger.info('Run ASR test with url file (wenet)...') + rec_result = self.run_pipeline( + model_id=self.am_model_id, audio_in=URL_FILE) + self.check_result('test_run_with_url', rec_result) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/run_config.yaml b/tests/run_config.yaml index 2e06b88e..cb90852f 100644 --- a/tests/run_config.yaml +++ b/tests/run_config.yaml @@ -10,6 +10,7 @@ isolated: # test cases that may require excessive anmount of GPU memory or run - test_easycv_trainer.py - test_segformer.py - test_segmentation_pipeline.py + - test_movie_scene_segmentation.py - test_image_inpainting.py - test_mglm_text_summarization.py - test_team_transfer_trainer.py