| @@ -0,0 +1,3 @@ | |||
| *.png filter=lfs diff=lfs merge=lfs -text | |||
| *.jpg filter=lfs diff=lfs merge=lfs -text | |||
| *.mp4 filter=lfs diff=lfs merge=lfs -text | |||
| @@ -104,7 +104,6 @@ venv.bak/ | |||
| # mypy | |||
| .mypy_cache/ | |||
| data | |||
| .vscode | |||
| .idea | |||
| @@ -0,0 +1,67 @@ | |||
| DOCKER_REGISTRY = registry.cn-shanghai.aliyuncs.com | |||
| DOCKER_ORG = modelscope | |||
| DOCKER_IMAGE = modelscope | |||
| DOCKER_FULL_NAME = $(DOCKER_REGISTRY)/$(DOCKER_ORG)/$(DOCKER_IMAGE) | |||
| # CUDA_VERSION = 11.3 | |||
| # CUDNN_VERSION = 8 | |||
| BASE_RUNTIME = reg.docker.alibaba-inc.com/pai-dlc/pytorch-training:1.10PAI-gpu-py36-cu113-ubuntu18.04 | |||
| # BASE_DEVEL = reg.docker.alibaba-inc.com/pai-dlc/pytorch-training:1.10PAI-gpu-py36-cu113-ubuntu18.04 | |||
| BASE_DEVEL = pytorch/pytorch:1.10.0-cuda11.3-cudnn8-devel | |||
| MODELSCOPE_VERSION = $(shell git describe --tags --always) | |||
| # Can be either official / dev | |||
| BUILD_TYPE = dev | |||
| BUILD_PROGRESS = auto | |||
| BUILD_ARGS = --build-arg BASE_IMAGE=$(BASE_IMAGE) | |||
| EXTRA_DOCKER_BUILD_FLAGS ?= --network=host | |||
| # DOCKER_BUILD = DOCKER_BUILDKIT=1 \ | |||
| # docker build \ | |||
| # --progress=$(BUILD_PROGRESS) \ | |||
| # $(EXTRA_DOCKER_BUILD_FLAGS) \ | |||
| # --target $(BUILD_TYPE) \ | |||
| # -t $(DOCKER_FULL_NAME):$(DOCKER_TAG) \ | |||
| # $(BUILD_ARGS) \ | |||
| # -f docker/pytorch.dockerfile . | |||
| DOCKER_BUILD = DOCKER_BUILDKIT=1 \ | |||
| docker build \ | |||
| $(EXTRA_DOCKER_BUILD_FLAGS) \ | |||
| -t $(DOCKER_FULL_NAME):$(DOCKER_TAG) \ | |||
| $(BUILD_ARGS) \ | |||
| -f docker/pytorch.dockerfile . | |||
| DOCKER_PUSH = docker push $(DOCKER_FULL_NAME):$(DOCKER_TAG) | |||
| .PHONY: all | |||
| all: devel-image | |||
| .PHONY: devel-image | |||
| devel-image: BASE_IMAGE := $(BASE_DEVEL) | |||
| devel-image: DOCKER_TAG := $(MODELSCOPE_VERSION)-devel | |||
| devel-image: | |||
| $(DOCKER_BUILD) | |||
| .PHONY: devel-push | |||
| devel-push: BASE_IMAGE := $(BASE_DEVEL) | |||
| devel-push: DOCKER_TAG := $(MODELSCOPE_VERSION)-devel | |||
| devel-push: | |||
| $(DOCKER_PUSH) | |||
| .PHONY: runtime-image | |||
| runtime-image: BASE_IMAGE := $(BASE_RUNTIME) | |||
| runtime-image: DOCKER_TAG := $(MODELSCOPE_VERSION)-runtime | |||
| runtime-image: | |||
| $(DOCKER_BUILD) | |||
| docker tag $(DOCKER_FULL_NAME):$(DOCKER_TAG) $(DOCKER_FULL_NAME):latest | |||
| .PHONY: runtime-push | |||
| runtime-push: BASE_IMAGE := $(BASE_RUNTIME) | |||
| runtime-push: DOCKER_TAG := $(MODELSCOPE_VERSION)-runtime | |||
| runtime-push: | |||
| $(DOCKER_PUSH) | |||
| .PHONY: clean | |||
| clean: | |||
| -docker rmi -f $(shell docker images -q $(DOCKER_FULL_NAME)) | |||
| @@ -0,0 +1,3 @@ | |||
| version https://git-lfs.github.com/spec/v1 | |||
| oid sha256:78094cc48fbcfd9b6d321fe13619ecc72b65e006fc1b4c4458409ade9979486d | |||
| size 129862 | |||
| @@ -0,0 +1,3 @@ | |||
| version https://git-lfs.github.com/spec/v1 | |||
| oid sha256:af83a94899a6d23339c3ecc5c4c58c57c835af57b531a2f4c50461184f820141 | |||
| size 603621 | |||
| @@ -0,0 +1,4 @@ | |||
| *.sh | |||
| *.md | |||
| *.dockerfile | |||
| *.zip | |||
| @@ -0,0 +1,53 @@ | |||
| # syntax = docker/dockerfile:experimental | |||
| # | |||
| # NOTE: To build this you will need a docker version > 18.06 with | |||
| # experimental enabled and DOCKER_BUILDKIT=1 | |||
| # | |||
| # If you do not use buildkit you are not going to have a good time | |||
| # | |||
| # For reference: | |||
| # https://docs.docker.com/develop/develop-images/build_enhancements/ | |||
| # ARG BASE_IMAGE=reg.docker.alibaba-inc.com/pai-dlc/pytorch-training:1.10PAI-gpu-py36-cu113-ubuntu18.04 | |||
| # FROM ${BASE_IMAGE} as dev-base | |||
| # FROM reg.docker.alibaba-inc.com/pai-dlc/pytorch-training:1.10PAI-gpu-py36-cu113-ubuntu18.04 as dev-base | |||
| FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-devel | |||
| # FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime | |||
| # config pip source | |||
| RUN mkdir /root/.pip | |||
| COPY docker/rcfiles/pip.conf.tsinghua /root/.pip/pip.conf | |||
| COPY docker/rcfiles/sources.list.aliyun /etc/apt/sources.list | |||
| # Install essential Ubuntu packages | |||
| RUN apt-get update &&\ | |||
| apt-get install -y software-properties-common \ | |||
| build-essential \ | |||
| git \ | |||
| wget \ | |||
| vim \ | |||
| curl \ | |||
| zip \ | |||
| zlib1g-dev \ | |||
| unzip \ | |||
| pkg-config | |||
| # install modelscope and its python env | |||
| WORKDIR /opt/modelscope | |||
| COPY . . | |||
| RUN pip install -r requirements.txt | |||
| # RUN --mount=type=cache,target=/opt/ccache \ | |||
| # python setup.py install | |||
| # opencv-python-headless conflict with opencv-python installed | |||
| RUN python setup.py install \ | |||
| && pip uninstall -y opencv-python-headless | |||
| # prepare modelscope libs | |||
| COPY docker/scripts/install_libs.sh /tmp/ | |||
| RUN bash /tmp/install_libs.sh && \ | |||
| rm -rf /tmp/install_libs.sh | |||
| ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/modelscope/lib64 | |||
| WORKDIR /workspace | |||
| @@ -0,0 +1,2 @@ | |||
| [global] | |||
| index-url=https://pypi.tuna.tsinghua.edu.cn/simple | |||
| @@ -0,0 +1,25 @@ | |||
| deb http://mirrors.aliyun.com/ubuntu/ bionic main restricted | |||
| # deb-src http://mirrors.aliyun.com/ubuntu/ bionic main restricted | |||
| deb http://mirrors.aliyun.com/ubuntu/ bionic-updates main restricted | |||
| # deb-src http://mirrors.aliyun.com/ubuntu/ bionic-updates main restricted | |||
| deb http://mirrors.aliyun.com/ubuntu/ bionic universe | |||
| # deb-src http://mirrors.aliyun.com/ubuntu/ bionic universe | |||
| deb http://mirrors.aliyun.com/ubuntu/ bionic-updates universe | |||
| # deb-src http://mirrors.aliyun.com/ubuntu/ bionic-updates universe | |||
| deb http://mirrors.aliyun.com/ubuntu/ bionic multiverse | |||
| # deb-src http://mirrors.aliyun.com/ubuntu/ bionic multiverse | |||
| deb http://mirrors.aliyun.com/ubuntu/ bionic-updates multiverse | |||
| # deb-src http://mirrors.aliyun.com/ubuntu/ bionic-updates multiverse | |||
| deb http://mirrors.aliyun.com/ubuntu/ bionic-backports main restricted universe multiverse | |||
| # deb-src http://mirrors.aliyun.com/ubuntu/ bionic-backports main restricted universe multiverse | |||
| deb http://mirrors.aliyun.com/ubuntu bionic-security main restricted | |||
| # deb-src http://mirrors.aliyun.com/ubuntu bionic-security main restricted | |||
| deb http://mirrors.aliyun.com/ubuntu bionic-security universe | |||
| # deb-src http://mirrors.aliyun.com/ubuntu bionic-security universe | |||
| deb http://mirrors.aliyun.com/ubuntu bionic-security multiverse | |||
| # deb-src http://mirrors.aliyun.com/ubuntu bionic-security multiverse | |||
| @@ -0,0 +1,10 @@ | |||
| set nocompatible | |||
| set encoding=utf-8 | |||
| set hlsearch | |||
| set smartindent | |||
| set ruler | |||
| set number | |||
| set ts=2 | |||
| set sw=2 | |||
| set expandtab | |||
| autocmd FileType make setlocal noexpandtab | |||
| @@ -0,0 +1,12 @@ | |||
| #!/bin/bash | |||
| set -eo pipefail | |||
| ModelScopeLib=/usr/local/modelscope/lib64 | |||
| if [ ! -d /usr/local/modelscope ]; then | |||
| mkdir -p $ModelScopeLib | |||
| fi | |||
| # audio libs | |||
| wget "http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/release/maas/libs/audio/libmitaec_pyio.so" -O ${ModelScopeLib}/libmitaec_pyio.so | |||
| @@ -76,7 +76,7 @@ exclude_patterns = ['build', 'Thumbs.db', '.DS_Store'] | |||
| # The theme to use for HTML and HTML Help pages. See the documentation for | |||
| # a list of builtin themes. | |||
| # | |||
| html_theme = 'sphinx_rtd_theme' | |||
| html_theme = 'sphinx_book_theme' | |||
| html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] | |||
| html_theme_options = {} | |||
| @@ -34,13 +34,111 @@ make linter | |||
| ``` | |||
| ## 2. Test | |||
| ### 2.1 Unit test | |||
| ### 2.1 Test level | |||
| There are mainly three test levels: | |||
| * level 0: tests for basic interface and function of framework, such as `tests/trainers/test_trainer_base.py` | |||
| * level 1: important functional test which test end2end workflow, such as `tests/pipelines/test_image_matting.py` | |||
| * level 2: scenario tests for all the implemented modules such as model, pipeline in different algorithm filed. | |||
| Default test level is 0, which will only run those cases of level 0, you can set test level | |||
| via environment variable `TEST_LEVEL`. For more details, you can refer to [test-doc](https://alidocs.dingtalk.com/i/nodes/mdvQnONayjBJKLXy1Bp38PY2MeXzp5o0?dontjump=true&nav=spaces&navQuery=spaceId%3Dnb9XJNlZxbgrOXyA) | |||
| ```bash | |||
| # run all tests | |||
| TEST_LEVEL=2 make test | |||
| # run important functional tests | |||
| TEST_LEVEL=1 make test | |||
| # run core UT and basic functional tests | |||
| make test | |||
| ``` | |||
| ### 2.2 Test data | |||
| TODO | |||
| When writing test cases, you should assign a test level for your test case using | |||
| following code. If left default, the test level will be 0, it will run in each | |||
| test stage. | |||
| File test_module.py | |||
| ```python | |||
| from modelscope.utils.test_utils import test_level | |||
| class ImageCartoonTest(unittest.TestCase): | |||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
| def test_run_by_direct_model_download(self): | |||
| pass | |||
| ``` | |||
| ### 2.2 Run tests | |||
| 1. Run your own single test case to test your self-implemented function. You can run your | |||
| test file directly, if it fails to run, pls check if variable `TEST_LEVEL` | |||
| exists in the environment and unset it. | |||
| ```bash | |||
| python tests/path/to/your_test.py | |||
| ``` | |||
| 2. Remember to run core tests in local environment before start a codereview, by default it will | |||
| only run test cases with level 0. | |||
| ```bash | |||
| make tests | |||
| ``` | |||
| 3. After you start a code review, ci tests will be triggered which will run test cases with level 1 | |||
| 4. Daily regression tests will run all cases at 0 am each day using master branch. | |||
| ### 2.3 Test data storage | |||
| As we need a lot of data for testing, including images, videos, models. We use git lfs | |||
| to store those large files. | |||
| 1. install git-lfs | |||
| for mac | |||
| ```bash | |||
| brew install git-lfs | |||
| git lfs install | |||
| ``` | |||
| for centos, please download rpm from git-lfs github release [website](https://github.com/git-lfs/git-lfs/releases/tag/v3.2.0) | |||
| ```bash | |||
| wget http://101374-public.oss-cn-hangzhou-zmf.aliyuncs.com/git-lfs-3.2.0-1.el7.x86_64.rpm | |||
| sudo rpm -ivh git-lfs-3.2.0-1.el7.x86_64.rpm | |||
| git lfs install | |||
| ``` | |||
| for ubuntu | |||
| ```bash | |||
| curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash | |||
| sudo apt-get install git-lfs | |||
| git lfs install | |||
| ``` | |||
| 2. track your data type using git lfs, for example, to track png files | |||
| ```bash | |||
| git lfs track "*.png" | |||
| ``` | |||
| 3. add your test files to `data/test/` folder, you can make directories if you need. | |||
| ```bash | |||
| git add data/test/test.png | |||
| ``` | |||
| 4. commit your test data to remote branch | |||
| ```bash | |||
| git commit -m "xxx" | |||
| ``` | |||
| To pull data from remote repo, just as the same way you pull git files. | |||
| ```bash | |||
| git pull origin branch_name | |||
| ``` | |||
| ## Code Review | |||
| @@ -93,3 +191,22 @@ TODO | |||
| ```bash | |||
| make whl | |||
| ``` | |||
| ## Build docker | |||
| build develop docker | |||
| ```bash | |||
| sudo make -f Makefile.docker devel-image | |||
| ``` | |||
| push develop docker, passwd pls ask wenmeng.zwm | |||
| ```bash | |||
| sudo docker login --username=mass_test@test.aliyunid.com registry.cn-shanghai.aliyuncs.com | |||
| Password: | |||
| sudo make -f Makefile.docker devel-push | |||
| ``` | |||
| To build runtime image, just replace `devel` with `runtime` in the upper commands. | |||
| ```bash | |||
| udo make -f Makefile.docker runtime-image runtime-push | |||
| ``` | |||
| @@ -2,4 +2,4 @@ | |||
| from .base import Model | |||
| from .builder import MODELS, build_model | |||
| from .nlp import BertForSequenceClassification | |||
| from .nlp import BertForSequenceClassification, SbertForSentenceSimilarity | |||
| @@ -0,0 +1,60 @@ | |||
| import torch.nn as nn | |||
| from .layer_base import LayerBase | |||
| class RectifiedLinear(LayerBase): | |||
| def __init__(self, input_dim, output_dim): | |||
| super(RectifiedLinear, self).__init__() | |||
| self.dim = input_dim | |||
| self.relu = nn.ReLU() | |||
| def forward(self, input): | |||
| return self.relu(input) | |||
| def to_kaldi_nnet(self): | |||
| re_str = '' | |||
| re_str += '<RectifiedLinear> %d %d\n' % (self.dim, self.dim) | |||
| return re_str | |||
| def load_kaldi_nnet(self, instr): | |||
| return instr | |||
| class LogSoftmax(LayerBase): | |||
| def __init__(self, input_dim, output_dim): | |||
| super(LogSoftmax, self).__init__() | |||
| self.dim = input_dim | |||
| self.ls = nn.LogSoftmax() | |||
| def forward(self, input): | |||
| return self.ls(input) | |||
| def to_kaldi_nnet(self): | |||
| re_str = '' | |||
| re_str += '<Softmax> %d %d\n' % (self.dim, self.dim) | |||
| return re_str | |||
| def load_kaldi_nnet(self, instr): | |||
| return instr | |||
| class Sigmoid(LayerBase): | |||
| def __init__(self, input_dim, output_dim): | |||
| super(Sigmoid, self).__init__() | |||
| self.dim = input_dim | |||
| self.sig = nn.Sigmoid() | |||
| def forward(self, input): | |||
| return self.sig(input) | |||
| def to_kaldi_nnet(self): | |||
| re_str = '' | |||
| re_str += '<Sigmoid> %d %d\n' % (self.dim, self.dim) | |||
| return re_str | |||
| def load_kaldi_nnet(self, instr): | |||
| return instr | |||
| @@ -0,0 +1,78 @@ | |||
| import numpy as np | |||
| import torch as th | |||
| import torch.nn as nn | |||
| from .layer_base import (LayerBase, expect_kaldi_matrix, expect_token_number, | |||
| to_kaldi_matrix) | |||
| class AffineTransform(LayerBase): | |||
| def __init__(self, input_dim, output_dim): | |||
| super(AffineTransform, self).__init__() | |||
| self.input_dim = input_dim | |||
| self.output_dim = output_dim | |||
| self.linear = nn.Linear(input_dim, output_dim) | |||
| def forward(self, input): | |||
| return self.linear(input) | |||
| def to_kaldi_nnet(self): | |||
| re_str = '' | |||
| re_str += '<AffineTransform> %d %d\n' % (self.output_dim, | |||
| self.input_dim) | |||
| re_str += '<LearnRateCoef> 1 <BiasLearnRateCoef> 1 <MaxNorm> 0\n' | |||
| linear_weights = self.state_dict()['linear.weight'] | |||
| x = linear_weights.squeeze().numpy() | |||
| re_str += to_kaldi_matrix(x) | |||
| linear_bias = self.state_dict()['linear.bias'] | |||
| x = linear_bias.squeeze().numpy() | |||
| re_str += to_kaldi_matrix(x) | |||
| return re_str | |||
| def to_raw_nnet(self, fid): | |||
| linear_weights = self.state_dict()['linear.weight'] | |||
| x = linear_weights.squeeze().numpy() | |||
| x.tofile(fid) | |||
| linear_bias = self.state_dict()['linear.bias'] | |||
| x = linear_bias.squeeze().numpy() | |||
| x.tofile(fid) | |||
| def load_kaldi_nnet(self, instr): | |||
| output = expect_token_number( | |||
| instr, | |||
| '<LearnRateCoef>', | |||
| ) | |||
| if output is None: | |||
| raise Exception('AffineTransform format error for <LearnRateCoef>') | |||
| instr, lr = output | |||
| output = expect_token_number(instr, '<BiasLearnRateCoef>') | |||
| if output is None: | |||
| raise Exception( | |||
| 'AffineTransform format error for <BiasLearnRateCoef>') | |||
| instr, lr = output | |||
| output = expect_token_number(instr, '<MaxNorm>') | |||
| if output is None: | |||
| raise Exception('AffineTransform format error for <MaxNorm>') | |||
| instr, lr = output | |||
| output = expect_kaldi_matrix(instr) | |||
| if output is None: | |||
| raise Exception('AffineTransform format error for parsing matrix') | |||
| instr, mat = output | |||
| print(mat.shape) | |||
| self.linear.weight = th.nn.Parameter( | |||
| th.from_numpy(mat).type(th.FloatTensor)) | |||
| output = expect_kaldi_matrix(instr) | |||
| if output is None: | |||
| raise Exception('AffineTransform format error for parsing matrix') | |||
| instr, mat = output | |||
| mat = np.squeeze(mat) | |||
| self.linear.bias = th.nn.Parameter( | |||
| th.from_numpy(mat).type(th.FloatTensor)) | |||
| return instr | |||
| @@ -0,0 +1,178 @@ | |||
| import numpy as np | |||
| import torch as th | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| from .layer_base import (LayerBase, expect_kaldi_matrix, expect_token_number, | |||
| to_kaldi_matrix) | |||
| class DeepFsmn(LayerBase): | |||
| def __init__(self, | |||
| input_dim, | |||
| output_dim, | |||
| lorder=None, | |||
| rorder=None, | |||
| hidden_size=None, | |||
| layer_norm=False, | |||
| dropout=0): | |||
| super(DeepFsmn, self).__init__() | |||
| self.input_dim = input_dim | |||
| self.output_dim = output_dim | |||
| if lorder is None: | |||
| return | |||
| self.lorder = lorder | |||
| self.rorder = rorder | |||
| self.hidden_size = hidden_size | |||
| self.layer_norm = layer_norm | |||
| self.linear = nn.Linear(input_dim, hidden_size) | |||
| self.norm = nn.LayerNorm(hidden_size) | |||
| self.drop1 = nn.Dropout(p=dropout) | |||
| self.drop2 = nn.Dropout(p=dropout) | |||
| self.project = nn.Linear(hidden_size, output_dim, bias=False) | |||
| self.conv1 = nn.Conv2d( | |||
| output_dim, | |||
| output_dim, [lorder, 1], [1, 1], | |||
| groups=output_dim, | |||
| bias=False) | |||
| self.conv2 = nn.Conv2d( | |||
| output_dim, | |||
| output_dim, [rorder, 1], [1, 1], | |||
| groups=output_dim, | |||
| bias=False) | |||
| def forward(self, input): | |||
| f1 = F.relu(self.linear(input)) | |||
| f1 = self.drop1(f1) | |||
| if self.layer_norm: | |||
| f1 = self.norm(f1) | |||
| p1 = self.project(f1) | |||
| x = th.unsqueeze(p1, 1) | |||
| x_per = x.permute(0, 3, 2, 1) | |||
| y = F.pad(x_per, [0, 0, self.lorder - 1, 0]) | |||
| yr = F.pad(x_per, [0, 0, 0, self.rorder]) | |||
| yr = yr[:, :, 1:, :] | |||
| out = x_per + self.conv1(y) + self.conv2(yr) | |||
| out = self.drop2(out) | |||
| out1 = out.permute(0, 3, 2, 1) | |||
| return input + out1.squeeze() | |||
| def to_kaldi_nnet(self): | |||
| re_str = '' | |||
| re_str += '<UniDeepFsmn> %d %d\n'\ | |||
| % (self.output_dim, self.input_dim) | |||
| re_str += '<LearnRateCoef> %d <HidSize> %d <LOrder> %d <LStride> %d <MaxNorm> 0\n'\ | |||
| % (1, self.hidden_size, self.lorder, 1) | |||
| lfiters = self.state_dict()['conv1.weight'] | |||
| x = np.flipud(lfiters.squeeze().numpy().T) | |||
| re_str += to_kaldi_matrix(x) | |||
| proj_weights = self.state_dict()['project.weight'] | |||
| x = proj_weights.squeeze().numpy() | |||
| re_str += to_kaldi_matrix(x) | |||
| linear_weights = self.state_dict()['linear.weight'] | |||
| x = linear_weights.squeeze().numpy() | |||
| re_str += to_kaldi_matrix(x) | |||
| linear_bias = self.state_dict()['linear.bias'] | |||
| x = linear_bias.squeeze().numpy() | |||
| re_str += to_kaldi_matrix(x) | |||
| return re_str | |||
| def load_kaldi_nnet(self, instr): | |||
| output = expect_token_number( | |||
| instr, | |||
| '<LearnRateCoef>', | |||
| ) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for <LearnRateCoef>') | |||
| instr, lr = output | |||
| output = expect_token_number( | |||
| instr, | |||
| '<HidSize>', | |||
| ) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for <HidSize>') | |||
| instr, hiddensize = output | |||
| self.hidden_size = int(hiddensize) | |||
| output = expect_token_number( | |||
| instr, | |||
| '<LOrder>', | |||
| ) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for <LOrder>') | |||
| instr, lorder = output | |||
| self.lorder = int(lorder) | |||
| output = expect_token_number( | |||
| instr, | |||
| '<LStride>', | |||
| ) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for <LStride>') | |||
| instr, lstride = output | |||
| self.lstride = lstride | |||
| output = expect_token_number( | |||
| instr, | |||
| '<MaxNorm>', | |||
| ) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for <MaxNorm>') | |||
| output = expect_kaldi_matrix(instr) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for parsing matrix') | |||
| instr, mat = output | |||
| mat1 = np.fliplr(mat.T).copy() | |||
| self.conv1 = nn.Conv2d( | |||
| self.output_dim, | |||
| self.output_dim, [self.lorder, 1], [1, 1], | |||
| groups=self.output_dim, | |||
| bias=False) | |||
| mat_th = th.from_numpy(mat1).type(th.FloatTensor) | |||
| mat_th = mat_th.unsqueeze(1) | |||
| mat_th = mat_th.unsqueeze(3) | |||
| self.conv1.weight = th.nn.Parameter(mat_th) | |||
| output = expect_kaldi_matrix(instr) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for parsing matrix') | |||
| instr, mat = output | |||
| self.project = nn.Linear(self.hidden_size, self.output_dim, bias=False) | |||
| self.linear = nn.Linear(self.input_dim, self.hidden_size) | |||
| self.project.weight = th.nn.Parameter( | |||
| th.from_numpy(mat).type(th.FloatTensor)) | |||
| output = expect_kaldi_matrix(instr) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for parsing matrix') | |||
| instr, mat = output | |||
| self.linear.weight = th.nn.Parameter( | |||
| th.from_numpy(mat).type(th.FloatTensor)) | |||
| output = expect_kaldi_matrix(instr) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for parsing matrix') | |||
| instr, mat = output | |||
| self.linear.bias = th.nn.Parameter( | |||
| th.from_numpy(mat).type(th.FloatTensor)) | |||
| return instr | |||
| @@ -0,0 +1,50 @@ | |||
| import abc | |||
| import re | |||
| import numpy as np | |||
| import torch.nn as nn | |||
| def expect_token_number(instr, token): | |||
| first_token = re.match(r'^\s*' + token, instr) | |||
| if first_token is None: | |||
| return None | |||
| instr = instr[first_token.end():] | |||
| lr = re.match(r'^\s*(-?\d+\.?\d*e?-?\d*?)', instr) | |||
| if lr is None: | |||
| return None | |||
| return instr[lr.end():], lr.groups()[0] | |||
| def expect_kaldi_matrix(instr): | |||
| pos2 = instr.find('[', 0) | |||
| pos3 = instr.find(']', pos2) | |||
| mat = [] | |||
| for stt in instr[pos2 + 1:pos3].split('\n'): | |||
| tmp_mat = np.fromstring(stt, dtype=np.float32, sep=' ') | |||
| if tmp_mat.size > 0: | |||
| mat.append(tmp_mat) | |||
| return instr[pos3 + 1:], np.array(mat) | |||
| def to_kaldi_matrix(np_mat): | |||
| """ | |||
| function that transform as str numpy mat to standard kaldi str matrix | |||
| :param np_mat: numpy mat | |||
| :return: str | |||
| """ | |||
| np.set_printoptions(threshold=np.inf, linewidth=np.nan, suppress=True) | |||
| out_str = str(np_mat) | |||
| out_str = out_str.replace('[', '') | |||
| out_str = out_str.replace(']', '') | |||
| return '[ %s ]\n' % out_str | |||
| class LayerBase(nn.Module, metaclass=abc.ABCMeta): | |||
| def __init__(self): | |||
| super(LayerBase, self).__init__() | |||
| @abc.abstractmethod | |||
| def to_kaldi_nnet(self): | |||
| pass | |||
| @@ -0,0 +1,482 @@ | |||
| import numpy as np | |||
| import torch as th | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| from .layer_base import (LayerBase, expect_kaldi_matrix, expect_token_number, | |||
| to_kaldi_matrix) | |||
| class SepConv(nn.Module): | |||
| def __init__(self, | |||
| in_channels, | |||
| filters, | |||
| out_channels, | |||
| kernel_size=(5, 2), | |||
| dilation=(1, 1)): | |||
| """ :param kernel_size (time, frequency) | |||
| """ | |||
| super(SepConv, self).__init__() | |||
| # depthwise + pointwise | |||
| self.dconv = nn.Conv2d( | |||
| in_channels, | |||
| in_channels * filters, | |||
| kernel_size, | |||
| dilation=dilation, | |||
| groups=in_channels) | |||
| self.pconv = nn.Conv2d( | |||
| in_channels * filters, out_channels, kernel_size=1) | |||
| self.padding = dilation[0] * (kernel_size[0] - 1) | |||
| def forward(self, input): | |||
| ''' input: [B, C, T, F] | |||
| ''' | |||
| x = F.pad(input, [0, 0, self.padding, 0]) | |||
| x = self.dconv(x) | |||
| x = self.pconv(x) | |||
| return x | |||
| class Conv2d(nn.Module): | |||
| def __init__(self, | |||
| input_dim, | |||
| output_dim, | |||
| lorder=20, | |||
| rorder=0, | |||
| groups=1, | |||
| bias=False, | |||
| skip_connect=True): | |||
| super(Conv2d, self).__init__() | |||
| self.lorder = lorder | |||
| self.conv = nn.Conv2d( | |||
| input_dim, output_dim, [lorder, 1], groups=groups, bias=bias) | |||
| self.rorder = rorder | |||
| if self.rorder: | |||
| self.conv2 = nn.Conv2d( | |||
| input_dim, output_dim, [rorder, 1], groups=groups, bias=bias) | |||
| self.skip_connect = skip_connect | |||
| def forward(self, input): | |||
| # [B, 1, T, F] | |||
| x = th.unsqueeze(input, 1) | |||
| # [B, F, T, 1] | |||
| x_per = x.permute(0, 3, 2, 1) | |||
| y = F.pad(x_per, [0, 0, self.lorder - 1, 0]) | |||
| out = self.conv(y) | |||
| if self.rorder: | |||
| yr = F.pad(x_per, [0, 0, 0, self.rorder]) | |||
| yr = yr[:, :, 1:, :] | |||
| out += self.conv2(yr) | |||
| out = out.permute(0, 3, 2, 1).squeeze(1) | |||
| if self.skip_connect: | |||
| out = out + input | |||
| return out | |||
| class SelfAttLayer(nn.Module): | |||
| def __init__(self, input_dim, output_dim, lorder=None, hidden_size=None): | |||
| super(SelfAttLayer, self).__init__() | |||
| self.input_dim = input_dim | |||
| self.output_dim = output_dim | |||
| if lorder is None: | |||
| return | |||
| self.lorder = lorder | |||
| self.hidden_size = hidden_size | |||
| self.linear = nn.Linear(input_dim, hidden_size) | |||
| self.project = nn.Linear(hidden_size, output_dim, bias=False) | |||
| self.att = nn.Linear(input_dim, lorder, bias=False) | |||
| def forward(self, input): | |||
| f1 = F.relu(self.linear(input)) | |||
| p1 = self.project(f1) | |||
| x = th.unsqueeze(p1, 1) | |||
| x_per = x.permute(0, 3, 2, 1) | |||
| y = F.pad(x_per, [0, 0, self.lorder - 1, 0]) | |||
| # z [B, F, T, lorder] | |||
| z = x_per | |||
| for i in range(1, self.lorder): | |||
| z = th.cat([z, y[:, :, self.lorder - 1 - i:-i, :]], axis=-1) | |||
| # [B, T, lorder] | |||
| att = F.softmax(self.att(input), dim=-1) | |||
| att = th.unsqueeze(att, 1) | |||
| z = th.sum(z * att, axis=-1) | |||
| out1 = z.permute(0, 2, 1) | |||
| return input + out1 | |||
| class TFFsmn(nn.Module): | |||
| def __init__(self, | |||
| input_dim, | |||
| output_dim, | |||
| lorder=None, | |||
| hidden_size=None, | |||
| dilation=1, | |||
| layer_norm=False, | |||
| dropout=0, | |||
| skip_connect=True): | |||
| super(TFFsmn, self).__init__() | |||
| self.skip_connect = skip_connect | |||
| self.linear = nn.Linear(input_dim, hidden_size) | |||
| self.norm = nn.Identity() | |||
| if layer_norm: | |||
| self.norm = nn.LayerNorm(input_dim) | |||
| self.act = nn.ReLU() | |||
| self.project = nn.Linear(hidden_size, output_dim, bias=False) | |||
| self.conv1 = nn.Conv2d( | |||
| output_dim, | |||
| output_dim, [lorder, 1], | |||
| dilation=[dilation, 1], | |||
| groups=output_dim, | |||
| bias=False) | |||
| self.padding_left = dilation * (lorder - 1) | |||
| dorder = 5 | |||
| self.conv2 = nn.Conv2d(1, 1, [dorder, 1], bias=False) | |||
| self.padding_freq = dorder - 1 | |||
| def forward(self, input): | |||
| return self.compute1(input) | |||
| def compute1(self, input): | |||
| ''' linear-dconv-relu(norm)-linear-dconv | |||
| ''' | |||
| x = self.linear(input) | |||
| # [B, 1, F, T] | |||
| x = th.unsqueeze(x, 1).permute(0, 1, 3, 2) | |||
| z = F.pad(x, [0, 0, self.padding_freq, 0]) | |||
| z = self.conv2(z) + x | |||
| x = z.permute(0, 3, 2, 1).squeeze(-1) | |||
| x = self.act(x) | |||
| x = self.norm(x) | |||
| x = self.project(x) | |||
| x = th.unsqueeze(x, 1).permute(0, 3, 2, 1) | |||
| # [B, F, T+lorder-1, 1] | |||
| y = F.pad(x, [0, 0, self.padding_left, 0]) | |||
| out = self.conv1(y) | |||
| if self.skip_connect: | |||
| out = out + x | |||
| out = out.permute(0, 3, 2, 1).squeeze() | |||
| return input + out | |||
| class CNNFsmn(nn.Module): | |||
| ''' use cnn to reduce parameters | |||
| ''' | |||
| def __init__(self, | |||
| input_dim, | |||
| output_dim, | |||
| lorder=None, | |||
| hidden_size=None, | |||
| dilation=1, | |||
| layer_norm=False, | |||
| dropout=0, | |||
| skip_connect=True): | |||
| super(CNNFsmn, self).__init__() | |||
| self.input_dim = input_dim | |||
| self.output_dim = output_dim | |||
| self.skip_connect = skip_connect | |||
| if lorder is None: | |||
| return | |||
| self.lorder = lorder | |||
| self.hidden_size = hidden_size | |||
| self.linear = nn.Linear(input_dim, hidden_size) | |||
| self.act = nn.ReLU() | |||
| kernel_size = (3, 8) | |||
| stride = (1, 4) | |||
| self.conv = nn.Sequential( | |||
| nn.ConstantPad2d((stride[1], 0, kernel_size[0] - 1, 0), 0), | |||
| nn.Conv2d(1, stride[1], kernel_size=kernel_size, stride=stride)) | |||
| self.dconv = nn.Conv2d( | |||
| output_dim, | |||
| output_dim, [lorder, 1], | |||
| dilation=[dilation, 1], | |||
| groups=output_dim, | |||
| bias=False) | |||
| self.padding_left = dilation * (lorder - 1) | |||
| def forward(self, input): | |||
| return self.compute2(input) | |||
| def compute1(self, input): | |||
| ''' linear-relu(norm)-conv2d-relu?-dconv | |||
| ''' | |||
| # [B, T, F] | |||
| x = self.linear(input) | |||
| x = self.act(x) | |||
| x = th.unsqueeze(x, 1) | |||
| x = self.conv(x) | |||
| # [B, C, T, F] -> [B, 1, T, F] | |||
| b, c, t, f = x.shape | |||
| x = x.view([b, 1, t, -1]) | |||
| x = x.permute(0, 3, 2, 1) | |||
| # [B, F, T+lorder-1, 1] | |||
| y = F.pad(x, [0, 0, self.padding_left, 0]) | |||
| out = self.dconv(y) | |||
| if self.skip_connect: | |||
| out = out + x | |||
| out = out.permute(0, 3, 2, 1).squeeze() | |||
| return input + out | |||
| def compute2(self, input): | |||
| ''' conv2d-relu-linear-relu?-dconv | |||
| ''' | |||
| x = th.unsqueeze(input, 1) | |||
| x = self.conv(x) | |||
| x = self.act(x) | |||
| # [B, C, T, F] -> [B, T, F] | |||
| b, c, t, f = x.shape | |||
| x = x.view([b, t, -1]) | |||
| x = self.linear(x) | |||
| x = th.unsqueeze(x, 1).permute(0, 3, 2, 1) | |||
| y = F.pad(x, [0, 0, self.padding_left, 0]) | |||
| out = self.dconv(y) | |||
| if self.skip_connect: | |||
| out = out + x | |||
| out = out.permute(0, 3, 2, 1).squeeze() | |||
| return input + out | |||
| class UniDeepFsmn(LayerBase): | |||
| def __init__(self, | |||
| input_dim, | |||
| output_dim, | |||
| lorder=None, | |||
| hidden_size=None, | |||
| dilation=1, | |||
| layer_norm=False, | |||
| dropout=0, | |||
| skip_connect=True): | |||
| super(UniDeepFsmn, self).__init__() | |||
| self.input_dim = input_dim | |||
| self.output_dim = output_dim | |||
| self.skip_connect = skip_connect | |||
| if lorder is None: | |||
| return | |||
| self.lorder = lorder | |||
| self.hidden_size = hidden_size | |||
| self.linear = nn.Linear(input_dim, hidden_size) | |||
| self.norm = nn.Identity() | |||
| if layer_norm: | |||
| self.norm = nn.LayerNorm(input_dim) | |||
| self.act = nn.ReLU() | |||
| self.project = nn.Linear(hidden_size, output_dim, bias=False) | |||
| self.conv1 = nn.Conv2d( | |||
| output_dim, | |||
| output_dim, [lorder, 1], | |||
| dilation=[dilation, 1], | |||
| groups=output_dim, | |||
| bias=False) | |||
| self.padding_left = dilation * (lorder - 1) | |||
| def forward(self, input): | |||
| return self.compute1(input) | |||
| def compute1(self, input): | |||
| ''' linear-relu(norm)-linear-dconv | |||
| ''' | |||
| # [B, T, F] | |||
| x = self.linear(input) | |||
| x = self.act(x) | |||
| x = self.norm(x) | |||
| x = self.project(x) | |||
| x = th.unsqueeze(x, 1).permute(0, 3, 2, 1) | |||
| # [B, F, T+lorder-1, 1] | |||
| y = F.pad(x, [0, 0, self.padding_left, 0]) | |||
| out = self.conv1(y) | |||
| if self.skip_connect: | |||
| out = out + x | |||
| out = out.permute(0, 3, 2, 1).squeeze() | |||
| return input + out | |||
| def compute2(self, input): | |||
| ''' linear-dconv-linear-relu(norm) | |||
| ''' | |||
| x = self.project(input) | |||
| x = th.unsqueeze(x, 1).permute(0, 3, 2, 1) | |||
| y = F.pad(x, [0, 0, self.padding_left, 0]) | |||
| out = self.conv1(y) | |||
| if self.skip_connect: | |||
| out = out + x | |||
| out = out.permute(0, 3, 2, 1).squeeze() | |||
| x = self.linear(out) | |||
| x = self.act(x) | |||
| x = self.norm(x) | |||
| return input + x | |||
| def compute3(self, input): | |||
| ''' dconv-linear-relu(norm)-linear | |||
| ''' | |||
| x = th.unsqueeze(input, 1).permute(0, 3, 2, 1) | |||
| y = F.pad(x, [0, 0, self.padding_left, 0]) | |||
| out = self.conv1(y) | |||
| if self.skip_connect: | |||
| out = out + x | |||
| out = out.permute(0, 3, 2, 1).squeeze() | |||
| x = self.linear(out) | |||
| x = self.act(x) | |||
| x = self.norm(x) | |||
| x = self.project(x) | |||
| return input + x | |||
| def to_kaldi_nnet(self): | |||
| re_str = '' | |||
| re_str += '<UniDeepFsmn> %d %d\n' \ | |||
| % (self.output_dim, self.input_dim) | |||
| re_str += '<LearnRateCoef> %d <HidSize> %d <LOrder> %d <LStride> %d <MaxNorm> 0\n' \ | |||
| % (1, self.hidden_size, self.lorder, 1) | |||
| lfiters = self.state_dict()['conv1.weight'] | |||
| x = np.flipud(lfiters.squeeze().numpy().T) | |||
| re_str += to_kaldi_matrix(x) | |||
| proj_weights = self.state_dict()['project.weight'] | |||
| x = proj_weights.squeeze().numpy() | |||
| re_str += to_kaldi_matrix(x) | |||
| linear_weights = self.state_dict()['linear.weight'] | |||
| x = linear_weights.squeeze().numpy() | |||
| re_str += to_kaldi_matrix(x) | |||
| linear_bias = self.state_dict()['linear.bias'] | |||
| x = linear_bias.squeeze().numpy() | |||
| re_str += to_kaldi_matrix(x) | |||
| return re_str | |||
| def to_raw_nnet(self, fid): | |||
| lfiters = self.state_dict()['conv1.weight'] | |||
| x = np.flipud(lfiters.squeeze().numpy().T) | |||
| x.tofile(fid) | |||
| proj_weights = self.state_dict()['project.weight'] | |||
| x = proj_weights.squeeze().numpy() | |||
| x.tofile(fid) | |||
| linear_weights = self.state_dict()['linear.weight'] | |||
| x = linear_weights.squeeze().numpy() | |||
| x.tofile(fid) | |||
| linear_bias = self.state_dict()['linear.bias'] | |||
| x = linear_bias.squeeze().numpy() | |||
| x.tofile(fid) | |||
| def load_kaldi_nnet(self, instr): | |||
| output = expect_token_number( | |||
| instr, | |||
| '<LearnRateCoef>', | |||
| ) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for <LearnRateCoef>') | |||
| instr, lr = output | |||
| output = expect_token_number( | |||
| instr, | |||
| '<HidSize>', | |||
| ) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for <HidSize>') | |||
| instr, hiddensize = output | |||
| self.hidden_size = int(hiddensize) | |||
| output = expect_token_number( | |||
| instr, | |||
| '<LOrder>', | |||
| ) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for <LOrder>') | |||
| instr, lorder = output | |||
| self.lorder = int(lorder) | |||
| output = expect_token_number( | |||
| instr, | |||
| '<LStride>', | |||
| ) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for <LStride>') | |||
| instr, lstride = output | |||
| self.lstride = lstride | |||
| output = expect_token_number( | |||
| instr, | |||
| '<MaxNorm>', | |||
| ) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for <MaxNorm>') | |||
| output = expect_kaldi_matrix(instr) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for parsing matrix') | |||
| instr, mat = output | |||
| mat1 = np.fliplr(mat.T).copy() | |||
| self.conv1 = nn.Conv2d( | |||
| self.output_dim, | |||
| self.output_dim, [self.lorder, 1], [1, 1], | |||
| groups=self.output_dim, | |||
| bias=False) | |||
| mat_th = th.from_numpy(mat1).type(th.FloatTensor) | |||
| mat_th = mat_th.unsqueeze(1) | |||
| mat_th = mat_th.unsqueeze(3) | |||
| self.conv1.weight = th.nn.Parameter(mat_th) | |||
| output = expect_kaldi_matrix(instr) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for parsing matrix') | |||
| instr, mat = output | |||
| self.project = nn.Linear(self.hidden_size, self.output_dim, bias=False) | |||
| self.linear = nn.Linear(self.input_dim, self.hidden_size) | |||
| self.project.weight = th.nn.Parameter( | |||
| th.from_numpy(mat).type(th.FloatTensor)) | |||
| output = expect_kaldi_matrix(instr) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for parsing matrix') | |||
| instr, mat = output | |||
| self.linear.weight = th.nn.Parameter( | |||
| th.from_numpy(mat).type(th.FloatTensor)) | |||
| output = expect_kaldi_matrix(instr) | |||
| if output is None: | |||
| raise Exception('UniDeepFsmn format error for parsing matrix') | |||
| instr, mat = output | |||
| mat = np.squeeze(mat) | |||
| self.linear.bias = th.nn.Parameter( | |||
| th.from_numpy(mat).type(th.FloatTensor)) | |||
| return instr | |||
| @@ -0,0 +1,394 @@ | |||
| import torch | |||
| import torch.nn.functional as F | |||
| from .modulation_loss import (GaborSTRFConv, MelScale, | |||
| ModulationDomainLossModule) | |||
| EPS = 1e-8 | |||
| def compute_mask(mixed_spec, clean_spec, mask_type='psmiam', clip=1): | |||
| ''' | |||
| stft: (batch, ..., 2) or complex(batch, ...) | |||
| y = x + n | |||
| ''' | |||
| if torch.is_complex(mixed_spec): | |||
| yr, yi = mixed_spec.real, mixed_spec.imag | |||
| else: | |||
| yr, yi = mixed_spec[..., 0], mixed_spec[..., 1] | |||
| if torch.is_complex(clean_spec): | |||
| xr, xi = clean_spec.real, clean_spec.imag | |||
| else: | |||
| xr, xi = clean_spec[..., 0], clean_spec[..., 1] | |||
| if mask_type == 'iam': | |||
| ymag = torch.sqrt(yr**2 + yi**2) | |||
| xmag = torch.sqrt(xr**2 + xi**2) | |||
| iam = xmag / (ymag + EPS) | |||
| return torch.clamp(iam, 0, 1) | |||
| elif mask_type == 'psm': | |||
| ypow = yr**2 + yi**2 | |||
| psm = (xr * yr + xi * yi) / (ypow + EPS) | |||
| return torch.clamp(psm, 0, 1) | |||
| elif mask_type == 'psmiam': | |||
| ypow = yr**2 + yi**2 | |||
| psm = (xr * yr + xi * yi) / (ypow + EPS) | |||
| ymag = torch.sqrt(yr**2 + yi**2) | |||
| xmag = torch.sqrt(xr**2 + xi**2) | |||
| iam = xmag / (ymag + EPS) | |||
| psmiam = psm * iam | |||
| return torch.clamp(psmiam, 0, 1) | |||
| elif mask_type == 'crm': | |||
| ypow = yr**2 + yi**2 | |||
| mr = (xr * yr + xi * yi) / (ypow + EPS) | |||
| mi = (xi * yr - xr * yi) / (ypow + EPS) | |||
| mr = torch.clamp(mr, -clip, clip) | |||
| mi = torch.clamp(mi, -clip, clip) | |||
| return mr, mi | |||
| def energy_vad(spec, | |||
| thdhigh=320 * 600 * 600 * 2, | |||
| thdlow=320 * 300 * 300 * 2, | |||
| int16=True): | |||
| ''' | |||
| energy based vad should be accurate enough | |||
| spec: (batch, bins, frames, 2) | |||
| returns (batch, frames) | |||
| ''' | |||
| energy = torch.sum(spec[..., 0]**2 + spec[..., 1]**2, dim=1) | |||
| vad = energy > thdhigh | |||
| idx = torch.logical_and(vad == 0, energy > thdlow) | |||
| vad[idx] = 0.5 | |||
| return vad | |||
| def modulation_loss_init(n_fft): | |||
| gabor_strf_parameters = torch.load( | |||
| './network/gabor_strf_parameters.pt')['state_dict'] | |||
| gabor_modulation_kernels = GaborSTRFConv(supn=30, supk=30, nkern=60) | |||
| gabor_modulation_kernels.load_state_dict(gabor_strf_parameters) | |||
| modulation_loss_module = ModulationDomainLossModule( | |||
| gabor_modulation_kernels.eval()) | |||
| for param in modulation_loss_module.parameters(): | |||
| param.requires_grad = False | |||
| stft2mel = MelScale( | |||
| n_mels=80, sample_rate=16000, n_stft=n_fft // 2 + 1).cuda() | |||
| return modulation_loss_module, stft2mel | |||
| def mask_loss_function( | |||
| loss_func='psm_loss', | |||
| loss_type='mse', # ['mse', 'mae', 'comb'] | |||
| mask_type='psmiam', | |||
| use_mod_loss=False, | |||
| use_wav2vec_loss=False, | |||
| n_fft=640, | |||
| hop_length=320, | |||
| EPS=1e-8, | |||
| weight=None): | |||
| if weight is not None: | |||
| print(f'Use loss weight: {weight}') | |||
| winlen = n_fft | |||
| window = torch.hamming_window(winlen, periodic=False) | |||
| def stft(x, return_complex=False): | |||
| # returns [batch, bins, frames, 2] | |||
| return torch.stft( | |||
| x, | |||
| n_fft, | |||
| hop_length, | |||
| winlen, | |||
| window=window.to(x.device), | |||
| center=False, | |||
| return_complex=return_complex) | |||
| def istft(x, slen): | |||
| return torch.istft( | |||
| x, | |||
| n_fft, | |||
| hop_length, | |||
| winlen, | |||
| window=window.to(x.device), | |||
| center=False, | |||
| length=slen) | |||
| def mask_loss(targets, masks, nframes): | |||
| ''' [Batch, Time, Frequency] | |||
| ''' | |||
| with torch.no_grad(): | |||
| mask_for_loss = torch.ones_like(targets) | |||
| for idx, num in enumerate(nframes): | |||
| mask_for_loss[idx, num:, :] = 0 | |||
| masks = masks * mask_for_loss | |||
| targets = targets * mask_for_loss | |||
| if weight is None: | |||
| alpha = 1 | |||
| else: # for aec ST | |||
| alpha = weight - targets | |||
| if loss_type == 'mse': | |||
| loss = 0.5 * torch.sum(alpha * torch.pow(targets - masks, 2)) | |||
| elif loss_type == 'mae': | |||
| loss = torch.sum(alpha * torch.abs(targets - masks)) | |||
| else: # mse(mask), mae(mask) approx 1:2 | |||
| loss = 0.5 * torch.sum(alpha * torch.pow(targets - masks, 2) | |||
| + 0.1 * alpha * torch.abs(targets - masks)) | |||
| loss /= torch.sum(nframes) | |||
| return loss | |||
| def spectrum_loss(targets, spec, nframes): | |||
| ''' [Batch, Time, Frequency, 2] | |||
| ''' | |||
| with torch.no_grad(): | |||
| mask_for_loss = torch.ones_like(targets[..., 0]) | |||
| for idx, num in enumerate(nframes): | |||
| mask_for_loss[idx, num:, :] = 0 | |||
| xr = spec[..., 0] * mask_for_loss | |||
| xi = spec[..., 1] * mask_for_loss | |||
| yr = targets[..., 0] * mask_for_loss | |||
| yi = targets[..., 1] * mask_for_loss | |||
| xmag = torch.sqrt(spec[..., 0]**2 + spec[..., 1]**2) * mask_for_loss | |||
| ymag = torch.sqrt(targets[..., 0]**2 | |||
| + targets[..., 1]**2) * mask_for_loss | |||
| loss1 = torch.sum(torch.pow(xr - yr, 2) + torch.pow(xi - yi, 2)) | |||
| loss2 = torch.sum(torch.pow(xmag - ymag, 2)) | |||
| loss = (loss1 + loss2) / torch.sum(nframes) | |||
| return loss | |||
| def sa_loss_dlen(mixed, clean, masks, nframes): | |||
| yspec = stft(mixed).permute([0, 2, 1, 3]) / 32768 | |||
| xspec = stft(clean).permute([0, 2, 1, 3]) / 32768 | |||
| with torch.no_grad(): | |||
| mask_for_loss = torch.ones_like(xspec[..., 0]) | |||
| for idx, num in enumerate(nframes): | |||
| mask_for_loss[idx, num:, :] = 0 | |||
| emag = ((yspec[..., 0]**2 + yspec[..., 1]**2)**0.15) * (masks**0.3) | |||
| xmag = (xspec[..., 0]**2 + xspec[..., 1]**2)**0.15 | |||
| emag = emag * mask_for_loss | |||
| xmag = xmag * mask_for_loss | |||
| loss = torch.sum(torch.pow(emag - xmag, 2)) / torch.sum(nframes) | |||
| return loss | |||
| def psm_vad_loss_dlen(mixed, clean, masks, nframes, subtask=None): | |||
| mixed_spec = stft(mixed) | |||
| clean_spec = stft(clean) | |||
| targets = compute_mask(mixed_spec, clean_spec, mask_type) | |||
| # [B, T, F] | |||
| targets = targets.permute(0, 2, 1) | |||
| loss = mask_loss(targets, masks, nframes) | |||
| if subtask is not None: | |||
| vadtargets = energy_vad(clean_spec) | |||
| with torch.no_grad(): | |||
| mask_for_loss = torch.ones_like(targets[:, :, 0]) | |||
| for idx, num in enumerate(nframes): | |||
| mask_for_loss[idx, num:] = 0 | |||
| subtask = subtask[:, :, 0] * mask_for_loss | |||
| vadtargets = vadtargets * mask_for_loss | |||
| loss_vad = F.binary_cross_entropy(subtask, vadtargets) | |||
| return loss + loss_vad | |||
| return loss | |||
| def modulation_loss(mixed, clean, masks, nframes, subtask=None): | |||
| mixed_spec = stft(mixed, True) | |||
| clean_spec = stft(clean, True) | |||
| enhanced_mag = torch.abs(mixed_spec) | |||
| clean_mag = torch.abs(clean_spec) | |||
| with torch.no_grad(): | |||
| mask_for_loss = torch.ones_like(clean_mag) | |||
| for idx, num in enumerate(nframes): | |||
| mask_for_loss[idx, :, num:] = 0 | |||
| clean_mag = clean_mag * mask_for_loss | |||
| enhanced_mag = enhanced_mag * mask_for_loss * masks.permute([0, 2, 1]) | |||
| # Covert to log-mel representation | |||
| # (B,T,#mel_channels) | |||
| clean_log_mel = torch.log( | |||
| torch.transpose(stft2mel(clean_mag**2), 2, 1) + 1e-8) | |||
| enhanced_log_mel = torch.log( | |||
| torch.transpose(stft2mel(enhanced_mag**2), 2, 1) + 1e-8) | |||
| alpha = compute_mask(mixed_spec, clean_spec, mask_type) | |||
| alpha = alpha.permute(0, 2, 1) | |||
| loss = 0.05 * modulation_loss_module(enhanced_log_mel, clean_log_mel, | |||
| alpha) | |||
| loss2 = psm_vad_loss_dlen(mixed, clean, masks, nframes, subtask) | |||
| # print(loss.item(), loss2.item()) #approx 1:4 | |||
| loss = loss + loss2 | |||
| return loss | |||
| def wav2vec_loss(mixed, clean, masks, nframes, subtask=None): | |||
| mixed /= 32768 | |||
| clean /= 32768 | |||
| mixed_spec = stft(mixed) | |||
| with torch.no_grad(): | |||
| mask_for_loss = torch.ones_like(masks) | |||
| for idx, num in enumerate(nframes): | |||
| mask_for_loss[idx, num:, :] = 0 | |||
| masks_est = masks * mask_for_loss | |||
| estimate = mixed_spec * masks_est.permute([0, 2, 1]).unsqueeze(3) | |||
| est_clean = istft(estimate, clean.shape[1]) | |||
| loss = wav2vec_loss_module(est_clean, clean) | |||
| return loss | |||
| def sisdr_loss_dlen(mixed, | |||
| clean, | |||
| masks, | |||
| nframes, | |||
| subtask=None, | |||
| zero_mean=True): | |||
| mixed_spec = stft(mixed) | |||
| with torch.no_grad(): | |||
| mask_for_loss = torch.ones_like(masks) | |||
| for idx, num in enumerate(nframes): | |||
| mask_for_loss[idx, num:, :] = 0 | |||
| masks_est = masks * mask_for_loss | |||
| estimate = mixed_spec * masks_est.permute([0, 2, 1]).unsqueeze(3) | |||
| est_clean = istft(estimate, clean.shape[1]) | |||
| flen = min(clean.shape[1], est_clean.shape[1]) | |||
| clean = clean[:, :flen] | |||
| est_clean = est_clean[:, :flen] | |||
| # follow asteroid/losses/sdr.py | |||
| if zero_mean: | |||
| clean = clean - torch.mean(clean, dim=1, keepdim=True) | |||
| est_clean = est_clean - torch.mean(est_clean, dim=1, keepdim=True) | |||
| dot = torch.sum(est_clean * clean, dim=1, keepdim=True) | |||
| s_clean_energy = torch.sum(clean**2, dim=1, keepdim=True) + EPS | |||
| scaled_clean = dot * clean / s_clean_energy | |||
| e_noise = est_clean - scaled_clean | |||
| # [batch] | |||
| sisdr = torch.sum( | |||
| scaled_clean**2, dim=1) / ( | |||
| torch.sum(e_noise**2, dim=1) + EPS) | |||
| sisdr = -10 * torch.log10(sisdr + EPS) | |||
| loss = sisdr.mean() | |||
| return loss | |||
| def sisdr_freq_loss_dlen(mixed, clean, masks, nframes, subtask=None): | |||
| mixed_spec = stft(mixed) | |||
| clean_spec = stft(clean) | |||
| with torch.no_grad(): | |||
| mask_for_loss = torch.ones_like(masks) | |||
| for idx, num in enumerate(nframes): | |||
| mask_for_loss[idx, num:, :] = 0 | |||
| masks_est = masks * mask_for_loss | |||
| estimate = mixed_spec * masks_est.permute([0, 2, 1]).unsqueeze(3) | |||
| dot_real = estimate[..., 0] * clean_spec[..., 0] + \ | |||
| estimate[..., 1] * clean_spec[..., 1] | |||
| dot_imag = estimate[..., 0] * clean_spec[..., 1] - \ | |||
| estimate[..., 1] * clean_spec[..., 0] | |||
| dot = torch.cat([dot_real.unsqueeze(3), dot_imag.unsqueeze(3)], dim=-1) | |||
| s_clean_energy = clean_spec[..., 0] ** 2 + \ | |||
| clean_spec[..., 1] ** 2 + EPS | |||
| scaled_clean = dot * clean_spec / s_clean_energy.unsqueeze(3) | |||
| e_noise = estimate - scaled_clean | |||
| # [batch] | |||
| scaled_clean_energy = torch.sum( | |||
| scaled_clean[..., 0]**2 + scaled_clean[..., 1]**2, dim=1) | |||
| e_noise_energy = torch.sum( | |||
| e_noise[..., 0]**2 + e_noise[..., 1]**2, dim=1) | |||
| sisdr = torch.sum( | |||
| scaled_clean_energy, dim=1) / ( | |||
| torch.sum(e_noise_energy, dim=1) + EPS) | |||
| sisdr = -10 * torch.log10(sisdr + EPS) | |||
| loss = sisdr.mean() | |||
| return loss | |||
| def crm_loss_dlen(mixed, clean, masks, nframes, subtask=None): | |||
| mixed_spec = stft(mixed).permute([0, 2, 1, 3]) | |||
| clean_spec = stft(clean).permute([0, 2, 1, 3]) | |||
| mixed_spec = mixed_spec / 32768 | |||
| clean_spec = clean_spec / 32768 | |||
| tgt_mr, tgt_mi = compute_mask(mixed_spec, clean_spec, mask_type='crm') | |||
| D = int(masks.shape[2] / 2) | |||
| with torch.no_grad(): | |||
| mask_for_loss = torch.ones_like(clean_spec[..., 0]) | |||
| for idx, num in enumerate(nframes): | |||
| mask_for_loss[idx, num:, :] = 0 | |||
| mr = masks[..., :D] * mask_for_loss | |||
| mi = masks[..., D:] * mask_for_loss | |||
| tgt_mr = tgt_mr * mask_for_loss | |||
| tgt_mi = tgt_mi * mask_for_loss | |||
| if weight is None: | |||
| alpha = 1 | |||
| else: | |||
| alpha = weight - tgt_mr | |||
| # signal approximation | |||
| yr = mixed_spec[..., 0] | |||
| yi = mixed_spec[..., 1] | |||
| loss1 = torch.sum(alpha * torch.pow((mr * yr - mi * yi) - clean_spec[..., 0], 2)) \ | |||
| + torch.sum(alpha * torch.pow((mr * yi + mi * yr) - clean_spec[..., 1], 2)) | |||
| # mask approximation | |||
| loss2 = torch.sum(alpha * torch.pow(mr - tgt_mr, 2)) \ | |||
| + torch.sum(alpha * torch.pow(mi - tgt_mi, 2)) | |||
| loss = 0.5 * (loss1 + loss2) / torch.sum(nframes) | |||
| return loss | |||
| def crm_miso_loss_dlen(mixed, clean, masks, nframes): | |||
| return crm_loss_dlen(mixed[..., 0], clean[..., 0], masks, nframes) | |||
| def mimo_loss_dlen(mixed, clean, masks, nframes): | |||
| chs = mixed.shape[-1] | |||
| D = masks.shape[2] // chs | |||
| loss = psm_vad_loss_dlen(mixed[..., 0], clean[..., 0], masks[..., :D], | |||
| nframes) | |||
| for ch in range(1, chs): | |||
| loss1 = psm_vad_loss_dlen(mixed[..., ch], clean[..., ch], | |||
| masks[..., ch * D:ch * D + D], nframes) | |||
| loss = loss + loss1 | |||
| return loss / chs | |||
| def spec_loss_dlen(mixed, clean, spec, nframes): | |||
| clean_spec = stft(clean).permute([0, 2, 1, 3]) | |||
| clean_spec = clean_spec / 32768 | |||
| D = spec.shape[2] // 2 | |||
| spec_est = torch.cat([spec[..., :D, None], spec[..., D:, None]], | |||
| dim=-1) | |||
| loss = spectrum_loss(clean_spec, spec_est, nframes) | |||
| return loss | |||
| if loss_func == 'psm_vad_loss_dlen': | |||
| return psm_vad_loss_dlen | |||
| elif loss_func == 'sisdr_loss_dlen': | |||
| return sisdr_loss_dlen | |||
| elif loss_func == 'sisdr_freq_loss_dlen': | |||
| return sisdr_freq_loss_dlen | |||
| elif loss_func == 'crm_loss_dlen': | |||
| return crm_loss_dlen | |||
| elif loss_func == 'modulation_loss': | |||
| return modulation_loss | |||
| elif loss_func == 'wav2vec_loss': | |||
| return wav2vec_loss | |||
| elif loss_func == 'mimo_loss_dlen': | |||
| return mimo_loss_dlen | |||
| elif loss_func == 'spec_loss_dlen': | |||
| return spec_loss_dlen | |||
| elif loss_func == 'sa_loss_dlen': | |||
| return sa_loss_dlen | |||
| else: | |||
| print('error loss func') | |||
| return None | |||
| @@ -0,0 +1,248 @@ | |||
| import math | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| from torchaudio.transforms import MelScale | |||
| class ModulationDomainLossModule(torch.nn.Module): | |||
| """Modulation-domain loss function developed in [1] for supervised speech enhancement | |||
| In our paper, we used the gabor-based STRF kernels as the modulation kernels and used the log-mel spectrogram | |||
| as the input spectrogram representation. | |||
| Specific parameter details are in the paper and in the example below | |||
| Parameters | |||
| ---------- | |||
| modulation_kernels: nn.Module | |||
| Differentiable module that transforms a spectrogram representation to the modulation domain | |||
| modulation_domain = modulation_kernels(input_tf_representation) | |||
| Input Spectrogram representation (B, T, F) ---> |(M) modulation_kernels|--->Modulation Domain(B, M, T', F') | |||
| norm: boolean | |||
| Normalizes the modulation domain representation to be 0 mean across time | |||
| [1] T. Vuong, Y. Xia, and R. M. Stern, “A modulation-domain lossfor neural-network-based real-time | |||
| speech enhancement” | |||
| Accepted ICASSP 2021, https://arxiv.org/abs/2102.07330 | |||
| """ | |||
| def __init__(self, modulation_kernels, norm=True): | |||
| super(ModulationDomainLossModule, self).__init__() | |||
| self.modulation_kernels = modulation_kernels | |||
| self.mse = nn.MSELoss(reduce=False) | |||
| self.norm = norm | |||
| def forward(self, enhanced_spect, clean_spect, weight=None): | |||
| """Calculate modulation-domain loss | |||
| Args: | |||
| enhanced_spect (Tensor): spectrogram representation of enhanced signal (B, #frames, #freq_channels). | |||
| clean_spect (Tensor): spectrogram representation of clean ground-truth signal (B, #frames, #freq_channels). | |||
| Returns: | |||
| Tensor: Modulation-domain loss value. | |||
| """ | |||
| clean_mod = self.modulation_kernels(clean_spect) | |||
| enhanced_mod = self.modulation_kernels(enhanced_spect) | |||
| if self.norm: | |||
| mean_clean_mod = torch.mean(clean_mod, dim=2) | |||
| mean_enhanced_mod = torch.mean(enhanced_mod, dim=2) | |||
| clean_mod = clean_mod - mean_clean_mod.unsqueeze(2) | |||
| enhanced_mod = enhanced_mod - mean_enhanced_mod.unsqueeze(2) | |||
| if weight is None: | |||
| alpha = 1 | |||
| else: # TF-mask weight | |||
| alpha = 1 + torch.sum(weight, dim=-1, keepdim=True).unsqueeze(1) | |||
| mod_mse_loss = self.mse(enhanced_mod, clean_mod) * alpha | |||
| mod_mse_loss = torch.mean( | |||
| torch.sum(mod_mse_loss, dim=(1, 2, 3)) | |||
| / torch.sum(clean_mod**2, dim=(1, 2, 3))) | |||
| return mod_mse_loss | |||
| class ModulationDomainNCCLossModule(torch.nn.Module): | |||
| """Modulation-domain loss function developed in [1] for supervised speech enhancement | |||
| # Speech Intelligibility Prediction Using Spectro-Temporal Modulation Analysis - based off of this | |||
| In our paper, we used the gabor-based STRF kernels as the modulation kernels and used the log-mel spectrogram | |||
| as the input spectrogram representation. | |||
| Specific parameter details are in the paper and in the example below | |||
| Parameters | |||
| ---------- | |||
| modulation_kernels: nn.Module | |||
| Differentiable module that transforms a spectrogram representation to the modulation domain | |||
| modulation_domain = modulation_kernels(input_tf_representation) | |||
| Input Spectrogram representation(B, T, F) --- (M) modulation_kernels---> Modulation Domain(B, M, T', F') | |||
| [1] | |||
| """ | |||
| def __init__(self, modulation_kernels): | |||
| super(ModulationDomainNCCLossModule, self).__init__() | |||
| self.modulation_kernels = modulation_kernels | |||
| self.mse = nn.MSELoss(reduce=False) | |||
| def forward(self, enhanced_spect, clean_spect): | |||
| """Calculate modulation-domain loss | |||
| Args: | |||
| enhanced_spect (Tensor): spectrogram representation of enhanced signal (B, #frames, #freq_channels). | |||
| clean_spect (Tensor): spectrogram representation of clean ground-truth signal (B, #frames, #freq_channels). | |||
| Returns: | |||
| Tensor: Modulation-domain loss value. | |||
| """ | |||
| clean_mod = self.modulation_kernels(clean_spect) | |||
| enhanced_mod = self.modulation_kernels(enhanced_spect) | |||
| mean_clean_mod = torch.mean(clean_mod, dim=2) | |||
| mean_enhanced_mod = torch.mean(enhanced_mod, dim=2) | |||
| normalized_clean = clean_mod - mean_clean_mod.unsqueeze(2) | |||
| normalized_enhanced = enhanced_mod - mean_enhanced_mod.unsqueeze(2) | |||
| inner_product = torch.sum( | |||
| normalized_clean * normalized_enhanced, dim=2) | |||
| normalized_denom = (torch.sum( | |||
| normalized_clean * normalized_clean, dim=2))**.5 * (torch.sum( | |||
| normalized_enhanced * normalized_enhanced, dim=2))**.5 | |||
| ncc = inner_product / normalized_denom | |||
| mod_mse_loss = torch.mean((ncc - 1.0)**2) | |||
| return mod_mse_loss | |||
| class GaborSTRFConv(nn.Module): | |||
| """Gabor-STRF-based cross-correlation kernel.""" | |||
| def __init__(self, | |||
| supn, | |||
| supk, | |||
| nkern, | |||
| rates=None, | |||
| scales=None, | |||
| norm_strf=True, | |||
| real_only=False): | |||
| """Instantiate a Gabor-based STRF convolution layer. | |||
| Parameters | |||
| ---------- | |||
| supn: int | |||
| Time support in number of frames. Also the window length. | |||
| supk: int | |||
| Frequency support in number of channels. Also the window length. | |||
| nkern: int | |||
| Number of kernels, each with a learnable rate and scale. | |||
| rates: list of float, None | |||
| Initial values for temporal modulation. | |||
| scales: list of float, None | |||
| Initial values for spectral modulation. | |||
| norm_strf: Boolean | |||
| Normalize STRF kernels to be unit length | |||
| real_only: Boolean | |||
| If True, nkern REAL gabor-STRF kernels | |||
| If False, nkern//2 REAL and nkern//2 IMAGINARY gabor-STRF kernels | |||
| """ | |||
| super(GaborSTRFConv, self).__init__() | |||
| self.numN = supn | |||
| self.numK = supk | |||
| self.numKern = nkern | |||
| self.real_only = real_only | |||
| self.norm_strf = norm_strf | |||
| if not real_only: | |||
| nkern = nkern // 2 | |||
| if supk % 2 == 0: # force odd number | |||
| supk += 1 | |||
| self.supk = torch.arange(supk, dtype=torch.float32) | |||
| if supn % 2 == 0: # force odd number | |||
| supn += 1 | |||
| self.supn = torch.arange(supn, dtype=self.supk.dtype) | |||
| self.padding = (supn // 2, supk // 2) | |||
| # Set up learnable parameters | |||
| # for param in (rates, scales): | |||
| # assert (not param) or len(param) == nkern | |||
| if not rates: | |||
| rates = torch.rand(nkern) * math.pi / 2.0 | |||
| if not scales: | |||
| scales = (torch.rand(nkern) * 2.0 - 1.0) * math.pi / 2.0 | |||
| self.rates_ = nn.Parameter(torch.Tensor(rates)) | |||
| self.scales_ = nn.Parameter(torch.Tensor(scales)) | |||
| def strfs(self): | |||
| """Make STRFs using the current parameters.""" | |||
| if self.supn.device != self.rates_.device: # for first run | |||
| self.supn = self.supn.to(self.rates_.device) | |||
| self.supk = self.supk.to(self.rates_.device) | |||
| n0, k0 = self.padding | |||
| nwind = .5 - .5 * \ | |||
| torch.cos(2 * math.pi * (self.supn + 1) / (len(self.supn) + 1)) | |||
| kwind = .5 - .5 * \ | |||
| torch.cos(2 * math.pi * (self.supk + 1) / (len(self.supk) + 1)) | |||
| new_wind = torch.matmul((nwind).unsqueeze(-1), (kwind).unsqueeze(0)) | |||
| n_n_0 = self.supn - n0 | |||
| k_k_0 = self.supk - k0 | |||
| n_mult = torch.matmul( | |||
| n_n_0.unsqueeze(1), | |||
| torch.ones((1, len(self.supk))).type(torch.FloatTensor).to( | |||
| self.rates_.device)) | |||
| k_mult = torch.matmul( | |||
| torch.ones((len(self.supn), | |||
| 1)).type(torch.FloatTensor).to(self.rates_.device), | |||
| k_k_0.unsqueeze(0)) | |||
| inside = self.rates_.unsqueeze(1).unsqueeze( | |||
| 1) * n_mult + self.scales_.unsqueeze(1).unsqueeze(1) * k_mult | |||
| real_strf = torch.cos(inside) * new_wind.unsqueeze(0) | |||
| if self.real_only: | |||
| final_strf = real_strf | |||
| else: | |||
| imag_strf = torch.sin(inside) * new_wind.unsqueeze(0) | |||
| final_strf = torch.cat([real_strf, imag_strf], dim=0) | |||
| if self.norm_strf: | |||
| final_strf = final_strf / (torch.sum( | |||
| final_strf**2, dim=(1, 2)).unsqueeze(1).unsqueeze(2))**.5 | |||
| return final_strf | |||
| def forward(self, sigspec): | |||
| """Forward pass a batch of (real) spectra [Batch x Time x Frequency].""" | |||
| if len(sigspec.shape) == 2: # expand batch dimension if single eg | |||
| sigspec = sigspec.unsqueeze(0) | |||
| strfs = self.strfs().unsqueeze(1).type_as(sigspec) | |||
| out = F.conv2d(sigspec.unsqueeze(1), strfs, padding=self.padding) | |||
| return out | |||
| def __repr__(self): | |||
| """Gabor filter""" | |||
| report = """ | |||
| +++++ Gabor Filter Kernels [{}], supn[{}], supk[{}] real only [{}] norm strf [{}] +++++ | |||
| """.format(self.numKern, self.numN, self.numK, self.real_only, | |||
| self.norm_strf) | |||
| return report | |||
| @@ -0,0 +1,483 @@ | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| from ..layers.activations import RectifiedLinear, Sigmoid | |||
| from ..layers.affine_transform import AffineTransform | |||
| from ..layers.deep_fsmn import DeepFsmn | |||
| from ..layers.uni_deep_fsmn import Conv2d, UniDeepFsmn | |||
| class MaskNet(nn.Module): | |||
| def __init__(self, | |||
| indim, | |||
| outdim, | |||
| layers=9, | |||
| hidden_dim=128, | |||
| hidden_dim2=None, | |||
| lorder=20, | |||
| rorder=0, | |||
| dilation=1, | |||
| layer_norm=False, | |||
| dropout=0, | |||
| crm=False, | |||
| vad=False, | |||
| linearout=False): | |||
| super(MaskNet, self).__init__() | |||
| self.linear1 = AffineTransform(indim, hidden_dim) | |||
| self.relu = RectifiedLinear(hidden_dim, hidden_dim) | |||
| if hidden_dim2 is None: | |||
| hidden_dim2 = hidden_dim | |||
| if rorder == 0: | |||
| repeats = [ | |||
| UniDeepFsmn( | |||
| hidden_dim, | |||
| hidden_dim, | |||
| lorder, | |||
| hidden_dim2, | |||
| dilation=dilation, | |||
| layer_norm=layer_norm, | |||
| dropout=dropout) for i in range(layers) | |||
| ] | |||
| else: | |||
| repeats = [ | |||
| DeepFsmn( | |||
| hidden_dim, | |||
| hidden_dim, | |||
| lorder, | |||
| rorder, | |||
| hidden_dim2, | |||
| layer_norm=layer_norm, | |||
| dropout=dropout) for i in range(layers) | |||
| ] | |||
| self.deepfsmn = nn.Sequential(*repeats) | |||
| self.linear2 = AffineTransform(hidden_dim, outdim) | |||
| self.crm = crm | |||
| if self.crm: | |||
| self.sig = nn.Tanh() | |||
| else: | |||
| self.sig = Sigmoid(outdim, outdim) | |||
| self.vad = vad | |||
| if self.vad: | |||
| self.linear3 = AffineTransform(hidden_dim, 1) | |||
| self.layers = layers | |||
| self.linearout = linearout | |||
| if self.linearout and self.vad: | |||
| print('Warning: not supported nnet') | |||
| def forward(self, feat, ctl=None): | |||
| x1 = self.linear1(feat) | |||
| x2 = self.relu(x1) | |||
| if ctl is not None: | |||
| ctl = min(ctl, self.layers - 1) | |||
| for i in range(ctl): | |||
| x2 = self.deepfsmn[i](x2) | |||
| mask = self.sig(self.linear2(x2)) | |||
| if self.vad: | |||
| vad = torch.sigmoid(self.linear3(x2)) | |||
| return mask, vad | |||
| else: | |||
| return mask | |||
| x3 = self.deepfsmn(x2) | |||
| if self.linearout: | |||
| return self.linear2(x3) | |||
| mask = self.sig(self.linear2(x3)) | |||
| if self.vad: | |||
| vad = torch.sigmoid(self.linear3(x3)) | |||
| return mask, vad | |||
| else: | |||
| return mask | |||
| def to_kaldi_nnet(self): | |||
| re_str = '' | |||
| re_str += '<Nnet>\n' | |||
| re_str += self.linear1.to_kaldi_nnet() | |||
| re_str += self.relu.to_kaldi_nnet() | |||
| for dfsmn in self.deepfsmn: | |||
| re_str += dfsmn.to_kaldi_nnet() | |||
| re_str += self.linear2.to_kaldi_nnet() | |||
| re_str += self.sig.to_kaldi_nnet() | |||
| re_str += '</Nnet>\n' | |||
| return re_str | |||
| def to_raw_nnet(self, fid): | |||
| self.linear1.to_raw_nnet(fid) | |||
| for dfsmn in self.deepfsmn: | |||
| dfsmn.to_raw_nnet(fid) | |||
| self.linear2.to_raw_nnet(fid) | |||
| class StageNet(nn.Module): | |||
| def __init__(self, | |||
| indim, | |||
| outdim, | |||
| layers=9, | |||
| layers2=6, | |||
| hidden_dim=128, | |||
| lorder=20, | |||
| rorder=0, | |||
| layer_norm=False, | |||
| dropout=0, | |||
| crm=False, | |||
| vad=False, | |||
| linearout=False): | |||
| super(StageNet, self).__init__() | |||
| self.stage1 = nn.ModuleList() | |||
| self.stage2 = nn.ModuleList() | |||
| layer = nn.Sequential(nn.Linear(indim, hidden_dim), nn.ReLU()) | |||
| self.stage1.append(layer) | |||
| for i in range(layers): | |||
| layer = UniDeepFsmn( | |||
| hidden_dim, | |||
| hidden_dim, | |||
| lorder, | |||
| hidden_dim, | |||
| layer_norm=layer_norm, | |||
| dropout=dropout) | |||
| self.stage1.append(layer) | |||
| layer = nn.Sequential(nn.Linear(hidden_dim, 321), nn.Sigmoid()) | |||
| self.stage1.append(layer) | |||
| # stage2 | |||
| layer = nn.Sequential(nn.Linear(321 + indim, hidden_dim), nn.ReLU()) | |||
| self.stage2.append(layer) | |||
| for i in range(layers2): | |||
| layer = UniDeepFsmn( | |||
| hidden_dim, | |||
| hidden_dim, | |||
| lorder, | |||
| hidden_dim, | |||
| layer_norm=layer_norm, | |||
| dropout=dropout) | |||
| self.stage2.append(layer) | |||
| layer = nn.Sequential( | |||
| nn.Linear(hidden_dim, outdim), | |||
| nn.Sigmoid() if not crm else nn.Tanh()) | |||
| self.stage2.append(layer) | |||
| self.crm = crm | |||
| self.vad = vad | |||
| self.linearout = linearout | |||
| self.window = torch.hamming_window(640, periodic=False).cuda() | |||
| self.freezed = False | |||
| def freeze(self): | |||
| if not self.freezed: | |||
| for param in self.stage1.parameters(): | |||
| param.requires_grad = False | |||
| self.freezed = True | |||
| print('freezed stage1') | |||
| def forward(self, feat, mixture, ctl=None): | |||
| if ctl == 'off': | |||
| x = feat | |||
| for i in range(len(self.stage1)): | |||
| x = self.stage1[i](x) | |||
| return x | |||
| else: | |||
| self.freeze() | |||
| x = feat | |||
| for i in range(len(self.stage1)): | |||
| x = self.stage1[i](x) | |||
| spec = torch.stft( | |||
| mixture / 32768, | |||
| 640, | |||
| 320, | |||
| 640, | |||
| self.window, | |||
| center=False, | |||
| return_complex=True) | |||
| spec = torch.view_as_real(spec).permute([0, 2, 1, 3]) | |||
| specmag = torch.sqrt(spec[..., 0]**2 + spec[..., 1]**2) | |||
| est = x * specmag | |||
| y = torch.cat([est, feat], dim=-1) | |||
| for i in range(len(self.stage2)): | |||
| y = self.stage2[i](y) | |||
| return y | |||
| class Unet(nn.Module): | |||
| def __init__(self, | |||
| indim, | |||
| outdim, | |||
| layers=9, | |||
| dims=[256] * 4, | |||
| lorder=20, | |||
| rorder=0, | |||
| dilation=1, | |||
| layer_norm=False, | |||
| dropout=0, | |||
| crm=False, | |||
| vad=False, | |||
| linearout=False): | |||
| super(Unet, self).__init__() | |||
| self.linear1 = AffineTransform(indim, dims[0]) | |||
| self.relu = RectifiedLinear(dims[0], dims[0]) | |||
| self.encoder = nn.ModuleList() | |||
| self.decoder = nn.ModuleList() | |||
| for i in range(len(dims) - 1): | |||
| layer = nn.Sequential( | |||
| nn.Linear(dims[i], dims[i + 1]), nn.ReLU(), | |||
| nn.Linear(dims[i + 1], dims[i + 1], bias=False), | |||
| Conv2d( | |||
| dims[i + 1], | |||
| dims[i + 1], | |||
| lorder, | |||
| groups=dims[i + 1], | |||
| skip_connect=True)) | |||
| self.encoder.append(layer) | |||
| for i in range(len(dims) - 1, 0, -1): | |||
| layer = nn.Sequential( | |||
| nn.Linear(dims[i] * 2, dims[i - 1]), nn.ReLU(), | |||
| nn.Linear(dims[i - 1], dims[i - 1], bias=False), | |||
| Conv2d( | |||
| dims[i - 1], | |||
| dims[i - 1], | |||
| lorder, | |||
| groups=dims[i - 1], | |||
| skip_connect=True)) | |||
| self.decoder.append(layer) | |||
| self.tf = nn.ModuleList() | |||
| for i in range(layers - 2 * (len(dims) - 1)): | |||
| layer = nn.Sequential( | |||
| nn.Linear(dims[-1], dims[-1]), nn.ReLU(), | |||
| nn.Linear(dims[-1], dims[-1], bias=False), | |||
| Conv2d( | |||
| dims[-1], | |||
| dims[-1], | |||
| lorder, | |||
| groups=dims[-1], | |||
| skip_connect=True)) | |||
| self.tf.append(layer) | |||
| self.linear2 = AffineTransform(dims[0], outdim) | |||
| self.crm = crm | |||
| self.act = nn.Tanh() if self.crm else nn.Sigmoid() | |||
| self.vad = False | |||
| self.layers = layers | |||
| self.linearout = linearout | |||
| def forward(self, x, ctl=None): | |||
| x = self.linear1(x) | |||
| x = self.relu(x) | |||
| encoder_out = [] | |||
| for i in range(len(self.encoder)): | |||
| x = self.encoder[i](x) | |||
| encoder_out.append(x) | |||
| for i in range(len(self.tf)): | |||
| x = self.tf[i](x) | |||
| for i in range(len(self.decoder)): | |||
| x = torch.cat([x, encoder_out[-1 - i]], dim=-1) | |||
| x = self.decoder[i](x) | |||
| x = self.linear2(x) | |||
| if self.linearout: | |||
| return x | |||
| return self.act(x) | |||
| class BranchNet(nn.Module): | |||
| def __init__(self, | |||
| indim, | |||
| outdim, | |||
| layers=9, | |||
| hidden_dim=256, | |||
| lorder=20, | |||
| rorder=0, | |||
| dilation=1, | |||
| layer_norm=False, | |||
| dropout=0, | |||
| crm=False, | |||
| vad=False, | |||
| linearout=False): | |||
| super(BranchNet, self).__init__() | |||
| self.linear1 = AffineTransform(indim, hidden_dim) | |||
| self.relu = RectifiedLinear(hidden_dim, hidden_dim) | |||
| self.convs = nn.ModuleList() | |||
| self.deepfsmn = nn.ModuleList() | |||
| self.FREQ = nn.ModuleList() | |||
| self.TIME = nn.ModuleList() | |||
| self.br1 = nn.ModuleList() | |||
| self.br2 = nn.ModuleList() | |||
| for i in range(layers): | |||
| ''' | |||
| layer = nn.Sequential( | |||
| nn.Linear(hidden_dim, hidden_dim), | |||
| nn.ReLU(), | |||
| nn.Linear(hidden_dim, hidden_dim, bias=False), | |||
| Conv2d(hidden_dim, hidden_dim, lorder, | |||
| groups=hidden_dim, skip_connect=True) | |||
| ) | |||
| self.deepfsmn.append(layer) | |||
| ''' | |||
| layer = nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.ReLU()) | |||
| self.FREQ.append(layer) | |||
| ''' | |||
| layer = nn.GRU(hidden_dim, hidden_dim, | |||
| batch_first=True, | |||
| bidirectional=False) | |||
| self.TIME.append(layer) | |||
| layer = nn.Sequential( | |||
| nn.Linear(hidden_dim, hidden_dim//2, bias=False), | |||
| Conv2d(hidden_dim//2, hidden_dim//2, lorder, | |||
| groups=hidden_dim//2, skip_connect=True) | |||
| ) | |||
| self.br1.append(layer) | |||
| layer = nn.GRU(hidden_dim, hidden_dim//2, | |||
| batch_first=True, | |||
| bidirectional=False) | |||
| self.br2.append(layer) | |||
| ''' | |||
| self.linear2 = AffineTransform(hidden_dim, outdim) | |||
| self.crm = crm | |||
| self.act = nn.Tanh() if self.crm else nn.Sigmoid() | |||
| self.vad = False | |||
| self.layers = layers | |||
| self.linearout = linearout | |||
| def forward(self, x, ctl=None): | |||
| return self.forward_branch(x) | |||
| def forward_sepconv(self, x): | |||
| x = torch.unsqueeze(x, 1) | |||
| for i in range(len(self.convs)): | |||
| x = self.convs[i](x) | |||
| x = F.relu(x) | |||
| B, C, H, W = x.shape | |||
| x = x.permute(0, 2, 1, 3) | |||
| x = torch.reshape(x, [B, H, C * W]) | |||
| x = self.linear1(x) | |||
| x = self.relu(x) | |||
| for i in range(self.layers): | |||
| x = self.deepfsmn[i](x) + x | |||
| x = self.linear2(x) | |||
| return self.act(x) | |||
| def forward_branch(self, x): | |||
| x = self.linear1(x) | |||
| x = self.relu(x) | |||
| for i in range(self.layers): | |||
| z = self.FREQ[i](x) | |||
| x = z + x | |||
| x = self.linear2(x) | |||
| if self.linearout: | |||
| return x | |||
| return self.act(x) | |||
| class TACNet(nn.Module): | |||
| ''' transform average concatenate for ad hoc dr | |||
| ''' | |||
| def __init__(self, | |||
| indim, | |||
| outdim, | |||
| layers=9, | |||
| hidden_dim=128, | |||
| lorder=20, | |||
| rorder=0, | |||
| crm=False, | |||
| vad=False, | |||
| linearout=False): | |||
| super(TACNet, self).__init__() | |||
| self.linear1 = AffineTransform(indim, hidden_dim) | |||
| self.relu = RectifiedLinear(hidden_dim, hidden_dim) | |||
| if rorder == 0: | |||
| repeats = [ | |||
| UniDeepFsmn(hidden_dim, hidden_dim, lorder, hidden_dim) | |||
| for i in range(layers) | |||
| ] | |||
| else: | |||
| repeats = [ | |||
| DeepFsmn(hidden_dim, hidden_dim, lorder, rorder, hidden_dim) | |||
| for i in range(layers) | |||
| ] | |||
| self.deepfsmn = nn.Sequential(*repeats) | |||
| self.ch_transform = nn.ModuleList([]) | |||
| self.ch_average = nn.ModuleList([]) | |||
| self.ch_concat = nn.ModuleList([]) | |||
| for i in range(layers): | |||
| self.ch_transform.append( | |||
| nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.PReLU())) | |||
| self.ch_average.append( | |||
| nn.Sequential(nn.Linear(hidden_dim, hidden_dim), nn.PReLU())) | |||
| self.ch_concat.append( | |||
| nn.Sequential( | |||
| nn.Linear(hidden_dim * 2, hidden_dim), nn.PReLU())) | |||
| self.linear2 = AffineTransform(hidden_dim, outdim) | |||
| self.crm = crm | |||
| if self.crm: | |||
| self.sig = nn.Tanh() | |||
| else: | |||
| self.sig = Sigmoid(outdim, outdim) | |||
| self.vad = vad | |||
| if self.vad: | |||
| self.linear3 = AffineTransform(hidden_dim, 1) | |||
| self.layers = layers | |||
| self.linearout = linearout | |||
| if self.linearout and self.vad: | |||
| print('Warning: not supported nnet') | |||
| def forward(self, feat, ctl=None): | |||
| B, T, F = feat.shape | |||
| # assume 4ch | |||
| ch = 4 | |||
| zlist = [] | |||
| for c in range(ch): | |||
| z = self.linear1(feat[..., c * (F // 4):(c + 1) * (F // 4)]) | |||
| z = self.relu(z) | |||
| zlist.append(z) | |||
| for i in range(self.layers): | |||
| # forward | |||
| for c in range(ch): | |||
| zlist[c] = self.deepfsmn[i](zlist[c]) | |||
| # transform | |||
| olist = [] | |||
| for c in range(ch): | |||
| z = self.ch_transform[i](zlist[c]) | |||
| olist.append(z) | |||
| # average | |||
| avg = 0 | |||
| for c in range(ch): | |||
| avg = avg + olist[c] | |||
| avg = avg / ch | |||
| avg = self.ch_average[i](avg) | |||
| # concate | |||
| for c in range(ch): | |||
| tac = torch.cat([olist[c], avg], dim=-1) | |||
| tac = self.ch_concat[i](tac) | |||
| zlist[c] = zlist[c] + tac | |||
| for c in range(ch): | |||
| zlist[c] = self.sig(self.linear2(zlist[c])) | |||
| mask = torch.cat(zlist, dim=-1) | |||
| return mask | |||
| def to_kaldi_nnet(self): | |||
| pass | |||
| @@ -2,14 +2,13 @@ | |||
| import os.path as osp | |||
| from abc import ABC, abstractmethod | |||
| from typing import Dict, List, Tuple, Union | |||
| from typing import Dict, Union | |||
| from maas_hub.file_download import model_file_download | |||
| from maas_hub.snapshot_download import snapshot_download | |||
| from modelscope.models.builder import build_model | |||
| from modelscope.utils.config import Config | |||
| from modelscope.utils.constant import CONFIGFILE | |||
| from modelscope.utils.constant import ModelFile | |||
| from modelscope.utils.hub import get_model_cache_dir | |||
| Tensor = Union['torch.Tensor', 'tf.Tensor'] | |||
| @@ -21,16 +20,24 @@ class Model(ABC): | |||
| self.model_dir = model_dir | |||
| def __call__(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: | |||
| return self.post_process(self.forward(input)) | |||
| return self.postprocess(self.forward(input)) | |||
| @abstractmethod | |||
| def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: | |||
| pass | |||
| def post_process(self, input: Dict[str, Tensor], | |||
| **kwargs) -> Dict[str, Tensor]: | |||
| # model specific postprocess, implementation is optional | |||
| # will be called in Pipeline and evaluation loop(in the future) | |||
| def postprocess(self, input: Dict[str, Tensor], | |||
| **kwargs) -> Dict[str, Tensor]: | |||
| """ Model specific postprocess and convert model output to | |||
| standard model outputs. | |||
| Args: | |||
| inputs: input data | |||
| Return: | |||
| dict of results: a dict containing outputs of model, each | |||
| output should have the standard output name. | |||
| """ | |||
| return input | |||
| @classmethod | |||
| @@ -47,7 +54,8 @@ class Model(ABC): | |||
| # raise ValueError( | |||
| # 'Remote model repo {model_name_or_path} does not exists') | |||
| cfg = Config.from_file(osp.join(local_model_dir, CONFIGFILE)) | |||
| cfg = Config.from_file( | |||
| osp.join(local_model_dir, ModelFile.CONFIGURATION)) | |||
| task_name = cfg.task | |||
| model_cfg = cfg.model | |||
| # TODO @wenmeng.zwm may should manually initialize model after model building | |||
| @@ -1,4 +1,6 @@ | |||
| from .sequence_classification_model import * # noqa F403 | |||
| from .bert_for_sequence_classification import * # noqa F403 | |||
| from .palm_for_text_generation import * # noqa F403 | |||
| from .sbert_for_sentence_similarity import * # noqa F403 | |||
| from .sbert_for_token_classification import * # noqa F403 | |||
| from .space.dialog_intent_prediction_model import * # noqa F403 | |||
| from .space.dialog_modeling_model import * # noqa F403 | |||
| from .text_generation_model import * # noqa F403 | |||
| @@ -1,5 +1,7 @@ | |||
| import os | |||
| from typing import Any, Dict | |||
| import json | |||
| import numpy as np | |||
| from modelscope.utils.constant import Tasks | |||
| @@ -34,6 +36,11 @@ class BertForSequenceClassification(Model): | |||
| ('token_type_ids', torch.LongTensor)], | |||
| output_keys=['predictions', 'probabilities', 'logits']) | |||
| self.label_path = os.path.join(self.model_dir, 'label_mapping.json') | |||
| with open(self.label_path) as f: | |||
| self.label_mapping = json.load(f) | |||
| self.id2label = {idx: name for name, idx in self.label_mapping.items()} | |||
| def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]: | |||
| """return the result by the model | |||
| @@ -50,3 +57,13 @@ class BertForSequenceClassification(Model): | |||
| } | |||
| """ | |||
| return self.model.predict(input) | |||
| def postprocess(self, inputs: Dict[str, np.ndarray], | |||
| **kwargs) -> Dict[str, np.ndarray]: | |||
| # N x num_classes | |||
| probs = inputs['probabilities'] | |||
| result = { | |||
| 'probs': probs, | |||
| } | |||
| return result | |||
| @@ -0,0 +1,43 @@ | |||
| from typing import Dict | |||
| from modelscope.utils.constant import Tasks | |||
| from ..base import Model, Tensor | |||
| from ..builder import MODELS | |||
| __all__ = ['PalmForTextGeneration'] | |||
| @MODELS.register_module(Tasks.text_generation, module_name=r'palm2.0') | |||
| class PalmForTextGeneration(Model): | |||
| def __init__(self, model_dir: str, *args, **kwargs): | |||
| """initialize the text generation model from the `model_dir` path. | |||
| Args: | |||
| model_dir (str): the model path. | |||
| model_cls (Optional[Any], optional): model loader, if None, use the | |||
| default loader to load model weights, by default None. | |||
| """ | |||
| super().__init__(model_dir, *args, **kwargs) | |||
| self.model_dir = model_dir | |||
| from sofa.models.palm_v2 import PalmForConditionalGeneration, Translator | |||
| model = PalmForConditionalGeneration.from_pretrained(model_dir) | |||
| self.tokenizer = model.tokenizer | |||
| self.generator = Translator(model) | |||
| def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: | |||
| """return the result by the model | |||
| Args: | |||
| input (Dict[str, Tensor]): the preprocessed data | |||
| Returns: | |||
| Dict[str, Tensor]: results | |||
| Example: | |||
| { | |||
| 'predictions': Tensor([[1377, 4959, 2785, 6392...])]), # tokens need to be decode by tokenizer | |||
| } | |||
| """ | |||
| return self.generator(**input) | |||
| @@ -0,0 +1,88 @@ | |||
| import os | |||
| from typing import Any, Dict | |||
| import json | |||
| import numpy as np | |||
| import torch | |||
| from sofa import SbertModel | |||
| from sofa.models.sbert.modeling_sbert import SbertPreTrainedModel | |||
| from torch import nn | |||
| from modelscope.utils.constant import Tasks | |||
| from ..base import Model, Tensor | |||
| from ..builder import MODELS | |||
| __all__ = ['SbertForSentenceSimilarity'] | |||
| class SbertTextClassifier(SbertPreTrainedModel): | |||
| def __init__(self, config): | |||
| super().__init__(config) | |||
| self.num_labels = config.num_labels | |||
| self.config = config | |||
| self.encoder = SbertModel(config, add_pooling_layer=True) | |||
| self.dropout = nn.Dropout(config.hidden_dropout_prob) | |||
| self.classifier = nn.Linear(config.hidden_size, config.num_labels) | |||
| def forward(self, input_ids=None, token_type_ids=None): | |||
| outputs = self.encoder( | |||
| input_ids, | |||
| token_type_ids=token_type_ids, | |||
| return_dict=None, | |||
| ) | |||
| pooled_output = outputs[1] | |||
| pooled_output = self.dropout(pooled_output) | |||
| logits = self.classifier(pooled_output) | |||
| return logits | |||
| @MODELS.register_module( | |||
| Tasks.sentence_similarity, | |||
| module_name=r'sbert-base-chinese-sentence-similarity') | |||
| class SbertForSentenceSimilarity(Model): | |||
| def __init__(self, model_dir: str, *args, **kwargs): | |||
| """initialize the sentence similarity model from the `model_dir` path. | |||
| Args: | |||
| model_dir (str): the model path. | |||
| model_cls (Optional[Any], optional): model loader, if None, use the | |||
| default loader to load model weights, by default None. | |||
| """ | |||
| super().__init__(model_dir, *args, **kwargs) | |||
| self.model_dir = model_dir | |||
| self.model = SbertTextClassifier.from_pretrained( | |||
| model_dir, num_labels=2) | |||
| self.model.eval() | |||
| self.label_path = os.path.join(self.model_dir, 'label_mapping.json') | |||
| with open(self.label_path) as f: | |||
| self.label_mapping = json.load(f) | |||
| self.id2label = {idx: name for name, idx in self.label_mapping.items()} | |||
| def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]: | |||
| """return the result by the model | |||
| Args: | |||
| input (Dict[str, Any]): the preprocessed data | |||
| Returns: | |||
| Dict[str, np.ndarray]: results | |||
| Example: | |||
| { | |||
| 'predictions': array([1]), # lable 0-negative 1-positive | |||
| 'probabilities': array([[0.11491239, 0.8850876 ]], dtype=float32), | |||
| 'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value | |||
| } | |||
| """ | |||
| input_ids = torch.tensor(input['input_ids'], dtype=torch.long) | |||
| token_type_ids = torch.tensor( | |||
| input['token_type_ids'], dtype=torch.long) | |||
| with torch.no_grad(): | |||
| logits = self.model(input_ids, token_type_ids) | |||
| probs = logits.softmax(-1).numpy() | |||
| pred = logits.argmax(-1).numpy() | |||
| logits = logits.numpy() | |||
| res = {'predictions': pred, 'probabilities': probs, 'logits': logits} | |||
| return res | |||
| @@ -0,0 +1,56 @@ | |||
| from typing import Any, Dict, Union | |||
| import numpy as np | |||
| import torch | |||
| from sofa import SbertConfig, SbertForTokenClassification | |||
| from modelscope.utils.constant import Tasks | |||
| from ..base import Model, Tensor | |||
| from ..builder import MODELS | |||
| __all__ = ['StructBertForTokenClassification'] | |||
| @MODELS.register_module( | |||
| Tasks.word_segmentation, | |||
| module_name=r'structbert-chinese-word-segmentation') | |||
| class StructBertForTokenClassification(Model): | |||
| def __init__(self, model_dir: str, *args, **kwargs): | |||
| """initialize the word segmentation model from the `model_dir` path. | |||
| Args: | |||
| model_dir (str): the model path. | |||
| model_cls (Optional[Any], optional): model loader, if None, use the | |||
| default loader to load model weights, by default None. | |||
| """ | |||
| super().__init__(model_dir, *args, **kwargs) | |||
| self.model_dir = model_dir | |||
| self.model = SbertForTokenClassification.from_pretrained( | |||
| self.model_dir) | |||
| self.config = SbertConfig.from_pretrained(self.model_dir) | |||
| def forward(self, input: Dict[str, | |||
| Any]) -> Dict[str, Union[str, np.ndarray]]: | |||
| """return the result by the model | |||
| Args: | |||
| input (Dict[str, Any]): the preprocessed data | |||
| Returns: | |||
| Dict[str, Union[str,np.ndarray]]: results | |||
| Example: | |||
| { | |||
| 'predictions': array([1,4]), # lable 0-negative 1-positive | |||
| 'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value | |||
| 'text': str(今天), | |||
| } | |||
| """ | |||
| input_ids = torch.tensor(input['input_ids']).unsqueeze(0) | |||
| output = self.model(input_ids) | |||
| logits = output.logits | |||
| pred = torch.argmax(logits[0], dim=-1) | |||
| pred = pred.numpy() | |||
| rst = {'predictions': pred, 'logits': logits, 'text': input['text']} | |||
| return rst | |||
| @@ -1,52 +0,0 @@ | |||
| from typing import Any, Dict | |||
| from modelscope.utils.constant import Tasks | |||
| from ..base import Model, Tensor | |||
| from ..builder import MODELS | |||
| __all__ = ['PalmForTextGenerationModel'] | |||
| @MODELS.register_module(Tasks.text_generation, module_name=r'palm') | |||
| class PalmForTextGenerationModel(Model): | |||
| def __init__(self, model_dir: str, *args, **kwargs): | |||
| """initialize the text generation model from the `model_dir` path. | |||
| Args: | |||
| model_dir (str): the model path. | |||
| model_cls (Optional[Any], optional): model loader, if None, use the | |||
| default loader to load model weights, by default None. | |||
| """ | |||
| from sofa import PalmTokenizer | |||
| super().__init__(model_dir, *args, **kwargs) | |||
| self.model_dir = model_dir | |||
| from sofa.models.palm import PalmForConditionalGeneration, TextGenerator | |||
| tokenizer = kwargs.pop('tokenizer', | |||
| PalmTokenizer.from_pretrained(model_dir)) | |||
| model = PalmForConditionalGeneration.from_pretrained(model_dir) | |||
| self.generator = TextGenerator(model, tokenizer) | |||
| def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: | |||
| """return the result by the model | |||
| Args: | |||
| input (Dict[str, Any]): the preprocessed data | |||
| Returns: | |||
| Dict[str, np.ndarray]: results | |||
| Example: | |||
| { | |||
| 'predictions': array([1]), # lable 0-negative 1-positive | |||
| 'probabilities': array([[0.11491239, 0.8850876 ]], dtype=float32), | |||
| 'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value | |||
| } | |||
| """ | |||
| encoder_inputs = [ | |||
| input['input_ids'], input['token_type_ids'], | |||
| input['attention_mask'] | |||
| ] | |||
| return self.generator(encoder_inputs) | |||
| @@ -1,4 +1,4 @@ | |||
| from .audio import * # noqa F403 | |||
| from .audio import LinearAECPipeline | |||
| from .base import Pipeline | |||
| from .builder import pipeline | |||
| from .cv import * # noqa F403 | |||
| @@ -0,0 +1 @@ | |||
| from .linear_aec_pipeline import LinearAECPipeline | |||
| @@ -0,0 +1,160 @@ | |||
| import importlib | |||
| import os | |||
| from typing import Any, Dict | |||
| import numpy as np | |||
| import scipy.io.wavfile as wav | |||
| import torch | |||
| import yaml | |||
| from modelscope.preprocessors.audio import LinearAECAndFbank | |||
| from modelscope.utils.constant import ModelFile, Tasks | |||
| from ..base import Pipeline | |||
| from ..builder import PIPELINES | |||
| FEATURE_MVN = 'feature.DEY.mvn.txt' | |||
| CONFIG_YAML = 'dey_mini.yaml' | |||
| def initialize_config(module_cfg): | |||
| r"""According to config items, load specific module dynamically with params. | |||
| 1. Load the module corresponding to the "module" param. | |||
| 2. Call function (or instantiate class) corresponding to the "main" param. | |||
| 3. Send the param (in "args") into the function (or class) when calling ( or instantiating). | |||
| Args: | |||
| module_cfg (dict): config items, eg: | |||
| { | |||
| "module": "models.model", | |||
| "main": "Model", | |||
| "args": {...} | |||
| } | |||
| Returns: | |||
| the module loaded. | |||
| """ | |||
| module = importlib.import_module(module_cfg['module']) | |||
| return getattr(module, module_cfg['main'])(**module_cfg['args']) | |||
| @PIPELINES.register_module( | |||
| Tasks.speech_signal_process, module_name=r'speech_dfsmn_aec_psm_16k') | |||
| class LinearAECPipeline(Pipeline): | |||
| r"""AEC Inference Pipeline only support 16000 sample rate. | |||
| When invoke the class with pipeline.__call__(), you should provide two params: | |||
| Dict[str, Any] | |||
| the path of wav files,eg:{ | |||
| "nearend_mic": "/your/data/near_end_mic_audio.wav", | |||
| "farend_speech": "/your/data/far_end_speech_audio.wav"} | |||
| output_path (str, optional): "/your/output/audio_after_aec.wav" | |||
| the file path to write generate audio. | |||
| """ | |||
| def __init__(self, model): | |||
| r""" | |||
| Args: | |||
| model: model id on modelscope hub. | |||
| """ | |||
| super().__init__(model=model) | |||
| self.use_cuda = torch.cuda.is_available() | |||
| with open( | |||
| os.path.join(self.model, CONFIG_YAML), encoding='utf-8') as f: | |||
| self.config = yaml.full_load(f.read()) | |||
| self.config['io']['mvn'] = os.path.join(self.model, FEATURE_MVN) | |||
| self._init_model() | |||
| self.preprocessor = LinearAECAndFbank(self.config['io']) | |||
| n_fft = self.config['loss']['args']['n_fft'] | |||
| hop_length = self.config['loss']['args']['hop_length'] | |||
| winlen = n_fft | |||
| window = torch.hamming_window(winlen, periodic=False) | |||
| def stft(x): | |||
| return torch.stft( | |||
| x, | |||
| n_fft, | |||
| hop_length, | |||
| winlen, | |||
| center=False, | |||
| window=window.to(x.device), | |||
| return_complex=False) | |||
| def istft(x, slen): | |||
| return torch.istft( | |||
| x, | |||
| n_fft, | |||
| hop_length, | |||
| winlen, | |||
| window=window.to(x.device), | |||
| center=False, | |||
| length=slen) | |||
| self.stft = stft | |||
| self.istft = istft | |||
| def _init_model(self): | |||
| checkpoint = torch.load( | |||
| os.path.join(self.model, ModelFile.TORCH_MODEL_BIN_FILE), | |||
| map_location='cpu') | |||
| self.model = initialize_config(self.config['nnet']) | |||
| if self.use_cuda: | |||
| self.model = self.model.cuda() | |||
| self.model.load_state_dict(checkpoint) | |||
| def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | |||
| r"""The AEC process. | |||
| Args: | |||
| inputs: dict={'feature': Tensor, 'base': Tensor} | |||
| 'feature' feature of input audio. | |||
| 'base' the base audio to mask. | |||
| Returns: | |||
| dict: | |||
| { | |||
| 'output_pcm': generated audio array | |||
| } | |||
| """ | |||
| output_data = self._process(inputs['feature'], inputs['base']) | |||
| return {'output_pcm': output_data} | |||
| def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: | |||
| r"""The post process. Will save audio to file, if the output_path is given. | |||
| Args: | |||
| inputs: dict: | |||
| { | |||
| 'output_pcm': generated audio array | |||
| } | |||
| kwargs: accept 'output_path' which is the path to write generated audio | |||
| Returns: | |||
| dict: | |||
| { | |||
| 'output_pcm': generated audio array | |||
| } | |||
| """ | |||
| if 'output_path' in kwargs.keys(): | |||
| wav.write(kwargs['output_path'], self.preprocessor.SAMPLE_RATE, | |||
| inputs['output_pcm'].astype(np.int16)) | |||
| inputs['output_pcm'] = inputs['output_pcm'] / 32768.0 | |||
| return inputs | |||
| def _process(self, fbanks, mixture): | |||
| if self.use_cuda: | |||
| fbanks = fbanks.cuda() | |||
| mixture = mixture.cuda() | |||
| if self.model.vad: | |||
| with torch.no_grad(): | |||
| masks, vad = self.model(fbanks.unsqueeze(0)) | |||
| masks = masks.permute([2, 1, 0]) | |||
| else: | |||
| with torch.no_grad(): | |||
| masks = self.model(fbanks.unsqueeze(0)) | |||
| masks = masks.permute([2, 1, 0]) | |||
| spectrum = self.stft(mixture) | |||
| masked_spec = spectrum * masks | |||
| masked_sig = self.istft(masked_spec, len(mixture)).cpu().numpy() | |||
| return masked_sig | |||
| @@ -12,10 +12,11 @@ from modelscope.pydatasets import PyDataset | |||
| from modelscope.utils.config import Config | |||
| from modelscope.utils.hub import get_model_cache_dir | |||
| from modelscope.utils.logger import get_logger | |||
| from .outputs import TASK_OUTPUTS | |||
| from .util import is_model_name | |||
| Tensor = Union['torch.Tensor', 'tf.Tensor'] | |||
| Input = Union[str, PyDataset, Dict, 'PIL.Image.Image', 'numpy.ndarray'] | |||
| Input = Union[str, tuple, dict, PyDataset, 'PIL.Image.Image', 'numpy.ndarray'] | |||
| InputModel = Union[str, Model] | |||
| output_keys = [ | |||
| @@ -106,8 +107,25 @@ class Pipeline(ABC): | |||
| out = self.preprocess(input, **post_kwargs) | |||
| out = self.forward(out) | |||
| out = self.postprocess(out, **post_kwargs) | |||
| self._check_output(out) | |||
| return out | |||
| def _check_output(self, input): | |||
| # this attribute is dynamically attached by registry | |||
| # when cls is registered in registry using task name | |||
| task_name = self.group_key | |||
| if task_name not in TASK_OUTPUTS: | |||
| logger.warning(f'task {task_name} output keys are missing') | |||
| return | |||
| output_keys = TASK_OUTPUTS[task_name] | |||
| missing_keys = [] | |||
| for k in output_keys: | |||
| if k not in input: | |||
| missing_keys.append(k) | |||
| if len(missing_keys) > 0: | |||
| raise ValueError(f'expected output keys are {output_keys}, ' | |||
| f'those {missing_keys} are missing') | |||
| def preprocess(self, inputs: Input) -> Dict[str, Any]: | |||
| """ Provide default implementation based on preprocess_cfg and user can reimplement it | |||
| """ | |||
| @@ -125,4 +143,14 @@ class Pipeline(ABC): | |||
| @abstractmethod | |||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | |||
| """ If current pipeline support model reuse, common postprocess | |||
| code should be write here. | |||
| Args: | |||
| inputs: input data | |||
| Return: | |||
| dict of results: a dict containing outputs of model, each | |||
| output should have the standard output name. | |||
| """ | |||
| raise NotImplementedError('postprocess') | |||
| @@ -3,24 +3,27 @@ | |||
| import os.path as osp | |||
| from typing import List, Union | |||
| import json | |||
| from maas_hub.file_download import model_file_download | |||
| from modelscope.models.base import Model | |||
| from modelscope.utils.config import Config, ConfigDict | |||
| from modelscope.utils.constant import CONFIGFILE, Tasks | |||
| from modelscope.utils.constant import Tasks | |||
| from modelscope.utils.registry import Registry, build_from_cfg | |||
| from .base import Pipeline | |||
| from .util import is_model_name | |||
| PIPELINES = Registry('pipelines') | |||
| DEFAULT_MODEL_FOR_PIPELINE = { | |||
| # TaskName: (pipeline_module_name, model_repo) | |||
| Tasks.image_matting: ('image-matting', 'damo/image-matting-person'), | |||
| Tasks.word_segmentation: | |||
| ('structbert-chinese-word-segmentation', | |||
| 'damo/nlp_structbert_word-segmentation_chinese-base'), | |||
| Tasks.sentence_similarity: | |||
| ('sbert-base-chinese-sentence-similarity', | |||
| 'damo/nlp_structbert_sentence-similarity_chinese-base'), | |||
| Tasks.image_matting: ('image-matting', 'damo/cv_unet_image-matting'), | |||
| Tasks.text_classification: | |||
| ('bert-sentiment-analysis', 'damo/bert-base-sst2'), | |||
| Tasks.text_generation: ('palm', 'damo/nlp_palm_text-generation_chinese'), | |||
| Tasks.text_generation: ('palm2.0', | |||
| 'damo/nlp_palm2.0_text-generation_chinese-base'), | |||
| Tasks.image_captioning: ('ofa', None), | |||
| Tasks.image_generation: | |||
| ('person-image-cartoon', | |||
| @@ -1,5 +1,5 @@ | |||
| import os.path as osp | |||
| from typing import Any, Dict, List, Tuple, Union | |||
| from typing import Any, Dict | |||
| import cv2 | |||
| import numpy as np | |||
| @@ -7,7 +7,7 @@ import PIL | |||
| from modelscope.pipelines.base import Input | |||
| from modelscope.preprocessors import load_image | |||
| from modelscope.utils.constant import TF_GRAPH_FILE, Tasks | |||
| from modelscope.utils.constant import ModelFile, Tasks | |||
| from modelscope.utils.logger import get_logger | |||
| from ..base import Pipeline | |||
| from ..builder import PIPELINES | |||
| @@ -24,7 +24,7 @@ class ImageMattingPipeline(Pipeline): | |||
| import tensorflow as tf | |||
| if tf.__version__ >= '2.0': | |||
| tf = tf.compat.v1 | |||
| model_path = osp.join(self.model, TF_GRAPH_FILE) | |||
| model_path = osp.join(self.model, ModelFile.TF_GRAPH_FILE) | |||
| config = tf.ConfigProto(allow_soft_placement=True) | |||
| config.gpu_options.allow_growth = True | |||
| @@ -1 +1 @@ | |||
| from .image_captioning import ImageCaptionPipeline | |||
| from .image_caption_pipeline import ImageCaptionPipeline | |||
| @@ -84,8 +84,11 @@ class ImageCaptionPipeline(Pipeline): | |||
| s = torch.cat([s, self.eos_item]) | |||
| return s | |||
| patch_image = self.patch_resize_transform( | |||
| load_image(input)).unsqueeze(0) | |||
| if isinstance(input, Image.Image): | |||
| patch_image = self.patch_resize_transform(input).unsqueeze(0) | |||
| else: | |||
| patch_image = self.patch_resize_transform( | |||
| load_image(input)).unsqueeze(0) | |||
| patch_mask = torch.tensor([True]) | |||
| text = 'what does the image describe?' | |||
| src_text = encode_text( | |||
| @@ -1,4 +1,6 @@ | |||
| from .sentence_similarity_pipeline import * # noqa F403 | |||
| from .sequence_classification_pipeline import * # noqa F403 | |||
| from .space.dialog_intent_prediction_pipeline import * # noqa F403 | |||
| from .space.dialog_modeling_pipeline import * # noqa F403 | |||
| from .text_generation_pipeline import * # noqa F403 | |||
| from .word_segmentation_pipeline import * # noqa F403 | |||
| @@ -0,0 +1,62 @@ | |||
| from typing import Any, Dict, Union | |||
| import numpy as np | |||
| from modelscope.models.nlp import SbertForSentenceSimilarity | |||
| from modelscope.preprocessors import SequenceClassificationPreprocessor | |||
| from modelscope.utils.constant import Tasks | |||
| from ...models import Model | |||
| from ..base import Input, Pipeline | |||
| from ..builder import PIPELINES | |||
| __all__ = ['SentenceSimilarityPipeline'] | |||
| @PIPELINES.register_module( | |||
| Tasks.sentence_similarity, | |||
| module_name=r'sbert-base-chinese-sentence-similarity') | |||
| class SentenceSimilarityPipeline(Pipeline): | |||
| def __init__(self, | |||
| model: Union[SbertForSentenceSimilarity, str], | |||
| preprocessor: SequenceClassificationPreprocessor = None, | |||
| **kwargs): | |||
| """use `model` and `preprocessor` to create a nlp sentence similarity pipeline for prediction | |||
| Args: | |||
| model (SbertForSentenceSimilarity): a model instance | |||
| preprocessor (SequenceClassificationPreprocessor): a preprocessor instance | |||
| """ | |||
| assert isinstance(model, str) or isinstance(model, SbertForSentenceSimilarity), \ | |||
| 'model must be a single str or SbertForSentenceSimilarity' | |||
| sc_model = model if isinstance( | |||
| model, | |||
| SbertForSentenceSimilarity) else Model.from_pretrained(model) | |||
| if preprocessor is None: | |||
| preprocessor = SequenceClassificationPreprocessor( | |||
| sc_model.model_dir, | |||
| first_sequence='first_sequence', | |||
| second_sequence='second_sequence') | |||
| super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs) | |||
| assert hasattr(self.model, 'id2label'), \ | |||
| 'id2label map should be initalizaed in init function.' | |||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: | |||
| """process the prediction results | |||
| Args: | |||
| inputs (Dict[str, Any]): _description_ | |||
| Returns: | |||
| Dict[str, str]: the prediction results | |||
| """ | |||
| probs = inputs['probabilities'][0] | |||
| num_classes = probs.shape[0] | |||
| top_indices = np.argpartition(probs, -num_classes)[-num_classes:] | |||
| cls_ids = top_indices[np.argsort(-probs[top_indices], axis=-1)] | |||
| probs = probs[cls_ids].tolist() | |||
| cls_names = [self.model.id2label[cid] for cid in cls_ids] | |||
| b = 0 | |||
| return {'scores': probs[b], 'labels': cls_names[b]} | |||
| @@ -1,8 +1,5 @@ | |||
| import os | |||
| import uuid | |||
| from typing import Any, Dict, Union | |||
| import json | |||
| import numpy as np | |||
| from modelscope.models.nlp import BertForSequenceClassification | |||
| @@ -41,50 +38,29 @@ class SequenceClassificationPipeline(Pipeline): | |||
| second_sequence=None) | |||
| super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs) | |||
| from easynlp.utils import io | |||
| self.label_path = os.path.join(sc_model.model_dir, | |||
| 'label_mapping.json') | |||
| with io.open(self.label_path) as f: | |||
| self.label_mapping = json.load(f) | |||
| self.label_id_to_name = { | |||
| idx: name | |||
| for name, idx in self.label_mapping.items() | |||
| } | |||
| assert hasattr(self.model, 'id2label'), \ | |||
| 'id2label map should be initalizaed in init function.' | |||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: | |||
| def postprocess(self, | |||
| inputs: Dict[str, Any], | |||
| topk: int = 5) -> Dict[str, str]: | |||
| """process the prediction results | |||
| Args: | |||
| inputs (Dict[str, Any]): _description_ | |||
| inputs (Dict[str, Any]): input data dict | |||
| topk (int): return topk classification result. | |||
| Returns: | |||
| Dict[str, str]: the prediction results | |||
| """ | |||
| # NxC np.ndarray | |||
| probs = inputs['probs'][0] | |||
| num_classes = probs.shape[0] | |||
| topk = min(topk, num_classes) | |||
| top_indices = np.argpartition(probs, -topk)[-topk:] | |||
| cls_ids = top_indices[np.argsort(probs[top_indices])] | |||
| probs = probs[cls_ids].tolist() | |||
| probs = inputs['probabilities'] | |||
| logits = inputs['logits'] | |||
| predictions = np.argsort(-probs, axis=-1) | |||
| preds = predictions[0] | |||
| b = 0 | |||
| new_result = list() | |||
| for pred in preds: | |||
| new_result.append({ | |||
| 'pred': self.label_id_to_name[pred], | |||
| 'prob': float(probs[b][pred]), | |||
| 'logit': float(logits[b][pred]) | |||
| }) | |||
| new_results = list() | |||
| new_results.append({ | |||
| 'id': | |||
| inputs['id'][b] if 'id' in inputs else str(uuid.uuid4()), | |||
| 'output': | |||
| new_result, | |||
| 'predictions': | |||
| new_result[0]['pred'], | |||
| 'probabilities': | |||
| ','.join([str(t) for t in inputs['probabilities'][b]]), | |||
| 'logits': | |||
| ','.join([str(t) for t in inputs['logits'][b]]) | |||
| }) | |||
| cls_names = [self.model.id2label[cid] for cid in cls_ids] | |||
| return new_results[0] | |||
| return {'scores': probs, 'labels': cls_names} | |||
| @@ -1,7 +1,7 @@ | |||
| from typing import Dict, Optional, Union | |||
| from modelscope.models import Model | |||
| from modelscope.models.nlp import PalmForTextGenerationModel | |||
| from modelscope.models.nlp import PalmForTextGeneration | |||
| from modelscope.preprocessors import TextGenerationPreprocessor | |||
| from modelscope.utils.constant import Tasks | |||
| from ..base import Pipeline, Tensor | |||
| @@ -10,11 +10,11 @@ from ..builder import PIPELINES | |||
| __all__ = ['TextGenerationPipeline'] | |||
| @PIPELINES.register_module(Tasks.text_generation, module_name=r'palm') | |||
| @PIPELINES.register_module(Tasks.text_generation, module_name=r'palm2.0') | |||
| class TextGenerationPipeline(Pipeline): | |||
| def __init__(self, | |||
| model: Union[PalmForTextGenerationModel, str], | |||
| model: Union[PalmForTextGeneration, str], | |||
| preprocessor: Optional[TextGenerationPreprocessor] = None, | |||
| **kwargs): | |||
| """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction | |||
| @@ -23,16 +23,16 @@ class TextGenerationPipeline(Pipeline): | |||
| model (SequenceClassificationModel): a model instance | |||
| preprocessor (SequenceClassificationPreprocessor): a preprocessor instance | |||
| """ | |||
| sc_model = model if isinstance( | |||
| model, | |||
| PalmForTextGenerationModel) else Model.from_pretrained(model) | |||
| model = model if isinstance( | |||
| model, PalmForTextGeneration) else Model.from_pretrained(model) | |||
| if preprocessor is None: | |||
| preprocessor = TextGenerationPreprocessor( | |||
| sc_model.model_dir, | |||
| model.model_dir, | |||
| model.tokenizer, | |||
| first_sequence='sentence', | |||
| second_sequence=None) | |||
| super().__init__(model=sc_model, preprocessor=preprocessor, **kwargs) | |||
| self.tokenizer = preprocessor.tokenizer | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.tokenizer = model.tokenizer | |||
| def postprocess(self, inputs: Dict[str, Tensor]) -> Dict[str, str]: | |||
| """process the prediction results | |||
| @@ -43,17 +43,20 @@ class TextGenerationPipeline(Pipeline): | |||
| Returns: | |||
| Dict[str, str]: the prediction results | |||
| """ | |||
| replace_tokens_bert = (('[unused0]', ''), ('[PAD]', ''), | |||
| ('[unused1]', ''), (r' +', ' '), ('[SEP]', ''), | |||
| ('[unused2]', ''), ('[CLS]', ''), ('[UNK]', '')) | |||
| replace_tokens_roberta = ((r' +', ' '), ('<mask>', '<q>'), ('<pad>', | |||
| ''), | |||
| ('<s>', ''), ('</s>', ''), ('<unk>', ' ')) | |||
| vocab_size = len(self.tokenizer.vocab) | |||
| pred_list = inputs['predictions'] | |||
| pred_ids = pred_list[0][0].cpu().numpy().tolist() | |||
| for j in range(len(pred_ids)): | |||
| if pred_ids[j] >= vocab_size: | |||
| pred_ids[j] = 100 | |||
| pred = self.tokenizer.convert_ids_to_tokens(pred_ids) | |||
| pred_string = ''.join(pred).replace( | |||
| '##', | |||
| '').split('[SEP]')[0].replace('[CLS]', | |||
| '').replace('[SEP]', | |||
| '').replace('[UNK]', '') | |||
| return {'pred_string': pred_string} | |||
| pred_string = self.tokenizer.decode(pred_ids) | |||
| for _old, _new in replace_tokens_bert: | |||
| pred_string = pred_string.replace(_old, _new) | |||
| pred_string.strip() | |||
| for _old, _new in replace_tokens_roberta: | |||
| pred_string = pred_string.replace(_old, _new) | |||
| pred_string.strip() | |||
| return {'text': pred_string} | |||
| @@ -0,0 +1,69 @@ | |||
| from typing import Any, Dict, Optional, Union | |||
| from modelscope.models import Model | |||
| from modelscope.models.nlp import StructBertForTokenClassification | |||
| from modelscope.preprocessors import TokenClassifcationPreprocessor | |||
| from modelscope.utils.constant import Tasks | |||
| from ..base import Pipeline, Tensor | |||
| from ..builder import PIPELINES | |||
| __all__ = ['WordSegmentationPipeline'] | |||
| @PIPELINES.register_module( | |||
| Tasks.word_segmentation, | |||
| module_name=r'structbert-chinese-word-segmentation') | |||
| class WordSegmentationPipeline(Pipeline): | |||
| def __init__(self, | |||
| model: Union[StructBertForTokenClassification, str], | |||
| preprocessor: Optional[TokenClassifcationPreprocessor] = None, | |||
| **kwargs): | |||
| """use `model` and `preprocessor` to create a nlp word segmentation pipeline for prediction | |||
| Args: | |||
| model (StructBertForTokenClassification): a model instance | |||
| preprocessor (TokenClassifcationPreprocessor): a preprocessor instance | |||
| """ | |||
| model = model if isinstance( | |||
| model, | |||
| StructBertForTokenClassification) else Model.from_pretrained(model) | |||
| if preprocessor is None: | |||
| preprocessor = TokenClassifcationPreprocessor(model.model_dir) | |||
| super().__init__(model=model, preprocessor=preprocessor, **kwargs) | |||
| self.tokenizer = preprocessor.tokenizer | |||
| self.config = model.config | |||
| self.id2label = self.config.id2label | |||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, str]: | |||
| """process the prediction results | |||
| Args: | |||
| inputs (Dict[str, Any]): _description_ | |||
| Returns: | |||
| Dict[str, str]: the prediction results | |||
| """ | |||
| pred_list = inputs['predictions'] | |||
| labels = [] | |||
| for pre in pred_list: | |||
| labels.append(self.id2label[pre]) | |||
| labels = labels[1:-1] | |||
| chunks = [] | |||
| chunk = '' | |||
| assert len(inputs['text']) == len(labels) | |||
| for token, label in zip(inputs['text'], labels): | |||
| if label[0] == 'B' or label[0] == 'I': | |||
| chunk += token | |||
| else: | |||
| chunk += token | |||
| chunks.append(chunk) | |||
| chunk = '' | |||
| if chunk: | |||
| chunks.append(chunk) | |||
| seg_result = ' '.join(chunks) | |||
| rst = { | |||
| 'output': seg_result, | |||
| } | |||
| return rst | |||
| @@ -0,0 +1,117 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from modelscope.utils.constant import Tasks | |||
| TASK_OUTPUTS = { | |||
| # ============ vision tasks =================== | |||
| # image classification result for single sample | |||
| # { | |||
| # "labels": ["dog", "horse", "cow", "cat"], | |||
| # "scores": [0.9, 0.1, 0.05, 0.05] | |||
| # } | |||
| Tasks.image_classification: ['scores', 'labels'], | |||
| Tasks.image_tagging: ['scores', 'labels'], | |||
| # object detection result for single sample | |||
| # { | |||
| # "boxes": [ | |||
| # [x1, y1, x2, y2], | |||
| # [x1, y1, x2, y2], | |||
| # [x1, y1, x2, y2], | |||
| # ], | |||
| # "labels": ["dog", "horse", "cow", "cat"], | |||
| # "scores": [0.9, 0.1, 0.05, 0.05] | |||
| # } | |||
| Tasks.object_detection: ['scores', 'labels', 'boxes'], | |||
| # instance segmentation result for single sample | |||
| # { | |||
| # "masks": [ | |||
| # np.array in bgr channel order | |||
| # ], | |||
| # "labels": ["dog", "horse", "cow", "cat"], | |||
| # "scores": [0.9, 0.1, 0.05, 0.05] | |||
| # } | |||
| Tasks.image_segmentation: ['scores', 'labels', 'boxes'], | |||
| # image generation/editing/matting result for single sample | |||
| # { | |||
| # "output_png": np.array with shape(h, w, 4) | |||
| # for matting or (h, w, 3) for general purpose | |||
| # } | |||
| Tasks.image_editing: ['output_png'], | |||
| Tasks.image_matting: ['output_png'], | |||
| Tasks.image_generation: ['output_png'], | |||
| # pose estimation result for single sample | |||
| # { | |||
| # "poses": np.array with shape [num_pose, num_keypoint, 3], | |||
| # each keypoint is a array [x, y, score] | |||
| # "boxes": np.array with shape [num_pose, 4], each box is | |||
| # [x1, y1, x2, y2] | |||
| # } | |||
| Tasks.pose_estimation: ['poses', 'boxes'], | |||
| # ============ nlp tasks =================== | |||
| # text classification result for single sample | |||
| # { | |||
| # "labels": ["happy", "sad", "calm", "angry"], | |||
| # "scores": [0.9, 0.1, 0.05, 0.05] | |||
| # } | |||
| Tasks.text_classification: ['scores', 'labels'], | |||
| # text generation result for single sample | |||
| # { | |||
| # "text": "this is text generated by a model." | |||
| # } | |||
| Tasks.text_generation: ['text'], | |||
| # word segmentation result for single sample | |||
| # { | |||
| # "output": "今天 天气 不错 , 适合 出去 游玩" | |||
| # } | |||
| Tasks.word_segmentation: ['output'], | |||
| # sentence similarity result for single sample | |||
| # { | |||
| # "labels": "1", | |||
| # "scores": 0.9 | |||
| # } | |||
| Tasks.sentence_similarity: ['scores', 'labels'], | |||
| # ============ audio tasks =================== | |||
| # audio processed for single file in PCM format | |||
| # { | |||
| # "output_pcm": np.array with shape(samples,) and dtype float32 | |||
| # } | |||
| Tasks.speech_signal_process: ['output_pcm'], | |||
| # ============ multi-modal tasks =================== | |||
| # image caption result for single sample | |||
| # { | |||
| # "caption": "this is an image caption text." | |||
| # } | |||
| Tasks.image_captioning: ['caption'], | |||
| # visual grounding result for single sample | |||
| # { | |||
| # "boxes": [ | |||
| # [x1, y1, x2, y2], | |||
| # [x1, y1, x2, y2], | |||
| # [x1, y1, x2, y2], | |||
| # ], | |||
| # "scores": [0.9, 0.1, 0.05, 0.05] | |||
| # } | |||
| Tasks.visual_grounding: ['boxes', 'scores'], | |||
| # text_to_image result for a single sample | |||
| # { | |||
| # "image": np.ndarray with shape [height, width, 3] | |||
| # } | |||
| Tasks.text_to_image_synthesis: ['image'] | |||
| } | |||
| @@ -1,12 +1,23 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import os | |||
| import os.path as osp | |||
| from typing import List, Union | |||
| import json | |||
| from maas_hub.file_download import model_file_download | |||
| from modelscope.utils.constant import CONFIGFILE | |||
| from modelscope.utils.config import Config | |||
| from modelscope.utils.constant import ModelFile | |||
| from modelscope.utils.logger import get_logger | |||
| logger = get_logger() | |||
| def is_config_has_model(cfg_file): | |||
| try: | |||
| cfg = Config.from_file(cfg_file) | |||
| return hasattr(cfg, 'model') | |||
| except Exception as e: | |||
| logger.error(f'parse config file {cfg_file} failed: {e}') | |||
| return False | |||
| def is_model_name(model: Union[str, List]): | |||
| @@ -15,24 +26,17 @@ def is_model_name(model: Union[str, List]): | |||
| def is_model_name_impl(model): | |||
| if osp.exists(model): | |||
| if osp.exists(osp.join(model, CONFIGFILE)): | |||
| return True | |||
| cfg_file = osp.join(model, ModelFile.CONFIGURATION) | |||
| if osp.exists(cfg_file): | |||
| return is_config_has_model(cfg_file) | |||
| else: | |||
| return False | |||
| else: | |||
| # try: | |||
| # cfg_file = model_file_download(model, CONFIGFILE) | |||
| # except Exception: | |||
| # cfg_file = None | |||
| # TODO @wenmeng.zwm use exception instead of | |||
| # following tricky logic | |||
| cfg_file = model_file_download(model, CONFIGFILE) | |||
| with open(cfg_file, 'r') as infile: | |||
| cfg = json.load(infile) | |||
| if 'Code' in cfg: | |||
| try: | |||
| cfg_file = model_file_download(model, ModelFile.CONFIGURATION) | |||
| return is_config_has_model(cfg_file) | |||
| except Exception: | |||
| return False | |||
| else: | |||
| return True | |||
| if isinstance(model, str): | |||
| return is_model_name_impl(model) | |||
| @@ -1,10 +1,10 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from .audio import LinearAECAndFbank | |||
| from .base import Preprocessor | |||
| from .builder import PREPROCESSORS, build_preprocessor | |||
| from .common import Compose | |||
| from .image import LoadImage, load_image | |||
| from .nlp import * # noqa F403 | |||
| from .nlp import TextGenerationPreprocessor | |||
| from .space.dialog_intent_prediction_preprocessor import * # noqa F403 | |||
| from .space.dialog_modeling_preprocessor import * # noqa F403 | |||
| @@ -0,0 +1,230 @@ | |||
| import ctypes | |||
| import os | |||
| from typing import Any, Dict | |||
| import numpy as np | |||
| import scipy.io.wavfile as wav | |||
| import torch | |||
| import torchaudio.compliance.kaldi as kaldi | |||
| from numpy.ctypeslib import ndpointer | |||
| from modelscope.utils.constant import Fields | |||
| from .builder import PREPROCESSORS | |||
| def load_wav(path): | |||
| samp_rate, data = wav.read(path) | |||
| return np.float32(data), samp_rate | |||
| def load_library(libaec): | |||
| libaec_in_cwd = os.path.join('.', libaec) | |||
| if os.path.exists(libaec_in_cwd): | |||
| libaec = libaec_in_cwd | |||
| mitaec = ctypes.cdll.LoadLibrary(libaec) | |||
| fe_process = mitaec.fe_process_inst | |||
| fe_process.argtypes = [ | |||
| ndpointer(ctypes.c_float, flags='C_CONTIGUOUS'), | |||
| ndpointer(ctypes.c_float, flags='C_CONTIGUOUS'), ctypes.c_int, | |||
| ndpointer(ctypes.c_float, flags='C_CONTIGUOUS'), | |||
| ndpointer(ctypes.c_float, flags='C_CONTIGUOUS'), | |||
| ndpointer(ctypes.c_float, flags='C_CONTIGUOUS') | |||
| ] | |||
| return fe_process | |||
| def do_linear_aec(fe_process, mic, ref, int16range=True): | |||
| mic = np.float32(mic) | |||
| ref = np.float32(ref) | |||
| if len(mic) > len(ref): | |||
| mic = mic[:len(ref)] | |||
| out_mic = np.zeros_like(mic) | |||
| out_linear = np.zeros_like(mic) | |||
| out_echo = np.zeros_like(mic) | |||
| out_ref = np.zeros_like(mic) | |||
| if int16range: | |||
| mic /= 32768 | |||
| ref /= 32768 | |||
| fe_process(mic, ref, len(mic), out_mic, out_linear, out_echo) | |||
| # out_ref not in use here | |||
| if int16range: | |||
| out_mic *= 32768 | |||
| out_linear *= 32768 | |||
| out_echo *= 32768 | |||
| return out_mic, out_ref, out_linear, out_echo | |||
| def load_kaldi_feature_transform(filename): | |||
| fp = open(filename, 'r') | |||
| all_str = fp.read() | |||
| pos1 = all_str.find('AddShift') | |||
| pos2 = all_str.find('[', pos1) | |||
| pos3 = all_str.find(']', pos2) | |||
| mean = np.fromstring(all_str[pos2 + 1:pos3], dtype=np.float32, sep=' ') | |||
| pos1 = all_str.find('Rescale') | |||
| pos2 = all_str.find('[', pos1) | |||
| pos3 = all_str.find(']', pos2) | |||
| scale = np.fromstring(all_str[pos2 + 1:pos3], dtype=np.float32, sep=' ') | |||
| fp.close() | |||
| return mean, scale | |||
| class Feature: | |||
| r"""Extract feat from one utterance. | |||
| """ | |||
| def __init__(self, | |||
| fbank_config, | |||
| feat_type='spec', | |||
| mvn_file=None, | |||
| cuda=False): | |||
| r""" | |||
| Args: | |||
| fbank_config (dict): | |||
| feat_type (str): | |||
| raw: do nothing | |||
| fbank: use kaldi.fbank | |||
| spec: Real/Imag | |||
| logpow: log(1+|x|^2) | |||
| mvn_file (str): the path of data file for mean variance normalization | |||
| cuda: | |||
| """ | |||
| self.fbank_config = fbank_config | |||
| self.feat_type = feat_type | |||
| self.n_fft = fbank_config['frame_length'] * fbank_config[ | |||
| 'sample_frequency'] // 1000 | |||
| self.hop_length = fbank_config['frame_shift'] * fbank_config[ | |||
| 'sample_frequency'] // 1000 | |||
| self.window = torch.hamming_window(self.n_fft, periodic=False) | |||
| self.mvn = False | |||
| if mvn_file is not None and os.path.exists(mvn_file): | |||
| print(f'loading mvn file: {mvn_file}') | |||
| shift, scale = load_kaldi_feature_transform(mvn_file) | |||
| self.shift = torch.from_numpy(shift) | |||
| self.scale = torch.from_numpy(scale) | |||
| self.mvn = True | |||
| if cuda: | |||
| self.window = self.window.cuda() | |||
| if self.mvn: | |||
| self.shift = self.shift.cuda() | |||
| self.scale = self.scale.cuda() | |||
| def compute(self, utt): | |||
| r""" | |||
| Args: | |||
| utt: in [-32768, 32767] range | |||
| Returns: | |||
| [..., T, F] | |||
| """ | |||
| if self.feat_type == 'raw': | |||
| return utt | |||
| elif self.feat_type == 'fbank': | |||
| if len(utt.shape) == 1: | |||
| utt = utt.unsqueeze(0) | |||
| feat = kaldi.fbank(utt, **self.fbank_config) | |||
| elif self.feat_type == 'spec': | |||
| spec = torch.stft( | |||
| utt / 32768, | |||
| self.n_fft, | |||
| self.hop_length, | |||
| self.n_fft, | |||
| self.window, | |||
| center=False, | |||
| return_complex=True) | |||
| feat = torch.cat([spec.real, spec.imag], dim=-2).permute(-1, -2) | |||
| elif self.feat_type == 'logpow': | |||
| spec = torch.stft( | |||
| utt, | |||
| self.n_fft, | |||
| self.hop_length, | |||
| self.n_fft, | |||
| self.window, | |||
| center=False, | |||
| return_complex=True) | |||
| abspow = torch.abs(spec)**2 | |||
| feat = torch.log(1 + abspow).permute(-1, -2) | |||
| return feat | |||
| def normalize(self, feat): | |||
| if self.mvn: | |||
| feat = feat + self.shift | |||
| feat = feat * self.scale | |||
| return feat | |||
| @PREPROCESSORS.register_module(Fields.audio) | |||
| class LinearAECAndFbank: | |||
| SAMPLE_RATE = 16000 | |||
| def __init__(self, io_config): | |||
| self.trunc_length = 7200 * self.SAMPLE_RATE | |||
| self.linear_aec_delay = io_config['linear_aec_delay'] | |||
| self.feature = Feature(io_config['fbank_config'], | |||
| io_config['feat_type'], io_config['mvn']) | |||
| self.mitaec = load_library(io_config['mitaec_library']) | |||
| self.mask_on_mic = io_config['mask_on'] == 'nearend_mic' | |||
| def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: | |||
| """ linear filtering the near end mic and far end audio, then extract the feature | |||
| :param data: dict with two keys and correspond audios: "nearend_mic" and "farend_speech" | |||
| :return: dict with two keys and Tensor values: "base" linear filtered audio,and "feature" | |||
| """ | |||
| # read files | |||
| nearend_mic, fs = load_wav(data['nearend_mic']) | |||
| assert fs == self.SAMPLE_RATE, f'The sample rate should be {self.SAMPLE_RATE}' | |||
| farend_speech, fs = load_wav(data['farend_speech']) | |||
| assert fs == self.SAMPLE_RATE, f'The sample rate should be {self.SAMPLE_RATE}' | |||
| if 'nearend_speech' in data: | |||
| nearend_speech, fs = load_wav(data['nearend_speech']) | |||
| assert fs == self.SAMPLE_RATE, f'The sample rate should be {self.SAMPLE_RATE}' | |||
| else: | |||
| nearend_speech = np.zeros_like(nearend_mic) | |||
| out_mic, out_ref, out_linear, out_echo = do_linear_aec( | |||
| self.mitaec, nearend_mic, farend_speech) | |||
| # fix 20ms linear aec delay by delaying the target speech | |||
| extra_zeros = np.zeros([int(self.linear_aec_delay * fs)]) | |||
| nearend_speech = np.concatenate([extra_zeros, nearend_speech]) | |||
| # truncate files to the same length | |||
| flen = min( | |||
| len(out_mic), len(out_ref), len(out_linear), len(out_echo), | |||
| len(nearend_speech)) | |||
| fstart = 0 | |||
| flen = min(flen, self.trunc_length) | |||
| nearend_mic, out_ref, out_linear, out_echo, nearend_speech = ( | |||
| out_mic[fstart:flen], out_ref[fstart:flen], | |||
| out_linear[fstart:flen], out_echo[fstart:flen], | |||
| nearend_speech[fstart:flen]) | |||
| # extract features (frames, [mic, linear, ref, aes?]) | |||
| feat = torch.FloatTensor() | |||
| nearend_mic = torch.from_numpy(np.float32(nearend_mic)) | |||
| fbank_nearend_mic = self.feature.compute(nearend_mic) | |||
| feat = torch.cat([feat, fbank_nearend_mic], dim=1) | |||
| out_linear = torch.from_numpy(np.float32(out_linear)) | |||
| fbank_out_linear = self.feature.compute(out_linear) | |||
| feat = torch.cat([feat, fbank_out_linear], dim=1) | |||
| out_echo = torch.from_numpy(np.float32(out_echo)) | |||
| fbank_out_echo = self.feature.compute(out_echo) | |||
| feat = torch.cat([feat, fbank_out_echo], dim=1) | |||
| # feature transform | |||
| feat = self.feature.normalize(feat) | |||
| # prepare target | |||
| if nearend_speech is not None: | |||
| nearend_speech = torch.from_numpy(np.float32(nearend_speech)) | |||
| if self.mask_on_mic: | |||
| base = nearend_mic | |||
| else: | |||
| base = out_linear | |||
| out_data = {'base': base, 'target': nearend_speech, 'feature': feat} | |||
| return out_data | |||
| @@ -9,7 +9,7 @@ from modelscope.utils.constant import Fields | |||
| from .builder import PREPROCESSORS | |||
| @PREPROCESSORS.register_module(Fields.image) | |||
| @PREPROCESSORS.register_module(Fields.cv) | |||
| class LoadImage: | |||
| """Load an image from file or url. | |||
| Added or updated keys are "filename", "img", "img_shape", | |||
| @@ -11,8 +11,8 @@ from .base import Preprocessor | |||
| from .builder import PREPROCESSORS | |||
| __all__ = [ | |||
| 'Tokenize', | |||
| 'SequenceClassificationPreprocessor', | |||
| 'Tokenize', 'SequenceClassificationPreprocessor', | |||
| 'TextGenerationPreprocessor', 'TokenClassifcationPreprocessor' | |||
| ] | |||
| @@ -31,7 +31,7 @@ class Tokenize(Preprocessor): | |||
| @PREPROCESSORS.register_module( | |||
| Fields.nlp, module_name=r'bert-sentiment-analysis') | |||
| Fields.nlp, module_name=r'bert-sequence-classification') | |||
| class SequenceClassificationPreprocessor(Preprocessor): | |||
| def __init__(self, model_dir: str, *args, **kwargs): | |||
| @@ -51,21 +51,42 @@ class SequenceClassificationPreprocessor(Preprocessor): | |||
| self.sequence_length = kwargs.pop('sequence_length', 128) | |||
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir) | |||
| print(f'this is the tokenzier {self.tokenizer}') | |||
| @type_assert(object, str) | |||
| def __call__(self, data: str) -> Dict[str, Any]: | |||
| @type_assert(object, (str, tuple)) | |||
| def __call__(self, data: Union[str, tuple]) -> Dict[str, Any]: | |||
| """process the raw input data | |||
| Args: | |||
| data (str): a sentence | |||
| Example: | |||
| 'you are so handsome.' | |||
| data (str or tuple): | |||
| sentence1 (str): a sentence | |||
| Example: | |||
| 'you are so handsome.' | |||
| or | |||
| (sentence1, sentence2) | |||
| sentence1 (str): a sentence | |||
| Example: | |||
| 'you are so handsome.' | |||
| sentence2 (str): a sentence | |||
| Example: | |||
| 'you are so beautiful.' | |||
| Returns: | |||
| Dict[str, Any]: the preprocessed data | |||
| """ | |||
| new_data = {self.first_sequence: data} | |||
| if not isinstance(data, tuple): | |||
| data = ( | |||
| data, | |||
| None, | |||
| ) | |||
| sentence1, sentence2 = data | |||
| new_data = { | |||
| self.first_sequence: sentence1, | |||
| self.second_sequence: sentence2 | |||
| } | |||
| # preprocess the data for the model input | |||
| rst = { | |||
| @@ -94,17 +115,15 @@ class SequenceClassificationPreprocessor(Preprocessor): | |||
| return rst | |||
| @PREPROCESSORS.register_module(Fields.nlp, module_name=r'palm') | |||
| @PREPROCESSORS.register_module(Fields.nlp, module_name=r'palm2.0') | |||
| class TextGenerationPreprocessor(Preprocessor): | |||
| def __init__(self, model_dir: str, *args, **kwargs): | |||
| def __init__(self, model_dir: str, tokenizer, *args, **kwargs): | |||
| """preprocess the data using the vocab.txt from the `model_dir` path | |||
| Args: | |||
| model_dir (str): model path | |||
| """ | |||
| from sofa import PalmTokenizer | |||
| super().__init__(*args, **kwargs) | |||
| self.model_dir: str = model_dir | |||
| @@ -113,7 +132,7 @@ class TextGenerationPreprocessor(Preprocessor): | |||
| self.second_sequence: str = kwargs.pop('second_sequence', | |||
| 'second_sequence') | |||
| self.sequence_length: int = kwargs.pop('sequence_length', 128) | |||
| self.tokenizer = PalmTokenizer.from_pretrained(model_dir) | |||
| self.tokenizer = tokenizer | |||
| @type_assert(object, str) | |||
| def __call__(self, data: str) -> Dict[str, Any]: | |||
| @@ -132,7 +151,7 @@ class TextGenerationPreprocessor(Preprocessor): | |||
| new_data = {self.first_sequence: data} | |||
| # preprocess the data for the model input | |||
| rst = {'input_ids': [], 'attention_mask': [], 'token_type_ids': []} | |||
| rst = {'input_ids': [], 'attention_mask': []} | |||
| max_seq_length = self.sequence_length | |||
| @@ -147,6 +166,53 @@ class TextGenerationPreprocessor(Preprocessor): | |||
| rst['input_ids'].append(feature['input_ids']) | |||
| rst['attention_mask'].append(feature['attention_mask']) | |||
| rst['token_type_ids'].append(feature['token_type_ids']) | |||
| return {k: torch.tensor(v) for k, v in rst.items()} | |||
| @PREPROCESSORS.register_module( | |||
| Fields.nlp, module_name=r'bert-token-classification') | |||
| class TokenClassifcationPreprocessor(Preprocessor): | |||
| def __init__(self, model_dir: str, *args, **kwargs): | |||
| """preprocess the data via the vocab.txt from the `model_dir` path | |||
| Args: | |||
| model_dir (str): model path | |||
| """ | |||
| super().__init__(*args, **kwargs) | |||
| from sofa import SbertTokenizer | |||
| self.model_dir: str = model_dir | |||
| self.tokenizer = SbertTokenizer.from_pretrained(self.model_dir) | |||
| @type_assert(object, str) | |||
| def __call__(self, data: str) -> Dict[str, Any]: | |||
| """process the raw input data | |||
| Args: | |||
| data (str): a sentence | |||
| Example: | |||
| 'you are so handsome.' | |||
| Returns: | |||
| Dict[str, Any]: the preprocessed data | |||
| """ | |||
| # preprocess the data for the model input | |||
| text = data.replace(' ', '').strip() | |||
| tokens = [] | |||
| for token in text: | |||
| token = self.tokenizer.tokenize(token) | |||
| tokens.extend(token) | |||
| input_ids = self.tokenizer.convert_tokens_to_ids(tokens) | |||
| input_ids = self.tokenizer.build_inputs_with_special_tokens(input_ids) | |||
| attention_mask = [1] * len(input_ids) | |||
| token_type_ids = [0] * len(input_ids) | |||
| return { | |||
| 'text': text, | |||
| 'input_ids': input_ids, | |||
| 'attention_mask': attention_mask, | |||
| 'token_type_ids': token_type_ids | |||
| } | |||
| @@ -74,17 +74,17 @@ class Config: | |||
| {'c': [1, 2, 3], 'd': 'dd'} | |||
| >>> cfg.b.d | |||
| 'dd' | |||
| >>> cfg = Config.from_file('configs/examples/config.json') | |||
| >>> cfg = Config.from_file('configs/examples/configuration.json') | |||
| >>> cfg.filename | |||
| 'configs/examples/config.json' | |||
| 'configs/examples/configuration.json' | |||
| >>> cfg.b | |||
| {'c': [1, 2, 3], 'd': 'dd'} | |||
| >>> cfg = Config.from_file('configs/examples/config.py') | |||
| >>> cfg = Config.from_file('configs/examples/configuration.py') | |||
| >>> cfg.filename | |||
| "configs/examples/config.py" | |||
| >>> cfg = Config.from_file('configs/examples/config.yaml') | |||
| "configs/examples/configuration.py" | |||
| >>> cfg = Config.from_file('configs/examples/configuration.yaml') | |||
| >>> cfg.filename | |||
| "configs/examples/config.yaml" | |||
| "configs/examples/configuration.yaml" | |||
| """ | |||
| @staticmethod | |||
| @@ -4,8 +4,8 @@ | |||
| class Fields(object): | |||
| """ Names for different application fields | |||
| """ | |||
| image = 'image' | |||
| video = 'video' | |||
| # image = 'image' | |||
| # video = 'video' | |||
| cv = 'cv' | |||
| nlp = 'nlp' | |||
| audio = 'audio' | |||
| @@ -30,7 +30,9 @@ class Tasks(object): | |||
| image_matting = 'image-matting' | |||
| # nlp tasks | |||
| word_segmentation = 'word-segmentation' | |||
| sentiment_analysis = 'sentiment-analysis' | |||
| sentence_similarity = 'sentence-similarity' | |||
| text_classification = 'text-classification' | |||
| relation_extraction = 'relation-extraction' | |||
| zero_shot = 'zero-shot' | |||
| @@ -52,7 +54,7 @@ class Tasks(object): | |||
| text_to_speech = 'text-to-speech' | |||
| speech_signal_process = 'speech-signal-process' | |||
| # multi-media | |||
| # multi-modal tasks | |||
| image_captioning = 'image-captioning' | |||
| visual_grounding = 'visual-grounding' | |||
| text_to_image_synthesis = 'text-to-image-synthesis' | |||
| @@ -73,16 +75,16 @@ class Hubs(object): | |||
| huggingface = 'huggingface' | |||
| # configuration filename | |||
| # in order to avoid conflict with huggingface | |||
| # config file we use maas_config instead | |||
| CONFIGFILE = 'maas_config.json' | |||
| class ModelFile(object): | |||
| CONFIGURATION = 'configuration.json' | |||
| README = 'README.md' | |||
| TF_SAVED_MODEL_FILE = 'saved_model.pb' | |||
| TF_GRAPH_FILE = 'tf_graph.pb' | |||
| TF_CHECKPOINT_FOLDER = 'tf_ckpts' | |||
| TF_CKPT_PREFIX = 'ckpt-' | |||
| TORCH_MODEL_FILE = 'pytorch_model.pt' | |||
| TORCH_MODEL_BIN_FILE = 'pytorch_model.bin' | |||
| README_FILE = 'README.md' | |||
| TF_SAVED_MODEL_FILE = 'saved_model.pb' | |||
| TF_GRAPH_FILE = 'tf_graph.pb' | |||
| TF_CHECKPOINT_FOLDER = 'tf_ckpts' | |||
| TF_CHECKPOINT_FILE = 'checkpoint' | |||
| TORCH_MODEL_FILE = 'pytorch_model.bin' | |||
| TENSORFLOW = 'tensorflow' | |||
| PYTORCH = 'pytorch' | |||
| @@ -1,7 +1,6 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import inspect | |||
| from email.policy import default | |||
| from modelscope.utils.logger import get_logger | |||
| @@ -70,6 +69,7 @@ class Registry(object): | |||
| f'{self._name}[{group_key}]') | |||
| self._modules[group_key][module_name] = module_cls | |||
| module_cls.group_key = group_key | |||
| if module_name in self._modules[default_group]: | |||
| if id(self._modules[default_group][module_name]) == id(module_cls): | |||
| @@ -0,0 +1,20 @@ | |||
| #!/usr/bin/env python | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import os | |||
| TEST_LEVEL = 2 | |||
| TEST_LEVEL_STR = 'TEST_LEVEL' | |||
| def test_level(): | |||
| global TEST_LEVEL | |||
| if TEST_LEVEL_STR in os.environ: | |||
| TEST_LEVEL = int(os.environ[TEST_LEVEL_STR]) | |||
| return TEST_LEVEL | |||
| def set_test_level(level: int): | |||
| global TEST_LEVEL | |||
| TEST_LEVEL = level | |||
| @@ -1,6 +1,7 @@ | |||
| docutils==0.16.0 | |||
| recommonmark | |||
| sphinx==4.0.2 | |||
| sphinx-book-theme | |||
| sphinx-copybutton | |||
| sphinx_markdown_tables | |||
| sphinx_rtd_theme==0.5.2 | |||
| @@ -1 +1 @@ | |||
| https://alinlp.alibaba-inc.com/pypi/sofa-1.0.1.3-py3-none-any.whl | |||
| https://alinlp.alibaba-inc.com/pypi/sofa-1.0.2-py3-none-any.whl | |||
| @@ -1,12 +1,13 @@ | |||
| addict | |||
| datasets | |||
| easydict | |||
| https://maashub.oss-cn-hangzhou.aliyuncs.com/releases/maas_hub-0.1.0.dev0-py2.py3-none-any.whl | |||
| https://mindscope.oss-cn-hangzhou.aliyuncs.com/sdklib/maas_hub-0.2.4.dev0-py3-none-any.whl | |||
| numpy | |||
| opencv-python-headless | |||
| Pillow | |||
| Pillow>=6.2.0 | |||
| pyyaml | |||
| requests | |||
| scipy | |||
| tokenizers<=0.10.3 | |||
| transformers<=4.16.2 | |||
| yapf | |||
| @@ -11,6 +11,7 @@ default_section = THIRDPARTY | |||
| BASED_ON_STYLE = pep8 | |||
| BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true | |||
| SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true | |||
| SPLIT_BEFORE_ARITHMETIC_OPERATOR = true | |||
| [codespell] | |||
| skip = *.ipynb | |||
| @@ -20,5 +21,5 @@ ignore-words-list = patten,nd,ty,mot,hist,formating,winn,gool,datas,wan,confids | |||
| [flake8] | |||
| select = B,C,E,F,P,T4,W,B9 | |||
| max-line-length = 120 | |||
| ignore = F401,F821 | |||
| ignore = F401,F821,W503 | |||
| exclude = docs/src,*.pyi,.git | |||
| @@ -35,9 +35,10 @@ class CustomPipelineTest(unittest.TestCase): | |||
| CustomPipeline1() | |||
| def test_custom(self): | |||
| dummy_task = 'dummy-task' | |||
| @PIPELINES.register_module( | |||
| group_key=Tasks.image_tagging, module_name='custom-image') | |||
| group_key=dummy_task, module_name='custom-image') | |||
| class CustomImagePipeline(Pipeline): | |||
| def __init__(self, | |||
| @@ -67,32 +68,28 @@ class CustomPipelineTest(unittest.TestCase): | |||
| outputs['filename'] = inputs['url'] | |||
| img = inputs['img'] | |||
| new_image = img.resize((img.width // 2, img.height // 2)) | |||
| outputs['resize_image'] = np.array(new_image) | |||
| outputs['dummy_result'] = 'dummy_result' | |||
| outputs['output_png'] = np.array(new_image) | |||
| return outputs | |||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | |||
| return inputs | |||
| self.assertTrue('custom-image' in PIPELINES.modules[default_group]) | |||
| add_default_pipeline_info(Tasks.image_tagging, 'custom-image') | |||
| add_default_pipeline_info(dummy_task, 'custom-image', overwrite=True) | |||
| pipe = pipeline(pipeline_name='custom-image') | |||
| pipe2 = pipeline(Tasks.image_tagging) | |||
| pipe2 = pipeline(dummy_task) | |||
| self.assertTrue(type(pipe) is type(pipe2)) | |||
| img_url = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.' \ | |||
| 'aliyuncs.com/data/test/images/image1.jpg' | |||
| img_url = 'data/test/images/image1.jpg' | |||
| output = pipe(img_url) | |||
| self.assertEqual(output['filename'], img_url) | |||
| self.assertEqual(output['resize_image'].shape, (318, 512, 3)) | |||
| self.assertEqual(output['dummy_result'], 'dummy_result') | |||
| self.assertEqual(output['output_png'].shape, (318, 512, 3)) | |||
| outputs = pipe([img_url for i in range(4)]) | |||
| self.assertEqual(len(outputs), 4) | |||
| for out in outputs: | |||
| self.assertEqual(out['filename'], img_url) | |||
| self.assertEqual(out['resize_image'].shape, (318, 512, 3)) | |||
| self.assertEqual(out['dummy_result'], 'dummy_result') | |||
| self.assertEqual(out['output_png'].shape, (318, 512, 3)) | |||
| if __name__ == '__main__': | |||
| @@ -7,11 +7,12 @@ import unittest | |||
| from modelscope.fileio import File | |||
| from modelscope.pipelines import pipeline | |||
| from modelscope.utils.constant import Tasks | |||
| from modelscope.utils.test_utils import test_level | |||
| class ImageCaptionTest(unittest.TestCase): | |||
| @unittest.skip('skip long test') | |||
| @unittest.skip('skip before model is restored in model hub') | |||
| def test_run(self): | |||
| model = 'https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/caption_large_best_clean.pt' | |||
| @@ -26,9 +27,7 @@ class ImageCaptionTest(unittest.TestCase): | |||
| img_captioning = pipeline( | |||
| Tasks.image_captioning, model=ofile.name, bpe_dir=bpe_dir) | |||
| result = img_captioning( | |||
| 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' | |||
| ) | |||
| result = img_captioning('data/test/images/image_matting.png') | |||
| print(result['caption']) | |||
| @@ -9,14 +9,15 @@ import cv2 | |||
| from modelscope.fileio import File | |||
| from modelscope.pipelines import pipeline | |||
| from modelscope.pydatasets import PyDataset | |||
| from modelscope.utils.constant import Tasks | |||
| from modelscope.utils.constant import ModelFile, Tasks | |||
| from modelscope.utils.hub import get_model_cache_dir | |||
| from modelscope.utils.test_utils import test_level | |||
| class ImageMattingTest(unittest.TestCase): | |||
| def setUp(self) -> None: | |||
| self.model_id = 'damo/cv_unet_image-matting_damo' | |||
| self.model_id = 'damo/cv_unet_image-matting' | |||
| # switch to False if downloading everytime is not desired | |||
| purge_cache = True | |||
| if purge_cache: | |||
| @@ -28,20 +29,17 @@ class ImageMattingTest(unittest.TestCase): | |||
| model_path = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs' \ | |||
| '.com/data/test/maas/image_matting/matting_person.pb' | |||
| with tempfile.TemporaryDirectory() as tmp_dir: | |||
| model_file = osp.join(tmp_dir, 'matting_person.pb') | |||
| model_file = osp.join(tmp_dir, ModelFile.TF_GRAPH_FILE) | |||
| with open(model_file, 'wb') as ofile: | |||
| ofile.write(File.read(model_path)) | |||
| img_matting = pipeline(Tasks.image_matting, model=tmp_dir) | |||
| result = img_matting( | |||
| 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' | |||
| ) | |||
| result = img_matting('data/test/images/image_matting.png') | |||
| cv2.imwrite('result.png', result['output_png']) | |||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
| def test_run_with_dataset(self): | |||
| input_location = [ | |||
| 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' | |||
| ] | |||
| input_location = ['data/test/images/image_matting.png'] | |||
| # alternatively: | |||
| # input_location = '/dir/to/images' | |||
| @@ -52,21 +50,19 @@ class ImageMattingTest(unittest.TestCase): | |||
| cv2.imwrite('result.png', next(result)['output_png']) | |||
| print(f'Output written to {osp.abspath("result.png")}') | |||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
| def test_run_modelhub(self): | |||
| img_matting = pipeline(Tasks.image_matting, model=self.model_id) | |||
| result = img_matting( | |||
| 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' | |||
| ) | |||
| result = img_matting('data/test/images/image_matting.png') | |||
| cv2.imwrite('result.png', result['output_png']) | |||
| print(f'Output written to {osp.abspath("result.png")}') | |||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
| def test_run_modelhub_default_model(self): | |||
| img_matting = pipeline(Tasks.image_matting) | |||
| result = img_matting( | |||
| 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' | |||
| ) | |||
| result = img_matting('data/test/images/image_matting.png') | |||
| cv2.imwrite('result.png', result['output_png']) | |||
| print(f'Output written to {osp.abspath("result.png")}') | |||
| @@ -8,6 +8,7 @@ import cv2 | |||
| from modelscope.pipelines import pipeline | |||
| from modelscope.pipelines.base import Pipeline | |||
| from modelscope.utils.constant import Tasks | |||
| from modelscope.utils.test_utils import test_level | |||
| class ImageCartoonTest(unittest.TestCase): | |||
| @@ -36,10 +37,12 @@ class ImageCartoonTest(unittest.TestCase): | |||
| img_cartoon = pipeline(Tasks.image_generation, model=model_dir) | |||
| self.pipeline_inference(img_cartoon, self.test_image) | |||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
| def test_run_modelhub(self): | |||
| img_cartoon = pipeline(Tasks.image_generation, model=self.model_id) | |||
| self.pipeline_inference(img_cartoon, self.test_image) | |||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
| def test_run_modelhub_default_model(self): | |||
| img_cartoon = pipeline(Tasks.image_generation) | |||
| self.pipeline_inference(img_cartoon, self.test_image) | |||
| @@ -0,0 +1,67 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import shutil | |||
| import unittest | |||
| from maas_hub.snapshot_download import snapshot_download | |||
| from modelscope.models import Model | |||
| from modelscope.models.nlp import SbertForSentenceSimilarity | |||
| from modelscope.pipelines import SentenceSimilarityPipeline, pipeline | |||
| from modelscope.preprocessors import SequenceClassificationPreprocessor | |||
| from modelscope.utils.constant import Tasks | |||
| from modelscope.utils.hub import get_model_cache_dir | |||
| from modelscope.utils.test_utils import test_level | |||
| class SentenceSimilarityTest(unittest.TestCase): | |||
| model_id = 'damo/nlp_structbert_sentence-similarity_chinese-base' | |||
| sentence1 = '今天气温比昨天高么?' | |||
| sentence2 = '今天湿度比昨天高么?' | |||
| def setUp(self) -> None: | |||
| # switch to False if downloading everytime is not desired | |||
| purge_cache = True | |||
| if purge_cache: | |||
| shutil.rmtree( | |||
| get_model_cache_dir(self.model_id), ignore_errors=True) | |||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
| def test_run(self): | |||
| cache_path = snapshot_download(self.model_id) | |||
| tokenizer = SequenceClassificationPreprocessor(cache_path) | |||
| model = SbertForSentenceSimilarity(cache_path, tokenizer=tokenizer) | |||
| pipeline1 = SentenceSimilarityPipeline(model, preprocessor=tokenizer) | |||
| pipeline2 = pipeline( | |||
| Tasks.sentence_similarity, model=model, preprocessor=tokenizer) | |||
| print('test1') | |||
| print(f'sentence1: {self.sentence1}\nsentence2: {self.sentence2}\n' | |||
| f'pipeline1:{pipeline1(input=(self.sentence1, self.sentence2))}') | |||
| print() | |||
| print( | |||
| f'sentence1: {self.sentence1}\nsentence2: {self.sentence2}\n' | |||
| f'pipeline1: {pipeline2(input=(self.sentence1, self.sentence2))}') | |||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
| def test_run_with_model_from_modelhub(self): | |||
| model = Model.from_pretrained(self.model_id) | |||
| tokenizer = SequenceClassificationPreprocessor(model.model_dir) | |||
| pipeline_ins = pipeline( | |||
| task=Tasks.sentence_similarity, | |||
| model=model, | |||
| preprocessor=tokenizer) | |||
| print(pipeline_ins(input=(self.sentence1, self.sentence2))) | |||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
| def test_run_with_model_name(self): | |||
| pipeline_ins = pipeline( | |||
| task=Tasks.sentence_similarity, model=self.model_id) | |||
| print(pipeline_ins(input=(self.sentence1, self.sentence2))) | |||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
| def test_run_with_default_model(self): | |||
| pipeline_ins = pipeline(task=Tasks.sentence_similarity) | |||
| print(pipeline_ins(input=(self.sentence1, self.sentence2))) | |||
| if __name__ == '__main__': | |||
| unittest.main() | |||
| @@ -0,0 +1,56 @@ | |||
| import os.path | |||
| import shutil | |||
| import unittest | |||
| from modelscope.fileio import File | |||
| from modelscope.pipelines import pipeline | |||
| from modelscope.utils.constant import Tasks | |||
| from modelscope.utils.hub import get_model_cache_dir | |||
| NEAREND_MIC_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/AEC/sample_audio/nearend_mic.wav' | |||
| FAREND_SPEECH_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/AEC/sample_audio/farend_speech.wav' | |||
| NEAREND_MIC_FILE = 'nearend_mic.wav' | |||
| FAREND_SPEECH_FILE = 'farend_speech.wav' | |||
| AEC_LIB_URL = 'http://isv-data.oss-cn-hangzhou.aliyuncs.com/ics%2FMaaS%2FAEC%2Flib%2Flibmitaec_pyio.so' \ | |||
| '?Expires=1664085465&OSSAccessKeyId=LTAIxjQyZNde90zh&Signature=Y7gelmGEsQAJRK4yyHSYMrdWizk%3D' | |||
| AEC_LIB_FILE = 'libmitaec_pyio.so' | |||
| def download(remote_path, local_path): | |||
| local_dir = os.path.dirname(local_path) | |||
| if len(local_dir) > 0: | |||
| if not os.path.exists(local_dir): | |||
| os.makedirs(local_dir) | |||
| with open(local_path, 'wb') as ofile: | |||
| ofile.write(File.read(remote_path)) | |||
| class SpeechSignalProcessTest(unittest.TestCase): | |||
| def setUp(self) -> None: | |||
| self.model_id = 'damo/speech_dfsmn_aec_psm_16k' | |||
| # switch to False if downloading everytime is not desired | |||
| purge_cache = True | |||
| if purge_cache: | |||
| shutil.rmtree( | |||
| get_model_cache_dir(self.model_id), ignore_errors=True) | |||
| # A temporary hack to provide c++ lib. Download it first. | |||
| download(AEC_LIB_URL, AEC_LIB_FILE) | |||
| def test_run(self): | |||
| download(NEAREND_MIC_URL, NEAREND_MIC_FILE) | |||
| download(FAREND_SPEECH_URL, FAREND_SPEECH_FILE) | |||
| input = { | |||
| 'nearend_mic': NEAREND_MIC_FILE, | |||
| 'farend_speech': FAREND_SPEECH_FILE | |||
| } | |||
| aec = pipeline( | |||
| Tasks.speech_signal_process, | |||
| model=self.model_id, | |||
| pipeline_name=r'speech_dfsmn_aec_psm_16k') | |||
| aec(input, output_path='output.wav') | |||
| if __name__ == '__main__': | |||
| unittest.main() | |||
| @@ -12,6 +12,7 @@ from modelscope.preprocessors import SequenceClassificationPreprocessor | |||
| from modelscope.pydatasets import PyDataset | |||
| from modelscope.utils.constant import Hubs, Tasks | |||
| from modelscope.utils.hub import get_model_cache_dir | |||
| from modelscope.utils.test_utils import test_level | |||
| class SequenceClassificationTest(unittest.TestCase): | |||
| @@ -43,6 +44,7 @@ class SequenceClassificationTest(unittest.TestCase): | |||
| break | |||
| print(r) | |||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||
| def test_run(self): | |||
| model_url = 'https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com' \ | |||
| '/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip' | |||
| @@ -67,6 +69,7 @@ class SequenceClassificationTest(unittest.TestCase): | |||
| Tasks.text_classification, model=model, preprocessor=preprocessor) | |||
| print(pipeline2('Hello world!')) | |||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
| def test_run_with_model_from_modelhub(self): | |||
| model = Model.from_pretrained(self.model_id) | |||
| preprocessor = SequenceClassificationPreprocessor( | |||
| @@ -77,6 +80,7 @@ class SequenceClassificationTest(unittest.TestCase): | |||
| preprocessor=preprocessor) | |||
| self.predict(pipeline_ins) | |||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
| def test_run_with_model_name(self): | |||
| text_classification = pipeline( | |||
| task=Tasks.text_classification, model=self.model_id) | |||
| @@ -85,6 +89,7 @@ class SequenceClassificationTest(unittest.TestCase): | |||
| 'glue', name='sst2', target='sentence', hub=Hubs.huggingface)) | |||
| self.printDataset(result) | |||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
| def test_run_with_default_model(self): | |||
| text_classification = pipeline(task=Tasks.text_classification) | |||
| result = text_classification( | |||
| @@ -92,6 +97,7 @@ class SequenceClassificationTest(unittest.TestCase): | |||
| 'glue', name='sst2', target='sentence', hub=Hubs.huggingface)) | |||
| self.printDataset(result) | |||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
| def test_run_with_dataset(self): | |||
| model = Model.from_pretrained(self.model_id) | |||
| preprocessor = SequenceClassificationPreprocessor( | |||
| @@ -4,47 +4,75 @@ import unittest | |||
| from maas_hub.snapshot_download import snapshot_download | |||
| from modelscope.models import Model | |||
| from modelscope.models.nlp import PalmForTextGenerationModel | |||
| from modelscope.models.nlp import PalmForTextGeneration | |||
| from modelscope.pipelines import TextGenerationPipeline, pipeline | |||
| from modelscope.preprocessors import TextGenerationPreprocessor | |||
| from modelscope.utils.constant import Tasks | |||
| from modelscope.utils.test_utils import test_level | |||
| class TextGenerationTest(unittest.TestCase): | |||
| model_id = 'damo/nlp_palm_text-generation_chinese' | |||
| input1 = "今日天气类型='晴'&温度变化趋势='大幅上升'&最低气温='28℃'&最高气温='31℃'&体感='湿热'" | |||
| input2 = "今日天气类型='多云'&体感='舒适'&最低气温='26℃'&最高气温='30℃'" | |||
| model_id_zh = 'damo/nlp_palm2.0_text-generation_chinese-base' | |||
| model_id_en = 'damo/nlp_palm2.0_text-generation_english-base' | |||
| input_zh = """ | |||
| 本文总结了十个可穿戴产品的设计原则,而这些原则,同样也是笔者认为是这个行业最吸引人的地方: | |||
| 1.为人们解决重复性问题;2.从人开始,而不是从机器开始;3.要引起注意,但不要刻意;4.提升用户能力,而不是取代 | |||
| """ | |||
| input_en = """ | |||
| The Director of Public Prosecutions who let off Lord Janner over alleged child sex abuse started | |||
| her career at a legal chambers when the disgraced Labour peer was a top QC there . Alison Saunders , | |||
| 54 , sparked outrage last week when she decided the 86-year-old should not face astring of charges | |||
| of paedophilia against nine children because he has dementia . Today , newly-released documents | |||
| revealed damning evidence that abuse was covered up by police andsocial workers for more than 20 years . | |||
| And now it has emerged Mrs Saunders ' law career got off to a flying start when she secured her | |||
| pupillage -- a barrister 's training contract at 1 Garden Court Chambers in London in 1983 . | |||
| """ | |||
| @unittest.skip('skip temporarily to save test time') | |||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||
| def test_run(self): | |||
| cache_path = snapshot_download(self.model_id) | |||
| preprocessor = TextGenerationPreprocessor( | |||
| cache_path, first_sequence='sentence', second_sequence=None) | |||
| model = PalmForTextGenerationModel( | |||
| cache_path, tokenizer=preprocessor.tokenizer) | |||
| pipeline1 = TextGenerationPipeline(model, preprocessor) | |||
| pipeline2 = pipeline( | |||
| Tasks.text_generation, model=model, preprocessor=preprocessor) | |||
| print(f'input: {self.input1}\npipeline1: {pipeline1(self.input1)}') | |||
| print() | |||
| print(f'input: {self.input2}\npipeline2: {pipeline2(self.input2)}') | |||
| for model_id, input in ((self.model_id_zh, self.input_zh), | |||
| (self.model_id_en, self.input_en)): | |||
| cache_path = snapshot_download(model_id) | |||
| model = PalmForTextGeneration(cache_path) | |||
| preprocessor = TextGenerationPreprocessor( | |||
| cache_path, | |||
| model.tokenizer, | |||
| first_sequence='sentence', | |||
| second_sequence=None) | |||
| pipeline1 = TextGenerationPipeline(model, preprocessor) | |||
| pipeline2 = pipeline( | |||
| Tasks.text_generation, model=model, preprocessor=preprocessor) | |||
| print( | |||
| f'pipeline1: {pipeline1(input)}\npipeline2: {pipeline2(input)}' | |||
| ) | |||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
| def test_run_with_model_from_modelhub(self): | |||
| model = Model.from_pretrained(self.model_id) | |||
| preprocessor = TextGenerationPreprocessor( | |||
| model.model_dir, first_sequence='sentence', second_sequence=None) | |||
| pipeline_ins = pipeline( | |||
| task=Tasks.text_generation, model=model, preprocessor=preprocessor) | |||
| print(pipeline_ins(self.input1)) | |||
| for model_id, input in ((self.model_id_zh, self.input_zh), | |||
| (self.model_id_en, self.input_en)): | |||
| model = Model.from_pretrained(model_id) | |||
| preprocessor = TextGenerationPreprocessor( | |||
| model.model_dir, | |||
| model.tokenizer, | |||
| first_sequence='sentence', | |||
| second_sequence=None) | |||
| pipeline_ins = pipeline( | |||
| task=Tasks.text_generation, | |||
| model=model, | |||
| preprocessor=preprocessor) | |||
| print(pipeline_ins(input)) | |||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
| def test_run_with_model_name(self): | |||
| pipeline_ins = pipeline( | |||
| task=Tasks.text_generation, model=self.model_id) | |||
| print(pipeline_ins(self.input2)) | |||
| for model_id, input in ((self.model_id_zh, self.input_zh), | |||
| (self.model_id_en, self.input_en)): | |||
| pipeline_ins = pipeline(task=Tasks.text_generation, model=model_id) | |||
| print(pipeline_ins(input)) | |||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
| def test_run_with_default_model(self): | |||
| pipeline_ins = pipeline(task=Tasks.text_generation) | |||
| print(pipeline_ins(self.input2)) | |||
| print(pipeline_ins(self.input_zh)) | |||
| if __name__ == '__main__': | |||
| @@ -0,0 +1,62 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import shutil | |||
| import unittest | |||
| from maas_hub.snapshot_download import snapshot_download | |||
| from modelscope.models import Model | |||
| from modelscope.models.nlp import StructBertForTokenClassification | |||
| from modelscope.pipelines import WordSegmentationPipeline, pipeline | |||
| from modelscope.preprocessors import TokenClassifcationPreprocessor | |||
| from modelscope.utils.constant import Tasks | |||
| from modelscope.utils.hub import get_model_cache_dir | |||
| from modelscope.utils.test_utils import test_level | |||
| class WordSegmentationTest(unittest.TestCase): | |||
| model_id = 'damo/nlp_structbert_word-segmentation_chinese-base' | |||
| sentence = '今天天气不错,适合出去游玩' | |||
| def setUp(self) -> None: | |||
| # switch to False if downloading everytime is not desired | |||
| purge_cache = True | |||
| if purge_cache: | |||
| shutil.rmtree( | |||
| get_model_cache_dir(self.model_id), ignore_errors=True) | |||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||
| def test_run_by_direct_model_download(self): | |||
| cache_path = snapshot_download(self.model_id) | |||
| tokenizer = TokenClassifcationPreprocessor(cache_path) | |||
| model = StructBertForTokenClassification( | |||
| cache_path, tokenizer=tokenizer) | |||
| pipeline1 = WordSegmentationPipeline(model, preprocessor=tokenizer) | |||
| pipeline2 = pipeline( | |||
| Tasks.word_segmentation, model=model, preprocessor=tokenizer) | |||
| print(f'sentence: {self.sentence}\n' | |||
| f'pipeline1:{pipeline1(input=self.sentence)}') | |||
| print() | |||
| print(f'pipeline2: {pipeline2(input=self.sentence)}') | |||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
| def test_run_with_model_from_modelhub(self): | |||
| model = Model.from_pretrained(self.model_id) | |||
| tokenizer = TokenClassifcationPreprocessor(model.model_dir) | |||
| pipeline_ins = pipeline( | |||
| task=Tasks.word_segmentation, model=model, preprocessor=tokenizer) | |||
| print(pipeline_ins(input=self.sentence)) | |||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
| def test_run_with_model_name(self): | |||
| pipeline_ins = pipeline( | |||
| task=Tasks.word_segmentation, model=self.model_id) | |||
| print(pipeline_ins(input=self.sentence)) | |||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||
| def test_run_with_default_model(self): | |||
| pipeline_ins = pipeline(task=Tasks.word_segmentation) | |||
| print(pipeline_ins(input=self.sentence)) | |||
| if __name__ == '__main__': | |||
| unittest.main() | |||
| @@ -0,0 +1,20 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import unittest | |||
| import PIL | |||
| from modelscope.preprocessors import load_image | |||
| from modelscope.utils.logger import get_logger | |||
| class ImagePreprocessorTest(unittest.TestCase): | |||
| def test_load(self): | |||
| img = load_image('data/test/images/image_matting.png') | |||
| self.assertTrue(isinstance(img, PIL.Image.Image)) | |||
| self.assertEqual(img.size, (948, 533)) | |||
| if __name__ == '__main__': | |||
| unittest.main() | |||
| @@ -7,6 +7,11 @@ import sys | |||
| import unittest | |||
| from fnmatch import fnmatch | |||
| from modelscope.utils.logger import get_logger | |||
| from modelscope.utils.test_utils import set_test_level, test_level | |||
| logger = get_logger() | |||
| def gather_test_cases(test_dir, pattern, list_tests): | |||
| case_list = [] | |||
| @@ -49,5 +54,9 @@ if __name__ == '__main__': | |||
| '--pattern', default='test_*.py', help='test file pattern') | |||
| parser.add_argument( | |||
| '--test_dir', default='tests', help='directory to be tested') | |||
| parser.add_argument( | |||
| '--level', default=0, help='2 -- all, 1 -- p1, 0 -- p0') | |||
| args = parser.parse_args() | |||
| set_test_level(args.level) | |||
| logger.info(f'TEST LEVEL: {test_level()}') | |||
| main(args) | |||
| @@ -1,11 +1,8 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| import argparse | |||
| import os.path as osp | |||
| import tempfile | |||
| import unittest | |||
| from pathlib import Path | |||
| from modelscope.fileio import dump, load | |||
| from modelscope.utils.config import Config | |||
| obj = {'a': 1, 'b': {'c': [1, 2, 3], 'd': 'dd'}} | |||
| @@ -14,25 +11,25 @@ obj = {'a': 1, 'b': {'c': [1, 2, 3], 'd': 'dd'}} | |||
| class ConfigTest(unittest.TestCase): | |||
| def test_json(self): | |||
| config_file = 'configs/examples/config.json' | |||
| config_file = 'configs/examples/configuration.json' | |||
| cfg = Config.from_file(config_file) | |||
| self.assertEqual(cfg.a, 1) | |||
| self.assertEqual(cfg.b, obj['b']) | |||
| def test_yaml(self): | |||
| config_file = 'configs/examples/config.yaml' | |||
| config_file = 'configs/examples/configuration.yaml' | |||
| cfg = Config.from_file(config_file) | |||
| self.assertEqual(cfg.a, 1) | |||
| self.assertEqual(cfg.b, obj['b']) | |||
| def test_py(self): | |||
| config_file = 'configs/examples/config.py' | |||
| config_file = 'configs/examples/configuration.py' | |||
| cfg = Config.from_file(config_file) | |||
| self.assertEqual(cfg.a, 1) | |||
| self.assertEqual(cfg.b, obj['b']) | |||
| def test_dump(self): | |||
| config_file = 'configs/examples/config.py' | |||
| config_file = 'configs/examples/configuration.py' | |||
| cfg = Config.from_file(config_file) | |||
| self.assertEqual(cfg.a, 1) | |||
| self.assertEqual(cfg.b, obj['b']) | |||
| @@ -53,7 +50,7 @@ class ConfigTest(unittest.TestCase): | |||
| self.assertEqual(yaml_str, infile.read()) | |||
| def test_to_dict(self): | |||
| config_file = 'configs/examples/config.json' | |||
| config_file = 'configs/examples/configuration.json' | |||
| cfg = Config.from_file(config_file) | |||
| d = cfg.to_dict() | |||
| print(d) | |||