From d4692b5ada73936b119daf440f9409f06524c04d Mon Sep 17 00:00:00 2001 From: "xixing.tj" Date: Tue, 28 Jun 2022 14:03:01 +0800 Subject: [PATCH 1/9] [to #42322933]Merge branch 'master' into ocr/ocr_detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修复master分支ocr_detection 单元测试bug Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9112290 * create ocr_detection task * fix code check error * fix code check error * fix code check issue * fix code check issue * replace c++ nms with python version * fix code check issue * fix code check issue * rename maas_lib * merge master to ocr/ocr_detection * add model_hub sup for ocr_detection * fix bug * replace c++ decoder with python version * fix bug * Merge branch 'master' into ocr/ocr_detection * merge master * fix code check * update * add requirements for ocr_detection * fix model_hub fetch bug * remove debug code * Merge branch 'master' into ocr/ocr_detection * add local test image for ocr_detection * update requirements for model_hub * Merge branch 'master' into ocr/ocr_detection * fix bug for full case test * remove ema for ocr_detection * Merge branch 'master' into ocr/ocr_detection * apply ocr_detection test case * Merge branch 'master' into ocr/ocr_detection * update slim dependency for ocr_detection * add more test case for ocr_detection * release tf graph before create * recover ema for ocr_detection model * fix code * Merge branch 'master' into ocr/ocr_detection * fix code --- .../pipelines/cv/ocr_detection_pipeline.py | 94 ++++++++++--------- .../model_resnet_mutex_v4_linewithchar.py | 6 +- .../pipelines/cv/ocr_utils/resnet18_v1.py | 6 +- .../pipelines/cv/ocr_utils/resnet_utils.py | 6 +- tests/pipelines/test_ocr_detection.py | 5 + 5 files changed, 72 insertions(+), 45 deletions(-) diff --git a/modelscope/pipelines/cv/ocr_detection_pipeline.py b/modelscope/pipelines/cv/ocr_detection_pipeline.py index 0502fe36..4856b06b 100644 --- a/modelscope/pipelines/cv/ocr_detection_pipeline.py +++ b/modelscope/pipelines/cv/ocr_detection_pipeline.py @@ -8,7 +8,6 @@ import cv2 import numpy as np import PIL import tensorflow as tf -import tf_slim as slim from modelscope.metainfo import Pipelines from modelscope.pipelines.base import Input @@ -19,6 +18,11 @@ from ..base import Pipeline from ..builder import PIPELINES from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils +if tf.__version__ >= '2.0': + import tf_slim as slim +else: + from tensorflow.contrib import slim + if tf.__version__ >= '2.0': tf = tf.compat.v1 tf.compat.v1.disable_eager_execution() @@ -44,6 +48,7 @@ class OCRDetectionPipeline(Pipeline): def __init__(self, model: str): super().__init__(model=model) + tf.reset_default_graph() model_path = osp.join( osp.join(self.model, ModelFile.TF_CHECKPOINT_FOLDER), 'checkpoint-80000') @@ -51,51 +56,56 @@ class OCRDetectionPipeline(Pipeline): config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True self._session = tf.Session(config=config) - global_step = tf.get_variable( - 'global_step', [], - initializer=tf.constant_initializer(0), - dtype=tf.int64, - trainable=False) - variable_averages = tf.train.ExponentialMovingAverage( - 0.997, global_step) self.input_images = tf.placeholder( tf.float32, shape=[1, 1024, 1024, 3], name='input_images') self.output = {} - # detector - detector = model_resnet_mutex_v4_linewithchar.SegLinkDetector() - all_maps = detector.build_model(self.input_images, is_training=False) - - # decode local predictions - all_nodes, all_links, all_reg = [], [], [] - for i, maps in enumerate(all_maps): - cls_maps, lnk_maps, reg_maps = maps[0], maps[1], maps[2] - reg_maps = tf.multiply(reg_maps, OFFSET_VARIANCE) - - cls_prob = tf.nn.softmax(tf.reshape(cls_maps, [-1, 2])) - - lnk_prob_pos = tf.nn.softmax(tf.reshape(lnk_maps, [-1, 4])[:, :2]) - lnk_prob_mut = tf.nn.softmax(tf.reshape(lnk_maps, [-1, 4])[:, 2:]) - lnk_prob = tf.concat([lnk_prob_pos, lnk_prob_mut], axis=1) - - all_nodes.append(cls_prob) - all_links.append(lnk_prob) - all_reg.append(reg_maps) - - # decode segments and links - image_size = tf.shape(self.input_images)[1:3] - segments, group_indices, segment_counts, _ = ops.decode_segments_links_python( - image_size, - all_nodes, - all_links, - all_reg, - anchor_sizes=list(detector.anchor_sizes)) - - # combine segments - combined_rboxes, combined_counts = ops.combine_segments_python( - segments, group_indices, segment_counts) - self.output['combined_rboxes'] = combined_rboxes - self.output['combined_counts'] = combined_counts + with tf.variable_scope('', reuse=tf.AUTO_REUSE): + global_step = tf.get_variable( + 'global_step', [], + initializer=tf.constant_initializer(0), + dtype=tf.int64, + trainable=False) + variable_averages = tf.train.ExponentialMovingAverage( + 0.997, global_step) + + # detector + detector = model_resnet_mutex_v4_linewithchar.SegLinkDetector() + all_maps = detector.build_model( + self.input_images, is_training=False) + + # decode local predictions + all_nodes, all_links, all_reg = [], [], [] + for i, maps in enumerate(all_maps): + cls_maps, lnk_maps, reg_maps = maps[0], maps[1], maps[2] + reg_maps = tf.multiply(reg_maps, OFFSET_VARIANCE) + + cls_prob = tf.nn.softmax(tf.reshape(cls_maps, [-1, 2])) + + lnk_prob_pos = tf.nn.softmax( + tf.reshape(lnk_maps, [-1, 4])[:, :2]) + lnk_prob_mut = tf.nn.softmax( + tf.reshape(lnk_maps, [-1, 4])[:, 2:]) + lnk_prob = tf.concat([lnk_prob_pos, lnk_prob_mut], axis=1) + + all_nodes.append(cls_prob) + all_links.append(lnk_prob) + all_reg.append(reg_maps) + + # decode segments and links + image_size = tf.shape(self.input_images)[1:3] + segments, group_indices, segment_counts, _ = ops.decode_segments_links_python( + image_size, + all_nodes, + all_links, + all_reg, + anchor_sizes=list(detector.anchor_sizes)) + + # combine segments + combined_rboxes, combined_counts = ops.combine_segments_python( + segments, group_indices, segment_counts) + self.output['combined_rboxes'] = combined_rboxes + self.output['combined_counts'] = combined_counts with self._session.as_default() as sess: logger.info(f'loading model from {model_path}') diff --git a/modelscope/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py b/modelscope/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py index 50b8ba02..d03ff405 100644 --- a/modelscope/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py +++ b/modelscope/pipelines/cv/ocr_utils/model_resnet_mutex_v4_linewithchar.py @@ -1,8 +1,12 @@ import tensorflow as tf -import tf_slim as slim from . import ops, resnet18_v1, resnet_utils +if tf.__version__ >= '2.0': + import tf_slim as slim +else: + from tensorflow.contrib import slim + if tf.__version__ >= '2.0': tf = tf.compat.v1 diff --git a/modelscope/pipelines/cv/ocr_utils/resnet18_v1.py b/modelscope/pipelines/cv/ocr_utils/resnet18_v1.py index 6371d4e5..7930c5a3 100644 --- a/modelscope/pipelines/cv/ocr_utils/resnet18_v1.py +++ b/modelscope/pipelines/cv/ocr_utils/resnet18_v1.py @@ -30,10 +30,14 @@ ResNet-101 for semantic segmentation into 21 classes: output_stride=16) """ import tensorflow as tf -import tf_slim as slim from . import resnet_utils +if tf.__version__ >= '2.0': + import tf_slim as slim +else: + from tensorflow.contrib import slim + if tf.__version__ >= '2.0': tf = tf.compat.v1 diff --git a/modelscope/pipelines/cv/ocr_utils/resnet_utils.py b/modelscope/pipelines/cv/ocr_utils/resnet_utils.py index e0e240c8..0a9af224 100644 --- a/modelscope/pipelines/cv/ocr_utils/resnet_utils.py +++ b/modelscope/pipelines/cv/ocr_utils/resnet_utils.py @@ -19,7 +19,11 @@ implementation is more memory efficient. import collections import tensorflow as tf -import tf_slim as slim + +if tf.__version__ >= '2.0': + import tf_slim as slim +else: + from tensorflow.contrib import slim if tf.__version__ >= '2.0': tf = tf.compat.v1 diff --git a/tests/pipelines/test_ocr_detection.py b/tests/pipelines/test_ocr_detection.py index 986961b7..d1ecd4e4 100644 --- a/tests/pipelines/test_ocr_detection.py +++ b/tests/pipelines/test_ocr_detection.py @@ -27,6 +27,11 @@ class OCRDetectionTest(unittest.TestCase): print('ocr detection results: ') print(result) + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_run_with_model_from_modelhub(self): + ocr_detection = pipeline(Tasks.ocr_detection, model=self.model_id) + self.pipeline_inference(ocr_detection, self.test_image) + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_modelhub_default_model(self): ocr_detection = pipeline(Tasks.ocr_detection) From 664de39b7908fba039edd5adb0b601f7535c70bb Mon Sep 17 00:00:00 2001 From: "yanheng.wyh" Date: Tue, 28 Jun 2022 14:04:40 +0800 Subject: [PATCH 2/9] [to #42322933]animal recognation model Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9126742 * animal recognation model * update codes * delet file * f * pre commits * revise * fix last comment * fix comments * fix precommit * fix comments * Merge remote-tracking branch 'origin' into cv/animalRecog * fix comments --- modelscope/metainfo.py | 1 + .../models/cv/animal_recognition/__init__.py | 0 .../models/cv/animal_recognition/resnet.py | 430 ++++++++++++++++++ .../models/cv/animal_recognition/splat.py | 125 +++++ modelscope/pipelines/cv/__init__.py | 1 + .../pipelines/cv/animal_recog_pipeline.py | 127 ++++++ tests/pipelines/test_animal_recognation.py | 20 + 7 files changed, 704 insertions(+) create mode 100644 modelscope/models/cv/animal_recognition/__init__.py create mode 100644 modelscope/models/cv/animal_recognition/resnet.py create mode 100644 modelscope/models/cv/animal_recognition/splat.py create mode 100644 modelscope/pipelines/cv/animal_recog_pipeline.py create mode 100644 tests/pipelines/test_animal_recognation.py diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index 680fe2e8..9fad45e2 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -43,6 +43,7 @@ class Pipelines(object): person_image_cartoon = 'unet-person-image-cartoon' ocr_detection = 'resnet18-ocr-detection' action_recognition = 'TAdaConv_action-recognition' + animal_recognation = 'resnet101-animal_recog' # nlp tasks sentence_similarity = 'sentence-similarity' diff --git a/modelscope/models/cv/animal_recognition/__init__.py b/modelscope/models/cv/animal_recognition/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modelscope/models/cv/animal_recognition/resnet.py b/modelscope/models/cv/animal_recognition/resnet.py new file mode 100644 index 00000000..1fd4b93e --- /dev/null +++ b/modelscope/models/cv/animal_recognition/resnet.py @@ -0,0 +1,430 @@ +import math + +import torch +import torch.nn as nn + +from .splat import SplAtConv2d + +__all__ = ['ResNet', 'Bottleneck'] + + +class DropBlock2D(object): + + def __init__(self, *args, **kwargs): + raise NotImplementedError + + +class GlobalAvgPool2d(nn.Module): + + def __init__(self): + """Global average pooling over the input's spatial dimensions""" + super(GlobalAvgPool2d, self).__init__() + + def forward(self, inputs): + return nn.functional.adaptive_avg_pool2d(inputs, + 1).view(inputs.size(0), -1) + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, + inplanes, + planes, + stride=1, + downsample=None, + radix=1, + cardinality=1, + bottleneck_width=64, + avd=False, + avd_first=False, + dilation=1, + is_first=False, + rectified_conv=False, + rectify_avg=False, + norm_layer=None, + dropblock_prob=0.0, + last_gamma=False): + super(Bottleneck, self).__init__() + group_width = int(planes * (bottleneck_width / 64.)) * cardinality + self.conv1 = nn.Conv2d( + inplanes, group_width, kernel_size=1, bias=False) + self.bn1 = norm_layer(group_width) + self.dropblock_prob = dropblock_prob + self.radix = radix + self.avd = avd and (stride > 1 or is_first) + self.avd_first = avd_first + + if self.avd: + self.avd_layer = nn.AvgPool2d(3, stride, padding=1) + stride = 1 + + if dropblock_prob > 0.0: + self.dropblock1 = DropBlock2D(dropblock_prob, 3) + if radix == 1: + self.dropblock2 = DropBlock2D(dropblock_prob, 3) + self.dropblock3 = DropBlock2D(dropblock_prob, 3) + + if radix >= 1: + self.conv2 = SplAtConv2d( + group_width, + group_width, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + groups=cardinality, + bias=False, + radix=radix, + rectify=rectified_conv, + rectify_avg=rectify_avg, + norm_layer=norm_layer, + dropblock_prob=dropblock_prob) + elif rectified_conv: + from rfconv import RFConv2d + self.conv2 = RFConv2d( + group_width, + group_width, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + groups=cardinality, + bias=False, + average_mode=rectify_avg) + self.bn2 = norm_layer(group_width) + else: + self.conv2 = nn.Conv2d( + group_width, + group_width, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + groups=cardinality, + bias=False) + self.bn2 = norm_layer(group_width) + + self.conv3 = nn.Conv2d( + group_width, planes * 4, kernel_size=1, bias=False) + self.bn3 = norm_layer(planes * 4) + + if last_gamma: + from torch.nn.init import zeros_ + zeros_(self.bn3.weight) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.dilation = dilation + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + if self.dropblock_prob > 0.0: + out = self.dropblock1(out) + out = self.relu(out) + + if self.avd and self.avd_first: + out = self.avd_layer(out) + + out = self.conv2(out) + if self.radix == 0: + out = self.bn2(out) + if self.dropblock_prob > 0.0: + out = self.dropblock2(out) + out = self.relu(out) + + if self.avd and not self.avd_first: + out = self.avd_layer(out) + + out = self.conv3(out) + out = self.bn3(out) + if self.dropblock_prob > 0.0: + out = self.dropblock3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, + block, + layers, + radix=1, + groups=1, + bottleneck_width=64, + num_classes=1000, + dilated=False, + dilation=1, + deep_stem=False, + stem_width=64, + avg_down=False, + rectified_conv=False, + rectify_avg=False, + avd=False, + avd_first=False, + final_drop=0.0, + dropblock_prob=0, + last_gamma=False, + norm_layer=nn.BatchNorm2d): + self.cardinality = groups + self.bottleneck_width = bottleneck_width + # ResNet-D params + self.inplanes = stem_width * 2 if deep_stem else 64 + self.avg_down = avg_down + self.last_gamma = last_gamma + # ResNeSt params + self.radix = radix + self.avd = avd + self.avd_first = avd_first + + super(ResNet, self).__init__() + self.rectified_conv = rectified_conv + self.rectify_avg = rectify_avg + if rectified_conv: + from rfconv import RFConv2d + conv_layer = RFConv2d + else: + conv_layer = nn.Conv2d + conv_kwargs = {'average_mode': rectify_avg} if rectified_conv else {} + if deep_stem: + self.conv1 = nn.Sequential( + conv_layer( + 3, + stem_width, + kernel_size=3, + stride=2, + padding=1, + bias=False, + **conv_kwargs), + norm_layer(stem_width), + nn.ReLU(inplace=True), + conv_layer( + stem_width, + stem_width, + kernel_size=3, + stride=1, + padding=1, + bias=False, + **conv_kwargs), + norm_layer(stem_width), + nn.ReLU(inplace=True), + conv_layer( + stem_width, + stem_width * 2, + kernel_size=3, + stride=1, + padding=1, + bias=False, + **conv_kwargs), + ) + else: + self.conv1 = conv_layer( + 3, + 64, + kernel_size=7, + stride=2, + padding=3, + bias=False, + **conv_kwargs) + self.bn1 = norm_layer(self.inplanes) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer( + block, 64, layers[0], norm_layer=norm_layer, is_first=False) + self.layer2 = self._make_layer( + block, 128, layers[1], stride=2, norm_layer=norm_layer) + if dilated or dilation == 4: + self.layer3 = self._make_layer( + block, + 256, + layers[2], + stride=1, + dilation=2, + norm_layer=norm_layer, + dropblock_prob=dropblock_prob) + self.layer4 = self._make_layer( + block, + 512, + layers[3], + stride=1, + dilation=4, + norm_layer=norm_layer, + dropblock_prob=dropblock_prob) + elif dilation == 2: + self.layer3 = self._make_layer( + block, + 256, + layers[2], + stride=2, + dilation=1, + norm_layer=norm_layer, + dropblock_prob=dropblock_prob) + self.layer4 = self._make_layer( + block, + 512, + layers[3], + stride=1, + dilation=2, + norm_layer=norm_layer, + dropblock_prob=dropblock_prob) + else: + self.layer3 = self._make_layer( + block, + 256, + layers[2], + stride=2, + norm_layer=norm_layer, + dropblock_prob=dropblock_prob) + self.layer4 = self._make_layer( + block, + 512, + layers[3], + stride=2, + norm_layer=norm_layer, + dropblock_prob=dropblock_prob) + self.avgpool = GlobalAvgPool2d() + self.drop = nn.Dropout(final_drop) if final_drop > 0.0 else None + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, norm_layer): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, + block, + planes, + blocks, + stride=1, + dilation=1, + norm_layer=None, + dropblock_prob=0.0, + is_first=True): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + down_layers = [] + if self.avg_down: + if dilation == 1: + down_layers.append( + nn.AvgPool2d( + kernel_size=stride, + stride=stride, + ceil_mode=True, + count_include_pad=False)) + else: + down_layers.append( + nn.AvgPool2d( + kernel_size=1, + stride=1, + ceil_mode=True, + count_include_pad=False)) + down_layers.append( + nn.Conv2d( + self.inplanes, + planes * block.expansion, + kernel_size=1, + stride=1, + bias=False)) + else: + down_layers.append( + nn.Conv2d( + self.inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False)) + down_layers.append(norm_layer(planes * block.expansion)) + downsample = nn.Sequential(*down_layers) + + layers = [] + if dilation == 1 or dilation == 2: + layers.append( + block( + self.inplanes, + planes, + stride, + downsample=downsample, + radix=self.radix, + cardinality=self.cardinality, + bottleneck_width=self.bottleneck_width, + avd=self.avd, + avd_first=self.avd_first, + dilation=1, + is_first=is_first, + rectified_conv=self.rectified_conv, + rectify_avg=self.rectify_avg, + norm_layer=norm_layer, + dropblock_prob=dropblock_prob, + last_gamma=self.last_gamma)) + elif dilation == 4: + layers.append( + block( + self.inplanes, + planes, + stride, + downsample=downsample, + radix=self.radix, + cardinality=self.cardinality, + bottleneck_width=self.bottleneck_width, + avd=self.avd, + avd_first=self.avd_first, + dilation=2, + is_first=is_first, + rectified_conv=self.rectified_conv, + rectify_avg=self.rectify_avg, + norm_layer=norm_layer, + dropblock_prob=dropblock_prob, + last_gamma=self.last_gamma)) + else: + raise RuntimeError('=> unknown dilation size: {}'.format(dilation)) + + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append( + block( + self.inplanes, + planes, + radix=self.radix, + cardinality=self.cardinality, + bottleneck_width=self.bottleneck_width, + avd=self.avd, + avd_first=self.avd_first, + dilation=dilation, + rectified_conv=self.rectified_conv, + rectify_avg=self.rectify_avg, + norm_layer=norm_layer, + dropblock_prob=dropblock_prob, + last_gamma=self.last_gamma)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = torch.flatten(x, 1) + if self.drop: + x = self.drop(x) + x = self.fc(x) + + return x diff --git a/modelscope/models/cv/animal_recognition/splat.py b/modelscope/models/cv/animal_recognition/splat.py new file mode 100644 index 00000000..b12bf154 --- /dev/null +++ b/modelscope/models/cv/animal_recognition/splat.py @@ -0,0 +1,125 @@ +"""Split-Attention""" + +import torch +import torch.nn.functional as F +from torch import nn +from torch.nn import BatchNorm2d, Conv2d, Linear, Module, ReLU +from torch.nn.modules.utils import _pair + +__all__ = ['SplAtConv2d'] + + +class SplAtConv2d(Module): + """Split-Attention Conv2d + """ + + def __init__(self, + in_channels, + channels, + kernel_size, + stride=(1, 1), + padding=(0, 0), + dilation=(1, 1), + groups=1, + bias=True, + radix=2, + reduction_factor=4, + rectify=False, + rectify_avg=False, + norm_layer=None, + dropblock_prob=0.0, + **kwargs): + super(SplAtConv2d, self).__init__() + padding = _pair(padding) + self.rectify = rectify and (padding[0] > 0 or padding[1] > 0) + self.rectify_avg = rectify_avg + inter_channels = max(in_channels * radix // reduction_factor, 32) + self.radix = radix + self.cardinality = groups + self.channels = channels + self.dropblock_prob = dropblock_prob + if self.rectify: + from rfconv import RFConv2d + self.conv = RFConv2d( + in_channels, + channels * radix, + kernel_size, + stride, + padding, + dilation, + groups=groups * radix, + bias=bias, + average_mode=rectify_avg, + **kwargs) + else: + self.conv = Conv2d( + in_channels, + channels * radix, + kernel_size, + stride, + padding, + dilation, + groups=groups * radix, + bias=bias, + **kwargs) + self.use_bn = norm_layer is not None + if self.use_bn: + self.bn0 = norm_layer(channels * radix) + self.relu = ReLU(inplace=True) + self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality) + if self.use_bn: + self.bn1 = norm_layer(inter_channels) + self.fc2 = Conv2d( + inter_channels, channels * radix, 1, groups=self.cardinality) + if dropblock_prob > 0.0: + self.dropblock = DropBlock2D(dropblock_prob, 3) + self.rsoftmax = rSoftMax(radix, groups) + + def forward(self, x): + x = self.conv(x) + if self.use_bn: + x = self.bn0(x) + if self.dropblock_prob > 0.0: + x = self.dropblock(x) + x = self.relu(x) + + batch, rchannel = x.shape[:2] + if self.radix > 1: + splited = torch.split(x, rchannel // self.radix, dim=1) + gap = sum(splited) + else: + gap = x + gap = F.adaptive_avg_pool2d(gap, 1) + gap = self.fc1(gap) + + if self.use_bn: + gap = self.bn1(gap) + gap = self.relu(gap) + + atten = self.fc2(gap) + atten = self.rsoftmax(atten).view(batch, -1, 1, 1) + + if self.radix > 1: + attens = torch.split(atten, rchannel // self.radix, dim=1) + out = sum([att * split for (att, split) in zip(attens, splited)]) + else: + out = atten * x + return out.contiguous() + + +class rSoftMax(nn.Module): + + def __init__(self, radix, cardinality): + super().__init__() + self.radix = radix + self.cardinality = cardinality + + def forward(self, x): + batch = x.size(0) + if self.radix > 1: + x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2) + x = F.softmax(x, dim=1) + x = x.reshape(batch, -1) + else: + x = torch.sigmoid(x) + return x diff --git a/modelscope/pipelines/cv/__init__.py b/modelscope/pipelines/cv/__init__.py index 68d875ec..b046e076 100644 --- a/modelscope/pipelines/cv/__init__.py +++ b/modelscope/pipelines/cv/__init__.py @@ -1,4 +1,5 @@ from .action_recognition_pipeline import ActionRecognitionPipeline +from .animal_recog_pipeline import AnimalRecogPipeline from .image_cartoon_pipeline import ImageCartoonPipeline from .image_matting_pipeline import ImageMattingPipeline from .ocr_detection_pipeline import OCRDetectionPipeline diff --git a/modelscope/pipelines/cv/animal_recog_pipeline.py b/modelscope/pipelines/cv/animal_recog_pipeline.py new file mode 100644 index 00000000..eee9e844 --- /dev/null +++ b/modelscope/pipelines/cv/animal_recog_pipeline.py @@ -0,0 +1,127 @@ +import os.path as osp +import tempfile +from typing import Any, Dict + +import cv2 +import numpy as np +import torch +from PIL import Image +from torchvision import transforms + +from modelscope.fileio import File +from modelscope.hub.snapshot_download import snapshot_download +from modelscope.metainfo import Pipelines +from modelscope.models.cv.animal_recognition import resnet +from modelscope.pipelines.base import Input +from modelscope.preprocessors import load_image +from modelscope.utils.constant import ModelFile, Tasks +from modelscope.utils.logger import get_logger +from ..base import Pipeline +from ..builder import PIPELINES + +logger = get_logger() + + +@PIPELINES.register_module( + Tasks.image_classification, module_name=Pipelines.animal_recognation) +class AnimalRecogPipeline(Pipeline): + + def __init__(self, model: str): + super().__init__(model=model) + import torch + + def resnest101(**kwargs): + model = resnet.ResNet( + resnet.Bottleneck, [3, 4, 23, 3], + radix=2, + groups=1, + bottleneck_width=64, + deep_stem=True, + stem_width=64, + avg_down=True, + avd=True, + avd_first=False, + **kwargs) + return model + + def filter_param(src_params, own_state): + copied_keys = [] + for name, param in src_params.items(): + if 'module.' == name[0:7]: + name = name[7:] + if '.module.' not in list(own_state.keys())[0]: + name = name.replace('.module.', '.') + if (name in own_state) and (own_state[name].shape + == param.shape): + own_state[name].copy_(param) + copied_keys.append(name) + + def load_pretrained(model, src_params): + if 'state_dict' in src_params: + src_params = src_params['state_dict'] + own_state = model.state_dict() + filter_param(src_params, own_state) + model.load_state_dict(own_state) + + self.model = resnest101(num_classes=8288) + local_model_dir = model + if osp.exists(model): + local_model_dir = model + else: + local_model_dir = snapshot_download(model) + self.local_path = local_model_dir + src_params = torch.load( + osp.join(local_model_dir, 'pytorch_model.pt'), 'cpu') + load_pretrained(self.model, src_params) + logger.info('load model done') + + def preprocess(self, input: Input) -> Dict[str, Any]: + if isinstance(input, str): + img = load_image(input) + elif isinstance(input, PIL.Image.Image): + img = input.convert('RGB') + elif isinstance(input, np.ndarray): + if len(input.shape) == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + img = input[:, :, ::-1] + img = Image.fromarray(img.astype('uint8')).convert('RGB') + else: + raise TypeError(f'input should be either str, PIL.Image,' + f' np.array, but got {type(input)}') + + normalize = transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + test_transforms = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), normalize + ]) + img = test_transforms(img) + result = {'img': img} + return result + + def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: + + def set_phase(model, is_train): + if is_train: + model.train() + else: + model.eval() + + is_train = False + set_phase(self.model, is_train) + img = input['img'] + input_img = torch.unsqueeze(img, 0) + outputs = self.model(input_img) + return {'outputs': outputs} + + def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + label_mapping_path = osp.join(self.local_path, 'label_mapping.txt') + with open(label_mapping_path, 'r') as f: + label_mapping = f.readlines() + score = torch.max(inputs['outputs']) + inputs = { + 'scores': score.item(), + 'labels': label_mapping[inputs['outputs'].argmax()].split('\t')[1] + } + return inputs diff --git a/tests/pipelines/test_animal_recognation.py b/tests/pipelines/test_animal_recognation.py new file mode 100644 index 00000000..d0f42dc3 --- /dev/null +++ b/tests/pipelines/test_animal_recognation.py @@ -0,0 +1,20 @@ +import unittest + +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class MultiModalFeatureTest(unittest.TestCase): + + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_run(self): + animal_recog = pipeline( + Tasks.image_classification, + model='damo/cv_resnest101_animal_recognation') + result = animal_recog('data/test/images/image1.jpg') + print(result) + + +if __name__ == '__main__': + unittest.main() From a7c1cd0fc92ee0a3058cec8f4ccde1c0f641e982 Mon Sep 17 00:00:00 2001 From: "suluyan.sly" Date: Tue, 28 Jun 2022 14:34:16 +0800 Subject: [PATCH 3/9] [to #42322933]feat: add nlp-chinese-bert-fill-mask-pipeline to maas_lib Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9155437 --- .../models/nlp/masked_language_model.py | 48 ++++++++++++------- .../pipelines/nlp/fill_mask_pipeline.py | 36 ++++++++------ modelscope/preprocessors/nlp.py | 11 +++-- tests/pipelines/test_fill_mask.py | 36 +++++++++++++- 4 files changed, 94 insertions(+), 37 deletions(-) diff --git a/modelscope/models/nlp/masked_language_model.py b/modelscope/models/nlp/masked_language_model.py index fd5f97e6..a760822b 100644 --- a/modelscope/models/nlp/masked_language_model.py +++ b/modelscope/models/nlp/masked_language_model.py @@ -2,24 +2,28 @@ from typing import Any, Dict, Optional, Union import numpy as np -from modelscope.metainfo import Models -from modelscope.utils.constant import Tasks +from ...metainfo import Models +from ...utils.constant import Tasks from ..base import Model, Tensor from ..builder import MODELS -__all__ = ['StructBertForMaskedLM', 'VecoForMaskedLM'] +__all__ = ['BertForMaskedLM', 'StructBertForMaskedLM', 'VecoForMaskedLM'] -class AliceMindBaseForMaskedLM(Model): +class MaskedLanguageModelBase(Model): def __init__(self, model_dir: str, *args, **kwargs): - from sofa.utils.backend import AutoConfig, AutoModelForMaskedLM - self.model_dir = model_dir super().__init__(model_dir, *args, **kwargs) + self.model = self.build_model() - self.config = AutoConfig.from_pretrained(model_dir) - self.model = AutoModelForMaskedLM.from_pretrained( - model_dir, config=self.config) + def build_model(): + raise NotImplementedError() + + @property + def config(self): + if hasattr(self.model, 'config'): + return self.model.config + return None def forward(self, inputs: Dict[str, Tensor]) -> Dict[str, np.ndarray]: """return the result by the model @@ -38,14 +42,24 @@ class AliceMindBaseForMaskedLM(Model): @MODELS.register_module(Tasks.fill_mask, module_name=Models.structbert) -class StructBertForMaskedLM(AliceMindBaseForMaskedLM): - # The StructBert for MaskedLM uses the same underlying model structure - # as the base model class. - pass +class StructBertForMaskedLM(MaskedLanguageModelBase): + + def build_model(self): + from sofa import SbertForMaskedLM + return SbertForMaskedLM.from_pretrained(self.model_dir) @MODELS.register_module(Tasks.fill_mask, module_name=Models.veco) -class VecoForMaskedLM(AliceMindBaseForMaskedLM): - # The Veco for MaskedLM uses the same underlying model structure - # as the base model class. - pass +class VecoForMaskedLM(MaskedLanguageModelBase): + + def build_model(self): + from sofa import VecoForMaskedLM + return VecoForMaskedLM.from_pretrained(self.model_dir) + + +@MODELS.register_module(Tasks.fill_mask, module_name=Models.bert) +class BertForMaskedLM(MaskedLanguageModelBase): + + def build_model(self): + from transformers import BertForMaskedLM + return BertForMaskedLM.from_pretrained(self.model_dir) diff --git a/modelscope/pipelines/nlp/fill_mask_pipeline.py b/modelscope/pipelines/nlp/fill_mask_pipeline.py index 863d9a6d..1567ef9d 100644 --- a/modelscope/pipelines/nlp/fill_mask_pipeline.py +++ b/modelscope/pipelines/nlp/fill_mask_pipeline.py @@ -1,32 +1,34 @@ +import os from typing import Dict, Optional, Union -from modelscope.metainfo import Pipelines -from modelscope.models import Model -from modelscope.models.nlp.masked_language_model import \ - AliceMindBaseForMaskedLM -from modelscope.preprocessors import FillMaskPreprocessor -from modelscope.utils.constant import Tasks +from ...metainfo import Pipelines +from ...models import Model +from ...models.nlp.masked_language_model import MaskedLanguageModelBase +from ...preprocessors import FillMaskPreprocessor +from ...utils.config import Config +from ...utils.constant import ModelFile, Tasks from ..base import Pipeline, Tensor from ..builder import PIPELINES __all__ = ['FillMaskPipeline'] +_type_map = {'veco': 'roberta', 'sbert': 'bert'} @PIPELINES.register_module(Tasks.fill_mask, module_name=Pipelines.fill_mask) class FillMaskPipeline(Pipeline): def __init__(self, - model: Union[AliceMindBaseForMaskedLM, str], + model: Union[MaskedLanguageModelBase, str], preprocessor: Optional[FillMaskPreprocessor] = None, **kwargs): """use `model` and `preprocessor` to create a nlp fill mask pipeline for prediction Args: - model (AliceMindBaseForMaskedLM): a model instance + model (MaskedLanguageModelBase): a model instance preprocessor (FillMaskPreprocessor): a preprocessor instance """ fill_mask_model = model if isinstance( - model, AliceMindBaseForMaskedLM) else Model.from_pretrained(model) + model, MaskedLanguageModelBase) else Model.from_pretrained(model) if preprocessor is None: preprocessor = FillMaskPreprocessor( fill_mask_model.model_dir, @@ -34,11 +36,13 @@ class FillMaskPipeline(Pipeline): second_sequence=None) super().__init__(model=model, preprocessor=preprocessor, **kwargs) self.preprocessor = preprocessor + self.config = Config.from_file( + os.path.join(fill_mask_model.model_dir, ModelFile.CONFIGURATION)) self.tokenizer = preprocessor.tokenizer - self.mask_id = {'veco': 250001, 'sbert': 103} + self.mask_id = {'roberta': 250001, 'bert': 103} self.rep_map = { - 'sbert': { + 'bert': { '[unused0]': '', '[PAD]': '', '[unused1]': '', @@ -48,7 +52,7 @@ class FillMaskPipeline(Pipeline): '[CLS]': '', '[UNK]': '' }, - 'veco': { + 'roberta': { r' +': ' ', '': '', '': '', @@ -72,7 +76,9 @@ class FillMaskPipeline(Pipeline): input_ids = inputs['input_ids'].detach().numpy() pred_ids = np.argmax(logits, axis=-1) model_type = self.model.config.model_type - rst_ids = np.where(input_ids == self.mask_id[model_type], pred_ids, + process_type = model_type if model_type in self.mask_id else _type_map[ + model_type] + rst_ids = np.where(input_ids == self.mask_id[process_type], pred_ids, input_ids) def rep_tokens(string, rep_map): @@ -82,12 +88,12 @@ class FillMaskPipeline(Pipeline): pred_strings = [] for ids in rst_ids: # batch - if self.model.config.vocab_size == 21128: # zh bert + if 'language' in self.config.model and self.config.model.language == 'zh': pred_string = self.tokenizer.convert_ids_to_tokens(ids) pred_string = ''.join(pred_string) else: pred_string = self.tokenizer.decode(ids) - pred_string = rep_tokens(pred_string, self.rep_map[model_type]) + pred_string = rep_tokens(pred_string, self.rep_map[process_type]) pred_strings.append(pred_string) return {'text': pred_strings} diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py index 3f98a081..4ed63f3c 100644 --- a/modelscope/preprocessors/nlp.py +++ b/modelscope/preprocessors/nlp.py @@ -192,14 +192,17 @@ class FillMaskPreprocessor(Preprocessor): model_dir (str): model path """ super().__init__(*args, **kwargs) - from sofa.utils.backend import AutoTokenizer self.model_dir = model_dir self.first_sequence: str = kwargs.pop('first_sequence', 'first_sequence') self.sequence_length = kwargs.pop('sequence_length', 128) - - self.tokenizer = AutoTokenizer.from_pretrained( - model_dir, use_fast=False) + try: + from transformers import AutoTokenizer + self.tokenizer = AutoTokenizer.from_pretrained(model_dir) + except KeyError: + from sofa.utils.backend import AutoTokenizer + self.tokenizer = AutoTokenizer.from_pretrained( + model_dir, use_fast=False) @type_assert(object, str) def __call__(self, data: str) -> Dict[str, Any]: diff --git a/tests/pipelines/test_fill_mask.py b/tests/pipelines/test_fill_mask.py index 49c5dc8a..d44ba4c8 100644 --- a/tests/pipelines/test_fill_mask.py +++ b/tests/pipelines/test_fill_mask.py @@ -3,7 +3,8 @@ import unittest from modelscope.hub.snapshot_download import snapshot_download from modelscope.models import Model -from modelscope.models.nlp import StructBertForMaskedLM, VecoForMaskedLM +from modelscope.models.nlp import (BertForMaskedLM, StructBertForMaskedLM, + VecoForMaskedLM) from modelscope.pipelines import FillMaskPipeline, pipeline from modelscope.preprocessors import FillMaskPreprocessor from modelscope.utils.constant import Tasks @@ -16,6 +17,7 @@ class FillMaskTest(unittest.TestCase): 'en': 'damo/nlp_structbert_fill-mask_english-large' } model_id_veco = 'damo/nlp_veco_fill-mask-large' + model_id_bert = 'damo/nlp_bert_fill-mask_chinese-base' ori_texts = { 'zh': @@ -69,6 +71,20 @@ class FillMaskTest(unittest.TestCase): f'{pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}\n' ) + # zh bert + language = 'zh' + model_dir = snapshot_download(self.model_id_bert) + preprocessor = FillMaskPreprocessor( + model_dir, first_sequence='sentence', second_sequence=None) + model = BertForMaskedLM(model_dir) + pipeline1 = FillMaskPipeline(model, preprocessor) + pipeline2 = pipeline( + Tasks.fill_mask, model=model, preprocessor=preprocessor) + ori_text = self.ori_texts[language] + test_input = self.test_inputs[language] + print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline1: ' + f'{pipeline1(test_input)}\npipeline2: {pipeline2(test_input)}\n') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_from_modelhub(self): # sbert @@ -97,6 +113,18 @@ class FillMaskTest(unittest.TestCase): print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: ' f'{pipeline_ins(test_input)}\n') + # zh bert + model = Model.from_pretrained(self.model_id_bert) + preprocessor = FillMaskPreprocessor( + model.model_dir, first_sequence='sentence', second_sequence=None) + pipeline_ins = pipeline( + Tasks.fill_mask, model=model, preprocessor=preprocessor) + language = 'zh' + ori_text = self.ori_texts[language] + test_input = self.test_inputs[language] + print(f'\nori_text: {ori_text}\ninput: {test_input}\npipeline: ' + f'{pipeline_ins(test_input)}\n') + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') def test_run_with_model_name(self): # veco @@ -115,6 +143,12 @@ class FillMaskTest(unittest.TestCase): f'\nori_text: {self.ori_texts[language]}\ninput: {self.test_inputs[language]}\npipeline: ' f'{pipeline_ins(self.test_inputs[language])}\n') + # bert + pipeline_ins = pipeline(task=Tasks.fill_mask, model=self.model_id_bert) + print( + f'\nori_text: {self.ori_texts[language]}\ninput: {self.test_inputs[language]}\npipeline: ' + f'{pipeline_ins(self.test_inputs[language])}\n') + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_default_model(self): pipeline_ins = pipeline(task=Tasks.fill_mask) From 04b7eba285dae7026a08b0136ccea7ba31319f6b Mon Sep 17 00:00:00 2001 From: "bin.xue" Date: Tue, 28 Jun 2022 14:41:08 +0800 Subject: [PATCH 4/9] [to #42322933] Merge ANS pipeline into master Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9178339 * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * refactor: move aec models to audio/aec * feat: add unittest for ANS pipeline * Merge branch 'master' into dev/ans * add new SoundFile to audio dependency * Merge branch 'master' into dev/ans * use ANS pipeline name from metainfo * Merge branch 'master' into dev/ans * chore: update docstring of ANS module * Merge branch 'master' into dev/ans * refactor: use names from metainfo * refactor: enable ans unittest * refactor: add more log message in unittest --- modelscope/metainfo.py | 2 + modelscope/models/__init__.py | 1 + .../models/audio/{layers => aec}/__init__.py | 0 .../audio/{network => aec/layers}/__init__.py | 0 .../audio/{ => aec}/layers/activations.py | 0 .../{ => aec}/layers/affine_transform.py | 0 .../audio/{ => aec}/layers/deep_fsmn.py | 0 .../audio/{ => aec}/layers/layer_base.py | 0 .../audio/{ => aec}/layers/uni_deep_fsmn.py | 0 .../models/audio/aec/network/__init__.py | 0 .../models/audio/{ => aec}/network/loss.py | 0 .../{ => aec}/network/modulation_loss.py | 0 .../models/audio/{ => aec}/network/se_net.py | 0 modelscope/models/audio/ans/__init__.py | 0 modelscope/models/audio/ans/complex_nn.py | 248 ++++++++++++++ modelscope/models/audio/ans/conv_stft.py | 112 +++++++ modelscope/models/audio/ans/frcrn.py | 309 ++++++++++++++++++ .../models/audio/ans/se_module_complex.py | 26 ++ modelscope/models/audio/ans/unet.py | 269 +++++++++++++++ modelscope/pipelines/__init__.py | 1 + modelscope/pipelines/audio/ans_pipeline.py | 117 +++++++ requirements/audio.txt | 1 + tests/pipelines/test_speech_signal_process.py | 32 +- 23 files changed, 1112 insertions(+), 6 deletions(-) rename modelscope/models/audio/{layers => aec}/__init__.py (100%) rename modelscope/models/audio/{network => aec/layers}/__init__.py (100%) rename modelscope/models/audio/{ => aec}/layers/activations.py (100%) rename modelscope/models/audio/{ => aec}/layers/affine_transform.py (100%) rename modelscope/models/audio/{ => aec}/layers/deep_fsmn.py (100%) rename modelscope/models/audio/{ => aec}/layers/layer_base.py (100%) rename modelscope/models/audio/{ => aec}/layers/uni_deep_fsmn.py (100%) create mode 100644 modelscope/models/audio/aec/network/__init__.py rename modelscope/models/audio/{ => aec}/network/loss.py (100%) rename modelscope/models/audio/{ => aec}/network/modulation_loss.py (100%) rename modelscope/models/audio/{ => aec}/network/se_net.py (100%) create mode 100644 modelscope/models/audio/ans/__init__.py create mode 100644 modelscope/models/audio/ans/complex_nn.py create mode 100644 modelscope/models/audio/ans/conv_stft.py create mode 100644 modelscope/models/audio/ans/frcrn.py create mode 100644 modelscope/models/audio/ans/se_module_complex.py create mode 100644 modelscope/models/audio/ans/unet.py create mode 100644 modelscope/pipelines/audio/ans_pipeline.py diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index 9fad45e2..eda590ac 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -21,6 +21,7 @@ class Models(object): sambert_hifi_16k = 'sambert-hifi-16k' generic_tts_frontend = 'generic-tts-frontend' hifigan16k = 'hifigan16k' + speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k' kws_kwsbp = 'kws-kwsbp' # multi-modal models @@ -55,6 +56,7 @@ class Pipelines(object): # audio tasks sambert_hifigan_16k_tts = 'sambert-hifigan-16k-tts' speech_dfsmn_aec_psm_16k = 'speech-dfsmn-aec-psm-16k' + speech_frcrn_ans_cirm_16k = 'speech_frcrn_ans_cirm_16k' kws_kwsbp = 'kws-kwsbp' # multi-modal tasks diff --git a/modelscope/models/__init__.py b/modelscope/models/__init__.py index ebf81c32..816c44e2 100644 --- a/modelscope/models/__init__.py +++ b/modelscope/models/__init__.py @@ -1,5 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +from .audio.ans.frcrn import FRCRNModel from .audio.kws import GenericKeyWordSpotting from .audio.tts.am import SambertNetHifi16k from .audio.tts.vocoder import Hifigan16k diff --git a/modelscope/models/audio/layers/__init__.py b/modelscope/models/audio/aec/__init__.py similarity index 100% rename from modelscope/models/audio/layers/__init__.py rename to modelscope/models/audio/aec/__init__.py diff --git a/modelscope/models/audio/network/__init__.py b/modelscope/models/audio/aec/layers/__init__.py similarity index 100% rename from modelscope/models/audio/network/__init__.py rename to modelscope/models/audio/aec/layers/__init__.py diff --git a/modelscope/models/audio/layers/activations.py b/modelscope/models/audio/aec/layers/activations.py similarity index 100% rename from modelscope/models/audio/layers/activations.py rename to modelscope/models/audio/aec/layers/activations.py diff --git a/modelscope/models/audio/layers/affine_transform.py b/modelscope/models/audio/aec/layers/affine_transform.py similarity index 100% rename from modelscope/models/audio/layers/affine_transform.py rename to modelscope/models/audio/aec/layers/affine_transform.py diff --git a/modelscope/models/audio/layers/deep_fsmn.py b/modelscope/models/audio/aec/layers/deep_fsmn.py similarity index 100% rename from modelscope/models/audio/layers/deep_fsmn.py rename to modelscope/models/audio/aec/layers/deep_fsmn.py diff --git a/modelscope/models/audio/layers/layer_base.py b/modelscope/models/audio/aec/layers/layer_base.py similarity index 100% rename from modelscope/models/audio/layers/layer_base.py rename to modelscope/models/audio/aec/layers/layer_base.py diff --git a/modelscope/models/audio/layers/uni_deep_fsmn.py b/modelscope/models/audio/aec/layers/uni_deep_fsmn.py similarity index 100% rename from modelscope/models/audio/layers/uni_deep_fsmn.py rename to modelscope/models/audio/aec/layers/uni_deep_fsmn.py diff --git a/modelscope/models/audio/aec/network/__init__.py b/modelscope/models/audio/aec/network/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modelscope/models/audio/network/loss.py b/modelscope/models/audio/aec/network/loss.py similarity index 100% rename from modelscope/models/audio/network/loss.py rename to modelscope/models/audio/aec/network/loss.py diff --git a/modelscope/models/audio/network/modulation_loss.py b/modelscope/models/audio/aec/network/modulation_loss.py similarity index 100% rename from modelscope/models/audio/network/modulation_loss.py rename to modelscope/models/audio/aec/network/modulation_loss.py diff --git a/modelscope/models/audio/network/se_net.py b/modelscope/models/audio/aec/network/se_net.py similarity index 100% rename from modelscope/models/audio/network/se_net.py rename to modelscope/models/audio/aec/network/se_net.py diff --git a/modelscope/models/audio/ans/__init__.py b/modelscope/models/audio/ans/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modelscope/models/audio/ans/complex_nn.py b/modelscope/models/audio/ans/complex_nn.py new file mode 100644 index 00000000..69dec41e --- /dev/null +++ b/modelscope/models/audio/ans/complex_nn.py @@ -0,0 +1,248 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class UniDeepFsmn(nn.Module): + + def __init__(self, input_dim, output_dim, lorder=None, hidden_size=None): + super(UniDeepFsmn, self).__init__() + + self.input_dim = input_dim + self.output_dim = output_dim + + if lorder is None: + return + + self.lorder = lorder + self.hidden_size = hidden_size + + self.linear = nn.Linear(input_dim, hidden_size) + + self.project = nn.Linear(hidden_size, output_dim, bias=False) + + self.conv1 = nn.Conv2d( + output_dim, + output_dim, [lorder, 1], [1, 1], + groups=output_dim, + bias=False) + + def forward(self, input): + r""" + + Args: + input: torch with shape: batch (b) x sequence(T) x feature (h) + + Returns: + batch (b) x channel (c) x sequence(T) x feature (h) + """ + f1 = F.relu(self.linear(input)) + + p1 = self.project(f1) + + x = torch.unsqueeze(p1, 1) + # x: batch (b) x channel (c) x sequence(T) x feature (h) + x_per = x.permute(0, 3, 2, 1) + # x_per: batch (b) x feature (h) x sequence(T) x channel (c) + y = F.pad(x_per, [0, 0, self.lorder - 1, 0]) + + out = x_per + self.conv1(y) + + out1 = out.permute(0, 3, 2, 1) + # out1: batch (b) x channel (c) x sequence(T) x feature (h) + return input + out1.squeeze() + + +class ComplexUniDeepFsmn(nn.Module): + + def __init__(self, nIn, nHidden=128, nOut=128): + super(ComplexUniDeepFsmn, self).__init__() + + self.fsmn_re_L1 = UniDeepFsmn(nIn, nHidden, 20, nHidden) + self.fsmn_im_L1 = UniDeepFsmn(nIn, nHidden, 20, nHidden) + self.fsmn_re_L2 = UniDeepFsmn(nHidden, nOut, 20, nHidden) + self.fsmn_im_L2 = UniDeepFsmn(nHidden, nOut, 20, nHidden) + + def forward(self, x): + r""" + + Args: + x: torch with shape [batch, channel, feature, sequence, 2], eg: [6, 256, 1, 106, 2] + + Returns: + [batch, feature, sequence, 2], eg: [6, 99, 1024, 2] + """ + # + b, c, h, T, d = x.size() + x = torch.reshape(x, (b, c * h, T, d)) + # x: [b,h,T,2], [6, 256, 106, 2] + x = torch.transpose(x, 1, 2) + # x: [b,T,h,2], [6, 106, 256, 2] + + real_L1 = self.fsmn_re_L1(x[..., 0]) - self.fsmn_im_L1(x[..., 1]) + imaginary_L1 = self.fsmn_re_L1(x[..., 1]) + self.fsmn_im_L1(x[..., 0]) + # GRU output: [99, 6, 128] + real = self.fsmn_re_L2(real_L1) - self.fsmn_im_L2(imaginary_L1) + imaginary = self.fsmn_re_L2(imaginary_L1) + self.fsmn_im_L2(real_L1) + # output: [b,T,h,2], [99, 6, 1024, 2] + output = torch.stack((real, imaginary), dim=-1) + + # output: [b,h,T,2], [6, 99, 1024, 2] + output = torch.transpose(output, 1, 2) + output = torch.reshape(output, (b, c, h, T, d)) + + return output + + +class ComplexUniDeepFsmn_L1(nn.Module): + + def __init__(self, nIn, nHidden=128, nOut=128): + super(ComplexUniDeepFsmn_L1, self).__init__() + self.fsmn_re_L1 = UniDeepFsmn(nIn, nHidden, 20, nHidden) + self.fsmn_im_L1 = UniDeepFsmn(nIn, nHidden, 20, nHidden) + + def forward(self, x): + r""" + + Args: + x: torch with shape [batch, channel, feature, sequence, 2], eg: [6, 256, 1, 106, 2] + """ + b, c, h, T, d = x.size() + # x : [b,T,h,c,2] + x = torch.transpose(x, 1, 3) + x = torch.reshape(x, (b * T, h, c, d)) + + real = self.fsmn_re_L1(x[..., 0]) - self.fsmn_im_L1(x[..., 1]) + imaginary = self.fsmn_re_L1(x[..., 1]) + self.fsmn_im_L1(x[..., 0]) + # output: [b*T,h,c,2], [6*106, h, 256, 2] + output = torch.stack((real, imaginary), dim=-1) + + output = torch.reshape(output, (b, T, h, c, d)) + output = torch.transpose(output, 1, 3) + return output + + +class ComplexConv2d(nn.Module): + # https://github.com/litcoderr/ComplexCNN/blob/master/complexcnn/modules.py + def __init__(self, + in_channel, + out_channel, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + **kwargs): + super().__init__() + + # Model components + self.conv_re = nn.Conv2d( + in_channel, + out_channel, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias, + **kwargs) + self.conv_im = nn.Conv2d( + in_channel, + out_channel, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias, + **kwargs) + + def forward(self, x): + r""" + + Args: + x: torch with shape: [batch,channel,axis1,axis2,2] + """ + real = self.conv_re(x[..., 0]) - self.conv_im(x[..., 1]) + imaginary = self.conv_re(x[..., 1]) + self.conv_im(x[..., 0]) + output = torch.stack((real, imaginary), dim=-1) + return output + + +class ComplexConvTranspose2d(nn.Module): + + def __init__(self, + in_channel, + out_channel, + kernel_size, + stride=1, + padding=0, + output_padding=0, + dilation=1, + groups=1, + bias=True, + **kwargs): + super().__init__() + + # Model components + self.tconv_re = nn.ConvTranspose2d( + in_channel, + out_channel, + kernel_size=kernel_size, + stride=stride, + padding=padding, + output_padding=output_padding, + groups=groups, + bias=bias, + dilation=dilation, + **kwargs) + self.tconv_im = nn.ConvTranspose2d( + in_channel, + out_channel, + kernel_size=kernel_size, + stride=stride, + padding=padding, + output_padding=output_padding, + groups=groups, + bias=bias, + dilation=dilation, + **kwargs) + + def forward(self, x): # shpae of x : [batch,channel,axis1,axis2,2] + real = self.tconv_re(x[..., 0]) - self.tconv_im(x[..., 1]) + imaginary = self.tconv_re(x[..., 1]) + self.tconv_im(x[..., 0]) + output = torch.stack((real, imaginary), dim=-1) + return output + + +class ComplexBatchNorm2d(nn.Module): + + def __init__(self, + num_features, + eps=1e-5, + momentum=0.1, + affine=True, + track_running_stats=True, + **kwargs): + super().__init__() + self.bn_re = nn.BatchNorm2d( + num_features=num_features, + momentum=momentum, + affine=affine, + eps=eps, + track_running_stats=track_running_stats, + **kwargs) + self.bn_im = nn.BatchNorm2d( + num_features=num_features, + momentum=momentum, + affine=affine, + eps=eps, + track_running_stats=track_running_stats, + **kwargs) + + def forward(self, x): + real = self.bn_re(x[..., 0]) + imag = self.bn_im(x[..., 1]) + output = torch.stack((real, imag), dim=-1) + return output diff --git a/modelscope/models/audio/ans/conv_stft.py b/modelscope/models/audio/ans/conv_stft.py new file mode 100644 index 00000000..a47d7817 --- /dev/null +++ b/modelscope/models/audio/ans/conv_stft.py @@ -0,0 +1,112 @@ +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from scipy.signal import get_window + + +def init_kernels(win_len, win_inc, fft_len, win_type=None, invers=False): + if win_type == 'None' or win_type is None: + window = np.ones(win_len) + else: + window = get_window(win_type, win_len, fftbins=True)**0.5 + + N = fft_len + fourier_basis = np.fft.rfft(np.eye(N))[:win_len] + real_kernel = np.real(fourier_basis) + imag_kernel = np.imag(fourier_basis) + kernel = np.concatenate([real_kernel, imag_kernel], 1).T + + if invers: + kernel = np.linalg.pinv(kernel).T + + kernel = kernel * window + kernel = kernel[:, None, :] + return torch.from_numpy(kernel.astype(np.float32)), torch.from_numpy( + window[None, :, None].astype(np.float32)) + + +class ConvSTFT(nn.Module): + + def __init__(self, + win_len, + win_inc, + fft_len=None, + win_type='hamming', + feature_type='real', + fix=True): + super(ConvSTFT, self).__init__() + + if fft_len is None: + self.fft_len = np.int(2**np.ceil(np.log2(win_len))) + else: + self.fft_len = fft_len + + kernel, _ = init_kernels(win_len, win_inc, self.fft_len, win_type) + self.weight = nn.Parameter(kernel, requires_grad=(not fix)) + self.feature_type = feature_type + self.stride = win_inc + self.win_len = win_len + self.dim = self.fft_len + + def forward(self, inputs): + if inputs.dim() == 2: + inputs = torch.unsqueeze(inputs, 1) + + outputs = F.conv1d(inputs, self.weight, stride=self.stride) + + if self.feature_type == 'complex': + return outputs + else: + dim = self.dim // 2 + 1 + real = outputs[:, :dim, :] + imag = outputs[:, dim:, :] + mags = torch.sqrt(real**2 + imag**2) + phase = torch.atan2(imag, real) + return mags, phase + + +class ConviSTFT(nn.Module): + + def __init__(self, + win_len, + win_inc, + fft_len=None, + win_type='hamming', + feature_type='real', + fix=True): + super(ConviSTFT, self).__init__() + if fft_len is None: + self.fft_len = np.int(2**np.ceil(np.log2(win_len))) + else: + self.fft_len = fft_len + kernel, window = init_kernels( + win_len, win_inc, self.fft_len, win_type, invers=True) + self.weight = nn.Parameter(kernel, requires_grad=(not fix)) + self.feature_type = feature_type + self.win_type = win_type + self.win_len = win_len + self.win_inc = win_inc + self.stride = win_inc + self.dim = self.fft_len + self.register_buffer('window', window) + self.register_buffer('enframe', torch.eye(win_len)[:, None, :]) + + def forward(self, inputs, phase=None): + """ + Args: + inputs : [B, N+2, T] (complex spec) or [B, N//2+1, T] (mags) + phase: [B, N//2+1, T] (if not none) + """ + + if phase is not None: + real = inputs * torch.cos(phase) + imag = inputs * torch.sin(phase) + inputs = torch.cat([real, imag], 1) + outputs = F.conv_transpose1d(inputs, self.weight, stride=self.stride) + + # this is from torch-stft: https://github.com/pseeth/torch-stft + t = self.window.repeat(1, 1, inputs.size(-1))**2 + coff = F.conv_transpose1d(t, self.enframe, stride=self.stride) + outputs = outputs / (coff + 1e-8) + return outputs diff --git a/modelscope/models/audio/ans/frcrn.py b/modelscope/models/audio/ans/frcrn.py new file mode 100644 index 00000000..c56b8773 --- /dev/null +++ b/modelscope/models/audio/ans/frcrn.py @@ -0,0 +1,309 @@ +import os +from typing import Dict + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from modelscope.metainfo import Models +from modelscope.models.builder import MODELS +from modelscope.utils.constant import ModelFile, Tasks +from ...base import Model, Tensor +from .conv_stft import ConviSTFT, ConvSTFT +from .unet import UNet + + +class FTB(nn.Module): + + def __init__(self, input_dim=257, in_channel=9, r_channel=5): + + super(FTB, self).__init__() + self.in_channel = in_channel + self.conv1 = nn.Sequential( + nn.Conv2d(in_channel, r_channel, kernel_size=[1, 1]), + nn.BatchNorm2d(r_channel), nn.ReLU()) + + self.conv1d = nn.Sequential( + nn.Conv1d( + r_channel * input_dim, in_channel, kernel_size=9, padding=4), + nn.BatchNorm1d(in_channel), nn.ReLU()) + self.freq_fc = nn.Linear(input_dim, input_dim, bias=False) + + self.conv2 = nn.Sequential( + nn.Conv2d(in_channel * 2, in_channel, kernel_size=[1, 1]), + nn.BatchNorm2d(in_channel), nn.ReLU()) + + def forward(self, inputs): + ''' + inputs should be [Batch, Ca, Dim, Time] + ''' + # T-F attention + conv1_out = self.conv1(inputs) + B, C, D, T = conv1_out.size() + reshape1_out = torch.reshape(conv1_out, [B, C * D, T]) + conv1d_out = self.conv1d(reshape1_out) + conv1d_out = torch.reshape(conv1d_out, [B, self.in_channel, 1, T]) + + # now is also [B,C,D,T] + att_out = conv1d_out * inputs + + # tranpose to [B,C,T,D] + att_out = torch.transpose(att_out, 2, 3) + freqfc_out = self.freq_fc(att_out) + att_out = torch.transpose(freqfc_out, 2, 3) + + cat_out = torch.cat([att_out, inputs], 1) + outputs = self.conv2(cat_out) + return outputs + + +@MODELS.register_module( + Tasks.speech_signal_process, module_name=Models.speech_frcrn_ans_cirm_16k) +class FRCRNModel(Model): + r""" A decorator of FRCRN for integrating into modelscope framework """ + + def __init__(self, model_dir: str, *args, **kwargs): + """initialize the frcrn model from the `model_dir` path. + + Args: + model_dir (str): the model path. + """ + super().__init__(model_dir, *args, **kwargs) + self._model = FRCRN(*args, **kwargs) + model_bin_file = os.path.join(model_dir, + ModelFile.TORCH_MODEL_BIN_FILE) + if os.path.exists(model_bin_file): + checkpoint = torch.load(model_bin_file) + self._model.load_state_dict(checkpoint, strict=False) + + def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: + output = self._model.forward(input) + return { + 'spec_l1': output[0], + 'wav_l1': output[1], + 'mask_l1': output[2], + 'spec_l2': output[3], + 'wav_l2': output[4], + 'mask_l2': output[5] + } + + def to(self, *args, **kwargs): + self._model = self._model.to(*args, **kwargs) + return self + + def eval(self): + self._model = self._model.train(False) + return self + + +class FRCRN(nn.Module): + r""" Frequency Recurrent CRN """ + + def __init__(self, + complex, + model_complexity, + model_depth, + log_amp, + padding_mode, + win_len=400, + win_inc=100, + fft_len=512, + win_type='hanning'): + r""" + Args: + complex: Whether to use complex networks. + model_complexity: define the model complexity with the number of layers + model_depth: Only two options are available : 10, 20 + log_amp: Whether to use log amplitude to estimate signals + padding_mode: Encoder's convolution filter. 'zeros', 'reflect' + win_len: length of window used for defining one frame of sample points + win_inc: length of window shifting (equivalent to hop_size) + fft_len: number of Short Time Fourier Transform (STFT) points + win_type: windowing type used in STFT, eg. 'hanning', 'hamming' + """ + super().__init__() + self.feat_dim = fft_len // 2 + 1 + + self.win_len = win_len + self.win_inc = win_inc + self.fft_len = fft_len + self.win_type = win_type + + fix = True + self.stft = ConvSTFT( + self.win_len, + self.win_inc, + self.fft_len, + self.win_type, + feature_type='complex', + fix=fix) + self.istft = ConviSTFT( + self.win_len, + self.win_inc, + self.fft_len, + self.win_type, + feature_type='complex', + fix=fix) + self.unet = UNet( + 1, + complex=complex, + model_complexity=model_complexity, + model_depth=model_depth, + padding_mode=padding_mode) + self.unet2 = UNet( + 1, + complex=complex, + model_complexity=model_complexity, + model_depth=model_depth, + padding_mode=padding_mode) + + def forward(self, inputs): + out_list = [] + # [B, D*2, T] + cmp_spec = self.stft(inputs) + # [B, 1, D*2, T] + cmp_spec = torch.unsqueeze(cmp_spec, 1) + + # to [B, 2, D, T] real_part/imag_part + cmp_spec = torch.cat([ + cmp_spec[:, :, :self.feat_dim, :], + cmp_spec[:, :, self.feat_dim:, :], + ], 1) + + # [B, 2, D, T] + cmp_spec = torch.unsqueeze(cmp_spec, 4) + # [B, 1, D, T, 2] + cmp_spec = torch.transpose(cmp_spec, 1, 4) + unet1_out = self.unet(cmp_spec) + cmp_mask1 = torch.tanh(unet1_out) + unet2_out = self.unet2(unet1_out) + cmp_mask2 = torch.tanh(unet2_out) + est_spec, est_wav, est_mask = self.apply_mask(cmp_spec, cmp_mask1) + out_list.append(est_spec) + out_list.append(est_wav) + out_list.append(est_mask) + cmp_mask2 = cmp_mask2 + cmp_mask1 + est_spec, est_wav, est_mask = self.apply_mask(cmp_spec, cmp_mask2) + out_list.append(est_spec) + out_list.append(est_wav) + out_list.append(est_mask) + return out_list + + def apply_mask(self, cmp_spec, cmp_mask): + est_spec = torch.cat([ + cmp_spec[:, :, :, :, 0] * cmp_mask[:, :, :, :, 0] + - cmp_spec[:, :, :, :, 1] * cmp_mask[:, :, :, :, 1], + cmp_spec[:, :, :, :, 0] * cmp_mask[:, :, :, :, 1] + + cmp_spec[:, :, :, :, 1] * cmp_mask[:, :, :, :, 0] + ], 1) + est_spec = torch.cat([est_spec[:, 0, :, :], est_spec[:, 1, :, :]], 1) + cmp_mask = torch.squeeze(cmp_mask, 1) + cmp_mask = torch.cat([cmp_mask[:, :, :, 0], cmp_mask[:, :, :, 1]], 1) + + est_wav = self.istft(est_spec) + est_wav = torch.squeeze(est_wav, 1) + return est_spec, est_wav, cmp_mask + + def get_params(self, weight_decay=0.0): + # add L2 penalty + weights, biases = [], [] + for name, param in self.named_parameters(): + if 'bias' in name: + biases += [param] + else: + weights += [param] + params = [{ + 'params': weights, + 'weight_decay': weight_decay, + }, { + 'params': biases, + 'weight_decay': 0.0, + }] + return params + + def loss(self, noisy, labels, out_list, mode='Mix'): + if mode == 'SiSNR': + count = 0 + while count < len(out_list): + est_spec = out_list[count] + count = count + 1 + est_wav = out_list[count] + count = count + 1 + est_mask = out_list[count] + count = count + 1 + if count != 3: + loss = self.loss_1layer(noisy, est_spec, est_wav, labels, + est_mask, mode) + return loss + + elif mode == 'Mix': + count = 0 + while count < len(out_list): + est_spec = out_list[count] + count = count + 1 + est_wav = out_list[count] + count = count + 1 + est_mask = out_list[count] + count = count + 1 + if count != 3: + amp_loss, phase_loss, SiSNR_loss = self.loss_1layer( + noisy, est_spec, est_wav, labels, est_mask, mode) + loss = amp_loss + phase_loss + SiSNR_loss + return loss, amp_loss, phase_loss + + def loss_1layer(self, noisy, est, est_wav, labels, cmp_mask, mode='Mix'): + r""" Compute the loss by mode + mode == 'Mix' + est: [B, F*2, T] + labels: [B, F*2,T] + mode == 'SiSNR' + est: [B, T] + labels: [B, T] + """ + if mode == 'SiSNR': + if labels.dim() == 3: + labels = torch.squeeze(labels, 1) + if est_wav.dim() == 3: + est_wav = torch.squeeze(est_wav, 1) + return -si_snr(est_wav, labels) + elif mode == 'Mix': + + if labels.dim() == 3: + labels = torch.squeeze(labels, 1) + if est_wav.dim() == 3: + est_wav = torch.squeeze(est_wav, 1) + SiSNR_loss = -si_snr(est_wav, labels) + + b, d, t = est.size() + S = self.stft(labels) + Sr = S[:, :self.feat_dim, :] + Si = S[:, self.feat_dim:, :] + Y = self.stft(noisy) + Yr = Y[:, :self.feat_dim, :] + Yi = Y[:, self.feat_dim:, :] + Y_pow = Yr**2 + Yi**2 + gth_mask = torch.cat([(Sr * Yr + Si * Yi) / (Y_pow + 1e-8), + (Si * Yr - Sr * Yi) / (Y_pow + 1e-8)], 1) + gth_mask[gth_mask > 2] = 1 + gth_mask[gth_mask < -2] = -1 + amp_loss = F.mse_loss(gth_mask[:, :self.feat_dim, :], + cmp_mask[:, :self.feat_dim, :]) * d + phase_loss = F.mse_loss(gth_mask[:, self.feat_dim:, :], + cmp_mask[:, self.feat_dim:, :]) * d + return amp_loss, phase_loss, SiSNR_loss + + +def l2_norm(s1, s2): + norm = torch.sum(s1 * s2, -1, keepdim=True) + return norm + + +def si_snr(s1, s2, eps=1e-8): + s1_s2_norm = l2_norm(s1, s2) + s2_s2_norm = l2_norm(s2, s2) + s_target = s1_s2_norm / (s2_s2_norm + eps) * s2 + e_nosie = s1 - s_target + target_norm = l2_norm(s_target, s_target) + noise_norm = l2_norm(e_nosie, e_nosie) + snr = 10 * torch.log10((target_norm) / (noise_norm + eps) + eps) + return torch.mean(snr) diff --git a/modelscope/models/audio/ans/se_module_complex.py b/modelscope/models/audio/ans/se_module_complex.py new file mode 100644 index 00000000..f62fe523 --- /dev/null +++ b/modelscope/models/audio/ans/se_module_complex.py @@ -0,0 +1,26 @@ +import torch +from torch import nn + + +class SELayer(nn.Module): + + def __init__(self, channel, reduction=16): + super(SELayer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc_r = nn.Sequential( + nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel), nn.Sigmoid()) + self.fc_i = nn.Sequential( + nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel), nn.Sigmoid()) + + def forward(self, x): + b, c, _, _, _ = x.size() + x_r = self.avg_pool(x[:, :, :, :, 0]).view(b, c) + x_i = self.avg_pool(x[:, :, :, :, 1]).view(b, c) + y_r = self.fc_r(x_r).view(b, c, 1, 1, 1) - self.fc_i(x_i).view( + b, c, 1, 1, 1) + y_i = self.fc_r(x_i).view(b, c, 1, 1, 1) + self.fc_i(x_r).view( + b, c, 1, 1, 1) + y = torch.cat([y_r, y_i], 4) + return x * y diff --git a/modelscope/models/audio/ans/unet.py b/modelscope/models/audio/ans/unet.py new file mode 100644 index 00000000..aa5a4254 --- /dev/null +++ b/modelscope/models/audio/ans/unet.py @@ -0,0 +1,269 @@ +import torch +import torch.nn as nn + +from . import complex_nn +from .se_module_complex import SELayer + + +class Encoder(nn.Module): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride, + padding=None, + complex=False, + padding_mode='zeros'): + super().__init__() + if padding is None: + padding = [(i - 1) // 2 for i in kernel_size] # 'SAME' padding + + if complex: + conv = complex_nn.ComplexConv2d + bn = complex_nn.ComplexBatchNorm2d + else: + conv = nn.Conv2d + bn = nn.BatchNorm2d + + self.conv = conv( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + padding_mode=padding_mode) + self.bn = bn(out_channels) + self.relu = nn.LeakyReLU(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + +class Decoder(nn.Module): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride, + padding=(0, 0), + complex=False): + super().__init__() + if complex: + tconv = complex_nn.ComplexConvTranspose2d + bn = complex_nn.ComplexBatchNorm2d + else: + tconv = nn.ConvTranspose2d + bn = nn.BatchNorm2d + + self.transconv = tconv( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding) + self.bn = bn(out_channels) + self.relu = nn.LeakyReLU(inplace=True) + + def forward(self, x): + x = self.transconv(x) + x = self.bn(x) + x = self.relu(x) + return x + + +class UNet(nn.Module): + + def __init__(self, + input_channels=1, + complex=False, + model_complexity=45, + model_depth=20, + padding_mode='zeros'): + super().__init__() + + if complex: + model_complexity = int(model_complexity // 1.414) + + self.set_size( + model_complexity=model_complexity, + input_channels=input_channels, + model_depth=model_depth) + self.encoders = [] + self.model_length = model_depth // 2 + self.fsmn = complex_nn.ComplexUniDeepFsmn(128, 128, 128) + self.se_layers_enc = [] + self.fsmn_enc = [] + for i in range(self.model_length): + fsmn_enc = complex_nn.ComplexUniDeepFsmn_L1(128, 128, 128) + self.add_module('fsmn_enc{}'.format(i), fsmn_enc) + self.fsmn_enc.append(fsmn_enc) + module = Encoder( + self.enc_channels[i], + self.enc_channels[i + 1], + kernel_size=self.enc_kernel_sizes[i], + stride=self.enc_strides[i], + padding=self.enc_paddings[i], + complex=complex, + padding_mode=padding_mode) + self.add_module('encoder{}'.format(i), module) + self.encoders.append(module) + se_layer_enc = SELayer(self.enc_channels[i + 1], 8) + self.add_module('se_layer_enc{}'.format(i), se_layer_enc) + self.se_layers_enc.append(se_layer_enc) + self.decoders = [] + self.fsmn_dec = [] + self.se_layers_dec = [] + for i in range(self.model_length): + fsmn_dec = complex_nn.ComplexUniDeepFsmn_L1(128, 128, 128) + self.add_module('fsmn_dec{}'.format(i), fsmn_dec) + self.fsmn_dec.append(fsmn_dec) + module = Decoder( + self.dec_channels[i] * 2, + self.dec_channels[i + 1], + kernel_size=self.dec_kernel_sizes[i], + stride=self.dec_strides[i], + padding=self.dec_paddings[i], + complex=complex) + self.add_module('decoder{}'.format(i), module) + self.decoders.append(module) + if i < self.model_length - 1: + se_layer_dec = SELayer(self.dec_channels[i + 1], 8) + self.add_module('se_layer_dec{}'.format(i), se_layer_dec) + self.se_layers_dec.append(se_layer_dec) + if complex: + conv = complex_nn.ComplexConv2d + else: + conv = nn.Conv2d + + linear = conv(self.dec_channels[-1], 1, 1) + + self.add_module('linear', linear) + self.complex = complex + self.padding_mode = padding_mode + + self.decoders = nn.ModuleList(self.decoders) + self.encoders = nn.ModuleList(self.encoders) + self.se_layers_enc = nn.ModuleList(self.se_layers_enc) + self.se_layers_dec = nn.ModuleList(self.se_layers_dec) + self.fsmn_enc = nn.ModuleList(self.fsmn_enc) + self.fsmn_dec = nn.ModuleList(self.fsmn_dec) + + def forward(self, inputs): + x = inputs + # go down + xs = [] + xs_se = [] + xs_se.append(x) + for i, encoder in enumerate(self.encoders): + xs.append(x) + if i > 0: + x = self.fsmn_enc[i](x) + x = encoder(x) + xs_se.append(self.se_layers_enc[i](x)) + # xs : x0=input x1 ... x9 + x = self.fsmn(x) + + p = x + for i, decoder in enumerate(self.decoders): + p = decoder(p) + if i < self.model_length - 1: + p = self.fsmn_dec[i](p) + if i == self.model_length - 1: + break + if i < self.model_length - 2: + p = self.se_layers_dec[i](p) + p = torch.cat([p, xs_se[self.model_length - 1 - i]], dim=1) + + # cmp_spec: [12, 1, 513, 64, 2] + cmp_spec = self.linear(p) + return cmp_spec + + def set_size(self, model_complexity, model_depth=20, input_channels=1): + + if model_depth == 14: + self.enc_channels = [ + input_channels, 128, 128, 128, 128, 128, 128, 128 + ] + self.enc_kernel_sizes = [(5, 2), (5, 2), (5, 2), (5, 2), (5, 2), + (5, 2), (2, 2)] + self.enc_strides = [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2, 1), + (2, 1)] + self.enc_paddings = [(0, 1), (0, 1), (0, 1), (0, 1), (0, 1), + (0, 1), (0, 1)] + self.dec_channels = [64, 128, 128, 128, 128, 128, 128, 1] + self.dec_kernel_sizes = [(2, 2), (5, 2), (5, 2), (5, 2), (6, 2), + (5, 2), (5, 2)] + self.dec_strides = [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2, 1), + (2, 1)] + self.dec_paddings = [(0, 1), (0, 1), (0, 1), (0, 1), (0, 1), + (0, 1), (0, 1)] + + elif model_depth == 10: + self.enc_channels = [ + input_channels, + 16, + 32, + 64, + 128, + 256, + ] + self.enc_kernel_sizes = [(3, 3), (3, 3), (3, 3), (3, 3), (3, 3)] + self.enc_strides = [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1)] + self.enc_paddings = [(0, 1), (0, 1), (0, 1), (0, 1), (0, 1)] + self.dec_channels = [128, 128, 64, 32, 16, 1] + self.dec_kernel_sizes = [(3, 3), (3, 3), (3, 3), (4, 3), (3, 3)] + self.dec_strides = [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1)] + self.dec_paddings = [(0, 1), (0, 1), (0, 1), (0, 1), (0, 1)] + + elif model_depth == 20: + self.enc_channels = [ + input_channels, model_complexity, model_complexity, + model_complexity * 2, model_complexity * 2, + model_complexity * 2, model_complexity * 2, + model_complexity * 2, model_complexity * 2, + model_complexity * 2, 128 + ] + + self.enc_kernel_sizes = [(7, 1), (1, 7), (6, 4), (7, 5), (5, 3), + (5, 3), (5, 3), (5, 3), (5, 3), (5, 3)] + + self.enc_strides = [(1, 1), (1, 1), (2, 2), (2, 1), (2, 2), (2, 1), + (2, 2), (2, 1), (2, 2), (2, 1)] + + self.enc_paddings = [ + (3, 0), + (0, 3), + None, # (0, 2), + None, + None, # (3,1), + None, # (3,1), + None, # (1,2), + None, + None, + None + ] + + self.dec_channels = [ + 0, model_complexity * 2, model_complexity * 2, + model_complexity * 2, model_complexity * 2, + model_complexity * 2, model_complexity * 2, + model_complexity * 2, model_complexity * 2, + model_complexity * 2, model_complexity * 2, + model_complexity * 2 + ] + + self.dec_kernel_sizes = [(4, 3), (4, 2), (4, 3), (4, 2), (4, 3), + (4, 2), (6, 3), (7, 4), (1, 7), (7, 1)] + + self.dec_strides = [(2, 1), (2, 2), (2, 1), (2, 2), (2, 1), (2, 2), + (2, 1), (2, 2), (1, 1), (1, 1)] + + self.dec_paddings = [(1, 1), (1, 0), (1, 1), (1, 0), (1, 1), + (1, 0), (2, 1), (2, 1), (0, 3), (3, 0)] + else: + raise ValueError('Unknown model depth : {}'.format(model_depth)) diff --git a/modelscope/pipelines/__init__.py b/modelscope/pipelines/__init__.py index 14865872..74f5507f 100644 --- a/modelscope/pipelines/__init__.py +++ b/modelscope/pipelines/__init__.py @@ -1,4 +1,5 @@ from .audio import LinearAECPipeline +from .audio.ans_pipeline import ANSPipeline from .base import Pipeline from .builder import pipeline from .cv import * # noqa F403 diff --git a/modelscope/pipelines/audio/ans_pipeline.py b/modelscope/pipelines/audio/ans_pipeline.py new file mode 100644 index 00000000..d9a04a29 --- /dev/null +++ b/modelscope/pipelines/audio/ans_pipeline.py @@ -0,0 +1,117 @@ +import os.path +from typing import Any, Dict + +import librosa +import numpy as np +import soundfile as sf +import torch + +from modelscope.metainfo import Pipelines +from modelscope.utils.constant import Tasks +from ..base import Input, Pipeline +from ..builder import PIPELINES + + +def audio_norm(x): + rms = (x**2).mean()**0.5 + scalar = 10**(-25 / 20) / rms + x = x * scalar + pow_x = x**2 + avg_pow_x = pow_x.mean() + rmsx = pow_x[pow_x > avg_pow_x].mean()**0.5 + scalarx = 10**(-25 / 20) / rmsx + x = x * scalarx + return x + + +@PIPELINES.register_module( + Tasks.speech_signal_process, + module_name=Pipelines.speech_frcrn_ans_cirm_16k) +class ANSPipeline(Pipeline): + r"""ANS (Acoustic Noise Suppression) Inference Pipeline . + + When invoke the class with pipeline.__call__(), it accept only one parameter: + inputs(str): the path of wav file + """ + SAMPLE_RATE = 16000 + + def __init__(self, model): + r""" + Args: + model: model id on modelscope hub. + """ + super().__init__(model=model) + self.device = torch.device( + 'cuda' if torch.cuda.is_available() else 'cpu') + self.model = self.model.to(self.device) + self.model.eval() + + def preprocess(self, inputs: Input) -> Dict[str, Any]: + assert isinstance(inputs, str) and os.path.exists(inputs) and os.path.isfile(inputs), \ + f'Input file do not exists: {inputs}' + data1, fs = sf.read(inputs) + data1 = audio_norm(data1) + if fs != self.SAMPLE_RATE: + data1 = librosa.resample(data1, fs, self.SAMPLE_RATE) + if len(data1.shape) > 1: + data1 = data1[:, 0] + data = data1.astype(np.float32) + inputs = np.reshape(data, [1, data.shape[0]]) + return {'ndarray': inputs, 'nsamples': data.shape[0]} + + def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + ndarray = inputs['ndarray'] + nsamples = inputs['nsamples'] + decode_do_segement = False + window = 16000 + stride = int(window * 0.75) + print('inputs:{}'.format(ndarray.shape)) + b, t = ndarray.shape # size() + if t > window * 120: + decode_do_segement = True + + if t < window: + ndarray = np.concatenate( + [ndarray, np.zeros((ndarray.shape[0], window - t))], 1) + elif t < window + stride: + padding = window + stride - t + print('padding: {}'.format(padding)) + ndarray = np.concatenate( + [ndarray, np.zeros((ndarray.shape[0], padding))], 1) + else: + if (t - window) % stride != 0: + padding = t - (t - window) // stride * stride + print('padding: {}'.format(padding)) + ndarray = np.concatenate( + [ndarray, np.zeros((ndarray.shape[0], padding))], 1) + print('inputs after padding:{}'.format(ndarray.shape)) + with torch.no_grad(): + ndarray = torch.from_numpy(np.float32(ndarray)).to(self.device) + b, t = ndarray.shape + if decode_do_segement: + outputs = np.zeros(t) + give_up_length = (window - stride) // 2 + current_idx = 0 + while current_idx + window <= t: + print('current_idx: {}'.format(current_idx)) + tmp_input = ndarray[:, current_idx:current_idx + window] + tmp_output = self.model( + tmp_input, )['wav_l2'][0].cpu().numpy() + end_index = current_idx + window - give_up_length + if current_idx == 0: + outputs[current_idx: + end_index] = tmp_output[:-give_up_length] + else: + outputs[current_idx + + give_up_length:end_index] = tmp_output[ + give_up_length:-give_up_length] + current_idx += stride + else: + outputs = self.model(ndarray)['wav_l2'][0].cpu().numpy() + return {'output_pcm': outputs[:nsamples]} + + def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: + if 'output_path' in kwargs.keys(): + sf.write(kwargs['output_path'], inputs['output_pcm'], + self.SAMPLE_RATE) + return inputs diff --git a/requirements/audio.txt b/requirements/audio.txt index c7b2b239..1f5984ca 100644 --- a/requirements/audio.txt +++ b/requirements/audio.txt @@ -16,6 +16,7 @@ protobuf>3,<=3.20 ptflops PyWavelets>=1.0.0 scikit-learn +SoundFile>0.10 sox tensorboard tensorflow==1.15.* diff --git a/tests/pipelines/test_speech_signal_process.py b/tests/pipelines/test_speech_signal_process.py index bc3a542e..f317bc07 100644 --- a/tests/pipelines/test_speech_signal_process.py +++ b/tests/pipelines/test_speech_signal_process.py @@ -17,6 +17,9 @@ AEC_LIB_URL = 'http://isv-data.oss-cn-hangzhou.aliyuncs.com/ics%2FMaaS%2FAEC%2Fl '?Expires=1664085465&OSSAccessKeyId=LTAIxjQyZNde90zh&Signature=Y7gelmGEsQAJRK4yyHSYMrdWizk%3D' AEC_LIB_FILE = 'libmitaec_pyio.so' +NOISE_SPEECH_URL = 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ANS/sample_audio/speech_with_noise.wav' +NOISE_SPEECH_FILE = 'speech_with_noise.wav' + def download(remote_path, local_path): local_dir = os.path.dirname(local_path) @@ -30,23 +33,40 @@ def download(remote_path, local_path): class SpeechSignalProcessTest(unittest.TestCase): def setUp(self) -> None: - self.model_id = 'damo/speech_dfsmn_aec_psm_16k' + pass + + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_aec(self): # A temporary hack to provide c++ lib. Download it first. download(AEC_LIB_URL, AEC_LIB_FILE) - - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') - def test_run(self): + # Download audio files download(NEAREND_MIC_URL, NEAREND_MIC_FILE) download(FAREND_SPEECH_URL, FAREND_SPEECH_FILE) + model_id = 'damo/speech_dfsmn_aec_psm_16k' input = { 'nearend_mic': NEAREND_MIC_FILE, 'farend_speech': FAREND_SPEECH_FILE } aec = pipeline( Tasks.speech_signal_process, - model=self.model_id, + model=model_id, pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k) - aec(input, output_path='output.wav') + output_path = os.path.abspath('output.wav') + aec(input, output_path=output_path) + print(f'Processed audio saved to {output_path}') + + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_ans(self): + # Download audio files + download(NOISE_SPEECH_URL, NOISE_SPEECH_FILE) + model_id = 'damo/speech_frcrn_ans_cirm_16k' + ans = pipeline( + Tasks.speech_signal_process, + model=model_id, + pipeline_name=Pipelines.speech_frcrn_ans_cirm_16k) + output_path = os.path.abspath('output.wav') + ans(NOISE_SPEECH_FILE, output_path=output_path) + print(f'Processed audio saved to {output_path}') if __name__ == '__main__': From 5da470fd5d8a8a91936a41b21ad6ab1ebb9f3ba0 Mon Sep 17 00:00:00 2001 From: "feiwu.yfw" Date: Tue, 28 Jun 2022 20:40:57 +0800 Subject: [PATCH 5/9] [to #42791465, #42779255, #42777959, #42757844, #42756050, #42746916, #42743595, #42791863] fix: fix msdataset Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9174075 * fix msdataset --- modelscope/hub/errors.py | 15 ++++++ modelscope/msdatasets/config.py | 2 +- modelscope/msdatasets/ms_dataset.py | 56 +++++++++++++++------ modelscope/msdatasets/utils/ms_api.py | 48 ++++++++++++------ modelscope/utils/constant.py | 10 +++- tests/msdatasets/test_ms_dataset.py | 24 +++++---- tests/pipelines/test_image_matting.py | 3 +- tests/pipelines/test_text_classification.py | 8 ++- 8 files changed, 121 insertions(+), 45 deletions(-) diff --git a/modelscope/hub/errors.py b/modelscope/hub/errors.py index 4b39d6e3..d39036a0 100644 --- a/modelscope/hub/errors.py +++ b/modelscope/hub/errors.py @@ -32,3 +32,18 @@ def raise_on_error(rsp): return True else: raise RequestError(rsp['Message']) + + +# TODO use raise_on_error instead if modelhub and datahub response have uniform structures, +def datahub_raise_on_error(url, rsp): + """If response error, raise exception + + Args: + rsp (_type_): The server response + """ + if rsp.get('Code') == 200: + return True + else: + raise RequestError( + f"Url = {url}, Status = {rsp.get('status')}, error = {rsp.get('error')}, message = {rsp.get('message')}" + ) diff --git a/modelscope/msdatasets/config.py b/modelscope/msdatasets/config.py index e916b3ec..00c24c3a 100644 --- a/modelscope/msdatasets/config.py +++ b/modelscope/msdatasets/config.py @@ -19,4 +19,4 @@ DOWNLOADED_DATASETS_PATH = Path( os.getenv('DOWNLOADED_DATASETS_PATH', DEFAULT_DOWNLOADED_DATASETS_PATH)) MS_HUB_ENDPOINT = os.environ.get('MS_HUB_ENDPOINT', - 'http://101.201.119.157:31752') + 'http://123.57.189.90:31752') diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py index 0466894c..90964b36 100644 --- a/modelscope/msdatasets/ms_dataset.py +++ b/modelscope/msdatasets/ms_dataset.py @@ -3,7 +3,7 @@ from typing import (Any, Callable, Dict, Iterable, List, Mapping, Optional, Sequence, Union) import numpy as np -from datasets import Dataset +from datasets import Dataset, DatasetDict from datasets import load_dataset as hf_load_dataset from datasets.config import TF_AVAILABLE, TORCH_AVAILABLE from datasets.packaged_modules import _PACKAGED_DATASETS_MODULES @@ -12,7 +12,7 @@ from datasets.utils.file_utils import (is_relative_path, from modelscope.msdatasets.config import MS_DATASETS_CACHE from modelscope.msdatasets.utils.ms_api import MsApi -from modelscope.utils.constant import Hubs +from modelscope.utils.constant import DownloadMode, Hubs from modelscope.utils.logger import get_logger logger = get_logger() @@ -34,6 +34,10 @@ class MsDataset: def __init__(self, hf_ds: Dataset, target: Optional[str] = None): self._hf_ds = hf_ds + if target is not None and target not in self._hf_ds.features: + raise TypeError( + f'"target" must be a column of the dataset({list(self._hf_ds.features.keys())}, but got {target}' + ) self.target = target def __iter__(self): @@ -48,17 +52,23 @@ class MsDataset: @classmethod def from_hf_dataset(cls, - hf_ds: Dataset, + hf_ds: Union[Dataset, DatasetDict], target: str = None) -> Union[dict, 'MsDataset']: if isinstance(hf_ds, Dataset): return cls(hf_ds, target) - if len(hf_ds.keys()) == 1: - return cls(next(iter(hf_ds.values())), target) - return {k: cls(v, target) for k, v in hf_ds.items()} + elif isinstance(hf_ds, DatasetDict): + if len(hf_ds.keys()) == 1: + return cls(next(iter(hf_ds.values())), target) + return {k: cls(v, target) for k, v in hf_ds.items()} + else: + raise TypeError( + f'"hf_ds" must be a Dataset or DatasetDict, but got {type(hf_ds)}' + ) @staticmethod def load( dataset_name: Union[str, list], + namespace: Optional[str] = None, target: Optional[str] = None, version: Optional[str] = None, hub: Optional[Hubs] = Hubs.modelscope, @@ -67,23 +77,32 @@ class MsDataset: data_dir: Optional[str] = None, data_files: Optional[Union[str, Sequence[str], Mapping[str, Union[str, - Sequence[str]]]]] = None + Sequence[str]]]]] = None, + download_mode: Optional[DownloadMode] = DownloadMode. + REUSE_DATASET_IF_EXISTS ) -> Union[dict, 'MsDataset']: """Load a MsDataset from the ModelScope Hub, Hugging Face Hub, urls, or a local dataset. Args: dataset_name (str): Path or name of the dataset. + namespace(str, optional): Namespace of the dataset. It should not be None, if you load a remote dataset + from Hubs.modelscope, target (str, optional): Name of the column to output. version (str, optional): Version of the dataset script to load: subset_name (str, optional): Defining the subset_name of the dataset. data_dir (str, optional): Defining the data_dir of the dataset configuration. I data_files (str or Sequence or Mapping, optional): Path(s) to source data file(s). split (str, optional): Which split of the data to load. - hub (Hubs, optional): When loading from a remote hub, where it is from + hub (Hubs or str, optional): When loading from a remote hub, where it is from. default Hubs.modelscope + download_mode (DownloadMode or str, optional): How to treat existing datasets. default + DownloadMode.REUSE_DATASET_IF_EXISTS Returns: MsDataset (obj:`MsDataset`): MsDataset object for a certain dataset. """ + download_mode = DownloadMode(download_mode + or DownloadMode.REUSE_DATASET_IF_EXISTS) + hub = Hubs(hub or Hubs.modelscope) if hub == Hubs.huggingface: dataset = hf_load_dataset( dataset_name, @@ -91,21 +110,25 @@ class MsDataset: revision=version, split=split, data_dir=data_dir, - data_files=data_files) + data_files=data_files, + download_mode=download_mode.value) return MsDataset.from_hf_dataset(dataset, target=target) - else: + elif hub == Hubs.modelscope: return MsDataset._load_ms_dataset( dataset_name, + namespace=namespace, target=target, subset_name=subset_name, version=version, split=split, data_dir=data_dir, - data_files=data_files) + data_files=data_files, + download_mode=download_mode) @staticmethod def _load_ms_dataset( dataset_name: Union[str, list], + namespace: Optional[str] = None, target: Optional[str] = None, version: Optional[str] = None, subset_name: Optional[str] = None, @@ -113,17 +136,19 @@ class MsDataset: data_dir: Optional[str] = None, data_files: Optional[Union[str, Sequence[str], Mapping[str, Union[str, - Sequence[str]]]]] = None + Sequence[str]]]]] = None, + download_mode: Optional[DownloadMode] = None ) -> Union[dict, 'MsDataset']: if isinstance(dataset_name, str): use_hf = False if dataset_name in _PACKAGED_DATASETS_MODULES or os.path.isdir(dataset_name) or \ (os.path.isfile(dataset_name) and dataset_name.endswith('.py')): use_hf = True - elif is_relative_path(dataset_name): + elif is_relative_path(dataset_name) and dataset_name.count( + '/') == 0: ms_api = MsApi() dataset_scripts = ms_api.fetch_dataset_scripts( - dataset_name, version) + dataset_name, namespace, download_mode, version) if 'py' in dataset_scripts: # dataset copied from hf datasets dataset_name = dataset_scripts['py'][0] use_hf = True @@ -140,7 +165,8 @@ class MsDataset: split=split, data_dir=data_dir, data_files=data_files, - cache_dir=MS_DATASETS_CACHE) + cache_dir=MS_DATASETS_CACHE, + download_mode=download_mode.value) else: # TODO load from ms datahub raise NotImplementedError( diff --git a/modelscope/msdatasets/utils/ms_api.py b/modelscope/msdatasets/utils/ms_api.py index fc3bcca2..c9b49ca1 100644 --- a/modelscope/msdatasets/utils/ms_api.py +++ b/modelscope/msdatasets/utils/ms_api.py @@ -1,11 +1,14 @@ import os +import shutil from collections import defaultdict from typing import Optional import requests +from modelscope.hub.errors import NotExistError, datahub_raise_on_error from modelscope.msdatasets.config import (DOWNLOADED_DATASETS_PATH, MS_HUB_ENDPOINT) +from modelscope.utils.constant import DownloadMode from modelscope.utils.logger import get_logger logger = get_logger() @@ -27,23 +30,38 @@ class MsApi: def fetch_dataset_scripts(self, dataset_name: str, - version: Optional[str] = 'master', - force_download=False): - datahub_url = f'{self.endpoint}/api/v1/datasets?Query={dataset_name}' - r = requests.get(datahub_url) - r.raise_for_status() - dataset_list = r.json()['Data'] - if len(dataset_list) == 0: - return None - dataset_id = dataset_list[0]['Id'] + namespace: str, + download_mode: Optional[DownloadMode], + version: Optional[str] = 'master'): + if namespace is None: + raise ValueError( + f'Dataset from Hubs.modelscope should have a valid "namespace", but get {namespace}' + ) version = version or 'master' - datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/tree?Revision={version}' - r = requests.get(datahub_url) - r.raise_for_status() - file_list = r.json()['Data']['Files'] cache_dir = os.path.join(DOWNLOADED_DATASETS_PATH, dataset_name, - version) + namespace, version) + download_mode = DownloadMode(download_mode + or DownloadMode.REUSE_DATASET_IF_EXISTS) + if download_mode == DownloadMode.FORCE_REDOWNLOAD and os.path.exists( + cache_dir): + shutil.rmtree(cache_dir) os.makedirs(cache_dir, exist_ok=True) + datahub_url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}' + r = requests.get(datahub_url) + resp = r.json() + datahub_raise_on_error(datahub_url, resp) + dataset_id = resp['Data']['Id'] + datahub_url = f'{self.endpoint}/api/v1/datasets/{dataset_id}/repo/tree?Revision={version}' + r = requests.get(datahub_url) + resp = r.json() + datahub_raise_on_error(datahub_url, resp) + file_list = resp['Data'] + if file_list is None: + raise NotExistError( + f'The modelscope dataset [dataset_name = {dataset_name}, namespace = {namespace}, ' + f'version = {version}] dose not exist') + + file_list = file_list['Files'] local_paths = defaultdict(list) for file_info in file_list: file_path = file_info['Path'] @@ -54,7 +72,7 @@ class MsApi: r.raise_for_status() content = r.json()['Data']['Content'] local_path = os.path.join(cache_dir, file_path) - if os.path.exists(local_path) and not force_download: + if os.path.exists(local_path): logger.warning( f"Reusing dataset {dataset_name}'s python file ({local_path})" ) diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index f2215359..55f015e8 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -1,4 +1,5 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +import enum class Fields(object): @@ -69,13 +70,20 @@ class InputFields(object): audio = 'audio' -class Hubs(object): +class Hubs(enum.Enum): """ Source from which an entity (such as a Dataset or Model) is stored """ modelscope = 'modelscope' huggingface = 'huggingface' +class DownloadMode(enum.Enum): + """ How to treat existing datasets + """ + REUSE_DATASET_IF_EXISTS = 'reuse_dataset_if_exists' + FORCE_REDOWNLOAD = 'force_redownload' + + class ModelFile(object): CONFIGURATION = 'configuration.json' README = 'README.md' diff --git a/tests/msdatasets/test_ms_dataset.py b/tests/msdatasets/test_ms_dataset.py index de413d5f..50767fd8 100644 --- a/tests/msdatasets/test_ms_dataset.py +++ b/tests/msdatasets/test_ms_dataset.py @@ -32,11 +32,12 @@ class ImgPreprocessor(Preprocessor): class MsDatasetTest(unittest.TestCase): - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_ds_basic(self): - ms_ds_full = MsDataset.load('squad') + ms_ds_full = MsDataset.load('squad', namespace='damotest') ms_ds_full_hf = hfdata.load_dataset('squad') - ms_ds_train = MsDataset.load('squad', split='train') + ms_ds_train = MsDataset.load( + 'squad', namespace='damotest', split='train') ms_ds_train_hf = hfdata.load_dataset('squad', split='train') ms_image_train = MsDataset.from_hf_dataset( hfdata.load_dataset('beans', split='train')) @@ -48,7 +49,7 @@ class MsDatasetTest(unittest.TestCase): print(next(iter(ms_ds_train))) print(next(iter(ms_image_train))) - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') @require_torch def test_to_torch_dataset_text(self): model_id = 'damo/bert-base-sst2' @@ -57,13 +58,14 @@ class MsDatasetTest(unittest.TestCase): nlp_model.model_dir, first_sequence='context', second_sequence=None) - ms_ds_train = MsDataset.load('squad', split='train') + ms_ds_train = MsDataset.load( + 'squad', namespace='damotest', split='train') pt_dataset = ms_ds_train.to_torch_dataset(preprocessors=preprocessor) import torch dataloader = torch.utils.data.DataLoader(pt_dataset, batch_size=5) print(next(iter(dataloader))) - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') @require_tf def test_to_tf_dataset_text(self): import tensorflow as tf @@ -74,7 +76,8 @@ class MsDatasetTest(unittest.TestCase): nlp_model.model_dir, first_sequence='context', second_sequence=None) - ms_ds_train = MsDataset.load('squad', split='train') + ms_ds_train = MsDataset.load( + 'squad', namespace='damotest', split='train') tf_dataset = ms_ds_train.to_tf_dataset( batch_size=5, shuffle=True, @@ -85,8 +88,8 @@ class MsDatasetTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') @require_torch def test_to_torch_dataset_img(self): - ms_image_train = MsDataset.from_hf_dataset( - hfdata.load_dataset('beans', split='train')) + ms_image_train = MsDataset.load( + 'beans', namespace='damotest', split='train') pt_dataset = ms_image_train.to_torch_dataset( preprocessors=ImgPreprocessor( image_path='image_file_path', label='labels')) @@ -99,7 +102,8 @@ class MsDatasetTest(unittest.TestCase): def test_to_tf_dataset_img(self): import tensorflow as tf tf.compat.v1.enable_eager_execution() - ms_image_train = MsDataset.load('beans', split='train') + ms_image_train = MsDataset.load( + 'beans', namespace='damotest', split='train') tf_dataset = ms_image_train.to_tf_dataset( batch_size=5, shuffle=True, diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py index de60ff0b..48a715f1 100644 --- a/tests/pipelines/test_image_matting.py +++ b/tests/pipelines/test_image_matting.py @@ -62,7 +62,8 @@ class ImageMattingTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') def test_run_with_modelscope_dataset(self): - dataset = MsDataset.load('beans', split='train', target='image') + dataset = MsDataset.load( + 'beans', namespace='damotest', split='train', target='image') img_matting = pipeline(Tasks.image_matting, model=self.model_id) result = img_matting(dataset) for i in range(10): diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py index f913490c..1bf9f7ca 100644 --- a/tests/pipelines/test_text_classification.py +++ b/tests/pipelines/test_text_classification.py @@ -87,12 +87,16 @@ class SequenceClassificationTest(unittest.TestCase): result = text_classification(dataset) self.printDataset(result) - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_run_with_modelscope_dataset(self): text_classification = pipeline(task=Tasks.text_classification) # loaded from modelscope dataset dataset = MsDataset.load( - 'squad', split='train', target='context', hub=Hubs.modelscope) + 'squad', + namespace='damotest', + split='train', + target='context', + hub=Hubs.modelscope) result = text_classification(dataset) self.printDataset(result) From 0d17eb5b395b0d1a74e1a10ad754843bd6dfc71b Mon Sep 17 00:00:00 2001 From: "mulin.lyh" Date: Tue, 28 Jun 2022 21:12:15 +0800 Subject: [PATCH 6/9] [to #42849800 #42822853 #42822836 #42822791 #42822717 #42820011]fix: bug test bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修复测试bug Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9186775 * [to #42849800 #42822853 #42822836 #42822791 #42822717 #42820011]fix: test bugs --- modelscope/hub/api.py | 84 ++++++++++++++++------- modelscope/hub/errors.py | 4 ++ modelscope/hub/file_download.py | 16 +++-- modelscope/hub/git.py | 8 +++ modelscope/hub/repository.py | 12 ++-- modelscope/hub/snapshot_download.py | 16 ++--- modelscope/hub/utils/caching.py | 8 ++- modelscope/utils/hub.py | 5 +- tests/hub/test_hub_operation.py | 42 ++++++++++-- tests/hub/test_hub_private_files.py | 85 ++++++++++++++++++++++++ tests/hub/test_hub_private_repository.py | 9 ++- tests/hub/test_hub_repository.py | 24 ++----- 12 files changed, 235 insertions(+), 78 deletions(-) create mode 100644 tests/hub/test_hub_private_files.py diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index d102219b..e79bfd41 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -9,7 +9,7 @@ import requests from modelscope.utils.logger import get_logger from .constants import MODELSCOPE_URL_SCHEME -from .errors import NotExistError, is_ok, raise_on_error +from .errors import InvalidParameter, NotExistError, is_ok, raise_on_error from .utils.utils import (get_endpoint, get_gitlab_domain, model_id_to_group_owner_name) @@ -61,17 +61,21 @@ class HubApi: return d['Data']['AccessToken'], cookies - def create_model(self, model_id: str, chinese_name: str, visibility: int, - license: str) -> str: + def create_model( + self, + model_id: str, + visibility: str, + license: str, + chinese_name: Optional[str] = None, + ) -> str: """ Create model repo at ModelScopeHub Args: model_id:(`str`): The model id - chinese_name(`str`): chinese name of the model - visibility(`int`): visibility of the model(1-private, 3-internal, 5-public) - license(`str`): license of the model, candidates can be found at: TBA - + visibility(`int`): visibility of the model(1-private, 5-public), default public. + license(`str`): license of the model, default none. + chinese_name(`str`, *optional*): chinese name of the model Returns: name of the model created @@ -79,6 +83,8 @@ class HubApi: model_id = {owner}/{name} """ + if model_id is None: + raise InvalidParameter('model_id is required!') cookies = ModelScopeConfig.get_cookies() if cookies is None: raise ValueError('Token does not exist, please login first.') @@ -151,11 +157,33 @@ class HubApi: else: r.raise_for_status() + def _check_cookie(self, + use_cookies: Union[bool, + CookieJar] = False) -> CookieJar: + cookies = None + if isinstance(use_cookies, CookieJar): + cookies = use_cookies + elif use_cookies: + cookies = ModelScopeConfig.get_cookies() + if cookies is None: + raise ValueError('Token does not exist, please login first.') + return cookies + def get_model_branches_and_tags( self, model_id: str, + use_cookies: Union[bool, CookieJar] = False ) -> Tuple[List[str], List[str]]: - cookies = ModelScopeConfig.get_cookies() + """Get model branch and tags. + + Args: + model_id (str): The model id + use_cookies (Union[bool, CookieJar], optional): If is cookieJar, we will use this cookie, if True, will + will load cookie from local. Defaults to False. + Returns: + Tuple[List[str], List[str]]: _description_ + """ + cookies = self._check_cookie(use_cookies) path = f'{self.endpoint}/api/v1/models/{model_id}/revisions' r = requests.get(path, cookies=cookies) @@ -169,23 +197,33 @@ class HubApi: ] if info['RevisionMap']['Tags'] else [] return branches, tags - def get_model_files( - self, - model_id: str, - revision: Optional[str] = 'master', - root: Optional[str] = None, - recursive: Optional[str] = False, - use_cookies: Union[bool, CookieJar] = False) -> List[dict]: + def get_model_files(self, + model_id: str, + revision: Optional[str] = 'master', + root: Optional[str] = None, + recursive: Optional[str] = False, + use_cookies: Union[bool, CookieJar] = False, + is_snapshot: Optional[bool] = True) -> List[dict]: + """List the models files. - cookies = None - if isinstance(use_cookies, CookieJar): - cookies = use_cookies - elif use_cookies: - cookies = ModelScopeConfig.get_cookies() - if cookies is None: - raise ValueError('Token does not exist, please login first.') + Args: + model_id (str): The model id + revision (Optional[str], optional): The branch or tag name. Defaults to 'master'. + root (Optional[str], optional): The root path. Defaults to None. + recursive (Optional[str], optional): Is recurive list files. Defaults to False. + use_cookies (Union[bool, CookieJar], optional): If is cookieJar, we will use this cookie, if True, will + will load cookie from local. Defaults to False. + is_snapshot(Optional[bool], optional): when snapshot_download set to True, otherwise False. - path = f'{self.endpoint}/api/v1/models/{model_id}/repo/files?Revision={revision}&Recursive={recursive}' + Raises: + ValueError: If user_cookies is True, but no local cookie. + + Returns: + List[dict]: Model file list. + """ + path = '%s/api/v1/models/%s/repo/files?Revision=%s&Recursive=%s&Snapshot=%s' % ( + self.endpoint, model_id, revision, recursive, is_snapshot) + cookies = self._check_cookie(use_cookies) if root is not None: path = path + f'&Root={root}' diff --git a/modelscope/hub/errors.py b/modelscope/hub/errors.py index d39036a0..9a19fdb5 100644 --- a/modelscope/hub/errors.py +++ b/modelscope/hub/errors.py @@ -10,6 +10,10 @@ class GitError(Exception): pass +class InvalidParameter(Exception): + pass + + def is_ok(rsp): """ Check the request is ok diff --git a/modelscope/hub/file_download.py b/modelscope/hub/file_download.py index b92bf89c..60aae3b6 100644 --- a/modelscope/hub/file_download.py +++ b/modelscope/hub/file_download.py @@ -7,6 +7,7 @@ import tempfile import time from functools import partial from hashlib import sha256 +from http.cookiejar import CookieJar from pathlib import Path from typing import BinaryIO, Dict, Optional, Union from uuid import uuid4 @@ -107,7 +108,9 @@ def model_file_download( _api = HubApi() headers = {'user-agent': http_user_agent(user_agent=user_agent, )} - branches, tags = _api.get_model_branches_and_tags(model_id) + cookies = ModelScopeConfig.get_cookies() + branches, tags = _api.get_model_branches_and_tags( + model_id, use_cookies=False if cookies is None else cookies) file_to_download_info = None is_commit_id = False if revision in branches or revision in tags: # The revision is version or tag, @@ -117,18 +120,19 @@ def model_file_download( model_id=model_id, revision=revision, recursive=True, - ) + use_cookies=False if cookies is None else cookies, + is_snapshot=False) for model_file in model_files: if model_file['Type'] == 'tree': continue if model_file['Path'] == file_path: - model_file['Branch'] = revision if cache.exists(model_file): return cache.get_file_by_info(model_file) else: file_to_download_info = model_file + break if file_to_download_info is None: raise NotExistError('The file path: %s not exist in: %s' % @@ -141,8 +145,6 @@ def model_file_download( return cached_file_path # the file is in cache. is_commit_id = True # we need to download again - # TODO: skip using JWT for authorization, use cookie instead - cookies = ModelScopeConfig.get_cookies() url_to_download = get_file_download_url(model_id, file_path, revision) file_to_download_info = { 'Path': file_path, @@ -202,7 +204,7 @@ def http_get_file( url: str, local_dir: str, file_name: str, - cookies: Dict[str, str], + cookies: CookieJar, headers: Optional[Dict[str, str]] = None, ): """ @@ -217,7 +219,7 @@ def http_get_file( local directory where the downloaded file stores file_name(`str`): name of the file stored in `local_dir` - cookies(`Dict[str, str]`): + cookies(`CookieJar`): cookies used to authentication the user, which is used for downloading private repos headers(`Optional[Dict[str, str]] = None`): http headers to carry necessary info when requesting the remote file diff --git a/modelscope/hub/git.py b/modelscope/hub/git.py index 37f61814..54161f1c 100644 --- a/modelscope/hub/git.py +++ b/modelscope/hub/git.py @@ -70,6 +70,14 @@ class GitCommandWrapper(metaclass=Singleton): except GitError: return False + def git_lfs_install(self, repo_dir): + cmd = ['git', '-C', repo_dir, 'lfs', 'install'] + try: + self._run_git_command(*cmd) + return True + except GitError: + return False + def clone(self, repo_base_dir: str, token: str, diff --git a/modelscope/hub/repository.py b/modelscope/hub/repository.py index d9322144..37dec571 100644 --- a/modelscope/hub/repository.py +++ b/modelscope/hub/repository.py @@ -1,7 +1,7 @@ import os from typing import List, Optional -from modelscope.hub.errors import GitError +from modelscope.hub.errors import GitError, InvalidParameter from modelscope.utils.logger import get_logger from .api import ModelScopeConfig from .constants import MODELSCOPE_URL_SCHEME @@ -49,6 +49,8 @@ class Repository: git_wrapper = GitCommandWrapper() if not git_wrapper.is_lfs_installed(): logger.error('git lfs is not installed, please install.') + else: + git_wrapper.git_lfs_install(self.model_dir) # init repo lfs self.git_wrapper = GitCommandWrapper(git_path) os.makedirs(self.model_dir, exist_ok=True) @@ -74,8 +76,6 @@ class Repository: def push(self, commit_message: str, - files: List[str] = list(), - all_files: bool = False, branch: Optional[str] = 'master', force: bool = False): """Push local to remote, this method will do. @@ -86,8 +86,12 @@ class Repository: commit_message (str): commit message revision (Optional[str], optional): which branch to push. Defaults to 'master'. """ + if commit_message is None: + msg = 'commit_message must be provided!' + raise InvalidParameter(msg) url = self.git_wrapper.get_repo_remote_url(self.model_dir) - self.git_wrapper.add(self.model_dir, files, all_files) + self.git_wrapper.pull(self.model_dir) + self.git_wrapper.add(self.model_dir, all_files=True) self.git_wrapper.commit(self.model_dir, commit_message) self.git_wrapper.push( repo_dir=self.model_dir, diff --git a/modelscope/hub/snapshot_download.py b/modelscope/hub/snapshot_download.py index 90d850f4..91463f76 100644 --- a/modelscope/hub/snapshot_download.py +++ b/modelscope/hub/snapshot_download.py @@ -20,8 +20,7 @@ def snapshot_download(model_id: str, revision: Optional[str] = 'master', cache_dir: Union[str, Path, None] = None, user_agent: Optional[Union[Dict, str]] = None, - local_files_only: Optional[bool] = False, - private: Optional[bool] = False) -> str: + local_files_only: Optional[bool] = False) -> str: """Download all files of a repo. Downloads a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from a repo, because you don't know which @@ -79,8 +78,10 @@ def snapshot_download(model_id: str, # make headers headers = {'user-agent': http_user_agent(user_agent=user_agent, )} _api = HubApi() + cookies = ModelScopeConfig.get_cookies() # get file list from model repo - branches, tags = _api.get_model_branches_and_tags(model_id) + branches, tags = _api.get_model_branches_and_tags( + model_id, use_cookies=False if cookies is None else cookies) if revision not in branches and revision not in tags: raise NotExistError('The specified branch or tag : %s not exist!' % revision) @@ -89,11 +90,8 @@ def snapshot_download(model_id: str, model_id=model_id, revision=revision, recursive=True, - use_cookies=private) - - cookies = None - if private: - cookies = ModelScopeConfig.get_cookies() + use_cookies=False if cookies is None else cookies, + is_snapshot=True) for model_file in model_files: if model_file['Type'] == 'tree': @@ -116,7 +114,7 @@ def snapshot_download(model_id: str, local_dir=tempfile.gettempdir(), file_name=model_file['Name'], headers=headers, - cookies=None if cookies is None else cookies.get_dict()) + cookies=cookies) # put file to cache cache.put_file( model_file, diff --git a/modelscope/hub/utils/caching.py b/modelscope/hub/utils/caching.py index ac258385..7675e49b 100644 --- a/modelscope/hub/utils/caching.py +++ b/modelscope/hub/utils/caching.py @@ -101,8 +101,9 @@ class FileSystemCache(object): Args: key (dict): The cache key. """ - self.cached_files.remove(key) - self.save_cached_files() + if key in self.cached_files: + self.cached_files.remove(key) + self.save_cached_files() def exists(self, key): for cache_file in self.cached_files: @@ -204,6 +205,7 @@ class ModelFileSystemCache(FileSystemCache): return orig_path else: self.remove_key(cached_file) + break return None @@ -230,6 +232,7 @@ class ModelFileSystemCache(FileSystemCache): cached_key['Revision'].startswith(key['Revision']) or key['Revision'].startswith(cached_key['Revision'])): is_exists = True + break file_path = os.path.join(self.cache_root_location, model_file_info['Path']) if is_exists: @@ -253,6 +256,7 @@ class ModelFileSystemCache(FileSystemCache): cached_file['Path']) if os.path.exists(file_path): os.remove(file_path) + break def put_file(self, model_file_info, model_file_location): """Put model on model_file_location to cache, the model first download to /tmp, and move to cache. diff --git a/modelscope/utils/hub.py b/modelscope/utils/hub.py index c427b7a3..3b7e80ef 100644 --- a/modelscope/utils/hub.py +++ b/modelscope/utils/hub.py @@ -31,9 +31,10 @@ def create_model_if_not_exist( else: api.create_model( model_id=model_id, - chinese_name=chinese_name, visibility=visibility, - license=license) + license=license, + chinese_name=chinese_name, + ) print(f'model {model_id} successfully created.') return True diff --git a/tests/hub/test_hub_operation.py b/tests/hub/test_hub_operation.py index 035b183e..d193ce32 100644 --- a/tests/hub/test_hub_operation.py +++ b/tests/hub/test_hub_operation.py @@ -3,6 +3,7 @@ import os import tempfile import unittest import uuid +from shutil import rmtree from modelscope.hub.api import HubApi, ModelScopeConfig from modelscope.hub.constants import Licenses, ModelVisibility @@ -23,7 +24,6 @@ download_model_file_name = 'test.bin' class HubOperationTest(unittest.TestCase): def setUp(self): - self.old_cwd = os.getcwd() self.api = HubApi() # note this is temporary before official account management is ready self.api.login(USER_NAME, PASSWORD) @@ -31,19 +31,18 @@ class HubOperationTest(unittest.TestCase): self.model_id = '%s/%s' % (model_org, self.model_name) self.api.create_model( model_id=self.model_id, - chinese_name=model_chinese_name, visibility=ModelVisibility.PUBLIC, - license=Licenses.APACHE_V2) + license=Licenses.APACHE_V2, + chinese_name=model_chinese_name, + ) temporary_dir = tempfile.mkdtemp() self.model_dir = os.path.join(temporary_dir, self.model_name) repo = Repository(self.model_dir, clone_from=self.model_id) - os.chdir(self.model_dir) os.system("echo 'testtest'>%s" - % os.path.join(self.model_dir, 'test.bin')) - repo.push('add model', all_files=True) + % os.path.join(self.model_dir, download_model_file_name)) + repo.push('add model') def tearDown(self): - os.chdir(self.old_cwd) self.api.delete_model(model_id=self.model_id) def test_model_repo_creation(self): @@ -79,6 +78,35 @@ class HubOperationTest(unittest.TestCase): mdtime2 = os.path.getmtime(downloaded_file_path) assert mdtime1 == mdtime2 + def test_download_public_without_login(self): + rmtree(ModelScopeConfig.path_credential) + snapshot_path = snapshot_download(model_id=self.model_id) + downloaded_file_path = os.path.join(snapshot_path, + download_model_file_name) + assert os.path.exists(downloaded_file_path) + temporary_dir = tempfile.mkdtemp() + downloaded_file = model_file_download( + model_id=self.model_id, + file_path=download_model_file_name, + cache_dir=temporary_dir) + assert os.path.exists(downloaded_file) + self.api.login(USER_NAME, PASSWORD) + + def test_snapshot_delete_download_cache_file(self): + snapshot_path = snapshot_download(model_id=self.model_id) + downloaded_file_path = os.path.join(snapshot_path, + download_model_file_name) + assert os.path.exists(downloaded_file_path) + os.remove(downloaded_file_path) + # download again in cache + file_download_path = model_file_download( + model_id=self.model_id, file_path='README.md') + assert os.path.exists(file_download_path) + # deleted file need download again + file_download_path = model_file_download( + model_id=self.model_id, file_path=download_model_file_name) + assert os.path.exists(file_download_path) + if __name__ == '__main__': unittest.main() diff --git a/tests/hub/test_hub_private_files.py b/tests/hub/test_hub_private_files.py new file mode 100644 index 00000000..b9c71456 --- /dev/null +++ b/tests/hub/test_hub_private_files.py @@ -0,0 +1,85 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +import tempfile +import unittest +import uuid + +from requests.exceptions import HTTPError + +from modelscope.hub.api import HubApi +from modelscope.hub.constants import Licenses, ModelVisibility +from modelscope.hub.errors import GitError +from modelscope.hub.file_download import model_file_download +from modelscope.hub.repository import Repository +from modelscope.hub.snapshot_download import snapshot_download +from modelscope.utils.constant import ModelFile + +USER_NAME = 'maasadmin' +PASSWORD = '12345678' +USER_NAME2 = 'sdkdev' + +model_chinese_name = '达摩卡通化模型' +model_org = 'unittest' + + +class HubPrivateFileDownloadTest(unittest.TestCase): + + def setUp(self): + self.old_cwd = os.getcwd() + self.api = HubApi() + # note this is temporary before official account management is ready + self.token, _ = self.api.login(USER_NAME, PASSWORD) + self.model_name = uuid.uuid4().hex + self.model_id = '%s/%s' % (model_org, self.model_name) + self.api.create_model( + model_id=self.model_id, + visibility=ModelVisibility.PRIVATE, # 1-private, 5-public + license=Licenses.APACHE_V2, + chinese_name=model_chinese_name, + ) + + def tearDown(self): + os.chdir(self.old_cwd) + self.api.delete_model(model_id=self.model_id) + + def test_snapshot_download_private_model(self): + snapshot_path = snapshot_download(self.model_id) + assert os.path.exists(os.path.join(snapshot_path, ModelFile.README)) + + def test_snapshot_download_private_model_no_permission(self): + self.token, _ = self.api.login(USER_NAME2, PASSWORD) + with self.assertRaises(HTTPError): + snapshot_download(self.model_id) + self.api.login(USER_NAME, PASSWORD) + + def test_download_file_private_model(self): + file_path = model_file_download(self.model_id, ModelFile.README) + assert os.path.exists(file_path) + + def test_download_file_private_model_no_permission(self): + self.token, _ = self.api.login(USER_NAME2, PASSWORD) + with self.assertRaises(HTTPError): + model_file_download(self.model_id, ModelFile.README) + self.api.login(USER_NAME, PASSWORD) + + def test_snapshot_download_local_only(self): + with self.assertRaises(ValueError): + snapshot_download(self.model_id, local_files_only=True) + snapshot_path = snapshot_download(self.model_id) + assert os.path.exists(os.path.join(snapshot_path, ModelFile.README)) + snapshot_path = snapshot_download(self.model_id, local_files_only=True) + assert os.path.exists(snapshot_path) + + def test_file_download_local_only(self): + with self.assertRaises(ValueError): + model_file_download( + self.model_id, ModelFile.README, local_files_only=True) + file_path = model_file_download(self.model_id, ModelFile.README) + assert os.path.exists(file_path) + file_path = model_file_download( + self.model_id, ModelFile.README, local_files_only=True) + assert os.path.exists(file_path) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/hub/test_hub_private_repository.py b/tests/hub/test_hub_private_repository.py index b6e3536c..01a89586 100644 --- a/tests/hub/test_hub_private_repository.py +++ b/tests/hub/test_hub_private_repository.py @@ -5,6 +5,7 @@ import unittest import uuid from modelscope.hub.api import HubApi +from modelscope.hub.constants import Licenses, ModelVisibility from modelscope.hub.errors import GitError from modelscope.hub.repository import Repository @@ -16,9 +17,6 @@ model_chinese_name = '达摩卡通化模型' model_org = 'unittest' DEFAULT_GIT_PATH = 'git' -sample_model_url = 'https://mindscope.oss-cn-hangzhou.aliyuncs.com/test_models/mnist-12.onnx' -download_model_file_name = 'mnist-12.onnx' - class HubPrivateRepositoryTest(unittest.TestCase): @@ -31,9 +29,10 @@ class HubPrivateRepositoryTest(unittest.TestCase): self.model_id = '%s/%s' % (model_org, self.model_name) self.api.create_model( model_id=self.model_id, + visibility=ModelVisibility.PRIVATE, # 1-private, 5-public + license=Licenses.APACHE_V2, chinese_name=model_chinese_name, - visibility=1, # 1-private, 5-public - license='apache-2.0') + ) def tearDown(self): self.api.login(USER_NAME, PASSWORD) diff --git a/tests/hub/test_hub_repository.py b/tests/hub/test_hub_repository.py index 7b1cc751..99f63eca 100644 --- a/tests/hub/test_hub_repository.py +++ b/tests/hub/test_hub_repository.py @@ -2,7 +2,6 @@ import os import shutil import tempfile -import time import unittest import uuid from os.path import expanduser @@ -10,6 +9,7 @@ from os.path import expanduser from requests import delete from modelscope.hub.api import HubApi +from modelscope.hub.constants import Licenses, ModelVisibility from modelscope.hub.errors import NotExistError from modelscope.hub.file_download import model_file_download from modelscope.hub.repository import Repository @@ -55,9 +55,10 @@ class HubRepositoryTest(unittest.TestCase): self.model_id = '%s/%s' % (model_org, self.model_name) self.api.create_model( model_id=self.model_id, + visibility=ModelVisibility.PUBLIC, # 1-private, 5-public + license=Licenses.APACHE_V2, chinese_name=model_chinese_name, - visibility=5, # 1-private, 5-public - license='apache-2.0') + ) temporary_dir = tempfile.mkdtemp() self.model_dir = os.path.join(temporary_dir, self.model_name) @@ -81,27 +82,12 @@ class HubRepositoryTest(unittest.TestCase): os.chdir(self.model_dir) os.system("echo '111'>%s" % os.path.join(self.model_dir, 'add1.py')) os.system("echo '222'>%s" % os.path.join(self.model_dir, 'add2.py')) - repo.push('test', all_files=True) + repo.push('test') add1 = model_file_download(self.model_id, 'add1.py') assert os.path.exists(add1) add2 = model_file_download(self.model_id, 'add2.py') assert os.path.exists(add2) - def test_push_files(self): - repo = Repository(self.model_dir, clone_from=self.model_id) - assert os.path.exists(os.path.join(self.model_dir, 'README.md')) - os.system("echo '111'>%s" % os.path.join(self.model_dir, 'add1.py')) - os.system("echo '222'>%s" % os.path.join(self.model_dir, 'add2.py')) - os.system("echo '333'>%s" % os.path.join(self.model_dir, 'add3.py')) - repo.push('test', files=['add1.py', 'add2.py'], all_files=False) - add1 = model_file_download(self.model_id, 'add1.py') - assert os.path.exists(add1) - add2 = model_file_download(self.model_id, 'add2.py') - assert os.path.exists(add2) - with self.assertRaises(NotExistError) as cm: - model_file_download(self.model_id, 'add3.py') - print(cm.exception) - if __name__ == '__main__': unittest.main() From 1cb2fa850f2f9b468798b062bb4bd23065eeea88 Mon Sep 17 00:00:00 2001 From: "wenmeng.zwm" Date: Tue, 28 Jun 2022 22:19:37 +0800 Subject: [PATCH 7/9] [to #42362425] update version with 0.2.1 --- modelscope/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelscope/version.py b/modelscope/version.py index df9144c5..fc79d63d 100644 --- a/modelscope/version.py +++ b/modelscope/version.py @@ -1 +1 @@ -__version__ = '0.1.1' +__version__ = '0.2.1' From 576b7cffb11532c3431fbfc2998ae833408c327b Mon Sep 17 00:00:00 2001 From: "zhangzhicheng.zzc" Date: Wed, 29 Jun 2022 09:12:59 +0800 Subject: [PATCH 8/9] [to #42322933] add pipeline params for preprocess and forward & zeroshot classification Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9180863 --- modelscope/metainfo.py | 2 + modelscope/models/__init__.py | 3 +- modelscope/models/nlp/__init__.py | 1 + .../nlp/sbert_for_zero_shot_classification.py | 50 ++++++++++ modelscope/pipelines/base.py | 55 ++++++++--- modelscope/pipelines/builder.py | 3 + modelscope/pipelines/nlp/__init__.py | 1 + .../nlp/zero_shot_classification_pipeline.py | 97 +++++++++++++++++++ modelscope/pipelines/outputs.py | 7 ++ modelscope/preprocessors/nlp.py | 46 ++++++++- modelscope/utils/constant.py | 1 + .../test_zero_shot_classification.py | 64 ++++++++++++ 12 files changed, 313 insertions(+), 17 deletions(-) create mode 100644 modelscope/models/nlp/sbert_for_zero_shot_classification.py create mode 100644 modelscope/pipelines/nlp/zero_shot_classification_pipeline.py create mode 100644 tests/pipelines/test_zero_shot_classification.py diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index eda590ac..1d2ee4d2 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -52,6 +52,7 @@ class Pipelines(object): text_generation = 'text-generation' sentiment_analysis = 'sentiment-analysis' fill_mask = 'fill-mask' + zero_shot_classification = 'zero-shot-classification' # audio tasks sambert_hifigan_16k_tts = 'sambert-hifigan-16k-tts' @@ -95,6 +96,7 @@ class Preprocessors(object): bert_seq_cls_tokenizer = 'bert-seq-cls-tokenizer' palm_text_gen_tokenizer = 'palm-text-gen-tokenizer' sbert_token_cls_tokenizer = 'sbert-token-cls-tokenizer' + zero_shot_cls_tokenizer = 'zero-shot-cls-tokenizer' # audio preprocessor linear_aec_fbank = 'linear-aec-fbank' diff --git a/modelscope/models/__init__.py b/modelscope/models/__init__.py index 816c44e2..f1074f68 100644 --- a/modelscope/models/__init__.py +++ b/modelscope/models/__init__.py @@ -7,4 +7,5 @@ from .audio.tts.vocoder import Hifigan16k from .base import Model from .builder import MODELS, build_model from .multi_modal import OfaForImageCaptioning -from .nlp import BertForSequenceClassification, SbertForSentenceSimilarity +from .nlp import (BertForSequenceClassification, SbertForSentenceSimilarity, + SbertForZeroShotClassification) diff --git a/modelscope/models/nlp/__init__.py b/modelscope/models/nlp/__init__.py index 6be4493b..f904efdf 100644 --- a/modelscope/models/nlp/__init__.py +++ b/modelscope/models/nlp/__init__.py @@ -3,3 +3,4 @@ from .masked_language_model import * # noqa F403 from .palm_for_text_generation import * # noqa F403 from .sbert_for_sentence_similarity import * # noqa F403 from .sbert_for_token_classification import * # noqa F403 +from .sbert_for_zero_shot_classification import * # noqa F403 diff --git a/modelscope/models/nlp/sbert_for_zero_shot_classification.py b/modelscope/models/nlp/sbert_for_zero_shot_classification.py new file mode 100644 index 00000000..837bb41e --- /dev/null +++ b/modelscope/models/nlp/sbert_for_zero_shot_classification.py @@ -0,0 +1,50 @@ +from typing import Any, Dict + +import numpy as np + +from modelscope.utils.constant import Tasks +from ...metainfo import Models +from ..base import Model +from ..builder import MODELS + +__all__ = ['SbertForZeroShotClassification'] + + +@MODELS.register_module( + Tasks.zero_shot_classification, module_name=Models.structbert) +class SbertForZeroShotClassification(Model): + + def __init__(self, model_dir: str, *args, **kwargs): + """initialize the zero shot classification model from the `model_dir` path. + + Args: + model_dir (str): the model path. + """ + + super().__init__(model_dir, *args, **kwargs) + from sofa import SbertForSequenceClassification + self.model = SbertForSequenceClassification.from_pretrained(model_dir) + + def train(self): + return self.model.train() + + def eval(self): + return self.model.eval() + + def forward(self, input: Dict[str, Any]) -> Dict[str, np.ndarray]: + """return the result by the model + + Args: + input (Dict[str, Any]): the preprocessed data + + Returns: + Dict[str, np.ndarray]: results + Example: + { + 'logits': array([[-0.53860897, 1.5029076 ]], dtype=float32) # true value + } + """ + outputs = self.model(**input) + logits = outputs['logits'].numpy() + res = {'logits': logits} + return res diff --git a/modelscope/pipelines/base.py b/modelscope/pipelines/base.py index 2f5d5dcc..4052d35a 100644 --- a/modelscope/pipelines/base.py +++ b/modelscope/pipelines/base.py @@ -74,33 +74,57 @@ class Pipeline(ABC): self.preprocessor = preprocessor def __call__(self, input: Union[Input, List[Input]], *args, - **post_kwargs) -> Union[Dict[str, Any], Generator]: + **kwargs) -> Union[Dict[str, Any], Generator]: # model provider should leave it as it is # modelscope library developer will handle this function # simple showcase, need to support iterator type for both tensorflow and pytorch # input_dict = self._handle_input(input) + + # sanitize the parameters + preprocess_params, forward_params, postprocess_params = self._sanitize_parameters( + **kwargs) + kwargs['preprocess_params'] = preprocess_params + kwargs['forward_params'] = forward_params + kwargs['postprocess_params'] = postprocess_params + if isinstance(input, list): output = [] for ele in input: - output.append(self._process_single(ele, *args, **post_kwargs)) + output.append(self._process_single(ele, *args, **kwargs)) elif isinstance(input, MsDataset): - return self._process_iterator(input, *args, **post_kwargs) + return self._process_iterator(input, *args, **kwargs) else: - output = self._process_single(input, *args, **post_kwargs) + output = self._process_single(input, *args, **kwargs) return output - def _process_iterator(self, input: Input, *args, **post_kwargs): + def _sanitize_parameters(self, **pipeline_parameters): + """ + this method should sanitize the keyword args to preprocessor params, + forward params and postprocess params on '__call__' or '_process_single' method + considered to be a normal classmethod with default implementation / output + + Default Returns: + Dict[str, str]: preprocess_params = {} + Dict[str, str]: forward_params = {} + Dict[str, str]: postprocess_params = pipeline_parameters + """ + return {}, {}, pipeline_parameters + + def _process_iterator(self, input: Input, *args, **kwargs): for ele in input: - yield self._process_single(ele, *args, **post_kwargs) + yield self._process_single(ele, *args, **kwargs) + + def _process_single(self, input: Input, *args, **kwargs) -> Dict[str, Any]: + preprocess_params = kwargs.get('preprocess_params') + forward_params = kwargs.get('forward_params') + postprocess_params = kwargs.get('postprocess_params') - def _process_single(self, input: Input, *args, - **post_kwargs) -> Dict[str, Any]: - out = self.preprocess(input) - out = self.forward(out) - out = self.postprocess(out, **post_kwargs) + out = self.preprocess(input, **preprocess_params) + out = self.forward(out, **forward_params) + out = self.postprocess(out, **postprocess_params) self._check_output(out) return out @@ -120,20 +144,21 @@ class Pipeline(ABC): raise ValueError(f'expected output keys are {output_keys}, ' f'those {missing_keys} are missing') - def preprocess(self, inputs: Input) -> Dict[str, Any]: + def preprocess(self, inputs: Input, **preprocess_params) -> Dict[str, Any]: """ Provide default implementation based on preprocess_cfg and user can reimplement it """ assert self.preprocessor is not None, 'preprocess method should be implemented' assert not isinstance(self.preprocessor, List),\ 'default implementation does not support using multiple preprocessors.' - return self.preprocessor(inputs) + return self.preprocessor(inputs, **preprocess_params) - def forward(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + def forward(self, inputs: Dict[str, Any], + **forward_params) -> Dict[str, Any]: """ Provide default implementation using self.model and user can reimplement it """ assert self.model is not None, 'forward method should be implemented' assert not self.has_multiple_models, 'default implementation does not support multiple models in a pipeline.' - return self.model(inputs) + return self.model(inputs, **forward_params) @abstractmethod def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index 41cd73da..847955d4 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -27,6 +27,9 @@ DEFAULT_MODEL_FOR_PIPELINE = { 'damo/bert-base-sst2'), Tasks.text_generation: (Pipelines.text_generation, 'damo/nlp_palm2.0_text-generation_chinese-base'), + Tasks.zero_shot_classification: + (Pipelines.zero_shot_classification, + 'damo/nlp_structbert_zero-shot-classification_chinese-base'), Tasks.image_captioning: (Pipelines.image_caption, 'damo/ofa_image-caption_coco_large_en'), Tasks.image_generation: diff --git a/modelscope/pipelines/nlp/__init__.py b/modelscope/pipelines/nlp/__init__.py index c50875fd..5ef12e22 100644 --- a/modelscope/pipelines/nlp/__init__.py +++ b/modelscope/pipelines/nlp/__init__.py @@ -3,3 +3,4 @@ from .sentence_similarity_pipeline import * # noqa F403 from .sequence_classification_pipeline import * # noqa F403 from .text_generation_pipeline import * # noqa F403 from .word_segmentation_pipeline import * # noqa F403 +from .zero_shot_classification_pipeline import * # noqa F403 diff --git a/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py b/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py new file mode 100644 index 00000000..2ed4dac3 --- /dev/null +++ b/modelscope/pipelines/nlp/zero_shot_classification_pipeline.py @@ -0,0 +1,97 @@ +import os +import uuid +from typing import Any, Dict, Union + +import json +import numpy as np +import torch +from scipy.special import softmax + +from ...metainfo import Pipelines +from ...models import Model +from ...models.nlp import SbertForZeroShotClassification +from ...preprocessors import ZeroShotClassificationPreprocessor +from ...utils.constant import Tasks +from ..base import Input, Pipeline +from ..builder import PIPELINES + +__all__ = ['ZeroShotClassificationPipeline'] + + +@PIPELINES.register_module( + Tasks.zero_shot_classification, + module_name=Pipelines.zero_shot_classification) +class ZeroShotClassificationPipeline(Pipeline): + + def __init__(self, + model: Union[SbertForZeroShotClassification, str], + preprocessor: ZeroShotClassificationPreprocessor = None, + **kwargs): + """use `model` and `preprocessor` to create a nlp text classification pipeline for prediction + + Args: + model (SbertForSentimentClassification): a model instance + preprocessor (SentimentClassificationPreprocessor): a preprocessor instance + """ + assert isinstance(model, str) or isinstance(model, SbertForZeroShotClassification), \ + 'model must be a single str or SbertForZeroShotClassification' + model = model if isinstance( + model, + SbertForZeroShotClassification) else Model.from_pretrained(model) + + self.entailment_id = 0 + self.contradiction_id = 2 + + if preprocessor is None: + preprocessor = ZeroShotClassificationPreprocessor(model.model_dir) + model.eval() + super().__init__(model=model, preprocessor=preprocessor, **kwargs) + + def _sanitize_parameters(self, **kwargs): + preprocess_params = {} + postprocess_params = {} + + if 'candidate_labels' in kwargs: + candidate_labels = kwargs.pop('candidate_labels') + preprocess_params['candidate_labels'] = candidate_labels + postprocess_params['candidate_labels'] = candidate_labels + else: + raise ValueError('You must include at least one label.') + preprocess_params['hypothesis_template'] = kwargs.pop( + 'hypothesis_template', '{}') + + postprocess_params['multi_label'] = kwargs.pop('multi_label', False) + return preprocess_params, {}, postprocess_params + + def forward(self, inputs: Dict[str, Any], + **forward_params) -> Dict[str, Any]: + with torch.no_grad(): + return super().forward(inputs, **forward_params) + + def postprocess(self, + inputs: Dict[str, Any], + candidate_labels, + multi_label=False) -> Dict[str, Any]: + """process the prediction results + + Args: + inputs (Dict[str, Any]): _description_ + + Returns: + Dict[str, Any]: the prediction results + """ + + logits = inputs['logits'] + if multi_label or len(candidate_labels) == 1: + logits = logits[..., [self.contradiction_id, self.entailment_id]] + scores = softmax(logits, axis=-1)[..., 1] + else: + logits = logits[..., self.entailment_id] + scores = softmax(logits, axis=-1) + + reversed_index = list(reversed(scores.argsort())) + result = { + 'labels': [candidate_labels[i] for i in reversed_index], + 'scores': [scores[i].item() for i in reversed_index], + } + return result diff --git a/modelscope/pipelines/outputs.py b/modelscope/pipelines/outputs.py index 52b7eeae..290e6717 100644 --- a/modelscope/pipelines/outputs.py +++ b/modelscope/pipelines/outputs.py @@ -101,6 +101,13 @@ TASK_OUTPUTS = { # } Tasks.sentence_similarity: ['scores', 'labels'], + # zero-shot classification result for single sample + # { + # "labels": ["happy", "sad", "calm", "angry"], + # "scores": [0.9, 0.1, 0.05, 0.05] + # } + Tasks.zero_shot_classification: ['scores', 'labels'], + # ============ audio tasks =================== # audio processed for single file in PCM format diff --git a/modelscope/preprocessors/nlp.py b/modelscope/preprocessors/nlp.py index 4ed63f3c..e8e33e74 100644 --- a/modelscope/preprocessors/nlp.py +++ b/modelscope/preprocessors/nlp.py @@ -14,7 +14,7 @@ from .builder import PREPROCESSORS __all__ = [ 'Tokenize', 'SequenceClassificationPreprocessor', 'TextGenerationPreprocessor', 'TokenClassifcationPreprocessor', - 'FillMaskPreprocessor' + 'FillMaskPreprocessor', 'ZeroShotClassificationPreprocessor' ] @@ -286,3 +286,47 @@ class TokenClassifcationPreprocessor(Preprocessor): 'attention_mask': attention_mask, 'token_type_ids': token_type_ids } + + +@PREPROCESSORS.register_module( + Fields.nlp, module_name=Preprocessors.zero_shot_cls_tokenizer) +class ZeroShotClassificationPreprocessor(Preprocessor): + + def __init__(self, model_dir: str, *args, **kwargs): + """preprocess the data via the vocab.txt from the `model_dir` path + + Args: + model_dir (str): model path + """ + + super().__init__(*args, **kwargs) + + from sofa import SbertTokenizer + self.model_dir: str = model_dir + self.sequence_length = kwargs.pop('sequence_length', 512) + self.tokenizer = SbertTokenizer.from_pretrained(self.model_dir) + + @type_assert(object, str) + def __call__(self, data: str, hypothesis_template: str, + candidate_labels: list) -> Dict[str, Any]: + """process the raw input data + + Args: + data (str): a sentence + Example: + 'you are so handsome.' + + Returns: + Dict[str, Any]: the preprocessed data + """ + pairs = [[data, hypothesis_template.format(label)] + for label in candidate_labels] + + features = self.tokenizer( + pairs, + padding=True, + truncation=True, + max_length=self.sequence_length, + return_tensors='pt', + truncation_strategy='only_first') + return features diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 55f015e8..44bd1dff 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -48,6 +48,7 @@ class Tasks(object): fill_mask = 'fill-mask' summarization = 'summarization' question_answering = 'question-answering' + zero_shot_classification = 'zero-shot-classification' # audio tasks auto_speech_recognition = 'auto-speech-recognition' diff --git a/tests/pipelines/test_zero_shot_classification.py b/tests/pipelines/test_zero_shot_classification.py new file mode 100644 index 00000000..b76a6a86 --- /dev/null +++ b/tests/pipelines/test_zero_shot_classification.py @@ -0,0 +1,64 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from modelscope.hub.snapshot_download import snapshot_download +from modelscope.models import Model +from modelscope.models.nlp import SbertForZeroShotClassification +from modelscope.pipelines import ZeroShotClassificationPipeline, pipeline +from modelscope.preprocessors import ZeroShotClassificationPreprocessor +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class ZeroShotClassificationTest(unittest.TestCase): + model_id = 'damo/nlp_structbert_zero-shot-classification_chinese-base' + sentence = '全新突破 解放军运20版空中加油机曝光' + labels = ['文化', '体育', '娱乐', '财经', '家居', '汽车', '教育', '科技', '军事'] + template = '这篇文章的标题是{}' + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_with_direct_file_download(self): + cache_path = snapshot_download(self.model_id) + tokenizer = ZeroShotClassificationPreprocessor(cache_path) + model = SbertForZeroShotClassification(cache_path, tokenizer=tokenizer) + pipeline1 = ZeroShotClassificationPipeline( + model, preprocessor=tokenizer) + pipeline2 = pipeline( + Tasks.zero_shot_classification, + model=model, + preprocessor=tokenizer) + + print( + f'sentence: {self.sentence}\n' + f'pipeline1:{pipeline1(input=self.sentence,candidate_labels=self.labels)}' + ) + print() + print( + f'sentence: {self.sentence}\n' + f'pipeline2: {pipeline2(self.sentence,candidate_labels=self.labels,hypothesis_template=self.template)}' + ) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_from_modelhub(self): + model = Model.from_pretrained(self.model_id) + tokenizer = ZeroShotClassificationPreprocessor(model.model_dir) + pipeline_ins = pipeline( + task=Tasks.zero_shot_classification, + model=model, + preprocessor=tokenizer) + print(pipeline_ins(input=self.sentence, candidate_labels=self.labels)) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_name(self): + pipeline_ins = pipeline( + task=Tasks.zero_shot_classification, model=self.model_id) + print(pipeline_ins(input=self.sentence, candidate_labels=self.labels)) + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_with_default_model(self): + pipeline_ins = pipeline(task=Tasks.zero_shot_classification) + print(pipeline_ins(input=self.sentence, candidate_labels=self.labels)) + + +if __name__ == '__main__': + unittest.main() From fabea5604e5795ce5cd341090865cf409490b062 Mon Sep 17 00:00:00 2001 From: "hemu.zp" Date: Wed, 29 Jun 2022 11:08:34 +0800 Subject: [PATCH 9/9] [to #42322933] Add MPLUG model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加 MPLUG 模型的 visual question answering 任务 pipeline Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9182119 --- data/test/images/image_mplug_vqa.jpg | 3 + modelscope/metainfo.py | 3 + modelscope/models/multi_modal/__init__.py | 2 + .../mplug_for_visual_question_answering.py | 46 +++++++++++++ modelscope/pipelines/builder.py | 5 +- modelscope/pipelines/multi_modal/__init__.py | 1 + .../visual_question_answering_pipeline.py | 65 +++++++++++++++++++ modelscope/preprocessors/__init__.py | 2 +- modelscope/preprocessors/multi_modal.py | 45 +++++++++++++ modelscope/utils/constant.py | 1 + requirements/nlp.txt | 2 +- .../test_visual_question_answering.py | 60 +++++++++++++++++ 12 files changed, 232 insertions(+), 3 deletions(-) create mode 100644 data/test/images/image_mplug_vqa.jpg create mode 100644 modelscope/models/multi_modal/mplug_for_visual_question_answering.py create mode 100644 modelscope/pipelines/multi_modal/visual_question_answering_pipeline.py create mode 100644 tests/pipelines/test_visual_question_answering.py diff --git a/data/test/images/image_mplug_vqa.jpg b/data/test/images/image_mplug_vqa.jpg new file mode 100644 index 00000000..57919471 --- /dev/null +++ b/data/test/images/image_mplug_vqa.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b37b706885849037b5fa7fa44a3b78a6375f768d95ce46bfcb8e7329d038a692 +size 181725 diff --git a/modelscope/metainfo.py b/modelscope/metainfo.py index 1d2ee4d2..485605bb 100644 --- a/modelscope/metainfo.py +++ b/modelscope/metainfo.py @@ -27,6 +27,7 @@ class Models(object): # multi-modal models ofa = 'ofa' clip = 'clip-multi-modal-embedding' + mplug = 'mplug' class Pipelines(object): @@ -63,6 +64,7 @@ class Pipelines(object): # multi-modal tasks image_caption = 'image-caption' multi_modal_embedding = 'multi-modal-embedding' + visual_question_answering = 'visual-question-answering' class Trainers(object): @@ -105,3 +107,4 @@ class Preprocessors(object): # multi-modal ofa_image_caption = 'ofa-image-caption' + mplug_visual_question_answering = 'mplug-visual-question-answering' diff --git a/modelscope/models/multi_modal/__init__.py b/modelscope/models/multi_modal/__init__.py index 2e6cc3bf..4ed9809b 100644 --- a/modelscope/models/multi_modal/__init__.py +++ b/modelscope/models/multi_modal/__init__.py @@ -1,2 +1,4 @@ from .clip.clip_model import CLIPForMultiModalEmbedding from .image_captioning_model import OfaForImageCaptioning +from .mplug_for_visual_question_answering import \ + MPlugForVisualQuestionAnswering diff --git a/modelscope/models/multi_modal/mplug_for_visual_question_answering.py b/modelscope/models/multi_modal/mplug_for_visual_question_answering.py new file mode 100644 index 00000000..2682c048 --- /dev/null +++ b/modelscope/models/multi_modal/mplug_for_visual_question_answering.py @@ -0,0 +1,46 @@ +from typing import Dict + +from ...metainfo import Models +from ...utils.constant import Tasks +from ..base import Model, Tensor +from ..builder import MODELS + +__all__ = ['MPlugForVisualQuestionAnswering'] + + +@MODELS.register_module( + Tasks.visual_question_answering, module_name=Models.mplug) +class MPlugForVisualQuestionAnswering(Model): + + def __init__(self, model_dir: str, *args, **kwargs): + """initialize the mplug model from the `model_dir` path. + Args: + model_dir (str): the model path. + """ + + super().__init__(model_dir, *args, **kwargs) + from sofa.models.mplug import MPlugForVisualQuestionAnswering + self.model = MPlugForVisualQuestionAnswering.from_pretrained(model_dir) + self.tokenizer = self.model.tokenizer + + def train(self): + return self.model.train() + + def eval(self): + return self.model.eval() + + def forward(self, input: Dict[str, Tensor]) -> Dict[str, Tensor]: + """return the result by the model + + Args: + input (Dict[str, Tensor]): the preprocessed data + + Returns: + Dict[str, Tensor]: results + Example: + { + 'predictions': Tensor([[1377, 4959, 2785, 6392...])]), + } + """ + + return self.model(**input)[0] diff --git a/modelscope/pipelines/builder.py b/modelscope/pipelines/builder.py index 847955d4..2f66682d 100644 --- a/modelscope/pipelines/builder.py +++ b/modelscope/pipelines/builder.py @@ -42,7 +42,10 @@ DEFAULT_MODEL_FOR_PIPELINE = { 'damo/cv_TAdaConv_action-recognition'), Tasks.multi_modal_embedding: (Pipelines.multi_modal_embedding, - 'damo/multi-modal_clip-vit-large-patch14-chinese_multi-modal-embedding') + 'damo/multi-modal_clip-vit-large-patch14-chinese_multi-modal-embedding'), + Tasks.visual_question_answering: + (Pipelines.visual_question_answering, + 'damo/mplug_visual-question-answering_coco_large_en'), } diff --git a/modelscope/pipelines/multi_modal/__init__.py b/modelscope/pipelines/multi_modal/__init__.py index 6c96d843..fdcada89 100644 --- a/modelscope/pipelines/multi_modal/__init__.py +++ b/modelscope/pipelines/multi_modal/__init__.py @@ -1,2 +1,3 @@ from .image_captioning_pipeline import ImageCaptionPipeline from .multi_modal_embedding_pipeline import MultiModalEmbeddingPipeline +from .visual_question_answering_pipeline import VisualQuestionAnsweringPipeline diff --git a/modelscope/pipelines/multi_modal/visual_question_answering_pipeline.py b/modelscope/pipelines/multi_modal/visual_question_answering_pipeline.py new file mode 100644 index 00000000..97c8cf7b --- /dev/null +++ b/modelscope/pipelines/multi_modal/visual_question_answering_pipeline.py @@ -0,0 +1,65 @@ +from typing import Any, Dict, Optional, Union + +import torch + +from ...metainfo import Pipelines +from ...models import Model +from ...models.multi_modal import MPlugForVisualQuestionAnswering +from ...preprocessors import MPlugVisualQuestionAnsweringPreprocessor +from ...utils.constant import Tasks +from ..base import Pipeline, Tensor +from ..builder import PIPELINES + +__all__ = ['VisualQuestionAnsweringPipeline'] + + +@PIPELINES.register_module( + Tasks.visual_question_answering, + module_name=Pipelines.visual_question_answering) +class VisualQuestionAnsweringPipeline(Pipeline): + + def __init__(self, + model: Union[MPlugForVisualQuestionAnswering, str], + preprocessor: Optional[ + MPlugVisualQuestionAnsweringPreprocessor] = None, + **kwargs): + """use `model` and `preprocessor` to create a visual question answering pipeline for prediction + + Args: + model (MPlugForVisualQuestionAnswering): a model instance + preprocessor (MPlugVisualQuestionAnsweringPreprocessor): a preprocessor instance + """ + model = model if isinstance( + model, + MPlugForVisualQuestionAnswering) else Model.from_pretrained(model) + if preprocessor is None: + preprocessor = MPlugVisualQuestionAnsweringPreprocessor( + model.model_dir) + model.eval() + super().__init__(model=model, preprocessor=preprocessor, **kwargs) + self.tokenizer = model.tokenizer + + def forward(self, inputs: Dict[str, Any], + **forward_params) -> Dict[str, Any]: + with torch.no_grad(): + return super().forward(inputs, **forward_params) + + def postprocess(self, inputs: Dict[str, Tensor], + **postprocess_params) -> Dict[str, str]: + """process the prediction results + + Args: + inputs (Dict[str, Any]): _description_ + + Returns: + Dict[str, str]: the prediction results + """ + replace_tokens_bert = (('[unused0]', ''), ('[PAD]', ''), + ('[unused1]', ''), (r' +', ' '), ('[SEP]', ''), + ('[unused2]', ''), ('[CLS]', ''), ('[UNK]', '')) + + pred_string = self.tokenizer.decode(inputs[0][0]) + for _old, _new in replace_tokens_bert: + pred_string = pred_string.replace(_old, _new) + pred_string.strip() + return {'answer': pred_string} diff --git a/modelscope/preprocessors/__init__.py b/modelscope/preprocessors/__init__.py index 1bc06ce3..694688f6 100644 --- a/modelscope/preprocessors/__init__.py +++ b/modelscope/preprocessors/__init__.py @@ -6,6 +6,6 @@ from .builder import PREPROCESSORS, build_preprocessor from .common import Compose from .image import LoadImage, load_image from .kws import WavToLists -from .multi_modal import OfaImageCaptionPreprocessor +from .multi_modal import * # noqa F403 from .nlp import * # noqa F403 from .text_to_speech import * # noqa F403 diff --git a/modelscope/preprocessors/multi_modal.py b/modelscope/preprocessors/multi_modal.py index 7c8f0fab..1bc686eb 100644 --- a/modelscope/preprocessors/multi_modal.py +++ b/modelscope/preprocessors/multi_modal.py @@ -16,6 +16,7 @@ from .image import load_image __all__ = [ 'OfaImageCaptionPreprocessor', + 'MPlugVisualQuestionAnsweringPreprocessor', ] @@ -110,3 +111,47 @@ class OfaImageCaptionPreprocessor(Preprocessor): } } return sample + + +@PREPROCESSORS.register_module( + Fields.multi_modal, + module_name=Preprocessors.mplug_visual_question_answering) +class MPlugVisualQuestionAnsweringPreprocessor(Preprocessor): + + def __init__(self, model_dir: str, *args, **kwargs): + """preprocess the data via 'bert-base-uncased' tokenizer and configuration + + """ + super().__init__(*args, **kwargs) + + # tokenizer + from transformers import AutoTokenizer + self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') + + # load configuration + from sofa.models.mplug import CONFIG_NAME, MPlugConfig + config = MPlugConfig.from_yaml_file(osp.join(model_dir, CONFIG_NAME)) + + # Initialize transform + from torchvision import transforms + mean = (0.48145466, 0.4578275, 0.40821073) + std = (0.26862954, 0.26130258, 0.27577711) + + self.patch_resize_transform = transforms.Compose([ + transforms.Resize((config.image_res, config.image_res), + interpolation=Image.BICUBIC), + transforms.ToTensor(), + transforms.Normalize(mean=mean, std=std), + ]) + + def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: + image, question = data['image'], data['question'] + image = Image.open(image).convert('RGB') if isinstance(image, + str) else image + image = self.patch_resize_transform(image) + image = torch.stack([image], dim=0) + question = self.tokenizer([question.lower()], + padding='longest', + return_tensors='pt') + + return {'image': image, 'question': question, 'train': False} diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 44bd1dff..3ce3ab98 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -61,6 +61,7 @@ class Tasks(object): visual_grounding = 'visual-grounding' text_to_image_synthesis = 'text-to-image-synthesis' multi_modal_embedding = 'multi-modal-embedding' + visual_question_answering = 'visual-question-answering' class InputFields(object): diff --git a/requirements/nlp.txt b/requirements/nlp.txt index 261b9ec5..574bf856 100644 --- a/requirements/nlp.txt +++ b/requirements/nlp.txt @@ -1 +1 @@ -https://alinlp.alibaba-inc.com/pypi/sofa-1.0.3-py3-none-any.whl +https://alinlp.alibaba-inc.com/pypi/sofa-1.0.4.1-py3-none-any.whl diff --git a/tests/pipelines/test_visual_question_answering.py b/tests/pipelines/test_visual_question_answering.py new file mode 100644 index 00000000..4577607e --- /dev/null +++ b/tests/pipelines/test_visual_question_answering.py @@ -0,0 +1,60 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import unittest + +from modelscope.hub.snapshot_download import snapshot_download +from modelscope.models import Model +from modelscope.models.multi_modal import MPlugForVisualQuestionAnswering +from modelscope.pipelines import VisualQuestionAnsweringPipeline, pipeline +from modelscope.preprocessors import MPlugVisualQuestionAnsweringPreprocessor +from modelscope.utils.constant import Tasks +from modelscope.utils.test_utils import test_level + + +class VisualQuestionAnsweringTest(unittest.TestCase): + model_id = 'damo/mplug_visual-question-answering_coco_large_en' + input_vqa = { + 'image': 'data/test/images/image_mplug_vqa.jpg', + 'question': 'What is the woman doing?', + } + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run(self): + cache_path = snapshot_download(self.model_id) + preprocessor = MPlugVisualQuestionAnsweringPreprocessor(cache_path) + model = MPlugForVisualQuestionAnswering(cache_path) + pipeline1 = VisualQuestionAnsweringPipeline( + model, preprocessor=preprocessor) + pipeline2 = pipeline( + Tasks.visual_question_answering, + model=model, + preprocessor=preprocessor) + print(f"question: {self.input_vqa['question']}") + print(f"pipeline1: {pipeline1(self.input_vqa)['answer']}") + print(f"pipeline2: {pipeline2(self.input_vqa)['answer']}") + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_with_model_from_modelhub(self): + model = Model.from_pretrained(self.model_id) + preprocessor = MPlugVisualQuestionAnsweringPreprocessor( + model.model_dir) + pipeline_vqa = pipeline( + task=Tasks.visual_question_answering, + model=model, + preprocessor=preprocessor) + print(pipeline_vqa(self.input_vqa)) + + @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') + def test_run_with_model_name(self): + pipeline_vqa = pipeline( + Tasks.visual_question_answering, model=self.model_id) + print(pipeline_vqa(self.input_vqa)) + + @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + def test_run_with_default_model(self): + pipeline_vqa = pipeline(task=Tasks.visual_question_answering) + print(pipeline_vqa(self.input_vqa)) + + +if __name__ == '__main__': + unittest.main()