yingda.chen 3 years ago
parent
commit
f8eb699f7f
3 changed files with 53 additions and 47 deletions
  1. +26
    -6
      docs/source/quick_start.md
  2. +15
    -21
      tests/pipelines/test_image_matting.py
  3. +12
    -20
      tests/pipelines/test_text_classification.py

+ 26
- 6
docs/source/quick_start.md View File

@@ -20,7 +20,7 @@ which pip


## 第三方依赖安装 ## 第三方依赖安装


MaaS Library支持tensorflow,pytorch两大深度学习框架进行模型训练、推理, 在Python 3.6+, Pytorch 1.8+, Tensorflow 2.6上测试可运行,用户可以根据所选模型对应的计算框架进行安装,可以参考如下链接进行安装所需框架:
MaaS Library目前支持tensorflow,pytorch两大深度学习框架进行模型训练、推理, 在Python 3.6+, Pytorch 1.8+, Tensorflow 2.6上测试可运行,用户可以根据所选模型对应的计算框架进行安装,可以参考如下链接进行安装所需框架:


* [Pytorch安装指导](https://pytorch.org/get-started/locally/) * [Pytorch安装指导](https://pytorch.org/get-started/locally/)
* [Tensorflow安装指导](https://www.tensorflow.org/install/pip) * [Tensorflow安装指导](https://www.tensorflow.org/install/pip)
@@ -41,7 +41,7 @@ python -c "from maas_lib.pipelines import pipeline;print(pipeline('image-matting
``` ```




### 使用源码
### 使用源码安装


适合本地开发调试使用,修改源码后可以直接执行 适合本地开发调试使用,修改源码后可以直接执行
```shell ```shell
@@ -64,7 +64,6 @@ python -c "from maas_lib.pipelines import pipeline;print(pipeline('image-matting
``` ```





## 训练 ## 训练


to be done to be done
@@ -84,12 +83,33 @@ from maas_lib.pipelines import pipeline
from maas_lib.utils.constant import Tasks from maas_lib.utils.constant import Tasks


# 根据任务名创建pipeline # 根据任务名创建pipeline
img_matting = pipeline(
Tasks.image_matting, model='damo/image-matting-person')
img_matting = pipeline(Tasks.image_matting, model='damo/image-matting-person')


# 直接提供图像文件的url作为pipeline推理的输入
result = img_matting( result = img_matting(
'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'
) )
cv2.imwrite('result.png', result['output_png']) cv2.imwrite('result.png', result['output_png'])
print(f'result file path is {osp.abspath("result.png")}')
print(f'Output written to {osp.abspath("result.png")}')

```

此外,pipeline接口也能接收Dataset作为输入,上面的代码同样可以实现为
```python
import cv2
import os.path as osp
from maas_lib.pipelines import pipeline
from maas_lib.utils.constant import Tasks
from ali_maas_datasets import PyDataset

# 使用图像url构建PyDataset,此处也可通过 input_location = '/dir/to/images' 来使用本地文件夹
input_location = [
'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'
]
dataset = PyDataset.load(input_location, target='image')
img_matting = pipeline(Tasks.image_matting, model='damo/image-matting-person')
# 输入为PyDataset时,输出的结果为迭代器
result = img_matting(dataset)
cv2.imwrite('result.png', next(result)['output_png'])
print(f'Output written to {osp.abspath("result.png")}')
``` ```

+ 15
- 21
tests/pipelines/test_image_matting.py View File

@@ -3,11 +3,8 @@
import os.path as osp import os.path as osp
import tempfile import tempfile
import unittest import unittest
from typing import Any, Dict, List, Tuple, Union


import cv2 import cv2
import numpy as np
import PIL
from ali_maas_datasets import PyDataset from ali_maas_datasets import PyDataset


from maas_lib.fileio import File from maas_lib.fileio import File
@@ -31,24 +28,20 @@ class ImageMattingTest(unittest.TestCase):
) )
cv2.imwrite('result.png', result['output_png']) cv2.imwrite('result.png', result['output_png'])


def test_dataset(self):
model_path = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs' \
'.com/data/test/maas/image_matting/matting_person.pb'
with tempfile.TemporaryDirectory() as tmp_dir:
model_file = osp.join(tmp_dir, 'matting_person.pb')
with open(model_file, 'wb') as ofile:
ofile.write(File.read(model_path))
img_matting = pipeline(Tasks.image_matting, model=tmp_dir)
# dataset = PyDataset.load('/dir/to/images', target='image')
# yapf: disable
dataset = PyDataset.load([
'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'
],
target='image')
result = img_matting(dataset)
for i, r in enumerate(result):
cv2.imwrite(f'/path/to/result/{i}.png', r['output_png'])
print('end')
def test_run_with_dataset(self):
input_location = [
'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'
]
# alternatively:
# input_location = '/dir/to/images'

dataset = PyDataset.load(input_location, target='image')
img_matting = pipeline(
Tasks.image_matting, model='damo/image-matting-person')
# note that for dataset output, the inference-output is a Generator that can be iterated.
result = img_matting(dataset)
cv2.imwrite('result.png', next(result)['output_png'])
print(f'Output written to {osp.abspath("result.png")}')


def test_run_modelhub(self): def test_run_modelhub(self):
img_matting = pipeline( img_matting = pipeline(
@@ -58,6 +51,7 @@ class ImageMattingTest(unittest.TestCase):
'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'
) )
cv2.imwrite('result.png', result['output_png']) cv2.imwrite('result.png', result['output_png'])
print(f'Output written to {osp.abspath("result.png")}')




if __name__ == '__main__': if __name__ == '__main__':


+ 12
- 20
tests/pipelines/test_text_classification.py View File

@@ -11,6 +11,7 @@ from maas_lib.models import Model
from maas_lib.models.nlp import SequenceClassificationModel from maas_lib.models.nlp import SequenceClassificationModel
from maas_lib.pipelines import SequenceClassificationPipeline, pipeline from maas_lib.pipelines import SequenceClassificationPipeline, pipeline
from maas_lib.preprocessors import SequenceClassificationPreprocessor from maas_lib.preprocessors import SequenceClassificationPreprocessor
from maas_lib.utils.constant import Tasks




class SequenceClassificationTest(unittest.TestCase): class SequenceClassificationTest(unittest.TestCase):
@@ -49,7 +50,7 @@ class SequenceClassificationTest(unittest.TestCase):
pipeline1 = SequenceClassificationPipeline(model, preprocessor) pipeline1 = SequenceClassificationPipeline(model, preprocessor)
self.predict(pipeline1) self.predict(pipeline1)
pipeline2 = pipeline( pipeline2 = pipeline(
'text-classification', model=model, preprocessor=preprocessor)
Tasks.text_classification, model=model, preprocessor=preprocessor)
print(pipeline2('Hello world!')) print(pipeline2('Hello world!'))


def test_run_modelhub(self): def test_run_modelhub(self):
@@ -57,29 +58,20 @@ class SequenceClassificationTest(unittest.TestCase):
preprocessor = SequenceClassificationPreprocessor( preprocessor = SequenceClassificationPreprocessor(
model.model_dir, first_sequence='sentence', second_sequence=None) model.model_dir, first_sequence='sentence', second_sequence=None)
pipeline_ins = pipeline( pipeline_ins = pipeline(
task='text-classification', model=model, preprocessor=preprocessor)
task=Tasks.text_classification,
model=model,
preprocessor=preprocessor)
self.predict(pipeline_ins) self.predict(pipeline_ins)


def test_dataset(self):
model_url = 'https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com' \
'/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip'
cache_path_str = r'.cache/easynlp/bert-base-sst2.zip'
cache_path = Path(cache_path_str)

if not cache_path.exists():
cache_path.parent.mkdir(parents=True, exist_ok=True)
cache_path.touch(exist_ok=True)
with cache_path.open('wb') as ofile:
ofile.write(File.read(model_url))

with zipfile.ZipFile(cache_path_str, 'r') as zipf:
zipf.extractall(cache_path.parent)
path = r'.cache/easynlp/'
model = SequenceClassificationModel(path)
def test_run_with_dataset(self):
model = Model.from_pretrained('damo/bert-base-sst2')
preprocessor = SequenceClassificationPreprocessor( preprocessor = SequenceClassificationPreprocessor(
path, first_sequence='sentence', second_sequence=None)
model.model_dir, first_sequence='sentence', second_sequence=None)
text_classification = pipeline( text_classification = pipeline(
'text-classification', model=model, preprocessor=preprocessor)
Tasks.text_classification, model=model, preprocessor=preprocessor)
# loaded from huggingface dataset
# TODO: add load_from parameter (an enum) LOAD_FROM.hugging_face
# TODO: rename parameter as dataset_name and subset_name
dataset = PyDataset.load('glue', name='sst2', target='sentence') dataset = PyDataset.load('glue', name='sst2', target='sentence')
result = text_classification(dataset) result = text_classification(dataset)
for i, r in enumerate(result): for i, r in enumerate(result):


Loading…
Cancel
Save