diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md index de5f8da8..3c961097 100644 --- a/docs/source/quick_start.md +++ b/docs/source/quick_start.md @@ -20,7 +20,7 @@ which pip ## 第三方依赖安装 -MaaS Library支持tensorflow,pytorch两大深度学习框架进行模型训练、推理, 在Python 3.6+, Pytorch 1.8+, Tensorflow 2.6上测试可运行,用户可以根据所选模型对应的计算框架进行安装,可以参考如下链接进行安装所需框架: +MaaS Library目前支持tensorflow,pytorch两大深度学习框架进行模型训练、推理, 在Python 3.6+, Pytorch 1.8+, Tensorflow 2.6上测试可运行,用户可以根据所选模型对应的计算框架进行安装,可以参考如下链接进行安装所需框架: * [Pytorch安装指导](https://pytorch.org/get-started/locally/) * [Tensorflow安装指导](https://www.tensorflow.org/install/pip) @@ -41,7 +41,7 @@ python -c "from maas_lib.pipelines import pipeline;print(pipeline('image-matting ``` -### 使用源码 +### 使用源码安装 适合本地开发调试使用,修改源码后可以直接执行 ```shell @@ -64,7 +64,6 @@ python -c "from maas_lib.pipelines import pipeline;print(pipeline('image-matting ``` - ## 训练 to be done @@ -84,12 +83,33 @@ from maas_lib.pipelines import pipeline from maas_lib.utils.constant import Tasks # 根据任务名创建pipeline -img_matting = pipeline( - Tasks.image_matting, model='damo/image-matting-person') +img_matting = pipeline(Tasks.image_matting, model='damo/image-matting-person') +# 直接提供图像文件的url作为pipeline推理的输入 result = img_matting( 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' ) cv2.imwrite('result.png', result['output_png']) -print(f'result file path is {osp.abspath("result.png")}') +print(f'Output written to {osp.abspath("result.png")}') + +``` + +此外,pipeline接口也能接收Dataset作为输入,上面的代码同样可以实现为 +```python +import cv2 +import os.path as osp +from maas_lib.pipelines import pipeline +from maas_lib.utils.constant import Tasks +from ali_maas_datasets import PyDataset + +# 使用图像url构建PyDataset,此处也可通过 input_location = '/dir/to/images' 来使用本地文件夹 +input_location = [ + 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' +] +dataset = PyDataset.load(input_location, target='image') +img_matting = pipeline(Tasks.image_matting, model='damo/image-matting-person') +# 输入为PyDataset时,输出的结果为迭代器 +result = img_matting(dataset) +cv2.imwrite('result.png', next(result)['output_png']) +print(f'Output written to {osp.abspath("result.png")}') ``` diff --git a/tests/pipelines/test_image_matting.py b/tests/pipelines/test_image_matting.py index 8b8672ae..26847389 100644 --- a/tests/pipelines/test_image_matting.py +++ b/tests/pipelines/test_image_matting.py @@ -3,11 +3,8 @@ import os.path as osp import tempfile import unittest -from typing import Any, Dict, List, Tuple, Union import cv2 -import numpy as np -import PIL from ali_maas_datasets import PyDataset from maas_lib.fileio import File @@ -31,24 +28,20 @@ class ImageMattingTest(unittest.TestCase): ) cv2.imwrite('result.png', result['output_png']) - def test_dataset(self): - model_path = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs' \ - '.com/data/test/maas/image_matting/matting_person.pb' - with tempfile.TemporaryDirectory() as tmp_dir: - model_file = osp.join(tmp_dir, 'matting_person.pb') - with open(model_file, 'wb') as ofile: - ofile.write(File.read(model_path)) - img_matting = pipeline(Tasks.image_matting, model=tmp_dir) - # dataset = PyDataset.load('/dir/to/images', target='image') - # yapf: disable - dataset = PyDataset.load([ - 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' - ], - target='image') - result = img_matting(dataset) - for i, r in enumerate(result): - cv2.imwrite(f'/path/to/result/{i}.png', r['output_png']) - print('end') + def test_run_with_dataset(self): + input_location = [ + 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' + ] + # alternatively: + # input_location = '/dir/to/images' + + dataset = PyDataset.load(input_location, target='image') + img_matting = pipeline( + Tasks.image_matting, model='damo/image-matting-person') + # note that for dataset output, the inference-output is a Generator that can be iterated. + result = img_matting(dataset) + cv2.imwrite('result.png', next(result)['output_png']) + print(f'Output written to {osp.abspath("result.png")}') def test_run_modelhub(self): img_matting = pipeline( @@ -58,6 +51,7 @@ class ImageMattingTest(unittest.TestCase): 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' ) cv2.imwrite('result.png', result['output_png']) + print(f'Output written to {osp.abspath("result.png")}') if __name__ == '__main__': diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py index 03a5b83f..45b584af 100644 --- a/tests/pipelines/test_text_classification.py +++ b/tests/pipelines/test_text_classification.py @@ -11,6 +11,7 @@ from maas_lib.models import Model from maas_lib.models.nlp import SequenceClassificationModel from maas_lib.pipelines import SequenceClassificationPipeline, pipeline from maas_lib.preprocessors import SequenceClassificationPreprocessor +from maas_lib.utils.constant import Tasks class SequenceClassificationTest(unittest.TestCase): @@ -49,7 +50,7 @@ class SequenceClassificationTest(unittest.TestCase): pipeline1 = SequenceClassificationPipeline(model, preprocessor) self.predict(pipeline1) pipeline2 = pipeline( - 'text-classification', model=model, preprocessor=preprocessor) + Tasks.text_classification, model=model, preprocessor=preprocessor) print(pipeline2('Hello world!')) def test_run_modelhub(self): @@ -57,29 +58,20 @@ class SequenceClassificationTest(unittest.TestCase): preprocessor = SequenceClassificationPreprocessor( model.model_dir, first_sequence='sentence', second_sequence=None) pipeline_ins = pipeline( - task='text-classification', model=model, preprocessor=preprocessor) + task=Tasks.text_classification, + model=model, + preprocessor=preprocessor) self.predict(pipeline_ins) - def test_dataset(self): - model_url = 'https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com' \ - '/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip' - cache_path_str = r'.cache/easynlp/bert-base-sst2.zip' - cache_path = Path(cache_path_str) - - if not cache_path.exists(): - cache_path.parent.mkdir(parents=True, exist_ok=True) - cache_path.touch(exist_ok=True) - with cache_path.open('wb') as ofile: - ofile.write(File.read(model_url)) - - with zipfile.ZipFile(cache_path_str, 'r') as zipf: - zipf.extractall(cache_path.parent) - path = r'.cache/easynlp/' - model = SequenceClassificationModel(path) + def test_run_with_dataset(self): + model = Model.from_pretrained('damo/bert-base-sst2') preprocessor = SequenceClassificationPreprocessor( - path, first_sequence='sentence', second_sequence=None) + model.model_dir, first_sequence='sentence', second_sequence=None) text_classification = pipeline( - 'text-classification', model=model, preprocessor=preprocessor) + Tasks.text_classification, model=model, preprocessor=preprocessor) + # loaded from huggingface dataset + # TODO: add load_from parameter (an enum) LOAD_FROM.hugging_face + # TODO: rename parameter as dataset_name and subset_name dataset = PyDataset.load('glue', name='sst2', target='sentence') result = text_classification(dataset) for i, r in enumerate(result):