refine tests and examples

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/8898823
3 years ago · f8eb699f7f
--- a/docs/source/quick_start.md
+++ b/docs/source/quick_start.md
@@ -20,7 +20,7 @@ which pip

 ## 第三方依赖安装

 MaaS Library支持tensorflow，pytorch两大深度学习框架进行模型训练、推理， 在Python 3.6+,  Pytorch 1.8+, Tensorflow 2.6上测试可运行，用户可以根据所选模型对应的计算框架进行安装，可以参考如下链接进行安装所需框架:
 MaaS Library目前支持tensorflow，pytorch两大深度学习框架进行模型训练、推理， 在Python 3.6+,  Pytorch 1.8+, Tensorflow 2.6上测试可运行，用户可以根据所选模型对应的计算框架进行安装，可以参考如下链接进行安装所需框架:

 * [Pytorch安装指导](https://pytorch.org/get-started/locally/)
 * [Tensorflow安装指导](https://www.tensorflow.org/install/pip)
@@ -41,7 +41,7 @@ python -c "from maas_lib.pipelines import pipeline;print(pipeline('image-matting
 ```


 ### 使用源码
 ### 使用源码安装

 适合本地开发调试使用，修改源码后可以直接执行
 ```shell
@@ -64,7 +64,6 @@ python -c "from maas_lib.pipelines import pipeline;print(pipeline('image-matting
 ```



 ## 训练

 to be done
@@ -84,12 +83,33 @@ from maas_lib.pipelines import pipeline
 from maas_lib.utils.constant import Tasks

 # 根据任务名创建pipeline
 img_matting = pipeline(
    Tasks.image_matting, model='damo/image-matting-person')
 img_matting = pipeline(Tasks.image_matting, model='damo/image-matting-person')

 # 直接提供图像文件的url作为pipeline推理的输入
 result = img_matting(
    'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'
 )
 cv2.imwrite('result.png', result['output_png'])
 print(f'result file path is {osp.abspath("result.png")}')
 print(f'Output written to {osp.abspath("result.png")}')

 ```

 此外，pipeline接口也能接收Dataset作为输入，上面的代码同样可以实现为
 ```python
 import cv2
 import os.path as osp
 from maas_lib.pipelines import pipeline
 from maas_lib.utils.constant import Tasks
 from ali_maas_datasets import PyDataset

 # 使用图像url构建PyDataset，此处也可通过 input_location = '/dir/to/images' 来使用本地文件夹
 input_location = [
    'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'
 ]
 dataset = PyDataset.load(input_location, target='image')
 img_matting = pipeline(Tasks.image_matting, model='damo/image-matting-person')
 # 输入为PyDataset时，输出的结果为迭代器
 result = img_matting(dataset)
 cv2.imwrite('result.png', next(result)['output_png'])
 print(f'Output written to {osp.abspath("result.png")}')
 ```
--- a/tests/pipelines/test_image_matting.py
+++ b/tests/pipelines/test_image_matting.py
@@ -3,11 +3,8 @@
 import os.path as osp
 import tempfile
 import unittest
 from typing import Any, Dict, List, Tuple, Union

 import cv2
 import numpy as np
 import PIL
 from ali_maas_datasets import PyDataset

 from maas_lib.fileio import File
@@ -31,24 +28,20 @@ class ImageMattingTest(unittest.TestCase):
            )
            cv2.imwrite('result.png', result['output_png'])

    def test_dataset(self):
        model_path = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs' \
                     '.com/data/test/maas/image_matting/matting_person.pb'
        with tempfile.TemporaryDirectory() as tmp_dir:
            model_file = osp.join(tmp_dir, 'matting_person.pb')
            with open(model_file, 'wb') as ofile:
                ofile.write(File.read(model_path))
            img_matting = pipeline(Tasks.image_matting, model=tmp_dir)
            # dataset = PyDataset.load('/dir/to/images', target='image')
            # yapf: disable
            dataset = PyDataset.load([
                'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'
            ],
                target='image')
            result = img_matting(dataset)
            for i, r in enumerate(result):
                cv2.imwrite(f'/path/to/result/{i}.png', r['output_png'])
            print('end')
    def test_run_with_dataset(self):
        input_location = [
            'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'
        ]
        # alternatively:
        # input_location = '/dir/to/images'

        dataset = PyDataset.load(input_location, target='image')
        img_matting = pipeline(
            Tasks.image_matting, model='damo/image-matting-person')
        # note that for dataset output, the inference-output is a Generator that can be iterated.
        result = img_matting(dataset)
        cv2.imwrite('result.png', next(result)['output_png'])
        print(f'Output written to {osp.abspath("result.png")}')

    def test_run_modelhub(self):
        img_matting = pipeline(
@@ -58,6 +51,7 @@ class ImageMattingTest(unittest.TestCase):
            'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'
        )
        cv2.imwrite('result.png', result['output_png'])
        print(f'Output written to {osp.abspath("result.png")}')


 if __name__ == '__main__':
--- a/tests/pipelines/test_text_classification.py
+++ b/tests/pipelines/test_text_classification.py
@@ -11,6 +11,7 @@ from maas_lib.models import Model
 from maas_lib.models.nlp import SequenceClassificationModel
 from maas_lib.pipelines import SequenceClassificationPipeline, pipeline
 from maas_lib.preprocessors import SequenceClassificationPreprocessor
 from maas_lib.utils.constant import Tasks


 class SequenceClassificationTest(unittest.TestCase):
@@ -49,7 +50,7 @@ class SequenceClassificationTest(unittest.TestCase):
        pipeline1 = SequenceClassificationPipeline(model, preprocessor)
        self.predict(pipeline1)
        pipeline2 = pipeline(
            'text-classification', model=model, preprocessor=preprocessor)
            Tasks.text_classification, model=model, preprocessor=preprocessor)
        print(pipeline2('Hello world!'))

    def test_run_modelhub(self):
@@ -57,29 +58,20 @@ class SequenceClassificationTest(unittest.TestCase):
        preprocessor = SequenceClassificationPreprocessor(
            model.model_dir, first_sequence='sentence', second_sequence=None)
        pipeline_ins = pipeline(
            task='text-classification', model=model, preprocessor=preprocessor)
            task=Tasks.text_classification,
            model=model,
            preprocessor=preprocessor)
        self.predict(pipeline_ins)

    def test_dataset(self):
        model_url = 'https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com' \
                    '/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip'
        cache_path_str = r'.cache/easynlp/bert-base-sst2.zip'
        cache_path = Path(cache_path_str)

        if not cache_path.exists():
            cache_path.parent.mkdir(parents=True, exist_ok=True)
            cache_path.touch(exist_ok=True)
            with cache_path.open('wb') as ofile:
                ofile.write(File.read(model_url))

        with zipfile.ZipFile(cache_path_str, 'r') as zipf:
            zipf.extractall(cache_path.parent)
        path = r'.cache/easynlp/'
        model = SequenceClassificationModel(path)
    def test_run_with_dataset(self):
        model = Model.from_pretrained('damo/bert-base-sst2')
        preprocessor = SequenceClassificationPreprocessor(
            path, first_sequence='sentence', second_sequence=None)
            model.model_dir, first_sequence='sentence', second_sequence=None)
        text_classification = pipeline(
            'text-classification', model=model, preprocessor=preprocessor)
            Tasks.text_classification, model=model, preprocessor=preprocessor)
        # loaded from huggingface dataset
        # TODO: add load_from parameter (an enum) LOAD_FROM.hugging_face
        # TODO: rename parameter as dataset_name and subset_name
        dataset = PyDataset.load('glue', name='sst2', target='sentence')
        result = text_classification(dataset)
        for i, r in enumerate(result):