Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/8898823master
| @@ -20,7 +20,7 @@ which pip | |||||
| ## 第三方依赖安装 | ## 第三方依赖安装 | ||||
| MaaS Library支持tensorflow,pytorch两大深度学习框架进行模型训练、推理, 在Python 3.6+, Pytorch 1.8+, Tensorflow 2.6上测试可运行,用户可以根据所选模型对应的计算框架进行安装,可以参考如下链接进行安装所需框架: | |||||
| MaaS Library目前支持tensorflow,pytorch两大深度学习框架进行模型训练、推理, 在Python 3.6+, Pytorch 1.8+, Tensorflow 2.6上测试可运行,用户可以根据所选模型对应的计算框架进行安装,可以参考如下链接进行安装所需框架: | |||||
| * [Pytorch安装指导](https://pytorch.org/get-started/locally/) | * [Pytorch安装指导](https://pytorch.org/get-started/locally/) | ||||
| * [Tensorflow安装指导](https://www.tensorflow.org/install/pip) | * [Tensorflow安装指导](https://www.tensorflow.org/install/pip) | ||||
| @@ -41,7 +41,7 @@ python -c "from maas_lib.pipelines import pipeline;print(pipeline('image-matting | |||||
| ``` | ``` | ||||
| ### 使用源码 | |||||
| ### 使用源码安装 | |||||
| 适合本地开发调试使用,修改源码后可以直接执行 | 适合本地开发调试使用,修改源码后可以直接执行 | ||||
| ```shell | ```shell | ||||
| @@ -64,7 +64,6 @@ python -c "from maas_lib.pipelines import pipeline;print(pipeline('image-matting | |||||
| ``` | ``` | ||||
| ## 训练 | ## 训练 | ||||
| to be done | to be done | ||||
| @@ -84,12 +83,33 @@ from maas_lib.pipelines import pipeline | |||||
| from maas_lib.utils.constant import Tasks | from maas_lib.utils.constant import Tasks | ||||
| # 根据任务名创建pipeline | # 根据任务名创建pipeline | ||||
| img_matting = pipeline( | |||||
| Tasks.image_matting, model='damo/image-matting-person') | |||||
| img_matting = pipeline(Tasks.image_matting, model='damo/image-matting-person') | |||||
| # 直接提供图像文件的url作为pipeline推理的输入 | |||||
| result = img_matting( | result = img_matting( | ||||
| 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' | 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' | ||||
| ) | ) | ||||
| cv2.imwrite('result.png', result['output_png']) | cv2.imwrite('result.png', result['output_png']) | ||||
| print(f'result file path is {osp.abspath("result.png")}') | |||||
| print(f'Output written to {osp.abspath("result.png")}') | |||||
| ``` | |||||
| 此外,pipeline接口也能接收Dataset作为输入,上面的代码同样可以实现为 | |||||
| ```python | |||||
| import cv2 | |||||
| import os.path as osp | |||||
| from maas_lib.pipelines import pipeline | |||||
| from maas_lib.utils.constant import Tasks | |||||
| from ali_maas_datasets import PyDataset | |||||
| # 使用图像url构建PyDataset,此处也可通过 input_location = '/dir/to/images' 来使用本地文件夹 | |||||
| input_location = [ | |||||
| 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' | |||||
| ] | |||||
| dataset = PyDataset.load(input_location, target='image') | |||||
| img_matting = pipeline(Tasks.image_matting, model='damo/image-matting-person') | |||||
| # 输入为PyDataset时,输出的结果为迭代器 | |||||
| result = img_matting(dataset) | |||||
| cv2.imwrite('result.png', next(result)['output_png']) | |||||
| print(f'Output written to {osp.abspath("result.png")}') | |||||
| ``` | ``` | ||||
| @@ -3,11 +3,8 @@ | |||||
| import os.path as osp | import os.path as osp | ||||
| import tempfile | import tempfile | ||||
| import unittest | import unittest | ||||
| from typing import Any, Dict, List, Tuple, Union | |||||
| import cv2 | import cv2 | ||||
| import numpy as np | |||||
| import PIL | |||||
| from ali_maas_datasets import PyDataset | from ali_maas_datasets import PyDataset | ||||
| from maas_lib.fileio import File | from maas_lib.fileio import File | ||||
| @@ -31,24 +28,20 @@ class ImageMattingTest(unittest.TestCase): | |||||
| ) | ) | ||||
| cv2.imwrite('result.png', result['output_png']) | cv2.imwrite('result.png', result['output_png']) | ||||
| def test_dataset(self): | |||||
| model_path = 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs' \ | |||||
| '.com/data/test/maas/image_matting/matting_person.pb' | |||||
| with tempfile.TemporaryDirectory() as tmp_dir: | |||||
| model_file = osp.join(tmp_dir, 'matting_person.pb') | |||||
| with open(model_file, 'wb') as ofile: | |||||
| ofile.write(File.read(model_path)) | |||||
| img_matting = pipeline(Tasks.image_matting, model=tmp_dir) | |||||
| # dataset = PyDataset.load('/dir/to/images', target='image') | |||||
| # yapf: disable | |||||
| dataset = PyDataset.load([ | |||||
| 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' | |||||
| ], | |||||
| target='image') | |||||
| result = img_matting(dataset) | |||||
| for i, r in enumerate(result): | |||||
| cv2.imwrite(f'/path/to/result/{i}.png', r['output_png']) | |||||
| print('end') | |||||
| def test_run_with_dataset(self): | |||||
| input_location = [ | |||||
| 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' | |||||
| ] | |||||
| # alternatively: | |||||
| # input_location = '/dir/to/images' | |||||
| dataset = PyDataset.load(input_location, target='image') | |||||
| img_matting = pipeline( | |||||
| Tasks.image_matting, model='damo/image-matting-person') | |||||
| # note that for dataset output, the inference-output is a Generator that can be iterated. | |||||
| result = img_matting(dataset) | |||||
| cv2.imwrite('result.png', next(result)['output_png']) | |||||
| print(f'Output written to {osp.abspath("result.png")}') | |||||
| def test_run_modelhub(self): | def test_run_modelhub(self): | ||||
| img_matting = pipeline( | img_matting = pipeline( | ||||
| @@ -58,6 +51,7 @@ class ImageMattingTest(unittest.TestCase): | |||||
| 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' | 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png' | ||||
| ) | ) | ||||
| cv2.imwrite('result.png', result['output_png']) | cv2.imwrite('result.png', result['output_png']) | ||||
| print(f'Output written to {osp.abspath("result.png")}') | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| @@ -11,6 +11,7 @@ from maas_lib.models import Model | |||||
| from maas_lib.models.nlp import SequenceClassificationModel | from maas_lib.models.nlp import SequenceClassificationModel | ||||
| from maas_lib.pipelines import SequenceClassificationPipeline, pipeline | from maas_lib.pipelines import SequenceClassificationPipeline, pipeline | ||||
| from maas_lib.preprocessors import SequenceClassificationPreprocessor | from maas_lib.preprocessors import SequenceClassificationPreprocessor | ||||
| from maas_lib.utils.constant import Tasks | |||||
| class SequenceClassificationTest(unittest.TestCase): | class SequenceClassificationTest(unittest.TestCase): | ||||
| @@ -49,7 +50,7 @@ class SequenceClassificationTest(unittest.TestCase): | |||||
| pipeline1 = SequenceClassificationPipeline(model, preprocessor) | pipeline1 = SequenceClassificationPipeline(model, preprocessor) | ||||
| self.predict(pipeline1) | self.predict(pipeline1) | ||||
| pipeline2 = pipeline( | pipeline2 = pipeline( | ||||
| 'text-classification', model=model, preprocessor=preprocessor) | |||||
| Tasks.text_classification, model=model, preprocessor=preprocessor) | |||||
| print(pipeline2('Hello world!')) | print(pipeline2('Hello world!')) | ||||
| def test_run_modelhub(self): | def test_run_modelhub(self): | ||||
| @@ -57,29 +58,20 @@ class SequenceClassificationTest(unittest.TestCase): | |||||
| preprocessor = SequenceClassificationPreprocessor( | preprocessor = SequenceClassificationPreprocessor( | ||||
| model.model_dir, first_sequence='sentence', second_sequence=None) | model.model_dir, first_sequence='sentence', second_sequence=None) | ||||
| pipeline_ins = pipeline( | pipeline_ins = pipeline( | ||||
| task='text-classification', model=model, preprocessor=preprocessor) | |||||
| task=Tasks.text_classification, | |||||
| model=model, | |||||
| preprocessor=preprocessor) | |||||
| self.predict(pipeline_ins) | self.predict(pipeline_ins) | ||||
| def test_dataset(self): | |||||
| model_url = 'https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com' \ | |||||
| '/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip' | |||||
| cache_path_str = r'.cache/easynlp/bert-base-sst2.zip' | |||||
| cache_path = Path(cache_path_str) | |||||
| if not cache_path.exists(): | |||||
| cache_path.parent.mkdir(parents=True, exist_ok=True) | |||||
| cache_path.touch(exist_ok=True) | |||||
| with cache_path.open('wb') as ofile: | |||||
| ofile.write(File.read(model_url)) | |||||
| with zipfile.ZipFile(cache_path_str, 'r') as zipf: | |||||
| zipf.extractall(cache_path.parent) | |||||
| path = r'.cache/easynlp/' | |||||
| model = SequenceClassificationModel(path) | |||||
| def test_run_with_dataset(self): | |||||
| model = Model.from_pretrained('damo/bert-base-sst2') | |||||
| preprocessor = SequenceClassificationPreprocessor( | preprocessor = SequenceClassificationPreprocessor( | ||||
| path, first_sequence='sentence', second_sequence=None) | |||||
| model.model_dir, first_sequence='sentence', second_sequence=None) | |||||
| text_classification = pipeline( | text_classification = pipeline( | ||||
| 'text-classification', model=model, preprocessor=preprocessor) | |||||
| Tasks.text_classification, model=model, preprocessor=preprocessor) | |||||
| # loaded from huggingface dataset | |||||
| # TODO: add load_from parameter (an enum) LOAD_FROM.hugging_face | |||||
| # TODO: rename parameter as dataset_name and subset_name | |||||
| dataset = PyDataset.load('glue', name='sst2', target='sentence') | dataset = PyDataset.load('glue', name='sst2', target='sentence') | ||||
| result = text_classification(dataset) | result = text_classification(dataset) | ||||
| for i, r in enumerate(result): | for i, r in enumerate(result): | ||||