1. add pipleine tutorial 2. fix bugs when using pipeline with certain model and preprocessor Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/8810524master
| @@ -1,3 +1,5 @@ | |||||
| .. toctree:: | .. toctree:: | ||||
| :maxdepth: 2 | :maxdepth: 2 | ||||
| :caption: Tutorials | :caption: Tutorials | ||||
| pipeline.md | |||||
| @@ -0,0 +1,85 @@ | |||||
| # Pipeline使用教程 | |||||
| 本文将简单介绍如何使用`pipeline`函数加载模型进行推理。`pipeline`函数支持按照任务类型、模型名称从模型仓库 | |||||
| 拉取模型进行进行推理,当前支持的任务有 | |||||
| * 人像抠图 (image-matting) | |||||
| * 基于bert的语义情感分析 (bert-sentiment-analysis) | |||||
| 本文将从如下方面进行讲解如何使用Pipeline模块: | |||||
| * 使用pipeline()函数进行推理 | |||||
| * 指定特定预处理、特定模型进行推理 | |||||
| * 不同场景推理任务示例 | |||||
| ## Pipeline基本用法 | |||||
| 1. pipeline函数支持指定特定任务名称,加载任务默认模型,创建对应Pipeline对象 | |||||
| 注: 当前还未与modelhub进行打通,需要手动下载模型,创建pipeline时需要指定本地模型路径,未来会支持指定模型名称从远端仓库 | |||||
| 拉取模型并初始化。 | |||||
| 下载模型文件 | |||||
| ```shell | |||||
| wget http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/matting_person.pb | |||||
| ``` | |||||
| 执行python命令 | |||||
| ```python | |||||
| >>> from maas_lib.pipelines import pipeline | |||||
| >>> img_matting = pipeline(task='image-matting', model_path='matting_person.pb') | |||||
| ``` | |||||
| 2. 传入单张图像url进行处理 | |||||
| ``` python | |||||
| >>> import cv2 | |||||
| >>> result = img_matting('http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png') | |||||
| >>> cv2.imwrite('result.png', result['output_png']) | |||||
| ``` | |||||
| pipeline对象也支持传入一个列表输入,返回对应输出列表,每个元素对应输入样本的返回结果 | |||||
| ```python | |||||
| results = img_matting( | |||||
| [ | |||||
| 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png', | |||||
| 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png', | |||||
| 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png', | |||||
| ]) | |||||
| ``` | |||||
| 如果pipeline对应有一些后处理参数,也支持通过调用时候传入. | |||||
| ```python | |||||
| pipe = pipeline(task_name) | |||||
| result = pipe(input, post_process_args) | |||||
| ``` | |||||
| ## 指定预处理、模型进行推理 | |||||
| pipeline函数支持传入实例化的预处理对象、模型对象,从而支持用户在推理过程中定制化预处理、模型。 | |||||
| 下面以文本情感分类为例进行介绍。 | |||||
| 注: 当前release版本还未实现AutoModel的语法糖,需要手动实例化模型,后续会加上对应语法糖简化调用 | |||||
| 下载模型文件 | |||||
| ```shell | |||||
| wget https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip && unzip bert-base-sst2.zip | |||||
| ``` | |||||
| 创建tokenzier和模型 | |||||
| ```python | |||||
| >>> from maas_lib.models.nlp import SequenceClassificationModel | |||||
| >>> path = 'bert-base-sst2' | |||||
| >>> model = SequenceClassificationModel(path) | |||||
| >>> from maas_lib.preprocessors import SequenceClassificationPreprocessor | |||||
| >>> tokenizer = SequenceClassificationPreprocessor( | |||||
| path, first_sequence='sentence', second_sequence=None) | |||||
| ``` | |||||
| 使用tokenizer和模型对象创建pipeline | |||||
| ```python | |||||
| >>> from maas_lib.pipelines import pipeline | |||||
| >>> semantic_cls = pipeline('text-classification', model=model, preprocessor=tokenizer) | |||||
| >>> semantic_cls("Hello world!") | |||||
| ``` | |||||
| ## 不同场景任务推理示例 | |||||
| 人像抠图、语义分类建上述两个例子。 其他例子未来添加。 | |||||
| @@ -28,6 +28,7 @@ def build_pipeline(cfg: ConfigDict, | |||||
| def pipeline(task: str = None, | def pipeline(task: str = None, | ||||
| model: Union[str, Model] = None, | model: Union[str, Model] = None, | ||||
| preprocessor=None, | |||||
| config_file: str = None, | config_file: str = None, | ||||
| pipeline_name: str = None, | pipeline_name: str = None, | ||||
| framework: str = None, | framework: str = None, | ||||
| @@ -39,6 +40,7 @@ def pipeline(task: str = None, | |||||
| Args: | Args: | ||||
| task (str): Task name defining which pipeline will be returned. | task (str): Task name defining which pipeline will be returned. | ||||
| model (str or obj:`Model`): model name or model object. | model (str or obj:`Model`): model name or model object. | ||||
| preprocessor: preprocessor object. | |||||
| config_file (str, optional): path to config file. | config_file (str, optional): path to config file. | ||||
| pipeline_name (str, optional): pipeline class name or alias name. | pipeline_name (str, optional): pipeline class name or alias name. | ||||
| framework (str, optional): framework type. | framework (str, optional): framework type. | ||||
| @@ -55,11 +57,23 @@ def pipeline(task: str = None, | |||||
| >>> resnet = Model.from_pretrained('Resnet') | >>> resnet = Model.from_pretrained('Resnet') | ||||
| >>> p = pipeline('image-classification', model=resnet) | >>> p = pipeline('image-classification', model=resnet) | ||||
| """ | """ | ||||
| if task is not None and model is None and pipeline_name is None: | |||||
| # get default pipeline for this task | |||||
| assert task in PIPELINES.modules, f'No pipeline is registerd for Task {task}' | |||||
| pipeline_name = list(PIPELINES.modules[task].keys())[0] | |||||
| if task is not None and pipeline_name is None: | |||||
| if model is None or isinstance(model, Model): | |||||
| # get default pipeline for this task | |||||
| assert task in PIPELINES.modules, f'No pipeline is registerd for Task {task}' | |||||
| pipeline_name = list(PIPELINES.modules[task].keys())[0] | |||||
| cfg = dict(type=pipeline_name, **kwargs) | |||||
| if model is not None: | |||||
| cfg['model'] = model | |||||
| if preprocessor is not None: | |||||
| cfg['preprocessor'] = preprocessor | |||||
| else: | |||||
| assert isinstance(model, str), \ | |||||
| f'model should be either str or Model, but got {type(model)}' | |||||
| # TODO @wenmeng.zwm determine pipeline_name according to task and model | |||||
| elif pipeline_name is not None: | |||||
| cfg = dict(type=pipeline_name) | |||||
| else: | |||||
| raise ValueError('task or pipeline_name is required') | |||||
| if pipeline_name is not None: | |||||
| cfg = dict(type=pipeline_name, **kwargs) | |||||
| return build_pipeline(cfg, task_name=task) | |||||
| return build_pipeline(cfg, task_name=task) | |||||
| @@ -7,7 +7,7 @@ import zipfile | |||||
| from maas_lib.fileio import File | from maas_lib.fileio import File | ||||
| from maas_lib.models.nlp import SequenceClassificationModel | from maas_lib.models.nlp import SequenceClassificationModel | ||||
| from maas_lib.pipelines import SequenceClassificationPipeline | |||||
| from maas_lib.pipelines import SequenceClassificationPipeline, pipeline | |||||
| from maas_lib.preprocessors import SequenceClassificationPreprocessor | from maas_lib.preprocessors import SequenceClassificationPreprocessor | ||||
| @@ -40,8 +40,11 @@ class SequenceClassificationTest(unittest.TestCase): | |||||
| model = SequenceClassificationModel(path) | model = SequenceClassificationModel(path) | ||||
| preprocessor = SequenceClassificationPreprocessor( | preprocessor = SequenceClassificationPreprocessor( | ||||
| path, first_sequence='sentence', second_sequence=None) | path, first_sequence='sentence', second_sequence=None) | ||||
| pipeline = SequenceClassificationPipeline(model, preprocessor) | |||||
| self.predict(pipeline) | |||||
| pipeline1 = SequenceClassificationPipeline(model, preprocessor) | |||||
| self.predict(pipeline1) | |||||
| pipeline2 = pipeline( | |||||
| 'text-classification', model=model, preprocessor=preprocessor) | |||||
| print(pipeline2('Hello world!')) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||