diff --git a/docs/source/tutorials/index.rst b/docs/source/tutorials/index.rst index 49d2cac8..1de2244b 100644 --- a/docs/source/tutorials/index.rst +++ b/docs/source/tutorials/index.rst @@ -1,3 +1,5 @@ .. toctree:: :maxdepth: 2 :caption: Tutorials + + pipeline.md diff --git a/docs/source/tutorials/pipeline.md b/docs/source/tutorials/pipeline.md new file mode 100644 index 00000000..738f50ed --- /dev/null +++ b/docs/source/tutorials/pipeline.md @@ -0,0 +1,85 @@ +# Pipeline使用教程 + +本文将简单介绍如何使用`pipeline`函数加载模型进行推理。`pipeline`函数支持按照任务类型、模型名称从模型仓库 +拉取模型进行进行推理,当前支持的任务有 + +* 人像抠图 (image-matting) +* 基于bert的语义情感分析 (bert-sentiment-analysis) + +本文将从如下方面进行讲解如何使用Pipeline模块: +* 使用pipeline()函数进行推理 +* 指定特定预处理、特定模型进行推理 +* 不同场景推理任务示例 + +## Pipeline基本用法 + +1. pipeline函数支持指定特定任务名称,加载任务默认模型,创建对应Pipeline对象 + 注: 当前还未与modelhub进行打通,需要手动下载模型,创建pipeline时需要指定本地模型路径,未来会支持指定模型名称从远端仓库 + 拉取模型并初始化。 + + 下载模型文件 + ```shell + wget http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/matting_person.pb + ``` + 执行python命令 + ```python + >>> from maas_lib.pipelines import pipeline + >>> img_matting = pipeline(task='image-matting', model_path='matting_person.pb') + ``` + +2. 传入单张图像url进行处理 + ``` python + >>> import cv2 + >>> result = img_matting('http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png') + >>> cv2.imwrite('result.png', result['output_png']) + ``` + + pipeline对象也支持传入一个列表输入,返回对应输出列表,每个元素对应输入样本的返回结果 + ```python + results = img_matting( + [ + 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png', + 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png', + 'http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png', + ]) + ``` + + 如果pipeline对应有一些后处理参数,也支持通过调用时候传入. + ```python + pipe = pipeline(task_name) + result = pipe(input, post_process_args) + ``` + +## 指定预处理、模型进行推理 +pipeline函数支持传入实例化的预处理对象、模型对象,从而支持用户在推理过程中定制化预处理、模型。 +下面以文本情感分类为例进行介绍。 + + + +注: 当前release版本还未实现AutoModel的语法糖,需要手动实例化模型,后续会加上对应语法糖简化调用 + +下载模型文件 +```shell +wget https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip && unzip bert-base-sst2.zip +``` + +创建tokenzier和模型 +```python +>>> from maas_lib.models.nlp import SequenceClassificationModel +>>> path = 'bert-base-sst2' +>>> model = SequenceClassificationModel(path) +>>> from maas_lib.preprocessors import SequenceClassificationPreprocessor +>>> tokenizer = SequenceClassificationPreprocessor( + path, first_sequence='sentence', second_sequence=None) +``` + +使用tokenizer和模型对象创建pipeline +```python +>>> from maas_lib.pipelines import pipeline +>>> semantic_cls = pipeline('text-classification', model=model, preprocessor=tokenizer) +>>> semantic_cls("Hello world!") +``` + +## 不同场景任务推理示例 + +人像抠图、语义分类建上述两个例子。 其他例子未来添加。 diff --git a/maas_lib/pipelines/builder.py b/maas_lib/pipelines/builder.py index 12631d01..da47ba92 100644 --- a/maas_lib/pipelines/builder.py +++ b/maas_lib/pipelines/builder.py @@ -28,6 +28,7 @@ def build_pipeline(cfg: ConfigDict, def pipeline(task: str = None, model: Union[str, Model] = None, + preprocessor=None, config_file: str = None, pipeline_name: str = None, framework: str = None, @@ -39,6 +40,7 @@ def pipeline(task: str = None, Args: task (str): Task name defining which pipeline will be returned. model (str or obj:`Model`): model name or model object. + preprocessor: preprocessor object. config_file (str, optional): path to config file. pipeline_name (str, optional): pipeline class name or alias name. framework (str, optional): framework type. @@ -55,11 +57,23 @@ def pipeline(task: str = None, >>> resnet = Model.from_pretrained('Resnet') >>> p = pipeline('image-classification', model=resnet) """ - if task is not None and model is None and pipeline_name is None: - # get default pipeline for this task - assert task in PIPELINES.modules, f'No pipeline is registerd for Task {task}' - pipeline_name = list(PIPELINES.modules[task].keys())[0] + if task is not None and pipeline_name is None: + if model is None or isinstance(model, Model): + # get default pipeline for this task + assert task in PIPELINES.modules, f'No pipeline is registerd for Task {task}' + pipeline_name = list(PIPELINES.modules[task].keys())[0] + cfg = dict(type=pipeline_name, **kwargs) + if model is not None: + cfg['model'] = model + if preprocessor is not None: + cfg['preprocessor'] = preprocessor + else: + assert isinstance(model, str), \ + f'model should be either str or Model, but got {type(model)}' + # TODO @wenmeng.zwm determine pipeline_name according to task and model + elif pipeline_name is not None: + cfg = dict(type=pipeline_name) + else: + raise ValueError('task or pipeline_name is required') - if pipeline_name is not None: - cfg = dict(type=pipeline_name, **kwargs) - return build_pipeline(cfg, task_name=task) + return build_pipeline(cfg, task_name=task) diff --git a/tests/pipelines/test_text_classification.py b/tests/pipelines/test_text_classification.py index afac9228..0f7ba771 100644 --- a/tests/pipelines/test_text_classification.py +++ b/tests/pipelines/test_text_classification.py @@ -7,7 +7,7 @@ import zipfile from maas_lib.fileio import File from maas_lib.models.nlp import SequenceClassificationModel -from maas_lib.pipelines import SequenceClassificationPipeline +from maas_lib.pipelines import SequenceClassificationPipeline, pipeline from maas_lib.preprocessors import SequenceClassificationPreprocessor @@ -40,8 +40,11 @@ class SequenceClassificationTest(unittest.TestCase): model = SequenceClassificationModel(path) preprocessor = SequenceClassificationPreprocessor( path, first_sequence='sentence', second_sequence=None) - pipeline = SequenceClassificationPipeline(model, preprocessor) - self.predict(pipeline) + pipeline1 = SequenceClassificationPipeline(model, preprocessor) + self.predict(pipeline1) + pipeline2 = pipeline( + 'text-classification', model=model, preprocessor=preprocessor) + print(pipeline2('Hello world!')) if __name__ == '__main__':