[to #42322933] Replace mplug input 'question' with 'text'

mplug 相关任务 pipeline 输入字段统一为 'image' + 'text' Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10236282
3 years ago · 69f8928dd2
--- a/modelscope/preprocessors/multi_modal.py
+++ b/modelscope/preprocessors/multi_modal.py
@@ -159,7 +159,8 @@ class MPlugPreprocessor(Preprocessor):
        image = image.convert('RGB')
        image = self.patch_resize_transform(image)
        question = '' if self.cfg.task == Tasks.image_captioning \
            else data[1 if isinstance(data, tuple) else 'question']
            else data[1 if isinstance(data, tuple)
                      else ('text' if 'text' in data else 'question')]
        question = self.tokenizer(
            question.lower(),
            padding='max_length',
--- a/tests/pipelines/test_mplug_tasks.py
+++ b/tests/pipelines/test_mplug_tasks.py
@@ -44,8 +44,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
            'damo/mplug_visual-question-answering_coco_large_en')
        pipeline_vqa = pipeline(Tasks.visual_question_answering, model=model)
        image = Image.open('data/test/images/image_mplug_vqa.jpg')
        question = 'What is the woman doing?'
        input = {'image': image, 'question': question}
        text = 'What is the woman doing?'
        input = {'image': image, 'text': text}
        result = pipeline_vqa(input)
        print(result)

@@ -54,8 +54,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
        model = 'damo/mplug_visual-question-answering_coco_large_en'
        pipeline_vqa = pipeline(Tasks.visual_question_answering, model=model)
        image = Image.open('data/test/images/image_mplug_vqa.jpg')
        question = 'What is the woman doing?'
        input = {'image': image, 'question': question}
        text = 'What is the woman doing?'
        input = {'image': image, 'text': text}
        result = pipeline_vqa(input)
        print(result)

@@ -65,8 +65,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
            'damo/mplug_image-text-retrieval_flickr30k_large_en')
        pipeline_retrieval = pipeline(Tasks.image_text_retrieval, model=model)
        image = Image.open('data/test/images/image-text-retrieval.jpg')
        question = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.'
        input = {'image': image, 'question': question}
        text = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.'
        input = {'image': image, 'text': text}
        result = pipeline_retrieval(input)
        print(result)

@@ -75,8 +75,8 @@ class MplugTasksTest(unittest.TestCase, DemoCompatibilityCheck):
        model = 'damo/mplug_image-text-retrieval_flickr30k_large_en'
        pipeline_retrieval = pipeline(Tasks.image_text_retrieval, model=model)
        image = Image.open('data/test/images/image-text-retrieval.jpg')
        question = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.'
        input = {'image': image, 'question': question}
        text = 'Two young guys with shaggy hair look at their hands while hanging out in the yard.'
        input = {'image': image, 'text': text}
        result = pipeline_retrieval(input)
        print(result)