Browse Source

[to #42463204] support Pil.Image for image_captioning_pipeline

master
lingcai.wl yingda.chen 3 years ago
parent
commit
2019315c54
2 changed files with 4 additions and 4 deletions
  1. +1
    -1
      modelscope/preprocessors/__init__.py
  2. +3
    -3
      modelscope/preprocessors/multi_modal.py

+ 1
- 1
modelscope/preprocessors/__init__.py View File

@@ -5,6 +5,6 @@ from .base import Preprocessor
from .builder import PREPROCESSORS, build_preprocessor
from .common import Compose
from .image import LoadImage, load_image
from .multi_model import OfaImageCaptionPreprocessor
from .multi_modal import OfaImageCaptionPreprocessor
from .nlp import * # noqa F403
from .text_to_speech import * # noqa F403

modelscope/preprocessors/multi_model.py → modelscope/preprocessors/multi_modal.py View File

@@ -73,7 +73,7 @@ class OfaImageCaptionPreprocessor(Preprocessor):
self.eos_item = torch.LongTensor([task.src_dict.eos()])
self.pad_idx = task.src_dict.pad()

@type_assert(object, (str, tuple))
@type_assert(object, (str, tuple, Image.Image))
def __call__(self, data: Union[str, tuple]) -> Dict[str, Any]:

def encode_text(text, length=None, append_bos=False, append_eos=False):
@@ -89,8 +89,8 @@ class OfaImageCaptionPreprocessor(Preprocessor):
s = torch.cat([s, self.eos_item])
return s

if isinstance(input, Image.Image):
patch_image = self.patch_resize_transform(input).unsqueeze(0)
if isinstance(data, Image.Image):
patch_image = self.patch_resize_transform(data).unsqueeze(0)
else:
patch_image = self.patch_resize_transform(
load_image(data)).unsqueeze(0)

Loading…
Cancel
Save