Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9268865master
| @@ -69,7 +69,7 @@ class Model(ABC): | |||||
| model_cfg.model_dir = local_model_dir | model_cfg.model_dir = local_model_dir | ||||
| for k, v in kwargs.items(): | for k, v in kwargs.items(): | ||||
| model_cfg.k = v | |||||
| model_cfg[k] = v | |||||
| model = build_model(model_cfg, task_name) | model = build_model(model_cfg, task_name) | ||||
| # dynamically add pipeline info to model for pipeline inference | # dynamically add pipeline info to model for pipeline inference | ||||
| @@ -215,8 +215,9 @@ class ImagenForTextToImageSynthesis(Model): | |||||
| eta=input.get('generator_ddim_eta', 0.0)) | eta=input.get('generator_ddim_eta', 0.0)) | ||||
| # upsampling (64->256) | # upsampling (64->256) | ||||
| img = F.interpolate( | |||||
| img, scale_factor=4.0, mode='bilinear', align_corners=False) | |||||
| if not input.get('debug', False): | |||||
| img = F.interpolate( | |||||
| img, scale_factor=4.0, mode='bilinear', align_corners=False) | |||||
| img = self.diffusion_imagen_upsampler_256.ddim_sample_loop( | img = self.diffusion_imagen_upsampler_256.ddim_sample_loop( | ||||
| noise=torch.randn_like(img), | noise=torch.randn_like(img), | ||||
| model=self.unet_imagen_upsampler_256, | model=self.unet_imagen_upsampler_256, | ||||
| @@ -233,14 +234,15 @@ class ImagenForTextToImageSynthesis(Model): | |||||
| 'context': torch.zeros_like(context), | 'context': torch.zeros_like(context), | ||||
| 'mask': torch.zeros_like(attention_mask) | 'mask': torch.zeros_like(attention_mask) | ||||
| }], | }], | ||||
| percentile=input.get('generator_percentile', 0.995), | |||||
| guide_scale=input.get('generator_guide_scale', 5.0), | |||||
| ddim_timesteps=input.get('generator_ddim_timesteps', 50), | |||||
| eta=input.get('generator_ddim_eta', 0.0)) | |||||
| percentile=input.get('upsampler_256_percentile', 0.995), | |||||
| guide_scale=input.get('upsampler_256_guide_scale', 5.0), | |||||
| ddim_timesteps=input.get('upsampler_256_ddim_timesteps', 50), | |||||
| eta=input.get('upsampler_256_ddim_eta', 0.0)) | |||||
| # upsampling (256->1024) | # upsampling (256->1024) | ||||
| img = F.interpolate( | |||||
| img, scale_factor=4.0, mode='bilinear', align_corners=False) | |||||
| if not input.get('debug', False): | |||||
| img = F.interpolate( | |||||
| img, scale_factor=4.0, mode='bilinear', align_corners=False) | |||||
| img = self.diffusion_upsampler_1024.ddim_sample_loop( | img = self.diffusion_upsampler_1024.ddim_sample_loop( | ||||
| noise=torch.randn_like(img), | noise=torch.randn_like(img), | ||||
| model=self.unet_upsampler_1024, | model=self.unet_upsampler_1024, | ||||
| @@ -1,5 +1,7 @@ | |||||
| from typing import Any, Dict | from typing import Any, Dict | ||||
| import torch | |||||
| from modelscope.metainfo import Pipelines | from modelscope.metainfo import Pipelines | ||||
| from modelscope.pipelines.base import Input | from modelscope.pipelines.base import Input | ||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| @@ -16,16 +18,17 @@ logger = get_logger() | |||||
| module_name=Pipelines.text_to_image_synthesis) | module_name=Pipelines.text_to_image_synthesis) | ||||
| class TextToImageSynthesisPipeline(Pipeline): | class TextToImageSynthesisPipeline(Pipeline): | ||||
| def __init__(self, model: str, device_id: int = -1): | |||||
| def __init__(self, model: str, **kwargs): | |||||
| device_id = 0 if torch.cuda.is_available() else -1 | |||||
| if isinstance(model, str): | if isinstance(model, str): | ||||
| pipe_model = Model.from_pretrained(model) | |||||
| pipe_model = Model.from_pretrained(model, device_id=device_id) | |||||
| elif isinstance(model, Model): | elif isinstance(model, Model): | ||||
| pipe_model = model | pipe_model = model | ||||
| else: | else: | ||||
| raise NotImplementedError( | raise NotImplementedError( | ||||
| f'expecting a Model instance or str, but get {type(model)}.') | f'expecting a Model instance or str, but get {type(model)}.') | ||||
| super().__init__(model=pipe_model) | |||||
| super().__init__(model=pipe_model, **kwargs) | |||||
| def preprocess(self, input: Input) -> Dict[str, Any]: | def preprocess(self, input: Input) -> Dict[str, Any]: | ||||
| return input | return input | ||||
| @@ -13,9 +13,15 @@ from modelscope.utils.test_utils import test_level | |||||
| class TextToImageSynthesisTest(unittest.TestCase): | class TextToImageSynthesisTest(unittest.TestCase): | ||||
| model_id = 'damo/cv_imagen_text-to-image-synthesis_tiny' | model_id = 'damo/cv_imagen_text-to-image-synthesis_tiny' | ||||
| test_text = {'text': '宇航员'} | |||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| test_text = { | |||||
| 'text': '宇航员', | |||||
| 'generator_ddim_timesteps': 2, | |||||
| 'upsampler_256_ddim_timesteps': 2, | |||||
| 'upsampler_1024_ddim_timesteps': 2, | |||||
| 'debug': True | |||||
| } | |||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | |||||
| def test_run_with_model_from_modelhub(self): | def test_run_with_model_from_modelhub(self): | ||||
| model = Model.from_pretrained(self.model_id) | model = Model.from_pretrained(self.model_id) | ||||
| pipe_line_text_to_image_synthesis = pipeline( | pipe_line_text_to_image_synthesis = pipeline( | ||||
| @@ -24,7 +30,7 @@ class TextToImageSynthesisTest(unittest.TestCase): | |||||
| self.test_text)[OutputKeys.OUTPUT_IMG] | self.test_text)[OutputKeys.OUTPUT_IMG] | ||||
| print(np.sum(np.abs(img))) | print(np.sum(np.abs(img))) | ||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||||
| def test_run_with_model_name(self): | def test_run_with_model_name(self): | ||||
| pipe_line_text_to_image_synthesis = pipeline( | pipe_line_text_to_image_synthesis = pipeline( | ||||
| task=Tasks.text_to_image_synthesis, model=self.model_id) | task=Tasks.text_to_image_synthesis, model=self.model_id) | ||||