Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9240440master
| @@ -29,7 +29,7 @@ pip install model_scope[all] -f https://pai-vision-data-hz.oss-cn-zhangjiakou.al | |||||
| ``` | ``` | ||||
| ### 使用源码安装 | ### 使用源码安装 | ||||
| 适合本地开发调试使用,修改源码后可以直接执行 | 适合本地开发调试使用,修改源码后可以直接执行 | ||||
| 下载源码前首先联系(临在,谦言,颖达,一耘)申请代码库权限,clone代码到本地 | |||||
| 下载源码可以直接clone代码到本地 | |||||
| ```shell | ```shell | ||||
| git clone git@gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib.git modelscope | git clone git@gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib.git modelscope | ||||
| git fetch origin master | git fetch origin master | ||||
| @@ -108,7 +108,11 @@ class CLIPForMultiModalEmbedding(Model): | |||||
| return text_ids_tensor, text_mask_tensor | return text_ids_tensor, text_mask_tensor | ||||
| def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | ||||
| output = {'img_embedding': None, 'text_embedding': None} | |||||
| from modelscope.pipelines.outputs import OutputKeys | |||||
| output = { | |||||
| OutputKeys.IMG_EMBEDDING: None, | |||||
| OutputKeys.TEXT_EMBEDDING: None | |||||
| } | |||||
| if 'img' in input and input['img'] is not None: | if 'img' in input and input['img'] is not None: | ||||
| input_img = input['img'] | input_img = input['img'] | ||||
| if isinstance(input_img, Image.Image): | if isinstance(input_img, Image.Image): | ||||
| @@ -130,7 +134,8 @@ class CLIPForMultiModalEmbedding(Model): | |||||
| img_embedding = self.clip_model( | img_embedding = self.clip_model( | ||||
| input_data=img_tensor, input_type='img') | input_data=img_tensor, input_type='img') | ||||
| output['img_embedding'] = img_embedding.data.cpu().numpy() | |||||
| from modelscope.pipelines.outputs import OutputKeys | |||||
| output[OutputKeys.IMG_EMBEDDING] = img_embedding.data.cpu().numpy() | |||||
| if 'text' in input and input['text'] is not None: | if 'text' in input and input['text'] is not None: | ||||
| text_str = input['text'] | text_str = input['text'] | ||||
| @@ -76,9 +76,10 @@ class OfaForImageCaptioning(Model): | |||||
| input = fairseq.utils.move_to_cuda(input, device=self._device) | input = fairseq.utils.move_to_cuda(input, device=self._device) | ||||
| results, _ = self.eval_caption(self.task, self.generator, self.models, | results, _ = self.eval_caption(self.task, self.generator, self.models, | ||||
| input) | input) | ||||
| from ...pipelines.outputs import OutputKeys | |||||
| return { | return { | ||||
| 'image_id': results[0]['image_id'], | 'image_id': results[0]['image_id'], | ||||
| 'caption': results[0]['caption'] | |||||
| OutputKeys.CAPTION: results[0][OutputKeys.CAPTION] | |||||
| } | } | ||||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | ||||
| @@ -10,6 +10,7 @@ from modelscope.metainfo import Pipelines | |||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| from ..base import Input, Pipeline | from ..base import Input, Pipeline | ||||
| from ..builder import PIPELINES | from ..builder import PIPELINES | ||||
| from ..outputs import OutputKeys | |||||
| def audio_norm(x): | def audio_norm(x): | ||||
| @@ -108,10 +109,10 @@ class ANSPipeline(Pipeline): | |||||
| current_idx += stride | current_idx += stride | ||||
| else: | else: | ||||
| outputs = self.model(ndarray)['wav_l2'][0].cpu().numpy() | outputs = self.model(ndarray)['wav_l2'][0].cpu().numpy() | ||||
| return {'output_pcm': outputs[:nsamples]} | |||||
| return {OutputKeys.OUTPUT_PCM: outputs[:nsamples]} | |||||
| def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: | def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: | ||||
| if 'output_path' in kwargs.keys(): | if 'output_path' in kwargs.keys(): | ||||
| sf.write(kwargs['output_path'], inputs['output_pcm'], | |||||
| sf.write(kwargs['output_path'], inputs[OutputKeys.OUTPUT_PCM], | |||||
| self.SAMPLE_RATE) | self.SAMPLE_RATE) | ||||
| return inputs | return inputs | ||||
| @@ -12,6 +12,7 @@ from modelscope.preprocessors.audio import LinearAECAndFbank | |||||
| from modelscope.utils.constant import ModelFile, Tasks | from modelscope.utils.constant import ModelFile, Tasks | ||||
| from ..base import Pipeline | from ..base import Pipeline | ||||
| from ..builder import PIPELINES | from ..builder import PIPELINES | ||||
| from ..outputs import OutputKeys | |||||
| FEATURE_MVN = 'feature.DEY.mvn.txt' | FEATURE_MVN = 'feature.DEY.mvn.txt' | ||||
| @@ -120,7 +121,7 @@ class LinearAECPipeline(Pipeline): | |||||
| } | } | ||||
| """ | """ | ||||
| output_data = self._process(inputs['feature'], inputs['base']) | output_data = self._process(inputs['feature'], inputs['base']) | ||||
| return {'output_pcm': output_data} | |||||
| return {OutputKeys.OUTPUT_PCM: output_data} | |||||
| def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: | def postprocess(self, inputs: Dict[str, Any], **kwargs) -> Dict[str, Any]: | ||||
| r"""The post process. Will save audio to file, if the output_path is given. | r"""The post process. Will save audio to file, if the output_path is given. | ||||
| @@ -140,8 +141,8 @@ class LinearAECPipeline(Pipeline): | |||||
| """ | """ | ||||
| if 'output_path' in kwargs.keys(): | if 'output_path' in kwargs.keys(): | ||||
| wav.write(kwargs['output_path'], self.preprocessor.SAMPLE_RATE, | wav.write(kwargs['output_path'], self.preprocessor.SAMPLE_RATE, | ||||
| inputs['output_pcm'].astype(np.int16)) | |||||
| inputs['output_pcm'] = inputs['output_pcm'] / 32768.0 | |||||
| inputs[OutputKeys.OUTPUT_PCM].astype(np.int16)) | |||||
| inputs[OutputKeys.OUTPUT_PCM] = inputs[OutputKeys.OUTPUT_PCM] / 32768.0 | |||||
| return inputs | return inputs | ||||
| def _process(self, fbanks, mixture): | def _process(self, fbanks, mixture): | ||||
| @@ -16,6 +16,7 @@ from modelscope.utils.constant import ModelFile, Tasks | |||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| from ..base import Pipeline | from ..base import Pipeline | ||||
| from ..builder import PIPELINES | from ..builder import PIPELINES | ||||
| from ..outputs import OutputKeys | |||||
| logger = get_logger() | logger = get_logger() | ||||
| @@ -49,7 +50,7 @@ class ActionRecognitionPipeline(Pipeline): | |||||
| def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | ||||
| pred = self.perform_inference(input['video_data']) | pred = self.perform_inference(input['video_data']) | ||||
| output_label = self.label_mapping[str(pred)] | output_label = self.label_mapping[str(pred)] | ||||
| return {'output_label': output_label} | |||||
| return {OutputKeys.LABELS: output_label} | |||||
| @torch.no_grad() | @torch.no_grad() | ||||
| def perform_inference(self, data, max_bsz=4): | def perform_inference(self, data, max_bsz=4): | ||||
| @@ -18,6 +18,7 @@ from modelscope.utils.constant import ModelFile, Tasks | |||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| from ..base import Pipeline | from ..base import Pipeline | ||||
| from ..builder import PIPELINES | from ..builder import PIPELINES | ||||
| from ..outputs import OutputKeys | |||||
| logger = get_logger() | logger = get_logger() | ||||
| @@ -121,7 +122,9 @@ class AnimalRecogPipeline(Pipeline): | |||||
| label_mapping = f.readlines() | label_mapping = f.readlines() | ||||
| score = torch.max(inputs['outputs']) | score = torch.max(inputs['outputs']) | ||||
| inputs = { | inputs = { | ||||
| 'scores': score.item(), | |||||
| 'labels': label_mapping[inputs['outputs'].argmax()].split('\t')[1] | |||||
| OutputKeys.SCORES: | |||||
| score.item(), | |||||
| OutputKeys.LABELS: | |||||
| label_mapping[inputs['outputs'].argmax()].split('\t')[1] | |||||
| } | } | ||||
| return inputs | return inputs | ||||
| @@ -17,6 +17,7 @@ from modelscope.utils.constant import Tasks | |||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| from ..base import Pipeline | from ..base import Pipeline | ||||
| from ..builder import PIPELINES | from ..builder import PIPELINES | ||||
| from ..outputs import OutputKeys | |||||
| if tf.__version__ >= '2.0': | if tf.__version__ >= '2.0': | ||||
| tf = tf.compat.v1 | tf = tf.compat.v1 | ||||
| @@ -94,7 +95,7 @@ class ImageCartoonPipeline(Pipeline): | |||||
| landmarks = self.detect_face(img) | landmarks = self.detect_face(img) | ||||
| if landmarks is None: | if landmarks is None: | ||||
| print('No face detected!') | print('No face detected!') | ||||
| return {'output_png': None} | |||||
| return {OutputKeys.OUTPUT_IMG: None} | |||||
| # background process | # background process | ||||
| pad_bg, pad_h, pad_w = padTo16x(img_brg) | pad_bg, pad_h, pad_w = padTo16x(img_brg) | ||||
| @@ -143,7 +144,7 @@ class ImageCartoonPipeline(Pipeline): | |||||
| res = cv2.resize(res, (ori_w, ori_h), interpolation=cv2.INTER_AREA) | res = cv2.resize(res, (ori_w, ori_h), interpolation=cv2.INTER_AREA) | ||||
| return {'output_png': res} | |||||
| return {OutputKeys.OUTPUT_IMG: res} | |||||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | ||||
| return inputs | return inputs | ||||
| @@ -12,6 +12,7 @@ from modelscope.utils.constant import ModelFile, Tasks | |||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| from ..base import Pipeline | from ..base import Pipeline | ||||
| from ..builder import PIPELINES | from ..builder import PIPELINES | ||||
| from ..outputs import OutputKeys | |||||
| logger = get_logger() | logger = get_logger() | ||||
| @@ -60,9 +61,9 @@ class ImageMattingPipeline(Pipeline): | |||||
| def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | ||||
| with self._session.as_default(): | with self._session.as_default(): | ||||
| feed_dict = {self.input_name: input['img']} | feed_dict = {self.input_name: input['img']} | ||||
| output_png = self._session.run(self.output, feed_dict=feed_dict) | |||||
| output_png = cv2.cvtColor(output_png, cv2.COLOR_RGBA2BGRA) | |||||
| return {'output_png': output_png} | |||||
| output_img = self._session.run(self.output, feed_dict=feed_dict) | |||||
| output_img = cv2.cvtColor(output_img, cv2.COLOR_RGBA2BGRA) | |||||
| return {OutputKeys.OUTPUT_IMG: output_img} | |||||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | ||||
| return inputs | return inputs | ||||
| @@ -16,6 +16,7 @@ from modelscope.utils.constant import ModelFile, Tasks | |||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| from ..base import Pipeline | from ..base import Pipeline | ||||
| from ..builder import PIPELINES | from ..builder import PIPELINES | ||||
| from ..outputs import OutputKeys | |||||
| from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils | from .ocr_utils import model_resnet_mutex_v4_linewithchar, ops, utils | ||||
| if tf.__version__ >= '2.0': | if tf.__version__ >= '2.0': | ||||
| @@ -174,5 +175,5 @@ class OCRDetectionPipeline(Pipeline): | |||||
| dt_nms = utils.nms_python(dt_n9) | dt_nms = utils.nms_python(dt_n9) | ||||
| dt_polygons = np.array([o[:8] for o in dt_nms]) | dt_polygons = np.array([o[:8] for o in dt_nms]) | ||||
| result = {'det_polygons': dt_polygons} | |||||
| result = {OutputKeys.POLYGONS: dt_polygons} | |||||
| return result | return result | ||||
| @@ -9,6 +9,7 @@ from ...utils.config import Config | |||||
| from ...utils.constant import ModelFile, Tasks | from ...utils.constant import ModelFile, Tasks | ||||
| from ..base import Pipeline, Tensor | from ..base import Pipeline, Tensor | ||||
| from ..builder import PIPELINES | from ..builder import PIPELINES | ||||
| from ..outputs import OutputKeys | |||||
| __all__ = ['FillMaskPipeline'] | __all__ = ['FillMaskPipeline'] | ||||
| _type_map = {'veco': 'roberta', 'sbert': 'bert'} | _type_map = {'veco': 'roberta', 'sbert': 'bert'} | ||||
| @@ -96,4 +97,4 @@ class FillMaskPipeline(Pipeline): | |||||
| pred_string = rep_tokens(pred_string, self.rep_map[process_type]) | pred_string = rep_tokens(pred_string, self.rep_map[process_type]) | ||||
| pred_strings.append(pred_string) | pred_strings.append(pred_string) | ||||
| return {'text': pred_strings} | |||||
| return {OutputKeys.TEXT: pred_strings} | |||||
| @@ -9,6 +9,7 @@ from modelscope.utils.constant import Tasks | |||||
| from ...models import Model | from ...models import Model | ||||
| from ..base import Input, Pipeline | from ..base import Input, Pipeline | ||||
| from ..builder import PIPELINES | from ..builder import PIPELINES | ||||
| from ..outputs import OutputKeys | |||||
| __all__ = ['SentenceSimilarityPipeline'] | __all__ = ['SentenceSimilarityPipeline'] | ||||
| @@ -59,4 +60,4 @@ class SentenceSimilarityPipeline(Pipeline): | |||||
| probs = probs[cls_ids].tolist() | probs = probs[cls_ids].tolist() | ||||
| cls_names = [self.model.id2label[cid] for cid in cls_ids] | cls_names = [self.model.id2label[cid] for cid in cls_ids] | ||||
| b = 0 | b = 0 | ||||
| return {'scores': probs[b], 'labels': cls_names[b]} | |||||
| return {OutputKeys.SCORES: probs[b], OutputKeys.LABELS: cls_names[b]} | |||||
| @@ -9,6 +9,7 @@ from modelscope.utils.constant import Tasks | |||||
| from ...models import Model | from ...models import Model | ||||
| from ..base import Input, Pipeline | from ..base import Input, Pipeline | ||||
| from ..builder import PIPELINES | from ..builder import PIPELINES | ||||
| from ..outputs import OutputKeys | |||||
| __all__ = ['SequenceClassificationPipeline'] | __all__ = ['SequenceClassificationPipeline'] | ||||
| @@ -64,4 +65,4 @@ class SequenceClassificationPipeline(Pipeline): | |||||
| cls_names = [self.model.id2label[cid] for cid in cls_ids] | cls_names = [self.model.id2label[cid] for cid in cls_ids] | ||||
| return {'scores': probs, 'labels': cls_names} | |||||
| return {OutputKeys.SCORES: probs, OutputKeys.LABELS: cls_names} | |||||
| @@ -7,6 +7,7 @@ from modelscope.preprocessors import TextGenerationPreprocessor | |||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| from ..base import Pipeline, Tensor | from ..base import Pipeline, Tensor | ||||
| from ..builder import PIPELINES | from ..builder import PIPELINES | ||||
| from ..outputs import OutputKeys | |||||
| __all__ = ['TextGenerationPipeline'] | __all__ = ['TextGenerationPipeline'] | ||||
| @@ -61,4 +62,4 @@ class TextGenerationPipeline(Pipeline): | |||||
| for _old, _new in replace_tokens_roberta: | for _old, _new in replace_tokens_roberta: | ||||
| pred_string = pred_string.replace(_old, _new) | pred_string = pred_string.replace(_old, _new) | ||||
| pred_string.strip() | pred_string.strip() | ||||
| return {'text': pred_string} | |||||
| return {OutputKeys.TEXT: pred_string} | |||||
| @@ -7,6 +7,7 @@ from modelscope.preprocessors import TokenClassifcationPreprocessor | |||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| from ..base import Pipeline, Tensor | from ..base import Pipeline, Tensor | ||||
| from ..builder import PIPELINES | from ..builder import PIPELINES | ||||
| from ..outputs import OutputKeys | |||||
| __all__ = ['WordSegmentationPipeline'] | __all__ = ['WordSegmentationPipeline'] | ||||
| @@ -63,7 +64,4 @@ class WordSegmentationPipeline(Pipeline): | |||||
| if chunk: | if chunk: | ||||
| chunks.append(chunk) | chunks.append(chunk) | ||||
| seg_result = ' '.join(chunks) | seg_result = ' '.join(chunks) | ||||
| rst = { | |||||
| 'output': seg_result, | |||||
| } | |||||
| return rst | |||||
| return {OutputKeys.OUTPUT: seg_result} | |||||
| @@ -14,6 +14,7 @@ from ...preprocessors import ZeroShotClassificationPreprocessor | |||||
| from ...utils.constant import Tasks | from ...utils.constant import Tasks | ||||
| from ..base import Input, Pipeline | from ..base import Input, Pipeline | ||||
| from ..builder import PIPELINES | from ..builder import PIPELINES | ||||
| from ..outputs import OutputKeys | |||||
| __all__ = ['ZeroShotClassificationPipeline'] | __all__ = ['ZeroShotClassificationPipeline'] | ||||
| @@ -91,7 +92,7 @@ class ZeroShotClassificationPipeline(Pipeline): | |||||
| reversed_index = list(reversed(scores.argsort())) | reversed_index = list(reversed(scores.argsort())) | ||||
| result = { | result = { | ||||
| 'labels': [candidate_labels[i] for i in reversed_index], | |||||
| 'scores': [scores[i].item() for i in reversed_index], | |||||
| OutputKeys.LABELS: [candidate_labels[i] for i in reversed_index], | |||||
| OutputKeys.SCORES: [scores[i].item() for i in reversed_index], | |||||
| } | } | ||||
| return result | return result | ||||
| @@ -2,54 +2,72 @@ | |||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| class OutputKeys(object): | |||||
| SCORES = 'scores' | |||||
| LABELS = 'labels' | |||||
| POSES = 'poses' | |||||
| CAPTION = 'caption' | |||||
| BOXES = 'boxes' | |||||
| TEXT = 'text' | |||||
| POLYGONS = 'polygons' | |||||
| OUTPUT = 'output' | |||||
| OUTPUT_IMG = 'output_img' | |||||
| OUTPUT_PCM = 'output_pcm' | |||||
| IMG_EMBEDDING = 'img_embedding' | |||||
| TEXT_EMBEDDING = 'text_embedding' | |||||
| TASK_OUTPUTS = { | TASK_OUTPUTS = { | ||||
| # ============ vision tasks =================== | # ============ vision tasks =================== | ||||
| # image classification result for single sample | # image classification result for single sample | ||||
| # { | # { | ||||
| # "labels": ["dog", "horse", "cow", "cat"], | |||||
| # "scores": [0.9, 0.1, 0.05, 0.05] | # "scores": [0.9, 0.1, 0.05, 0.05] | ||||
| # "labels": ["dog", "horse", "cow", "cat"], | |||||
| # } | # } | ||||
| Tasks.image_classification: ['scores', 'labels'], | |||||
| Tasks.image_tagging: ['scores', 'labels'], | |||||
| Tasks.image_classification: [OutputKeys.SCORES, OutputKeys.LABELS], | |||||
| Tasks.image_tagging: [OutputKeys.SCORES, OutputKeys.LABELS], | |||||
| # object detection result for single sample | # object detection result for single sample | ||||
| # { | # { | ||||
| # "scores": [0.9, 0.1, 0.05, 0.05] | |||||
| # "labels": ["dog", "horse", "cow", "cat"], | |||||
| # "boxes": [ | # "boxes": [ | ||||
| # [x1, y1, x2, y2], | # [x1, y1, x2, y2], | ||||
| # [x1, y1, x2, y2], | # [x1, y1, x2, y2], | ||||
| # [x1, y1, x2, y2], | # [x1, y1, x2, y2], | ||||
| # ], | # ], | ||||
| # "labels": ["dog", "horse", "cow", "cat"], | |||||
| # "scores": [0.9, 0.1, 0.05, 0.05] | |||||
| # } | # } | ||||
| Tasks.object_detection: ['scores', 'labels', 'boxes'], | |||||
| Tasks.object_detection: | |||||
| [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES], | |||||
| # instance segmentation result for single sample | # instance segmentation result for single sample | ||||
| # { | # { | ||||
| # "masks": [ | |||||
| # np.array in bgr channel order | |||||
| # ], | |||||
| # "scores": [0.9, 0.1, 0.05, 0.05], | |||||
| # "labels": ["dog", "horse", "cow", "cat"], | # "labels": ["dog", "horse", "cow", "cat"], | ||||
| # "scores": [0.9, 0.1, 0.05, 0.05] | |||||
| # "boxes": [ | |||||
| # np.array in bgr channel order | |||||
| # ] | |||||
| # } | # } | ||||
| Tasks.image_segmentation: ['scores', 'labels', 'boxes'], | |||||
| Tasks.image_segmentation: | |||||
| [OutputKeys.SCORES, OutputKeys.LABELS, OutputKeys.BOXES], | |||||
| # image generation/editing/matting result for single sample | # image generation/editing/matting result for single sample | ||||
| # { | # { | ||||
| # "output_png": np.array with shape(h, w, 4) | |||||
| # "output_img": np.array with shape(h, w, 4) | |||||
| # for matting or (h, w, 3) for general purpose | # for matting or (h, w, 3) for general purpose | ||||
| # } | # } | ||||
| Tasks.image_editing: ['output_png'], | |||||
| Tasks.image_matting: ['output_png'], | |||||
| Tasks.image_generation: ['output_png'], | |||||
| Tasks.image_editing: [OutputKeys.OUTPUT_IMG], | |||||
| Tasks.image_matting: [OutputKeys.OUTPUT_IMG], | |||||
| Tasks.image_generation: [OutputKeys.OUTPUT_IMG], | |||||
| # action recognition result for single video | # action recognition result for single video | ||||
| # { | # { | ||||
| # "output_label": "abseiling" | # "output_label": "abseiling" | ||||
| # } | # } | ||||
| Tasks.action_recognition: ['output_label'], | |||||
| Tasks.action_recognition: [OutputKeys.LABELS], | |||||
| # pose estimation result for single sample | # pose estimation result for single sample | ||||
| # { | # { | ||||
| @@ -58,55 +76,55 @@ TASK_OUTPUTS = { | |||||
| # "boxes": np.array with shape [num_pose, 4], each box is | # "boxes": np.array with shape [num_pose, 4], each box is | ||||
| # [x1, y1, x2, y2] | # [x1, y1, x2, y2] | ||||
| # } | # } | ||||
| Tasks.pose_estimation: ['poses', 'boxes'], | |||||
| Tasks.pose_estimation: [OutputKeys.POSES, OutputKeys.BOXES], | |||||
| # ocr detection result for single sample | # ocr detection result for single sample | ||||
| # { | # { | ||||
| # "det_polygons": np.array with shape [num_text, 8], each box is | |||||
| # "polygons": np.array with shape [num_text, 8], each polygon is | |||||
| # [x1, y1, x2, y2, x3, y3, x4, y4] | # [x1, y1, x2, y2, x3, y3, x4, y4] | ||||
| # } | # } | ||||
| Tasks.ocr_detection: ['det_polygons'], | |||||
| Tasks.ocr_detection: [OutputKeys.POLYGONS], | |||||
| # ============ nlp tasks =================== | # ============ nlp tasks =================== | ||||
| # text classification result for single sample | # text classification result for single sample | ||||
| # { | # { | ||||
| # "labels": ["happy", "sad", "calm", "angry"], | |||||
| # "scores": [0.9, 0.1, 0.05, 0.05] | # "scores": [0.9, 0.1, 0.05, 0.05] | ||||
| # "labels": ["happy", "sad", "calm", "angry"], | |||||
| # } | # } | ||||
| Tasks.text_classification: ['scores', 'labels'], | |||||
| Tasks.text_classification: [OutputKeys.SCORES, OutputKeys.LABELS], | |||||
| # text generation result for single sample | # text generation result for single sample | ||||
| # { | # { | ||||
| # "text": "this is text generated by a model." | |||||
| # "text": "this is the text generated by a model." | |||||
| # } | # } | ||||
| Tasks.text_generation: ['text'], | |||||
| Tasks.text_generation: [OutputKeys.TEXT], | |||||
| # fill mask result for single sample | # fill mask result for single sample | ||||
| # { | # { | ||||
| # "text": "this is the text which masks filled by model." | # "text": "this is the text which masks filled by model." | ||||
| # } | # } | ||||
| Tasks.fill_mask: ['text'], | |||||
| Tasks.fill_mask: [OutputKeys.TEXT], | |||||
| # word segmentation result for single sample | # word segmentation result for single sample | ||||
| # { | # { | ||||
| # "output": "今天 天气 不错 , 适合 出去 游玩" | # "output": "今天 天气 不错 , 适合 出去 游玩" | ||||
| # } | # } | ||||
| Tasks.word_segmentation: ['output'], | |||||
| Tasks.word_segmentation: [OutputKeys.OUTPUT], | |||||
| # sentence similarity result for single sample | # sentence similarity result for single sample | ||||
| # { | # { | ||||
| # "labels": "1", | |||||
| # "scores": 0.9 | # "scores": 0.9 | ||||
| # "labels": "1", | |||||
| # } | # } | ||||
| Tasks.sentence_similarity: ['scores', 'labels'], | |||||
| Tasks.sentence_similarity: [OutputKeys.SCORES, OutputKeys.LABELS], | |||||
| # zero-shot classification result for single sample | # zero-shot classification result for single sample | ||||
| # { | # { | ||||
| # "labels": ["happy", "sad", "calm", "angry"], | |||||
| # "scores": [0.9, 0.1, 0.05, 0.05] | # "scores": [0.9, 0.1, 0.05, 0.05] | ||||
| # "labels": ["happy", "sad", "calm", "angry"], | |||||
| # } | # } | ||||
| Tasks.zero_shot_classification: ['scores', 'labels'], | |||||
| Tasks.zero_shot_classification: [OutputKeys.SCORES, OutputKeys.LABELS], | |||||
| # ============ audio tasks =================== | # ============ audio tasks =================== | ||||
| @@ -114,7 +132,7 @@ TASK_OUTPUTS = { | |||||
| # { | # { | ||||
| # "output_pcm": np.array with shape(samples,) and dtype float32 | # "output_pcm": np.array with shape(samples,) and dtype float32 | ||||
| # } | # } | ||||
| Tasks.speech_signal_process: ['output_pcm'], | |||||
| Tasks.speech_signal_process: [OutputKeys.OUTPUT_PCM], | |||||
| # ============ multi-modal tasks =================== | # ============ multi-modal tasks =================== | ||||
| @@ -122,14 +140,15 @@ TASK_OUTPUTS = { | |||||
| # { | # { | ||||
| # "caption": "this is an image caption text." | # "caption": "this is an image caption text." | ||||
| # } | # } | ||||
| Tasks.image_captioning: ['caption'], | |||||
| Tasks.image_captioning: [OutputKeys.CAPTION], | |||||
| # multi-modal embedding result for single sample | # multi-modal embedding result for single sample | ||||
| # { | # { | ||||
| # "img_embedding": np.array with shape [1, D], | # "img_embedding": np.array with shape [1, D], | ||||
| # "text_embedding": np.array with shape [1, D] | # "text_embedding": np.array with shape [1, D] | ||||
| # } | # } | ||||
| Tasks.multi_modal_embedding: ['img_embedding', 'text_embedding'], | |||||
| Tasks.multi_modal_embedding: | |||||
| [OutputKeys.IMG_EMBEDDING, OutputKeys.TEXT_EMBEDDING], | |||||
| # visual grounding result for single sample | # visual grounding result for single sample | ||||
| # { | # { | ||||
| @@ -140,11 +159,11 @@ TASK_OUTPUTS = { | |||||
| # ], | # ], | ||||
| # "scores": [0.9, 0.1, 0.05, 0.05] | # "scores": [0.9, 0.1, 0.05, 0.05] | ||||
| # } | # } | ||||
| Tasks.visual_grounding: ['boxes', 'scores'], | |||||
| Tasks.visual_grounding: [OutputKeys.BOXES, OutputKeys.SCORES], | |||||
| # text_to_image result for a single sample | # text_to_image result for a single sample | ||||
| # { | # { | ||||
| # "image": np.ndarray with shape [height, width, 3] | |||||
| # "output_img": np.ndarray with shape [height, width, 3] | |||||
| # } | # } | ||||
| Tasks.text_to_image_synthesis: ['image'] | |||||
| Tasks.text_to_image_synthesis: [OutputKeys.OUTPUT_IMG] | |||||
| } | } | ||||
| @@ -8,6 +8,7 @@ import PIL | |||||
| from modelscope.pipelines import Pipeline, pipeline | from modelscope.pipelines import Pipeline, pipeline | ||||
| from modelscope.pipelines.builder import PIPELINES, add_default_pipeline_info | from modelscope.pipelines.builder import PIPELINES, add_default_pipeline_info | ||||
| from modelscope.pipelines.outputs import OutputKeys | |||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| from modelscope.utils.registry import default_group | from modelscope.utils.registry import default_group | ||||
| @@ -68,7 +69,7 @@ class CustomPipelineTest(unittest.TestCase): | |||||
| outputs['filename'] = inputs['url'] | outputs['filename'] = inputs['url'] | ||||
| img = inputs['img'] | img = inputs['img'] | ||||
| new_image = img.resize((img.width // 2, img.height // 2)) | new_image = img.resize((img.width // 2, img.height // 2)) | ||||
| outputs['output_png'] = np.array(new_image) | |||||
| outputs[OutputKeys.OUTPUT_IMG] = np.array(new_image) | |||||
| return outputs | return outputs | ||||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | ||||
| @@ -83,13 +84,13 @@ class CustomPipelineTest(unittest.TestCase): | |||||
| img_url = 'data/test/images/image1.jpg' | img_url = 'data/test/images/image1.jpg' | ||||
| output = pipe(img_url) | output = pipe(img_url) | ||||
| self.assertEqual(output['filename'], img_url) | self.assertEqual(output['filename'], img_url) | ||||
| self.assertEqual(output['output_png'].shape, (318, 512, 3)) | |||||
| self.assertEqual(output[OutputKeys.OUTPUT_IMG].shape, (318, 512, 3)) | |||||
| outputs = pipe([img_url for i in range(4)]) | outputs = pipe([img_url for i in range(4)]) | ||||
| self.assertEqual(len(outputs), 4) | self.assertEqual(len(outputs), 4) | ||||
| for out in outputs: | for out in outputs: | ||||
| self.assertEqual(out['filename'], img_url) | self.assertEqual(out['filename'], img_url) | ||||
| self.assertEqual(out['output_png'].shape, (318, 512, 3)) | |||||
| self.assertEqual(out[OutputKeys.OUTPUT_IMG].shape, (318, 512, 3)) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| @@ -3,6 +3,7 @@ | |||||
| import unittest | import unittest | ||||
| from modelscope.pipelines import pipeline | from modelscope.pipelines import pipeline | ||||
| from modelscope.pipelines.outputs import OutputKeys | |||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| from modelscope.utils.test_utils import test_level | from modelscope.utils.test_utils import test_level | ||||
| @@ -15,7 +16,7 @@ class ImageCaptionTest(unittest.TestCase): | |||||
| Tasks.image_captioning, | Tasks.image_captioning, | ||||
| model='damo/ofa_image-caption_coco_large_en') | model='damo/ofa_image-caption_coco_large_en') | ||||
| result = img_captioning('data/test/images/image_captioning.png') | result = img_captioning('data/test/images/image_captioning.png') | ||||
| print(result['caption']) | |||||
| print(result[OutputKeys.CAPTION]) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| @@ -9,6 +9,7 @@ import cv2 | |||||
| from modelscope.fileio import File | from modelscope.fileio import File | ||||
| from modelscope.msdatasets import MsDataset | from modelscope.msdatasets import MsDataset | ||||
| from modelscope.pipelines import pipeline | from modelscope.pipelines import pipeline | ||||
| from modelscope.pipelines.outputs import OutputKeys | |||||
| from modelscope.utils.constant import ModelFile, Tasks | from modelscope.utils.constant import ModelFile, Tasks | ||||
| from modelscope.utils.test_utils import test_level | from modelscope.utils.test_utils import test_level | ||||
| @@ -29,7 +30,7 @@ class ImageMattingTest(unittest.TestCase): | |||||
| img_matting = pipeline(Tasks.image_matting, model=tmp_dir) | img_matting = pipeline(Tasks.image_matting, model=tmp_dir) | ||||
| result = img_matting('data/test/images/image_matting.png') | result = img_matting('data/test/images/image_matting.png') | ||||
| cv2.imwrite('result.png', result['output_png']) | |||||
| cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG]) | |||||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | ||||
| def test_run_with_dataset(self): | def test_run_with_dataset(self): | ||||
| @@ -41,7 +42,7 @@ class ImageMattingTest(unittest.TestCase): | |||||
| img_matting = pipeline(Tasks.image_matting, model=self.model_id) | img_matting = pipeline(Tasks.image_matting, model=self.model_id) | ||||
| # note that for dataset output, the inference-output is a Generator that can be iterated. | # note that for dataset output, the inference-output is a Generator that can be iterated. | ||||
| result = img_matting(dataset) | result = img_matting(dataset) | ||||
| cv2.imwrite('result.png', next(result)['output_png']) | |||||
| cv2.imwrite('result.png', next(result)[OutputKeys.OUTPUT_IMG]) | |||||
| print(f'Output written to {osp.abspath("result.png")}') | print(f'Output written to {osp.abspath("result.png")}') | ||||
| @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 0, 'skip test in current test level') | ||||
| @@ -49,7 +50,7 @@ class ImageMattingTest(unittest.TestCase): | |||||
| img_matting = pipeline(Tasks.image_matting, model=self.model_id) | img_matting = pipeline(Tasks.image_matting, model=self.model_id) | ||||
| result = img_matting('data/test/images/image_matting.png') | result = img_matting('data/test/images/image_matting.png') | ||||
| cv2.imwrite('result.png', result['output_png']) | |||||
| cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG]) | |||||
| print(f'Output written to {osp.abspath("result.png")}') | print(f'Output written to {osp.abspath("result.png")}') | ||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | ||||
| @@ -57,7 +58,7 @@ class ImageMattingTest(unittest.TestCase): | |||||
| img_matting = pipeline(Tasks.image_matting) | img_matting = pipeline(Tasks.image_matting) | ||||
| result = img_matting('data/test/images/image_matting.png') | result = img_matting('data/test/images/image_matting.png') | ||||
| cv2.imwrite('result.png', result['output_png']) | |||||
| cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG]) | |||||
| print(f'Output written to {osp.abspath("result.png")}') | print(f'Output written to {osp.abspath("result.png")}') | ||||
| @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') | ||||
| @@ -67,7 +68,7 @@ class ImageMattingTest(unittest.TestCase): | |||||
| img_matting = pipeline(Tasks.image_matting, model=self.model_id) | img_matting = pipeline(Tasks.image_matting, model=self.model_id) | ||||
| result = img_matting(dataset) | result = img_matting(dataset) | ||||
| for i in range(10): | for i in range(10): | ||||
| cv2.imwrite(f'result_{i}.png', next(result)['output_png']) | |||||
| cv2.imwrite(f'result_{i}.png', next(result)[OutputKeys.OUTPUT_IMG]) | |||||
| print( | print( | ||||
| f'Output written to dir: {osp.dirname(osp.abspath("result_0.png"))}' | f'Output written to dir: {osp.dirname(osp.abspath("result_0.png"))}' | ||||
| ) | ) | ||||
| @@ -7,6 +7,7 @@ import cv2 | |||||
| from modelscope.pipelines import pipeline | from modelscope.pipelines import pipeline | ||||
| from modelscope.pipelines.base import Pipeline | from modelscope.pipelines.base import Pipeline | ||||
| from modelscope.pipelines.outputs import OutputKeys | |||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| from modelscope.utils.test_utils import test_level | from modelscope.utils.test_utils import test_level | ||||
| @@ -22,7 +23,7 @@ class ImageCartoonTest(unittest.TestCase): | |||||
| def pipeline_inference(self, pipeline: Pipeline, input_location: str): | def pipeline_inference(self, pipeline: Pipeline, input_location: str): | ||||
| result = pipeline(input_location) | result = pipeline(input_location) | ||||
| if result is not None: | if result is not None: | ||||
| cv2.imwrite('result.png', result['output_png']) | |||||
| cv2.imwrite('result.png', result[OutputKeys.OUTPUT_IMG]) | |||||
| print(f'Output written to {osp.abspath("result.png")}') | print(f'Output written to {osp.abspath("result.png")}') | ||||
| @unittest.skip('deprecated, download model from model hub instead') | @unittest.skip('deprecated, download model from model hub instead') | ||||