修改了I/O的代码,以支持modelscope的demo services
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10518318
master
| @@ -25,9 +25,9 @@ emotion_list = [ | |||||
| ] | ] | ||||
| def inference(image_path, model, face_model, score_thre=0.5, GPU=0): | |||||
| image = Image.open(image_path).convert('RGB') | |||||
| def inference(image, model, face_model, score_thre=0.5, GPU=0): | |||||
| image = image.cpu().numpy() | |||||
| image = Image.fromarray(image) | |||||
| face, bbox = face_detection_PIL_v2(image, face_model) | face, bbox = face_detection_PIL_v2(image, face_model) | ||||
| if bbox is None: | if bbox is None: | ||||
| logger.warn('no face detected!') | logger.warn('no face detected!') | ||||
| @@ -115,9 +115,9 @@ std = [57.375, 57.12, 58.395] | |||||
| class_names = ['person', 'face', 'hand'] | class_names = ['person', 'face', 'hand'] | ||||
| def inference(model, device, img_path): | |||||
| def inference(model, device, img): | |||||
| img = img.cpu().numpy() | |||||
| img_info = {'id': 0} | img_info = {'id': 0} | ||||
| img = cv2.imread(img_path) | |||||
| height, width = img.shape[:2] | height, width = img.shape[:2] | ||||
| img_info['height'] = height | img_info['height'] = height | ||||
| img_info['width'] = width | img_info['width'] = width | ||||
| @@ -130,4 +130,9 @@ def inference(model, device, img_path): | |||||
| with torch.no_grad(): | with torch.no_grad(): | ||||
| res = model(meta) | res = model(meta) | ||||
| result = overlay_bbox_cv(res[0], class_names, score_thresh=0.35) | result = overlay_bbox_cv(res[0], class_names, score_thresh=0.35) | ||||
| return result | |||||
| cls_list, bbox_list, score_list = [], [], [] | |||||
| for pred in result: | |||||
| cls_list.append(pred[0]) | |||||
| bbox_list.append([pred[1], pred[2], pred[3], pred[4]]) | |||||
| score_list.append(pred[5]) | |||||
| return cls_list, bbox_list, score_list | |||||
| @@ -8,7 +8,7 @@ import torch | |||||
| import torch.nn.functional as F | import torch.nn.functional as F | ||||
| from PIL import Image | from PIL import Image | ||||
| from torch import nn | from torch import nn | ||||
| from torchvision.transforms import transforms | |||||
| from torchvision import transforms | |||||
| from modelscope.metainfo import Models | from modelscope.metainfo import Models | ||||
| from modelscope.models.base import TorchModel | from modelscope.models.base import TorchModel | ||||
| @@ -80,9 +80,9 @@ class HandStatic(TorchModel): | |||||
| return pred_result | return pred_result | ||||
| def infer(img_path, model, device): | |||||
| img = Image.open(img_path) | |||||
| def infer(img, model, device): | |||||
| img = img.cpu().numpy() | |||||
| img = Image.fromarray(img) | |||||
| clip = spatial_transform(img) | clip = spatial_transform(img) | ||||
| clip = clip.unsqueeze(0).to(device).float() | clip = clip.unsqueeze(0).to(device).float() | ||||
| outputs = model(clip) | outputs = model(clip) | ||||
| @@ -59,9 +59,8 @@ mean, std = np.array([[[124.55, 118.90, | |||||
| 102.94]]]), np.array([[[56.77, 55.97, 57.50]]]) | 102.94]]]), np.array([[[56.77, 55.97, 57.50]]]) | ||||
| def inference(model, device, input_path): | |||||
| img = Image.open(input_path) | |||||
| img = np.array(img.convert('RGB')).astype(np.float32) | |||||
| def inference(model, device, img): | |||||
| img = img.cpu().numpy() | |||||
| img = (img - mean) / std | img = (img - mean) / std | ||||
| img = cv2.resize(img, dsize=(448, 448), interpolation=cv2.INTER_LINEAR) | img = cv2.resize(img, dsize=(448, 448), interpolation=cv2.INTER_LINEAR) | ||||
| img = torch.from_numpy(img) | img = torch.from_numpy(img) | ||||
| @@ -762,12 +762,13 @@ TASK_OUTPUTS = { | |||||
| # } | # } | ||||
| Tasks.hand_static: [OutputKeys.OUTPUT], | Tasks.hand_static: [OutputKeys.OUTPUT], | ||||
| # 'output': [ | |||||
| # [2, 75, 287, 240, 510, 0.8335018754005432], | |||||
| # [1, 127, 83, 332, 366, 0.9175254702568054], | |||||
| # [0, 0, 0, 367, 639, 0.9693422317504883]] | |||||
| # { 'labels': [2, 1, 0], | |||||
| # 'boxes':[[[78, 282, 240, 504], [127, 87, 332, 370], [0, 0, 367, 639]] | |||||
| # 'scores':[0.8202137351036072, 0.8987470269203186, 0.9679114818572998] | |||||
| # } | # } | ||||
| Tasks.face_human_hand_detection: [OutputKeys.OUTPUT], | |||||
| Tasks.face_human_hand_detection: [ | |||||
| OutputKeys.LABELS, OutputKeys.BOXES, OutputKeys.SCORES | |||||
| ], | |||||
| # { | # { | ||||
| # {'output': 'Happiness', 'boxes': (203, 104, 663, 564)} | # {'output': 'Happiness', 'boxes': (203, 104, 663, 564)} | ||||
| @@ -1,11 +1,14 @@ | |||||
| # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. | # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. | ||||
| from typing import Any, Dict | from typing import Any, Dict | ||||
| import numpy as np | |||||
| from modelscope.metainfo import Pipelines | from modelscope.metainfo import Pipelines | ||||
| from modelscope.models.cv.face_emotion import emotion_infer | from modelscope.models.cv.face_emotion import emotion_infer | ||||
| from modelscope.outputs import OutputKeys | from modelscope.outputs import OutputKeys | ||||
| from modelscope.pipelines.base import Input, Pipeline | from modelscope.pipelines.base import Input, Pipeline | ||||
| from modelscope.pipelines.builder import PIPELINES | from modelscope.pipelines.builder import PIPELINES | ||||
| from modelscope.preprocessors import LoadImage | |||||
| from modelscope.utils.constant import ModelFile, Tasks | from modelscope.utils.constant import ModelFile, Tasks | ||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| @@ -28,10 +31,11 @@ class FaceEmotionPipeline(Pipeline): | |||||
| logger.info('load model done') | logger.info('load model done') | ||||
| def preprocess(self, input: Input) -> Dict[str, Any]: | def preprocess(self, input: Input) -> Dict[str, Any]: | ||||
| return input | |||||
| img = LoadImage.convert_to_ndarray(input['img_path']) | |||||
| return img | |||||
| def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | ||||
| result, bbox = emotion_infer.inference(input['img_path'], self.model, | |||||
| result, bbox = emotion_infer.inference(input, self.model, | |||||
| self.face_model) | self.face_model) | ||||
| return {OutputKeys.OUTPUT: result, OutputKeys.BOXES: bbox} | return {OutputKeys.OUTPUT: result, OutputKeys.BOXES: bbox} | ||||
| @@ -2,11 +2,14 @@ | |||||
| from typing import Any, Dict | from typing import Any, Dict | ||||
| import numpy as np | |||||
| from modelscope.metainfo import Pipelines | from modelscope.metainfo import Pipelines | ||||
| from modelscope.models.cv.face_human_hand_detection import det_infer | from modelscope.models.cv.face_human_hand_detection import det_infer | ||||
| from modelscope.outputs import OutputKeys | from modelscope.outputs import OutputKeys | ||||
| from modelscope.pipelines.base import Input, Pipeline | from modelscope.pipelines.base import Input, Pipeline | ||||
| from modelscope.pipelines.builder import PIPELINES | from modelscope.pipelines.builder import PIPELINES | ||||
| from modelscope.preprocessors import LoadImage | |||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| @@ -29,14 +32,19 @@ class NanoDettForFaceHumanHandDetectionPipeline(Pipeline): | |||||
| logger.info('load model done') | logger.info('load model done') | ||||
| def preprocess(self, input: Input) -> Dict[str, Any]: | def preprocess(self, input: Input) -> Dict[str, Any]: | ||||
| return input | |||||
| img = LoadImage.convert_to_ndarray(input['input_path']) | |||||
| return img | |||||
| def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | ||||
| result = det_infer.inference(self.model, self.device, | |||||
| input['input_path']) | |||||
| logger.info(result) | |||||
| return {OutputKeys.OUTPUT: result} | |||||
| cls_list, bbox_list, score_list = det_infer.inference( | |||||
| self.model, self.device, input) | |||||
| logger.info(cls_list, bbox_list, score_list) | |||||
| return { | |||||
| OutputKeys.LABELS: cls_list, | |||||
| OutputKeys.BOXES: bbox_list, | |||||
| OutputKeys.SCORES: score_list | |||||
| } | |||||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | ||||
| return inputs | return inputs | ||||
| @@ -1,11 +1,14 @@ | |||||
| # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. | # Copyright 2021-2022 The Alibaba Fundamental Vision Team Authors. All rights reserved. | ||||
| from typing import Any, Dict | from typing import Any, Dict | ||||
| import numpy as np | |||||
| from modelscope.metainfo import Pipelines | from modelscope.metainfo import Pipelines | ||||
| from modelscope.models.cv.hand_static import hand_model | from modelscope.models.cv.hand_static import hand_model | ||||
| from modelscope.outputs import OutputKeys | from modelscope.outputs import OutputKeys | ||||
| from modelscope.pipelines.base import Input, Pipeline | from modelscope.pipelines.base import Input, Pipeline | ||||
| from modelscope.pipelines.builder import PIPELINES | from modelscope.pipelines.builder import PIPELINES | ||||
| from modelscope.preprocessors import LoadImage | |||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| @@ -27,10 +30,11 @@ class HandStaticPipeline(Pipeline): | |||||
| logger.info('load model done') | logger.info('load model done') | ||||
| def preprocess(self, input: Input) -> Dict[str, Any]: | def preprocess(self, input: Input) -> Dict[str, Any]: | ||||
| return input | |||||
| img = LoadImage.convert_to_ndarray(input['img_path']) | |||||
| return img | |||||
| def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | ||||
| result = hand_model.infer(input['img_path'], self.model, self.device) | |||||
| result = hand_model.infer(input, self.model, self.device) | |||||
| return {OutputKeys.OUTPUT: result} | return {OutputKeys.OUTPUT: result} | ||||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | ||||
| @@ -2,11 +2,14 @@ | |||||
| from typing import Any, Dict | from typing import Any, Dict | ||||
| import numpy as np | |||||
| from modelscope.metainfo import Pipelines | from modelscope.metainfo import Pipelines | ||||
| from modelscope.models.cv.product_segmentation import seg_infer | from modelscope.models.cv.product_segmentation import seg_infer | ||||
| from modelscope.outputs import OutputKeys | from modelscope.outputs import OutputKeys | ||||
| from modelscope.pipelines.base import Input, Pipeline | from modelscope.pipelines.base import Input, Pipeline | ||||
| from modelscope.pipelines.builder import PIPELINES | from modelscope.pipelines.builder import PIPELINES | ||||
| from modelscope.preprocessors import LoadImage | |||||
| from modelscope.utils.constant import Tasks | from modelscope.utils.constant import Tasks | ||||
| from modelscope.utils.logger import get_logger | from modelscope.utils.logger import get_logger | ||||
| @@ -28,12 +31,13 @@ class F3NetForProductSegmentationPipeline(Pipeline): | |||||
| logger.info('load model done') | logger.info('load model done') | ||||
| def preprocess(self, input: Input) -> Dict[str, Any]: | def preprocess(self, input: Input) -> Dict[str, Any]: | ||||
| return input | |||||
| img = LoadImage.convert_to_ndarray(input['input_path']) | |||||
| img = img.astype(np.float32) | |||||
| return img | |||||
| def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | def forward(self, input: Dict[str, Any]) -> Dict[str, Any]: | ||||
| mask = seg_infer.inference(self.model, self.device, | |||||
| input['input_path']) | |||||
| mask = seg_infer.inference(self.model, self.device, input) | |||||
| return {OutputKeys.MASKS: mask} | return {OutputKeys.MASKS: mask} | ||||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | ||||