| @@ -402,6 +402,7 @@ class Metrics(object): | |||
| # accuracy | |||
| accuracy = 'accuracy' | |||
| multi_average_precision = 'mAP' | |||
| audio_noise_metric = 'audio-noise-metric' | |||
| # text gen | |||
| @@ -24,6 +24,7 @@ class MetricKeys(object): | |||
| ROUGE_1 = 'rouge-1' | |||
| ROUGE_L = 'rouge-l' | |||
| NED = 'ned' # ocr metric | |||
| mAP = 'mAP' | |||
| BatchAcc = 'inbatch_t2i_recall_at_1' | |||
| @@ -0,0 +1,67 @@ | |||
| # Copyright (c) Alibaba, Inc. and its affiliates. | |||
| from typing import Dict | |||
| import numpy as np | |||
| from modelscope.metainfo import Metrics | |||
| from modelscope.outputs import OutputKeys | |||
| from modelscope.utils.registry import default_group | |||
| from .base import Metric | |||
| from .builder import METRICS, MetricKeys | |||
| @METRICS.register_module( | |||
| group_key=default_group, module_name=Metrics.multi_average_precision) | |||
| class AveragePrecisionMetric(Metric): | |||
| """The metric computation class for multi avarage precision classes. | |||
| This metric class calculates multi avarage precision for the whole input batches. | |||
| """ | |||
| def __init__(self, *args, **kwargs): | |||
| super().__init__(*args, **kwargs) | |||
| self.preds = [] | |||
| self.labels = [] | |||
| self.thresh = kwargs.get('threshold', 0.5) | |||
| def add(self, outputs: Dict, inputs: Dict): | |||
| label_name = OutputKeys.LABEL if OutputKeys.LABEL in inputs else OutputKeys.LABELS | |||
| ground_truths = inputs[label_name] | |||
| eval_results = outputs[label_name] | |||
| for key in [ | |||
| OutputKeys.CAPTION, OutputKeys.TEXT, OutputKeys.BOXES, | |||
| OutputKeys.LABELS, OutputKeys.SCORES | |||
| ]: | |||
| if key in outputs and outputs[key] is not None: | |||
| eval_results = outputs[key] | |||
| break | |||
| assert type(ground_truths) == type(eval_results) | |||
| for truth in ground_truths: | |||
| self.labels.append(truth) | |||
| for result in eval_results: | |||
| if isinstance(truth, str): | |||
| self.preds.append(result.strip().replace(' ', '')) | |||
| else: | |||
| self.preds.append(result) | |||
| def evaluate(self): | |||
| assert len(self.preds) == len(self.labels) | |||
| scores = self._calculate_ap_score(self.preds, self.labels, self.thresh) | |||
| return {MetricKeys.mAP: scores.mean().item()} | |||
| def _calculate_ap_score(self, preds, labels, thresh=0.5): | |||
| hyps = np.array(preds) | |||
| refs = np.array(labels) | |||
| a = np.where(hyps[:, :2] < refs[:, :2], refs[:, :2], hyps[:, :2]) | |||
| b = np.where(hyps[:, 2:] < refs[:, 2:], hyps[:, 2:], refs[:, 2:]) | |||
| interacts = np.concatenate([a, b], axis=1) | |||
| area_predictions = (hyps[:, 2] - hyps[:, 0]) * ( | |||
| hyps[:, 3] - hyps[:, 1]) | |||
| area_targets = (refs[:, 2] - refs[:, 0]) * (refs[:, 3] - refs[:, 1]) | |||
| interacts_w = interacts[:, 2] - interacts[:, 0] | |||
| interacts_h = interacts[:, 3] - interacts[:, 1] | |||
| area_interacts = interacts_w * interacts_h | |||
| ious = area_interacts / ( | |||
| area_predictions + area_targets - area_interacts + 1e-6) | |||
| return (ious >= thresh) & (interacts_w > 0) & (interacts_h > 0) | |||
| @@ -9,6 +9,7 @@ from torchvision import transforms | |||
| from modelscope.preprocessors.image import load_image | |||
| from modelscope.utils.constant import ModeKeys | |||
| from .base import OfaBasePreprocessor | |||
| from .utils import transforms as T | |||
| class OfaVisualGroundingPreprocessor(OfaBasePreprocessor): | |||
| @@ -29,13 +30,14 @@ class OfaVisualGroundingPreprocessor(OfaBasePreprocessor): | |||
| super(OfaVisualGroundingPreprocessor, | |||
| self).__init__(cfg, model_dir, mode, *args, **kwargs) | |||
| self.num_bins = self.cfg.model.get('num_bins', 1000) | |||
| if self.mode == ModeKeys.TRAIN: | |||
| # for positioning | |||
| self.positioning_transform = transforms.Compose([ | |||
| transforms.RandomResize([self.patch_image_size], | |||
| max_size=self.patch_image_size), | |||
| transforms.ToTensor(), | |||
| transforms.Normalize( | |||
| self.positioning_transform = T.Compose([ | |||
| T.RandomResize([self.patch_image_size], | |||
| max_size=self.patch_image_size), | |||
| T.ToTensor(), | |||
| T.Normalize( | |||
| mean=self.mean, | |||
| std=self.std, | |||
| max_image_size=self.max_image_size) | |||
| @@ -130,4 +132,10 @@ class OfaVisualGroundingPreprocessor(OfaBasePreprocessor): | |||
| 'w_resize_ratio': w_resize_ratio, | |||
| 'h_resize_ratio': h_resize_ratio, | |||
| } | |||
| if 'region_coord' in self.column_map and self.column_map[ | |||
| 'region_coord'] in data: | |||
| x0, y0, x1, y1 = data[ | |||
| self.column_map['region_coord']].strip().split(',') | |||
| sample['label'] = [float(x0), float(y0), float(x1), float(y1)] | |||
| return sample | |||
| @@ -34,6 +34,7 @@ class OFATrainer(EpochBasedTrainer): | |||
| self, | |||
| model: Optional[Union[TorchModel, nn.Module, str]] = None, | |||
| cfg_file: Optional[str] = None, | |||
| cfg_modify_fn: Optional[Callable] = None, | |||
| arg_parse_fn: Optional[Callable] = None, | |||
| data_collator: Optional[Union[Callable, Dict[str, | |||
| Callable]]] = None, | |||
| @@ -49,7 +50,8 @@ class OFATrainer(EpochBasedTrainer): | |||
| **kwargs): | |||
| model = Model.from_pretrained(model, revision=model_revision) | |||
| model_dir = model.model_dir | |||
| cfg = Config.from_file(cfg_file) | |||
| self.cfg_modify_fn = cfg_modify_fn | |||
| cfg = self.rebuild_config(Config.from_file(cfg_file)) | |||
| if 'work_dir' not in kwargs or len(kwargs['work_dir']) == 0: | |||
| work_dir = cfg.train.work_dir | |||
| else: | |||
| @@ -57,10 +59,12 @@ class OFATrainer(EpochBasedTrainer): | |||
| tokenizer_files = { | |||
| 'zh': [ | |||
| 'tokenizer.json', 'tokenizer_config.json', 'vocab.txt', | |||
| 'config.json' | |||
| 'config.json', 'ans2label.json' | |||
| ], | |||
| 'en': [ | |||
| 'tokenizer.json', 'vocab.json', 'merges.txt', 'config.json', | |||
| 'ans2label.json' | |||
| ], | |||
| 'en': | |||
| ['tokenizer.json', 'vocab.json', 'merges.txt', 'config.json'], | |||
| } | |||
| for filename in tokenizer_files[cfg.model.get('language', 'en')]: | |||
| finetune_file = os.path.join(work_dir, filename) | |||
| @@ -127,6 +131,11 @@ class OFATrainer(EpochBasedTrainer): | |||
| **kwargs, | |||
| ) | |||
| def rebuild_config(self, cfg: Config): | |||
| if self.cfg_modify_fn is not None: | |||
| cfg = self.cfg_modify_fn(cfg) | |||
| return cfg | |||
| def train_step(self, model, inputs): | |||
| model.train() | |||
| loss, sample_size, logging_output = self.criterion(model, inputs) | |||
| @@ -5,10 +5,10 @@ import unittest | |||
| import json | |||
| from modelscope.metainfo import Trainers | |||
| from modelscope.msdatasets import MsDataset | |||
| from modelscope.trainers import build_trainer | |||
| from modelscope.utils.constant import DownloadMode, ModelFile | |||
| from modelscope.utils.hub import read_config | |||
| from modelscope.utils.test_utils import test_level | |||
| @@ -73,11 +73,12 @@ class TestOfaTrainer(unittest.TestCase): | |||
| def test_trainer_std(self): | |||
| WORKSPACE = './workspace/ckpts/recognition' | |||
| os.makedirs(WORKSPACE, exist_ok=True) | |||
| config_file = os.path.join(WORKSPACE, ModelFile.CONFIGURATION) | |||
| with open(config_file, 'w') as writer: | |||
| json.dump(self.finetune_cfg, writer) | |||
| pretrained_model = 'damo/ofa_ocr-recognition_scene_base_zh' | |||
| cfg = read_config(pretrained_model) | |||
| config_file = os.path.join(WORKSPACE, ModelFile.CONFIGURATION) | |||
| cfg.dump(config_file) | |||
| args = dict( | |||
| model=pretrained_model, | |||
| work_dir=WORKSPACE, | |||
| @@ -94,7 +95,7 @@ class TestOfaTrainer(unittest.TestCase): | |||
| split='test[:20]', | |||
| download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS), | |||
| cfg_file=config_file) | |||
| trainer = build_trainer(name=Trainers.ofa, default_args=args) | |||
| trainer = build_trainer(name='ofa', default_args=args) | |||
| trainer.train() | |||
| self.assertIn( | |||