Title: api tagging for pipeline/train/evaluate
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10588387
master
| @@ -5,6 +5,8 @@ import os | |||||
| from datetime import datetime | from datetime import datetime | ||||
| from typing import Optional | from typing import Optional | ||||
| import requests | |||||
| from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN, | from modelscope.hub.constants import (DEFAULT_MODELSCOPE_DOMAIN, | ||||
| DEFAULT_MODELSCOPE_GROUP, | DEFAULT_MODELSCOPE_GROUP, | ||||
| MODEL_ID_SEPARATOR, MODELSCOPE_SDK_DEBUG, | MODEL_ID_SEPARATOR, MODELSCOPE_SDK_DEBUG, | ||||
| @@ -85,3 +87,16 @@ def file_integrity_validation(file_path, expected_sha256): | |||||
| msg = 'File %s integrity check failed, the download may be incomplete, please try again.' % file_path | msg = 'File %s integrity check failed, the download may be incomplete, please try again.' % file_path | ||||
| logger.error(msg) | logger.error(msg) | ||||
| raise FileIntegrityError(msg) | raise FileIntegrityError(msg) | ||||
| def create_library_statistics(method: str, name: str, cn_name: Optional[str]): | |||||
| try: | |||||
| from modelscope.hub.api import ModelScopeConfig | |||||
| path = f'{get_endpoint()}/api/v1/statistics/library' | |||||
| headers = {'user-agent': ModelScopeConfig.get_user_agent()} | |||||
| params = {'Method': method, 'Name': name, 'CnName': cn_name} | |||||
| r = requests.post(path, params=params, headers=headers) | |||||
| r.raise_for_status() | |||||
| except Exception: | |||||
| pass | |||||
| return | |||||
| @@ -131,6 +131,8 @@ class Model(ABC): | |||||
| if not hasattr(model, 'cfg'): | if not hasattr(model, 'cfg'): | ||||
| model.cfg = cfg | model.cfg = cfg | ||||
| model.name = model_name_or_path | |||||
| return model | return model | ||||
| def save_pretrained(self, | def save_pretrained(self, | ||||
| @@ -10,6 +10,7 @@ from typing import Any, Dict, Generator, List, Mapping, Union | |||||
| import numpy as np | import numpy as np | ||||
| from modelscope.hub.utils.utils import create_library_statistics | |||||
| from modelscope.models.base import Model | from modelscope.models.base import Model | ||||
| from modelscope.msdatasets import MsDataset | from modelscope.msdatasets import MsDataset | ||||
| from modelscope.outputs import TASK_OUTPUTS | from modelscope.outputs import TASK_OUTPUTS | ||||
| @@ -151,7 +152,9 @@ class Pipeline(ABC): | |||||
| **kwargs) -> Union[Dict[str, Any], Generator]: | **kwargs) -> Union[Dict[str, Any], Generator]: | ||||
| # model provider should leave it as it is | # model provider should leave it as it is | ||||
| # modelscope library developer will handle this function | # modelscope library developer will handle this function | ||||
| for single_model in self.models: | |||||
| if hasattr(single_model, 'name'): | |||||
| create_library_statistics('pipeline', single_model.name, None) | |||||
| # place model to cpu or gpu | # place model to cpu or gpu | ||||
| if (self.model or (self.has_multiple_models and self.models[0])): | if (self.model or (self.has_multiple_models and self.models[0])): | ||||
| if not self._model_prepare: | if not self._model_prepare: | ||||
| @@ -15,6 +15,7 @@ from torch.utils.data.dataloader import default_collate | |||||
| from torch.utils.data.distributed import DistributedSampler | from torch.utils.data.distributed import DistributedSampler | ||||
| from modelscope.hub.snapshot_download import snapshot_download | from modelscope.hub.snapshot_download import snapshot_download | ||||
| from modelscope.hub.utils.utils import create_library_statistics | |||||
| from modelscope.metainfo import Trainers | from modelscope.metainfo import Trainers | ||||
| from modelscope.metrics import build_metric, task_default_metrics | from modelscope.metrics import build_metric, task_default_metrics | ||||
| from modelscope.models.base import Model, TorchModel | from modelscope.models.base import Model, TorchModel | ||||
| @@ -436,6 +437,8 @@ class EpochBasedTrainer(BaseTrainer): | |||||
| def train(self, checkpoint_path=None, *args, **kwargs): | def train(self, checkpoint_path=None, *args, **kwargs): | ||||
| self._mode = ModeKeys.TRAIN | self._mode = ModeKeys.TRAIN | ||||
| if hasattr(self.model, 'name'): | |||||
| create_library_statistics('train', self.model.name, None) | |||||
| if self.train_dataset is None: | if self.train_dataset is None: | ||||
| self.train_dataloader = self.get_train_dataloader() | self.train_dataloader = self.get_train_dataloader() | ||||
| @@ -456,6 +459,8 @@ class EpochBasedTrainer(BaseTrainer): | |||||
| self.train_loop(self.train_dataloader) | self.train_loop(self.train_dataloader) | ||||
| def evaluate(self, checkpoint_path=None): | def evaluate(self, checkpoint_path=None): | ||||
| if hasattr(self.model, 'name'): | |||||
| create_library_statistics('evaluate', self.model.name, None) | |||||
| if checkpoint_path is not None and os.path.isfile(checkpoint_path): | if checkpoint_path is not None and os.path.isfile(checkpoint_path): | ||||
| from modelscope.trainers.hooks import CheckpointHook | from modelscope.trainers.hooks import CheckpointHook | ||||
| CheckpointHook.load_checkpoint(checkpoint_path, self) | CheckpointHook.load_checkpoint(checkpoint_path, self) | ||||