specifiy file encoding when open text for read

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10708723
3 years ago · 4e4faa9a30
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -25,7 +25,7 @@ version_file = '../../modelscope/version.py'
 def get_version():
    with open(version_file, 'r') as f:
    with open(version_file, 'r', encoding='utf-8') as f:
        exec(compile(f.read(), version_file, 'exec'))
    return locals()['__version__']
--- a/modelscope/hub/api.py
+++ b/modelscope/hub/api.py
@@ -739,7 +739,7 @@ class ModelScopeConfig:
            with open(
                    os.path.join(ModelScopeConfig.path_credential,
                                 ModelScopeConfig.USER_INFO_FILE_NAME),
                    'r') as f:
                    'r', encoding='utf-8') as f:
                info = f.read()
                return info.split(':')[0], info.split(':')[1]
        except FileNotFoundError:
@@ -760,7 +760,7 @@ class ModelScopeConfig:
            with open(
                    os.path.join(ModelScopeConfig.path_credential,
                                 ModelScopeConfig.GIT_TOKEN_FILE_NAME),
                    'r') as f:
                    'r', encoding='utf-8') as f:
                token = f.read()
        except FileNotFoundError:
            pass
--- a/modelscope/models/audio/tts/models/datasets/kantts_data4fs.py
+++ b/modelscope/models/audio/tts/models/datasets/kantts_data4fs.py
@@ -21,7 +21,7 @@ class KanTtsText2MelDataset(Dataset):
        self.cache = cache
        with open(config_filename) as f:
        with open(config_filename, encoding='utf-8') as f:
            self._config = json.loads(f.read())
        # Load metadata:
--- a/modelscope/models/audio/tts/sambert_hifi.py
+++ b/modelscope/models/audio/tts/sambert_hifi.py
@@ -60,7 +60,7 @@ class SambertHifigan(Model):
        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
            zip_ref.extractall(model_dir)
        voice_cfg_path = os.path.join(self.__voice_path, 'voices.json')
        with open(voice_cfg_path, 'r') as f:
        with open(voice_cfg_path, 'r', encoding='utf-8') as f:
            voice_cfg = json.load(f)
        if 'voices' not in voice_cfg:
            raise TtsModelConfigurationException(
--- a/modelscope/models/cv/tinynas_classfication/plain_net_utils.py
+++ b/modelscope/models/cv/tinynas_classfication/plain_net_utils.py
@@ -39,7 +39,7 @@ class PlainNet(nn.Module):
                plainnet_struct_txt = self.module_opt.plainnet_struct_txt
            if plainnet_struct_txt is not None:
                with open(plainnet_struct_txt, 'r') as fid:
                with open(plainnet_struct_txt, 'r', encoding='utf-8') as fid:
                    the_line = fid.readlines()[0].strip()
                    self.plainnet_struct = the_line
                pass
--- a/modelscope/models/multi_modal/clip/bert_tokenizer.py
+++ b/modelscope/models/multi_modal/clip/bert_tokenizer.py
@@ -120,7 +120,7 @@ def load_vocab(vocab_file):
    """Loads a vocabulary file into a dictionary."""
    vocab = collections.OrderedDict()
    index = 0
    with open(vocab_file, 'r') as reader:
    with open(vocab_file, 'r', encoding='utf-8') as reader:
        while True:
            token = convert_to_unicode(reader.readline())
            if not token:
--- a/modelscope/models/multi_modal/clip/model.py
+++ b/modelscope/models/multi_modal/clip/model.py
@@ -523,8 +523,10 @@ class CLIPForMultiModalEmbedding(TorchModel):
        logger.info(f'Loading text model config from {text_model_config_file}')
        assert os.path.exists(text_model_config_file)
        with open(vision_model_config_file,
                  'r') as fv, open(text_model_config_file, 'r') as ft:
        with open(
                vision_model_config_file, 'r',
                encoding='utf-8') as fv,\
                open(text_model_config_file, 'r', encoding='utf-8') as ft:
            self.model_info = json.load(fv)
            for k, v in json.load(ft).items():
                self.model_info[k] = v
--- a/modelscope/models/multi_modal/diffusion/model.py
+++ b/modelscope/models/multi_modal/diffusion/model.py
@@ -76,7 +76,7 @@ class DiffusionModel(nn.Module):
        super(DiffusionModel, self).__init__()
        # including text and generator config
        model_config = json.load(
            open('{}/model_config.json'.format(model_dir)))
            open('{}/model_config.json'.format(model_dir), encoding='utf-8'))
        # text encoder
        text_config = model_config['text_config']
@@ -142,7 +142,9 @@ class DiffusionForTextToImageSynthesis(Model):
        # diffusion process
        diffusion_params = json.load(
            open('{}/diffusion_config.json'.format(model_dir)))
            open(
                '{}/diffusion_config.json'.format(model_dir),
                encoding='utf-8'))
        self.diffusion_generator = make_diffusion(
            **diffusion_params['generator_config'])
        self.diffusion_upsampler_256 = make_diffusion(
--- a/modelscope/models/multi_modal/diffusion/structbert.py
+++ b/modelscope/models/multi_modal/diffusion/structbert.py
@@ -130,7 +130,7 @@ class BertConfig(object):
    @classmethod
    def from_json_file(cls, json_file):
        """Constructs a `BertConfig` from a json file of parameters."""
        with open(json_file, 'r') as reader:
        with open(json_file, 'r', encoding='utf-8') as reader:
            text = reader.read()
        return cls.from_dict(json.loads(text))
--- a/modelscope/models/multi_modal/diffusion/tokenizer.py
+++ b/modelscope/models/multi_modal/diffusion/tokenizer.py
@@ -67,7 +67,7 @@ def load_vocab(vocab_file):
    """Loads a vocabulary file into a dictionary."""
    vocab = collections.OrderedDict()
    index = 0
    with open(vocab_file, 'r') as reader:
    with open(vocab_file, 'r', encoding='utf-8') as reader:
        while True:
            token = convert_to_unicode(reader.readline())
            if not token:
--- a/modelscope/models/multi_modal/gemm/gemm_base.py
+++ b/modelscope/models/multi_modal/gemm/gemm_base.py
@@ -522,7 +522,9 @@ class GEMMModel(nn.Module):
    def __init__(self, model_dir):
        super().__init__()
        with open('{}/encoder_config.json'.format(model_dir), 'r') as f:
        with open(
                '{}/encoder_config.json'.format(model_dir), 'r',
                encoding='utf-8') as f:
            model_config = json.loads(f.read())
        model_name = list(model_config.keys())[0]
        config_args = model_config[model_name]
--- a/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py
+++ b/modelscope/models/multi_modal/mmr/models/clip_for_mm_video_embedding.py
@@ -35,7 +35,9 @@ class VideoCLIPForMultiModalEmbedding(TorchModel):
    def __init__(self, model_dir, **kwargs):
        super().__init__(model_dir=model_dir, **kwargs)
        # model config parameters
        with open(f'{model_dir}/{ModelFile.CONFIGURATION}', 'r') as json_file:
        with open(
                f'{model_dir}/{ModelFile.CONFIGURATION}', 'r',
                encoding='utf-8') as json_file:
            model_config = json.load(json_file)
        model_config = model_config['paras']
        model_config['model_dir'] = model_dir
--- a/modelscope/models/multi_modal/mplug/configuration_mplug.py
+++ b/modelscope/models/multi_modal/mplug/configuration_mplug.py
@@ -111,6 +111,6 @@ class MPlugConfig(PretrainedConfig):
    @classmethod
    def from_yaml_file(cls, yaml_file: Union[str,
                                             os.PathLike]) -> Dict[str, Any]:
        with open(yaml_file, 'r') as reader:
        with open(yaml_file, 'r', encoding='utf-8') as reader:
            config_dict = yaml.load(reader, Loader=yaml.Loader)
        return cls(**config_dict)
--- a/modelscope/models/multi_modal/multi_stage_diffusion/model.py
+++ b/modelscope/models/multi_modal/multi_stage_diffusion/model.py
@@ -50,7 +50,8 @@ class UnCLIP(nn.Module):
    def __init__(self, model_dir):
        super(UnCLIP, self).__init__()
        self.model_dir = model_dir
        self.config = json.load(open(f'{model_dir}/{ModelFile.CONFIGURATION}'))
        self.config = json.load(
            open(f'{model_dir}/{ModelFile.CONFIGURATION}', encoding='utf-8'))
        # modules
        self.clip = CLIP(**self.config['clip']).fp16()
--- a/modelscope/models/multi_modal/ofa_for_all_tasks.py
+++ b/modelscope/models/multi_modal/ofa_for_all_tasks.py
@@ -312,7 +312,7 @@ class OfaForAllTasks(TorchModel):
        if self.cfg.model.get('answer2label', None):
            ans2label_file = osp.join(self.model_dir,
                                      self.cfg.model.answer2label)
            with open(ans2label_file, 'r') as reader:
            with open(ans2label_file, 'r', encoding='utf-8') as reader:
                self.ans2label_dict = json.load(reader)
    def save_pretrained(self,
--- a/modelscope/models/nlp/mglm/arguments.py
+++ b/modelscope/models/nlp/mglm/arguments.py
@@ -743,7 +743,7 @@ def get_args():
    if hasattr(args, 'deepspeed'
               ) and args.deepspeed and args.deepspeed_config is not None:
        with open(args.deepspeed_config) as file:
        with open(args.deepspeed_config, encoding='utf-8') as file:
            deepspeed_config = json.load(file)
        if 'train_micro_batch_size_per_gpu' in deepspeed_config:
            args.batch_size = deepspeed_config[
--- a/modelscope/models/nlp/mglm/data_utils/corpora.py
+++ b/modelscope/models/nlp/mglm/data_utils/corpora.py
@@ -156,7 +156,7 @@ class DataReader:
        def read_input_to_queue():
            for path in paths:
                print_rank_0(f'Start reading {path}')
                with open(path) as file:
                with open(path, encoding='utf-8') as file:
                    items = json.load(file)
                    for item in items:
                        task_queue.put(item)
--- a/modelscope/models/nlp/mglm/data_utils/datasets.py
+++ b/modelscope/models/nlp/mglm/data_utils/datasets.py
@@ -511,12 +511,12 @@ class json_dataset(data.Dataset):
    def load_json_stream(self, load_path):
        if not self.loose_json:
            jsons = json.load(open(load_path, 'r'))
            jsons = json.load(open(load_path, 'r', encoding='utf-8'))
            generator = iter(jsons)
        else:
            def gen_helper():
                with open(load_path, 'r') as f:
                with open(load_path, 'r', encoding='utf-8') as f:
                    for row in f:
                        yield json.loads(row)
--- a/modelscope/models/nlp/mglm/data_utils/extraction.py
+++ b/modelscope/models/nlp/mglm/data_utils/extraction.py
@@ -29,7 +29,9 @@ with open(output_path, 'w') as output:
            print(filename)
            article_lines = []
            article_open = False
            with open(filename, mode='r', newline='\n') as file:
            with open(
                    filename, mode='r', newline='\n',
                    encoding='utf-8') as file:
                for line in file:
                    line = line.rstrip()
                    if '<doc id=' in line:
--- a/modelscope/models/nlp/mglm/data_utils/tokenization_gpt2.py
+++ b/modelscope/models/nlp/mglm/data_utils/tokenization_gpt2.py
@@ -179,7 +179,7 @@ class GPT2Tokenizer(object):
                 special_tokens=None,
                 max_len=None):
        self.max_len = max_len if max_len is not None else int(1e12)
        self.encoder = json.load(open(vocab_file))
        self.encoder = json.load(open(vocab_file), encoding='utf-8')
        self.decoder = {v: k for k, v in self.encoder.items()}
        self.errors = errors  # how to handle errors in decoding
        self.byte_encoder = bytes_to_unicode()
--- a/modelscope/models/nlp/mglm/process_grid.py
+++ b/modelscope/models/nlp/mglm/process_grid.py
@@ -19,7 +19,7 @@ for dir_path in glob.glob(path_pattern, recursive=True):
    valid_path = os.path.join(dir_path, 'results.json')
    if os.path.exists(valid_path):
        print(entry)
        with open(valid_path) as file:
        with open(valid_path, encoding='utf-8') as file:
            valid_result = json.load(file)
    else:
        print(f'{entry} no validation results')
--- a/modelscope/models/nlp/mglm/tasks/language_model/dataset.py
+++ b/modelscope/models/nlp/mglm/tasks/language_model/dataset.py
@@ -121,7 +121,7 @@ class LambadaDataset(torch.utils.data.Dataset):
        self.tokens = []
        self.labels = []
        with open(data_path, 'r') as f:
        with open(data_path, 'r', encoding='utf-8') as f:
            for line in f.readlines():
                text = json.loads(line)['text']
                tokens, labels = self.get_tokens(text)
--- a/modelscope/models/nlp/mglm/tasks/seq2seq/dataset.py
+++ b/modelscope/models/nlp/mglm/tasks/seq2seq/dataset.py
@@ -209,14 +209,16 @@ class XSumProcessor:
            raise NotImplementedError(split)
        print_rank_0(f'Creating XSUM-{split} dataset from {self.data_dir}')
        with open(
                os.path.join(
                    self.data_dir,
                    'XSum-TRAINING-DEV-TEST-SPLIT-90-5-5.json')) as file:
                os.path.join(self.data_dir,
                             'XSum-TRAINING-DEV-TEST-SPLIT-90-5-5.json'),
                encoding='utf-8') as file:
            id_list = json.load(file)
        id_list = id_list[key]
        source_texts, target_texts = [], []
        for i, idx in enumerate(id_list):
            with open(os.path.join(self.data_dir, f'{idx}.summary')) as file:
            with open(
                    os.path.join(self.data_dir, f'{idx}.summary'),
                    encoding='utf-8') as file:
                key, sentences = None, []
                source_text, target_text = None, None
                for line in file:
--- a/modelscope/models/nlp/mglm/tasks/superglue/dataset.py
+++ b/modelscope/models/nlp/mglm/tasks/superglue/dataset.py
@@ -841,7 +841,7 @@ class RaceProcessor(DataProcessor):
            path, 'middle', '*.txt')) + glob.glob(
                os.path.join(path, 'high', '*.txt'))
        for filename in filenames:
            with open(filename, 'r') as f:
            with open(filename, 'r', encoding='utf-8') as f:
                for line in f:
                    data = json.loads(line)
                    idx = data['id']
@@ -1127,7 +1127,7 @@ class AgnewsProcessor(DataProcessor):
    def _create_examples(path: str, set_type: str) -> List[InputExample]:
        examples = []
        with open(path) as f:
        with open(path, encoding='utf-8') as f:
            reader = csv.reader(f, delimiter=',')
            for idx, row in enumerate(reader):
                label, headline, body = row
@@ -1209,7 +1209,7 @@ class YelpPolarityProcessor(DataProcessor):
    def _create_examples(path: str, set_type: str) -> List[InputExample]:
        examples = []
        with open(path) as f:
        with open(path, encoding='utf-8') as f:
            reader = csv.reader(f, delimiter=',')
            for idx, row in enumerate(reader):
                label, body = row
@@ -1419,7 +1419,7 @@ class SquadProcessor(DataProcessor):
    @staticmethod
    def _create_examples(path: str, set_type: str) -> List[InputExample]:
        examples = []
        with open(path) as f:
        with open(path, encoding='utf-8') as f:
            data = json.load(f)['data']
        for idx, passage in enumerate(data):
--- a/modelscope/models/nlp/mglm/tasks/superglue/pvp.py
+++ b/modelscope/models/nlp/mglm/tasks/superglue/pvp.py
@@ -538,7 +538,7 @@ class PVP(ABC):
            dict)  # type: Dict[int, Dict[str, List[str]]]
        current_pattern_id = None
        with open(path, 'r') as fh:
        with open(path, 'r', encoding='utf-8') as fh:
            for line in fh.read().splitlines():
                if line.isdigit():
                    current_pattern_id = int(line)
--- a/modelscope/models/nlp/mglm/utils.py
+++ b/modelscope/models/nlp/mglm/utils.py
@@ -77,7 +77,7 @@ def print_and_save_args(args, verbose=True, log_dir=None):
        with open(json_file, 'w') as output:
            json.dump(vars(args), output, sort_keys=True)
        if args.deepspeed and args.deepspeed_config is not None:
            with open(args.deepspeed_config) as file:
            with open(args.deepspeed_config, encoding='utf-8') as file:
                deepspeed_config = json.load(file)
            deepspeed_json_file = os.path.join(log_dir,
                                               'config_gpt_large.json')
@@ -324,7 +324,7 @@ def get_checkpoint_iteration(load_path):
        print_rank_0('    will not load any checkpoints and will start from '
                     'random')
        return load_path, 0, False, False
    with open(tracker_filename, 'r') as f:
    with open(tracker_filename, 'r', encoding='utf-8') as f:
        metastring = f.read().strip()
        release = metastring == 'release'
        # try:
--- a/modelscope/models/science/unifold/data/residue_constants.py
+++ b/modelscope/models/science/unifold/data/residue_constants.py
@@ -443,7 +443,7 @@ def load_stereo_chemical_props():
    stereo_chemical_props_path = os.path.join(
        os.path.dirname(os.path.abspath(__file__)),
        'stereo_chemical_props.txt')
    with open(stereo_chemical_props_path, 'rt') as f:
    with open(stereo_chemical_props_path, 'rt', encoding='utf-8') as f:
        stereo_chemical_props = f.read()
    lines_iter = iter(stereo_chemical_props.splitlines())
    # Load bond lengths.
--- a/modelscope/models/science/unifold/dataset.py
+++ b/modelscope/models/science/unifold/dataset.py
@@ -250,7 +250,7 @@ class UnifoldDataset(UnicoreDataset):
        self.path = data_path
        def load_json(filename):
            return json.load(open(filename, 'r'))
            return json.load(open(filename, 'r', encoding='utf-8'))
        sample_weight = load_json(
            os.path.join(self.path,
@@ -400,7 +400,8 @@ class UnifoldMultimerDataset(UnifoldDataset):
        self.pdb_assembly = json.load(
            open(
                os.path.join(self.data_path,
                             json_prefix + 'pdb_assembly.json')))
                             json_prefix + 'pdb_assembly.json'),
                encoding='utf-8'))
        self.pdb_chains = self.get_chains(self.inverse_multi_label)
        self.monomer_feature_path = os.path.join(self.data_path,
                                                 'pdb_features')
--- a/modelscope/models/science/unifold/msa/pipeline.py
+++ b/modelscope/models/science/unifold/msa/pipeline.py
@@ -99,7 +99,7 @@ def run_msa_tool(
            f.write(result[msa_format])
    else:
        logging.warning('Reading MSA from file %s', msa_out_path)
        with open(msa_out_path, 'r') as f:
        with open(msa_out_path, 'r', encoding='utf-8') as f:
            result = {msa_format: f.read()}
    return result
@@ -153,7 +153,7 @@ class DataPipeline:
    def process(self, input_fasta_path: str,
                msa_output_dir: str) -> FeatureDict:
        """Runs alignment tools on the input sequence and creates features."""
        with open(input_fasta_path) as f:
        with open(input_fasta_path, encoding='utf-8') as f:
            input_fasta_str = f.read()
        input_seqs, input_descs = parsers.parse_fasta(input_fasta_str)
        if len(input_seqs) != 1:
--- a/modelscope/models/science/unifold/msa/templates.py
+++ b/modelscope/models/science/unifold/msa/templates.py
@@ -155,7 +155,7 @@ def _parse_release_dates(path: str) -> Mapping[str, datetime.datetime]:
    """Parses release dates file, returns a mapping from PDBs to release dates."""
    if path.endswith('txt'):
        release_dates = {}
        with open(path, 'r') as f:
        with open(path, 'r', encoding='utf-8') as f:
            for line in f:
                pdb_id, date = line.split(':')
                date = date.strip()
--- a/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py
+++ b/modelscope/msdatasets/task_datasets/movie_scene_segmentation/movie_scene_segmentation_dataset.py
@@ -106,14 +106,14 @@ class MovieSceneSegmentationDataset(TorchTaskDataset):
        self.tmpl = '{}/shot_{}_img_{}.jpg'  # video_id, shot_id, shot_num
        if not self.test_mode:
            with open(self.ann_file) as f:
            with open(self.ann_file, encoding='utf-8') as f:
                self.anno_data = json.load(f)
            self.vidsid2label = {
                f"{it['video_id']}_{it['shot_id']}": it['boundary_label']
                for it in self.anno_data
            }
        else:
            with open(self.ann_file) as f:
            with open(self.ann_file, encoding='utf-8') as f:
                self.anno_data = json.load(f)
    def init_sampler(self, cfg):
--- a/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py
+++ b/modelscope/msdatasets/task_datasets/referring_video_object_segmentation/referring_video_object_segmentation_dataset.py
@@ -146,7 +146,7 @@ class ReferringVideoObjectSegmentationDataset(TorchTaskDataset):
        saved_annotations_file_path = osp.join(
            root_path, f'sentences_single_frame_{subset}_annotations.json')
        if osp.exists(saved_annotations_file_path):
            with open(saved_annotations_file_path, 'r') as f:
            with open(saved_annotations_file_path, 'r', encoding='utf-8') as f:
                text_annotations_by_frame = [tuple(a) for a in json.load(f)]
                return text_annotations_by_frame
        elif (distributed and dist.get_rank() == 0) or not distributed:
@@ -203,7 +203,7 @@ class ReferringVideoObjectSegmentationDataset(TorchTaskDataset):
                json.dump(text_annotations_by_frame, f)
        if distributed:
            dist.barrier()
            with open(saved_annotations_file_path, 'r') as f:
            with open(saved_annotations_file_path, 'r', encoding='utf-8') as f:
                text_annotations_by_frame = [tuple(a) for a in json.load(f)]
        return text_annotations_by_frame
@@ -267,8 +267,10 @@ def get_text_annotations_gt(root_path, subset):
        osp.join(root_path, 'Release/videoset.csv'), header=None)
    # 'vid', 'label', 'start_time', 'end_time', 'height', 'width', 'total_frames', 'annotated_frames', 'subset'
    a2d_data_info.columns = ['vid', '', '', '', '', '', '', '', 'subset']
    with open(osp.join(root_path, 'text_annotations/missed_videos.txt'),
              'r') as f:
    with open(
            osp.join(root_path, 'text_annotations/missed_videos.txt'),
            'r',
            encoding='utf-8') as f:
        unused_videos = f.read().splitlines()
    subsets = {'train': 0, 'test': 1}
    # filter unused videos and videos which do not belong to our train/test subset:
--- a/modelscope/msdatasets/task_datasets/video_summarization_dataset.py
+++ b/modelscope/msdatasets/task_datasets/video_summarization_dataset.py
@@ -26,7 +26,7 @@ class VideoSummarizationDataset(TorchTaskDataset):
        self.list_n_frames = []
        self.list_positions = []
        with open(self.split_filename) as f:
        with open(self.split_filename, encoding='utf-8') as f:
            data = json.loads(f.read())
            for i, split in enumerate(data):
                if i == self.split_index:
--- a/modelscope/pipelines/audio/asr_inference_pipeline.py
+++ b/modelscope/pipelines/audio/asr_inference_pipeline.py
@@ -116,7 +116,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
        }
        if self.framework == Frameworks.torch:
            config_file = open(inputs['asr_model_config'])
            config_file = open(inputs['asr_model_config'], encoding='utf-8')
            root = yaml.full_load(config_file)
            config_file.close()
            frontend_conf = None
--- a/modelscope/pipelines/cv/animal_recognition_pipeline.py
+++ b/modelscope/pipelines/cv/animal_recognition_pipeline.py
@@ -109,7 +109,7 @@ class AnimalRecognitionPipeline(Pipeline):
    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        label_mapping_path = osp.join(self.local_path, 'label_mapping.txt')
        with open(label_mapping_path, 'r') as f:
        with open(label_mapping_path, 'r', encoding='utf-8') as f:
            label_mapping = f.readlines()
        score = torch.max(inputs['outputs'])
        inputs = {
--- a/modelscope/pipelines/cv/general_recognition_pipeline.py
+++ b/modelscope/pipelines/cv/general_recognition_pipeline.py
@@ -110,7 +110,7 @@ class GeneralRecognitionPipeline(Pipeline):
    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        label_mapping_path = osp.join(self.local_path, 'meta_info.txt')
        with open(label_mapping_path, 'r') as f:
        with open(label_mapping_path, 'r', encoding='utf-8') as f:
            label_mapping = f.readlines()
        score = torch.max(inputs['outputs'])
        inputs = {
--- a/modelscope/pipelines/cv/ocr_recognition_pipeline.py
+++ b/modelscope/pipelines/cv/ocr_recognition_pipeline.py
@@ -49,7 +49,7 @@ class OCRRecognitionPipeline(Pipeline):
        self.infer_model.load_state_dict(
            torch.load(model_path, map_location=self.device))
        self.labelMapping = dict()
        with open(label_path, 'r') as f:
        with open(label_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            cnt = 2
            for line in lines:
--- a/modelscope/pipelines/cv/tinynas_classification_pipeline.py
+++ b/modelscope/pipelines/cv/tinynas_classification_pipeline.py
@@ -82,7 +82,7 @@ class TinynasClassificationPipeline(Pipeline):
    def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        label_mapping_path = osp.join(self.path, 'label_map.txt')
        f = open(label_mapping_path)
        f = open(label_mapping_path, encoding='utf-8')
        content = f.read()
        f.close()
        label_dict = eval(content)
--- a/modelscope/pipelines/cv/video_category_pipeline.py
+++ b/modelscope/pipelines/cv/video_category_pipeline.py
@@ -36,7 +36,7 @@ class VideoCategoryPipeline(Pipeline):
        super().__init__(model=model, **kwargs)
        config_path = osp.join(self.model, ModelFile.CONFIGURATION)
        logger.info(f'loading configuration from {config_path}')
        with open(config_path, 'r') as f:
        with open(config_path, 'r', encoding='utf-8') as f:
            config = json.load(f)
            self.frame_num = config['frame_num']
            self.level_1_num = config['level_1_num']
--- a/modelscope/pipelines/science/protein_structure_pipeline.py
+++ b/modelscope/pipelines/science/protein_structure_pipeline.py
@@ -59,8 +59,9 @@ def load_feature_for_one_target(
    else:
        uniprot_msa_dir = data_folder
        sequence_ids = open(os.path.join(data_folder,
                                         'chains.txt')).readline().split()
        sequence_ids = open(
            os.path.join(data_folder, 'chains.txt'),
            encoding='utf-8').readline().split()
    if symmetry_group is None:
        batch, _ = load_and_process(
--- a/modelscope/preprocessors/audio.py
+++ b/modelscope/preprocessors/audio.py
@@ -15,7 +15,7 @@ from modelscope.utils.constant import Fields
 def load_kaldi_feature_transform(filename):
    fp = open(filename, 'r')
    fp = open(filename, 'r', encoding='utf-8')
    all_str = fp.read()
    pos1 = all_str.find('AddShift')
    pos2 = all_str.find('[', pos1)
--- a/modelscope/preprocessors/kws.py
+++ b/modelscope/preprocessors/kws.py
@@ -78,7 +78,7 @@ class WavToLists(Preprocessor):
        assert os.path.exists(
            inputs['config_path']), 'model config yaml file does not exist'
        config_file = open(inputs['config_path'])
        config_file = open(inputs['config_path'], encoding='utf-8')
        root = yaml.full_load(config_file)
        config_file.close()
--- a/modelscope/preprocessors/multi_modal.py
+++ b/modelscope/preprocessors/multi_modal.py
@@ -145,8 +145,9 @@ class CLIPPreprocessor(Preprocessor):
            self.image_resolution = kwargs['resolution']
        else:
            self.image_resolution = json.load(
                open('{}/vision_model_config.json'.format(
                    model_dir)))['image_resolution']
                open(
                    '{}/vision_model_config.json'.format(model_dir),
                    encoding='utf-8'))['image_resolution']
        self.img_preprocess = self._build_image_transform()
        # key mapping
        # specify the input keys, compatible with training and inference whose key names may be different
--- a/modelscope/preprocessors/nlp/nlp_base.py
+++ b/modelscope/preprocessors/nlp/nlp_base.py
@@ -59,8 +59,10 @@ class NLPBasePreprocessor(Preprocessor, ABC):
            self.use_fast = False
        elif self.use_fast is None and os.path.isfile(
                os.path.join(model_dir, 'tokenizer_config.json')):
            with open(os.path.join(model_dir, 'tokenizer_config.json'),
                      'r') as f:
            with open(
                    os.path.join(model_dir, 'tokenizer_config.json'),
                    'r',
                    encoding='utf-8') as f:
                json_config = json.load(f)
                self.use_fast = json_config.get('use_fast')
        self.use_fast = False if self.use_fast is None else self.use_fast
--- a/modelscope/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py
+++ b/modelscope/preprocessors/nlp/space/dialog_intent_prediction_preprocessor.py
@@ -35,7 +35,10 @@ class DialogIntentPredictionPreprocessor(Preprocessor):
            self.model_dir, config=self.config)
        self.categories = None
        with open(os.path.join(self.model_dir, 'categories.json'), 'r') as f:
        with open(
                os.path.join(self.model_dir, 'categories.json'),
                'r',
                encoding='utf-8') as f:
            self.categories = json.load(f)
        assert len(self.categories) == 77
--- a/modelscope/preprocessors/nlp/space/dst_processors.py
+++ b/modelscope/preprocessors/nlp/space/dst_processors.py
@@ -184,7 +184,7 @@ class multiwoz22Processor(DSTProcessor):
    # Loads the dialogue_acts.json and returns a list
    # of slot-value pairs.
    def load_acts(self, input_file):
        with open(input_file) as f:
        with open(input_file, encoding='utf-8') as f:
            acts = json.load(f)
        s_dict = {}
        for d in acts:
--- a/modelscope/preprocessors/nlp/space/fields/gen_field.py
+++ b/modelscope/preprocessors/nlp/space/fields/gen_field.py
@@ -359,12 +359,14 @@ class MultiWOZBPETextField(BPETextField):
            test_list = [
                line.strip().lower() for line in open(
                    os.path.join(kwargs['data_dir'], 'testListFile.json'),
                    'r').readlines()
                    'r',
                    encoding='utf-8').readlines()
            ]
            dev_list = [
                line.strip().lower() for line in open(
                    os.path.join(kwargs['data_dir'], 'valListFile.json'),
                    'r').readlines()
                    'r',
                    encoding='utf-8').readlines()
            ]
            self.dev_files, self.test_files = {}, {}
--- a/modelscope/preprocessors/nlp/space/tokenizer.py
+++ b/modelscope/preprocessors/nlp/space/tokenizer.py
@@ -531,7 +531,7 @@ class GPT2Tokenizer(object):
                 special_tokens=None,
                 max_len=None):
        self.max_len = max_len if max_len is not None else int(1e12)
        self.encoder = json.load(open(vocab_file))
        self.encoder = json.load(open(vocab_file, encoding='utf-8'))
        self.decoder = {v: k for k, v in self.encoder.items()}
        self.errors = errors  # how to handle errors in decoding
        self.byte_encoder = bytes_to_unicode()
--- a/modelscope/preprocessors/nlp/space_T_cn/fields/database.py
+++ b/modelscope/preprocessors/nlp/space_T_cn/fields/database.py
@@ -32,12 +32,12 @@ class Database:
        tables = {}
        lines = []
        if type(table_file_path) == str:
            with open(table_file_path, 'r') as fo:
            with open(table_file_path, 'r', encoding='utf-8') as fo:
                for line in fo:
                    lines.append(line)
        elif type(table_file_path) == list:
            for path in table_file_path:
                with open(path, 'r') as fo:
                with open(path, 'r', encoding='utf-8') as fo:
                    for line in fo:
                        lines.append(line)
        else:
--- a/modelscope/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py
+++ b/modelscope/preprocessors/nlp/space_T_en/conversational_text_to_sql_preprocessor.py
@@ -45,7 +45,7 @@ class ConversationalTextToSqlPreprocessor(Preprocessor):
            and torch.cuda.is_available() else 'cpu'
        self.processor = None
        self.table_path = os.path.join(self.model_dir, 'tables.json')
        self.tables = json.load(open(self.table_path, 'r'))
        self.tables = json.load(open(self.table_path, 'r', encoding='utf-8'))
        self.output_tables = None
        self.path_cache = []
        self.graph_processor = GraphProcessor()
@@ -89,7 +89,7 @@ class ConversationalTextToSqlPreprocessor(Preprocessor):
                'local_db_path'] not in self.path_cache:
            self.path_cache.append(data['local_db_path'])
            path = os.path.join(data['local_db_path'], 'tables.json')
            self.tables = json.load(open(path, 'r'))
            self.tables = json.load(open(path, 'r', encoding='utf-8'))
            self.processor.db_dir = os.path.join(data['local_db_path'], 'db')
            self.output_tables = process_tables(self.processor, self.tables)
            Example.configuration(
--- a/modelscope/preprocessors/ofa/base.py
+++ b/modelscope/preprocessors/ofa/base.py
@@ -76,7 +76,7 @@ class OfaBasePreprocessor:
        self.constraint_trie = None
        if self.cfg.model.get('answer2label', None):
            ans2label_file = osp.join(model_dir, self.cfg.model.answer2label)
            with open(ans2label_file, 'r') as reader:
            with open(ans2label_file, 'r', encoding='utf-8') as reader:
                ans2label_dict = json.load(reader)
            self.ans2label = ans2label_dict
            self.label2ans = {v: k for k, v in self.ans2label.items()}
--- a/modelscope/preprocessors/science/uni_fold.py
+++ b/modelscope/preprocessors/science/uni_fold.py
@@ -201,7 +201,7 @@ def run_mmseqs2(
    a3m_lines = {}
    for a3m_file in a3m_files:
        update_M, M = True, None
        with open(a3m_file, 'r') as f:
        with open(a3m_file, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            for line in lines:
                if len(line) > 0:
--- a/modelscope/trainers/nlp/space/eval.py
+++ b/modelscope/trainers/nlp/space/eval.py
@@ -771,7 +771,8 @@ class CamRestEvaluator(GenericEvaluator):
    def get_entities(self, entity_path):
        entities_flat = []
        entitiy_to_slot_dict = {}
        raw_entities = json.loads(open(entity_path).read().lower())
        raw_entities = json.loads(
            open(entity_path, encoding='utf-8').read().lower())
        for s in raw_entities['informable']:
            entities_flat.extend(raw_entities['informable'][s])
            for v in raw_entities['informable'][s]:
--- a/modelscope/utils/audio/audio_utils.py
+++ b/modelscope/utils/audio/audio_utils.py
@@ -47,7 +47,7 @@ def update_conf(origin_config_file, new_config_file, conf_item: [str, str]):
        else:
            return None
    with open(origin_config_file) as f:
    with open(origin_config_file, encoding='utf-8') as f:
        lines = f.readlines()
    with open(new_config_file, 'w') as f:
        for line in lines:
--- a/modelscope/utils/config.py
+++ b/modelscope/utils/config.py
@@ -178,7 +178,7 @@ class Config:
        if cfg_text:
            text = cfg_text
        elif filename:
            with open(filename, 'r') as f:
            with open(filename, 'r', encoding='utf-8') as f:
                text = f.read()
        else:
            text = ''
--- a/modelscope/utils/hub.py
+++ b/modelscope/utils/hub.py
@@ -124,7 +124,7 @@ def parse_label_mapping(model_dir):
    label2id = None
    label_path = os.path.join(model_dir, ModelFile.LABEL_MAPPING)
    if os.path.exists(label_path):
        with open(label_path) as f:
        with open(label_path, encoding='utf-8') as f:
            label_mapping = json.load(f)
        label2id = {name: idx for name, idx in label_mapping.items()}
--- a/modelscope/utils/nlp/space/clean_dataset.py
+++ b/modelscope/utils/nlp/space/clean_dataset.py
@@ -59,7 +59,9 @@ def clean_text(data_dir, text):
                  text)  # 'abc.xyz' -> 'abc . xyz'
    text = re.sub(r'(\w+)\.\.? ', r'\1 . ', text)  # if 'abc. ' -> 'abc . '
    with open(os.path.join(data_dir, 'mapping.pair'), 'r') as fin:
    with open(
            os.path.join(data_dir, 'mapping.pair'), 'r',
            encoding='utf-8') as fin:
        for line in fin.readlines():
            fromx, tox = line.replace('\n', '').split('\t')
            text = ' ' + text + ' '
--- a/modelscope/utils/nlp/space/db_ops.py
+++ b/modelscope/utils/nlp/space/db_ops.py
@@ -15,7 +15,9 @@ class MultiWozDB(object):
        self.dbs = {}
        self.sql_dbs = {}
        for domain in all_domains:
            with open(os.path.join(db_dir, db_paths[domain]), 'r') as f:
            with open(
                    os.path.join(db_dir, db_paths[domain]), 'r',
                    encoding='utf-8') as f:
                self.dbs[domain] = json.loads(f.read().lower())
    def oneHotVector(self, domain, num):
--- a/modelscope/utils/nlp/space/utils.py
+++ b/modelscope/utils/nlp/space/utils.py
@@ -146,9 +146,9 @@ class MultiWOZVocab(object):
    def load_vocab(self, vocab_path):
        self._freq_dict = json.loads(
            open(vocab_path + '.freq.json', 'r').read())
            open(vocab_path + '.freq.json', 'r', encoding='utf-8').read())
        self._word2idx = json.loads(
            open(vocab_path + '.word2idx.json', 'r').read())
            open(vocab_path + '.word2idx.json', 'r', encoding='utf-8').read())
        self._idx2word = {}
        for w, idx in self._word2idx.items():
            self._idx2word[idx] = w
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def get_hash():
 def get_version():
    with open(version_file, 'r') as f:
    with open(version_file, 'r', encoding='utf-8') as f:
        exec(compile(f.read(), version_file, 'exec'))
    return locals()['__version__']
@@ -109,7 +109,7 @@ def parse_requirements(fname='requirements.txt', with_version=True):
            yield info
    def parse_require_file(fpath):
        with open(fpath, 'r') as f:
        with open(fpath, 'r', encoding='utf-8') as f:
            for line in f.readlines():
                line = line.strip()
                if line.startswith('http'):
--- a/tests/run.py
+++ b/tests/run.py
@@ -247,7 +247,7 @@ def run_in_subprocess(args):
        test_suite_env_map[test_suite_file] = 'default'
    if args.run_config is not None and Path(args.run_config).exists():
        with open(args.run_config) as f:
        with open(args.run_config, encoding='utf-8') as f:
            run_config = yaml.load(f, Loader=yaml.FullLoader)
        if 'isolated' in run_config:
            isolated_cases = run_config['isolated']
--- a/tests/trainers/easycv/test_easycv_trainer.py
+++ b/tests/trainers/easycv/test_easycv_trainer.py
@@ -109,7 +109,7 @@ class EasyCVTrainerTestSingleGpu(unittest.TestCase):
        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
        self.assertEqual(len(json_files), 1)
        with open(json_files[0], 'r') as f:
        with open(json_files[0], 'r', encoding='utf-8') as f:
            lines = [i.strip() for i in f.readlines()]
        self.assertDictContainsSubset(
@@ -185,7 +185,7 @@ class EasyCVTrainerTestMultiGpus(DistributedTestCase):
        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
        self.assertEqual(len(json_files), 1)
        with open(json_files[0], 'r') as f:
        with open(json_files[0], 'r', encoding='utf-8') as f:
            lines = [i.strip() for i in f.readlines()]
        self.assertDictContainsSubset(
--- a/tests/trainers/test_trainer.py
+++ b/tests/trainers/test_trainer.py
@@ -248,7 +248,7 @@ class TrainerTest(unittest.TestCase):
        results_files = os.listdir(self.tmp_dir)
        json_file = os.path.join(self.tmp_dir, f'{trainer.timestamp}.log.json')
        with open(json_file, 'r') as f:
        with open(json_file, 'r', encoding='utf-8') as f:
            lines = [i.strip() for i in f.readlines()]
        self.assertDictContainsSubset(
            {
@@ -367,7 +367,7 @@ class TrainerTest(unittest.TestCase):
        trainer.train()
        results_files = os.listdir(self.tmp_dir)
        json_file = os.path.join(self.tmp_dir, f'{trainer.timestamp}.log.json')
        with open(json_file, 'r') as f:
        with open(json_file, 'r', encoding='utf-8') as f:
            lines = [i.strip() for i in f.readlines()]
        self.assertDictContainsSubset(
            {
--- a/tests/trainers/test_trainer_gpu.py
+++ b/tests/trainers/test_trainer_gpu.py
@@ -142,7 +142,7 @@ class TrainerTestSingleGpu(unittest.TestCase):
        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
        self.assertEqual(len(json_files), 1)
        with open(json_files[0], 'r') as f:
        with open(json_files[0], 'r', encoding='utf-8') as f:
            lines = [i.strip() for i in f.readlines()]
        self.assertDictContainsSubset(
            {
@@ -236,7 +236,7 @@ class TrainerTestMultiGpus(DistributedTestCase):
        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
        self.assertEqual(len(json_files), 1)
        with open(json_files[0], 'r') as f:
        with open(json_files[0], 'r', encoding='utf-8') as f:
            lines = [i.strip() for i in f.readlines()]
        self.assertDictContainsSubset(
@@ -320,7 +320,7 @@ class TrainerTestMultiGpus(DistributedTestCase):
        json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json'))
        self.assertEqual(len(json_files), 1)
        with open(json_files[0], 'r') as f:
        with open(json_files[0], 'r', encoding='utf-8') as f:
            lines = [i.strip() for i in f.readlines()]
        print(results_files, lines)