Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10708723master
| @@ -25,7 +25,7 @@ version_file = '../../modelscope/version.py' | |||||
| def get_version(): | def get_version(): | ||||
| with open(version_file, 'r') as f: | |||||
| with open(version_file, 'r', encoding='utf-8') as f: | |||||
| exec(compile(f.read(), version_file, 'exec')) | exec(compile(f.read(), version_file, 'exec')) | ||||
| return locals()['__version__'] | return locals()['__version__'] | ||||
| @@ -739,7 +739,7 @@ class ModelScopeConfig: | |||||
| with open( | with open( | ||||
| os.path.join(ModelScopeConfig.path_credential, | os.path.join(ModelScopeConfig.path_credential, | ||||
| ModelScopeConfig.USER_INFO_FILE_NAME), | ModelScopeConfig.USER_INFO_FILE_NAME), | ||||
| 'r') as f: | |||||
| 'r', encoding='utf-8') as f: | |||||
| info = f.read() | info = f.read() | ||||
| return info.split(':')[0], info.split(':')[1] | return info.split(':')[0], info.split(':')[1] | ||||
| except FileNotFoundError: | except FileNotFoundError: | ||||
| @@ -760,7 +760,7 @@ class ModelScopeConfig: | |||||
| with open( | with open( | ||||
| os.path.join(ModelScopeConfig.path_credential, | os.path.join(ModelScopeConfig.path_credential, | ||||
| ModelScopeConfig.GIT_TOKEN_FILE_NAME), | ModelScopeConfig.GIT_TOKEN_FILE_NAME), | ||||
| 'r') as f: | |||||
| 'r', encoding='utf-8') as f: | |||||
| token = f.read() | token = f.read() | ||||
| except FileNotFoundError: | except FileNotFoundError: | ||||
| pass | pass | ||||
| @@ -21,7 +21,7 @@ class KanTtsText2MelDataset(Dataset): | |||||
| self.cache = cache | self.cache = cache | ||||
| with open(config_filename) as f: | |||||
| with open(config_filename, encoding='utf-8') as f: | |||||
| self._config = json.loads(f.read()) | self._config = json.loads(f.read()) | ||||
| # Load metadata: | # Load metadata: | ||||
| @@ -60,7 +60,7 @@ class SambertHifigan(Model): | |||||
| with zipfile.ZipFile(zip_file, 'r') as zip_ref: | with zipfile.ZipFile(zip_file, 'r') as zip_ref: | ||||
| zip_ref.extractall(model_dir) | zip_ref.extractall(model_dir) | ||||
| voice_cfg_path = os.path.join(self.__voice_path, 'voices.json') | voice_cfg_path = os.path.join(self.__voice_path, 'voices.json') | ||||
| with open(voice_cfg_path, 'r') as f: | |||||
| with open(voice_cfg_path, 'r', encoding='utf-8') as f: | |||||
| voice_cfg = json.load(f) | voice_cfg = json.load(f) | ||||
| if 'voices' not in voice_cfg: | if 'voices' not in voice_cfg: | ||||
| raise TtsModelConfigurationException( | raise TtsModelConfigurationException( | ||||
| @@ -39,7 +39,7 @@ class PlainNet(nn.Module): | |||||
| plainnet_struct_txt = self.module_opt.plainnet_struct_txt | plainnet_struct_txt = self.module_opt.plainnet_struct_txt | ||||
| if plainnet_struct_txt is not None: | if plainnet_struct_txt is not None: | ||||
| with open(plainnet_struct_txt, 'r') as fid: | |||||
| with open(plainnet_struct_txt, 'r', encoding='utf-8') as fid: | |||||
| the_line = fid.readlines()[0].strip() | the_line = fid.readlines()[0].strip() | ||||
| self.plainnet_struct = the_line | self.plainnet_struct = the_line | ||||
| pass | pass | ||||
| @@ -120,7 +120,7 @@ def load_vocab(vocab_file): | |||||
| """Loads a vocabulary file into a dictionary.""" | """Loads a vocabulary file into a dictionary.""" | ||||
| vocab = collections.OrderedDict() | vocab = collections.OrderedDict() | ||||
| index = 0 | index = 0 | ||||
| with open(vocab_file, 'r') as reader: | |||||
| with open(vocab_file, 'r', encoding='utf-8') as reader: | |||||
| while True: | while True: | ||||
| token = convert_to_unicode(reader.readline()) | token = convert_to_unicode(reader.readline()) | ||||
| if not token: | if not token: | ||||
| @@ -523,8 +523,10 @@ class CLIPForMultiModalEmbedding(TorchModel): | |||||
| logger.info(f'Loading text model config from {text_model_config_file}') | logger.info(f'Loading text model config from {text_model_config_file}') | ||||
| assert os.path.exists(text_model_config_file) | assert os.path.exists(text_model_config_file) | ||||
| with open(vision_model_config_file, | |||||
| 'r') as fv, open(text_model_config_file, 'r') as ft: | |||||
| with open( | |||||
| vision_model_config_file, 'r', | |||||
| encoding='utf-8') as fv,\ | |||||
| open(text_model_config_file, 'r', encoding='utf-8') as ft: | |||||
| self.model_info = json.load(fv) | self.model_info = json.load(fv) | ||||
| for k, v in json.load(ft).items(): | for k, v in json.load(ft).items(): | ||||
| self.model_info[k] = v | self.model_info[k] = v | ||||
| @@ -76,7 +76,7 @@ class DiffusionModel(nn.Module): | |||||
| super(DiffusionModel, self).__init__() | super(DiffusionModel, self).__init__() | ||||
| # including text and generator config | # including text and generator config | ||||
| model_config = json.load( | model_config = json.load( | ||||
| open('{}/model_config.json'.format(model_dir))) | |||||
| open('{}/model_config.json'.format(model_dir), encoding='utf-8')) | |||||
| # text encoder | # text encoder | ||||
| text_config = model_config['text_config'] | text_config = model_config['text_config'] | ||||
| @@ -142,7 +142,9 @@ class DiffusionForTextToImageSynthesis(Model): | |||||
| # diffusion process | # diffusion process | ||||
| diffusion_params = json.load( | diffusion_params = json.load( | ||||
| open('{}/diffusion_config.json'.format(model_dir))) | |||||
| open( | |||||
| '{}/diffusion_config.json'.format(model_dir), | |||||
| encoding='utf-8')) | |||||
| self.diffusion_generator = make_diffusion( | self.diffusion_generator = make_diffusion( | ||||
| **diffusion_params['generator_config']) | **diffusion_params['generator_config']) | ||||
| self.diffusion_upsampler_256 = make_diffusion( | self.diffusion_upsampler_256 = make_diffusion( | ||||
| @@ -130,7 +130,7 @@ class BertConfig(object): | |||||
| @classmethod | @classmethod | ||||
| def from_json_file(cls, json_file): | def from_json_file(cls, json_file): | ||||
| """Constructs a `BertConfig` from a json file of parameters.""" | """Constructs a `BertConfig` from a json file of parameters.""" | ||||
| with open(json_file, 'r') as reader: | |||||
| with open(json_file, 'r', encoding='utf-8') as reader: | |||||
| text = reader.read() | text = reader.read() | ||||
| return cls.from_dict(json.loads(text)) | return cls.from_dict(json.loads(text)) | ||||
| @@ -67,7 +67,7 @@ def load_vocab(vocab_file): | |||||
| """Loads a vocabulary file into a dictionary.""" | """Loads a vocabulary file into a dictionary.""" | ||||
| vocab = collections.OrderedDict() | vocab = collections.OrderedDict() | ||||
| index = 0 | index = 0 | ||||
| with open(vocab_file, 'r') as reader: | |||||
| with open(vocab_file, 'r', encoding='utf-8') as reader: | |||||
| while True: | while True: | ||||
| token = convert_to_unicode(reader.readline()) | token = convert_to_unicode(reader.readline()) | ||||
| if not token: | if not token: | ||||
| @@ -522,7 +522,9 @@ class GEMMModel(nn.Module): | |||||
| def __init__(self, model_dir): | def __init__(self, model_dir): | ||||
| super().__init__() | super().__init__() | ||||
| with open('{}/encoder_config.json'.format(model_dir), 'r') as f: | |||||
| with open( | |||||
| '{}/encoder_config.json'.format(model_dir), 'r', | |||||
| encoding='utf-8') as f: | |||||
| model_config = json.loads(f.read()) | model_config = json.loads(f.read()) | ||||
| model_name = list(model_config.keys())[0] | model_name = list(model_config.keys())[0] | ||||
| config_args = model_config[model_name] | config_args = model_config[model_name] | ||||
| @@ -35,7 +35,9 @@ class VideoCLIPForMultiModalEmbedding(TorchModel): | |||||
| def __init__(self, model_dir, **kwargs): | def __init__(self, model_dir, **kwargs): | ||||
| super().__init__(model_dir=model_dir, **kwargs) | super().__init__(model_dir=model_dir, **kwargs) | ||||
| # model config parameters | # model config parameters | ||||
| with open(f'{model_dir}/{ModelFile.CONFIGURATION}', 'r') as json_file: | |||||
| with open( | |||||
| f'{model_dir}/{ModelFile.CONFIGURATION}', 'r', | |||||
| encoding='utf-8') as json_file: | |||||
| model_config = json.load(json_file) | model_config = json.load(json_file) | ||||
| model_config = model_config['paras'] | model_config = model_config['paras'] | ||||
| model_config['model_dir'] = model_dir | model_config['model_dir'] = model_dir | ||||
| @@ -111,6 +111,6 @@ class MPlugConfig(PretrainedConfig): | |||||
| @classmethod | @classmethod | ||||
| def from_yaml_file(cls, yaml_file: Union[str, | def from_yaml_file(cls, yaml_file: Union[str, | ||||
| os.PathLike]) -> Dict[str, Any]: | os.PathLike]) -> Dict[str, Any]: | ||||
| with open(yaml_file, 'r') as reader: | |||||
| with open(yaml_file, 'r', encoding='utf-8') as reader: | |||||
| config_dict = yaml.load(reader, Loader=yaml.Loader) | config_dict = yaml.load(reader, Loader=yaml.Loader) | ||||
| return cls(**config_dict) | return cls(**config_dict) | ||||
| @@ -50,7 +50,8 @@ class UnCLIP(nn.Module): | |||||
| def __init__(self, model_dir): | def __init__(self, model_dir): | ||||
| super(UnCLIP, self).__init__() | super(UnCLIP, self).__init__() | ||||
| self.model_dir = model_dir | self.model_dir = model_dir | ||||
| self.config = json.load(open(f'{model_dir}/{ModelFile.CONFIGURATION}')) | |||||
| self.config = json.load( | |||||
| open(f'{model_dir}/{ModelFile.CONFIGURATION}', encoding='utf-8')) | |||||
| # modules | # modules | ||||
| self.clip = CLIP(**self.config['clip']).fp16() | self.clip = CLIP(**self.config['clip']).fp16() | ||||
| @@ -312,7 +312,7 @@ class OfaForAllTasks(TorchModel): | |||||
| if self.cfg.model.get('answer2label', None): | if self.cfg.model.get('answer2label', None): | ||||
| ans2label_file = osp.join(self.model_dir, | ans2label_file = osp.join(self.model_dir, | ||||
| self.cfg.model.answer2label) | self.cfg.model.answer2label) | ||||
| with open(ans2label_file, 'r') as reader: | |||||
| with open(ans2label_file, 'r', encoding='utf-8') as reader: | |||||
| self.ans2label_dict = json.load(reader) | self.ans2label_dict = json.load(reader) | ||||
| def save_pretrained(self, | def save_pretrained(self, | ||||
| @@ -743,7 +743,7 @@ def get_args(): | |||||
| if hasattr(args, 'deepspeed' | if hasattr(args, 'deepspeed' | ||||
| ) and args.deepspeed and args.deepspeed_config is not None: | ) and args.deepspeed and args.deepspeed_config is not None: | ||||
| with open(args.deepspeed_config) as file: | |||||
| with open(args.deepspeed_config, encoding='utf-8') as file: | |||||
| deepspeed_config = json.load(file) | deepspeed_config = json.load(file) | ||||
| if 'train_micro_batch_size_per_gpu' in deepspeed_config: | if 'train_micro_batch_size_per_gpu' in deepspeed_config: | ||||
| args.batch_size = deepspeed_config[ | args.batch_size = deepspeed_config[ | ||||
| @@ -156,7 +156,7 @@ class DataReader: | |||||
| def read_input_to_queue(): | def read_input_to_queue(): | ||||
| for path in paths: | for path in paths: | ||||
| print_rank_0(f'Start reading {path}') | print_rank_0(f'Start reading {path}') | ||||
| with open(path) as file: | |||||
| with open(path, encoding='utf-8') as file: | |||||
| items = json.load(file) | items = json.load(file) | ||||
| for item in items: | for item in items: | ||||
| task_queue.put(item) | task_queue.put(item) | ||||
| @@ -511,12 +511,12 @@ class json_dataset(data.Dataset): | |||||
| def load_json_stream(self, load_path): | def load_json_stream(self, load_path): | ||||
| if not self.loose_json: | if not self.loose_json: | ||||
| jsons = json.load(open(load_path, 'r')) | |||||
| jsons = json.load(open(load_path, 'r', encoding='utf-8')) | |||||
| generator = iter(jsons) | generator = iter(jsons) | ||||
| else: | else: | ||||
| def gen_helper(): | def gen_helper(): | ||||
| with open(load_path, 'r') as f: | |||||
| with open(load_path, 'r', encoding='utf-8') as f: | |||||
| for row in f: | for row in f: | ||||
| yield json.loads(row) | yield json.loads(row) | ||||
| @@ -29,7 +29,9 @@ with open(output_path, 'w') as output: | |||||
| print(filename) | print(filename) | ||||
| article_lines = [] | article_lines = [] | ||||
| article_open = False | article_open = False | ||||
| with open(filename, mode='r', newline='\n') as file: | |||||
| with open( | |||||
| filename, mode='r', newline='\n', | |||||
| encoding='utf-8') as file: | |||||
| for line in file: | for line in file: | ||||
| line = line.rstrip() | line = line.rstrip() | ||||
| if '<doc id=' in line: | if '<doc id=' in line: | ||||
| @@ -179,7 +179,7 @@ class GPT2Tokenizer(object): | |||||
| special_tokens=None, | special_tokens=None, | ||||
| max_len=None): | max_len=None): | ||||
| self.max_len = max_len if max_len is not None else int(1e12) | self.max_len = max_len if max_len is not None else int(1e12) | ||||
| self.encoder = json.load(open(vocab_file)) | |||||
| self.encoder = json.load(open(vocab_file), encoding='utf-8') | |||||
| self.decoder = {v: k for k, v in self.encoder.items()} | self.decoder = {v: k for k, v in self.encoder.items()} | ||||
| self.errors = errors # how to handle errors in decoding | self.errors = errors # how to handle errors in decoding | ||||
| self.byte_encoder = bytes_to_unicode() | self.byte_encoder = bytes_to_unicode() | ||||
| @@ -19,7 +19,7 @@ for dir_path in glob.glob(path_pattern, recursive=True): | |||||
| valid_path = os.path.join(dir_path, 'results.json') | valid_path = os.path.join(dir_path, 'results.json') | ||||
| if os.path.exists(valid_path): | if os.path.exists(valid_path): | ||||
| print(entry) | print(entry) | ||||
| with open(valid_path) as file: | |||||
| with open(valid_path, encoding='utf-8') as file: | |||||
| valid_result = json.load(file) | valid_result = json.load(file) | ||||
| else: | else: | ||||
| print(f'{entry} no validation results') | print(f'{entry} no validation results') | ||||
| @@ -121,7 +121,7 @@ class LambadaDataset(torch.utils.data.Dataset): | |||||
| self.tokens = [] | self.tokens = [] | ||||
| self.labels = [] | self.labels = [] | ||||
| with open(data_path, 'r') as f: | |||||
| with open(data_path, 'r', encoding='utf-8') as f: | |||||
| for line in f.readlines(): | for line in f.readlines(): | ||||
| text = json.loads(line)['text'] | text = json.loads(line)['text'] | ||||
| tokens, labels = self.get_tokens(text) | tokens, labels = self.get_tokens(text) | ||||
| @@ -209,14 +209,16 @@ class XSumProcessor: | |||||
| raise NotImplementedError(split) | raise NotImplementedError(split) | ||||
| print_rank_0(f'Creating XSUM-{split} dataset from {self.data_dir}') | print_rank_0(f'Creating XSUM-{split} dataset from {self.data_dir}') | ||||
| with open( | with open( | ||||
| os.path.join( | |||||
| self.data_dir, | |||||
| 'XSum-TRAINING-DEV-TEST-SPLIT-90-5-5.json')) as file: | |||||
| os.path.join(self.data_dir, | |||||
| 'XSum-TRAINING-DEV-TEST-SPLIT-90-5-5.json'), | |||||
| encoding='utf-8') as file: | |||||
| id_list = json.load(file) | id_list = json.load(file) | ||||
| id_list = id_list[key] | id_list = id_list[key] | ||||
| source_texts, target_texts = [], [] | source_texts, target_texts = [], [] | ||||
| for i, idx in enumerate(id_list): | for i, idx in enumerate(id_list): | ||||
| with open(os.path.join(self.data_dir, f'{idx}.summary')) as file: | |||||
| with open( | |||||
| os.path.join(self.data_dir, f'{idx}.summary'), | |||||
| encoding='utf-8') as file: | |||||
| key, sentences = None, [] | key, sentences = None, [] | ||||
| source_text, target_text = None, None | source_text, target_text = None, None | ||||
| for line in file: | for line in file: | ||||
| @@ -841,7 +841,7 @@ class RaceProcessor(DataProcessor): | |||||
| path, 'middle', '*.txt')) + glob.glob( | path, 'middle', '*.txt')) + glob.glob( | ||||
| os.path.join(path, 'high', '*.txt')) | os.path.join(path, 'high', '*.txt')) | ||||
| for filename in filenames: | for filename in filenames: | ||||
| with open(filename, 'r') as f: | |||||
| with open(filename, 'r', encoding='utf-8') as f: | |||||
| for line in f: | for line in f: | ||||
| data = json.loads(line) | data = json.loads(line) | ||||
| idx = data['id'] | idx = data['id'] | ||||
| @@ -1127,7 +1127,7 @@ class AgnewsProcessor(DataProcessor): | |||||
| def _create_examples(path: str, set_type: str) -> List[InputExample]: | def _create_examples(path: str, set_type: str) -> List[InputExample]: | ||||
| examples = [] | examples = [] | ||||
| with open(path) as f: | |||||
| with open(path, encoding='utf-8') as f: | |||||
| reader = csv.reader(f, delimiter=',') | reader = csv.reader(f, delimiter=',') | ||||
| for idx, row in enumerate(reader): | for idx, row in enumerate(reader): | ||||
| label, headline, body = row | label, headline, body = row | ||||
| @@ -1209,7 +1209,7 @@ class YelpPolarityProcessor(DataProcessor): | |||||
| def _create_examples(path: str, set_type: str) -> List[InputExample]: | def _create_examples(path: str, set_type: str) -> List[InputExample]: | ||||
| examples = [] | examples = [] | ||||
| with open(path) as f: | |||||
| with open(path, encoding='utf-8') as f: | |||||
| reader = csv.reader(f, delimiter=',') | reader = csv.reader(f, delimiter=',') | ||||
| for idx, row in enumerate(reader): | for idx, row in enumerate(reader): | ||||
| label, body = row | label, body = row | ||||
| @@ -1419,7 +1419,7 @@ class SquadProcessor(DataProcessor): | |||||
| @staticmethod | @staticmethod | ||||
| def _create_examples(path: str, set_type: str) -> List[InputExample]: | def _create_examples(path: str, set_type: str) -> List[InputExample]: | ||||
| examples = [] | examples = [] | ||||
| with open(path) as f: | |||||
| with open(path, encoding='utf-8') as f: | |||||
| data = json.load(f)['data'] | data = json.load(f)['data'] | ||||
| for idx, passage in enumerate(data): | for idx, passage in enumerate(data): | ||||
| @@ -538,7 +538,7 @@ class PVP(ABC): | |||||
| dict) # type: Dict[int, Dict[str, List[str]]] | dict) # type: Dict[int, Dict[str, List[str]]] | ||||
| current_pattern_id = None | current_pattern_id = None | ||||
| with open(path, 'r') as fh: | |||||
| with open(path, 'r', encoding='utf-8') as fh: | |||||
| for line in fh.read().splitlines(): | for line in fh.read().splitlines(): | ||||
| if line.isdigit(): | if line.isdigit(): | ||||
| current_pattern_id = int(line) | current_pattern_id = int(line) | ||||
| @@ -77,7 +77,7 @@ def print_and_save_args(args, verbose=True, log_dir=None): | |||||
| with open(json_file, 'w') as output: | with open(json_file, 'w') as output: | ||||
| json.dump(vars(args), output, sort_keys=True) | json.dump(vars(args), output, sort_keys=True) | ||||
| if args.deepspeed and args.deepspeed_config is not None: | if args.deepspeed and args.deepspeed_config is not None: | ||||
| with open(args.deepspeed_config) as file: | |||||
| with open(args.deepspeed_config, encoding='utf-8') as file: | |||||
| deepspeed_config = json.load(file) | deepspeed_config = json.load(file) | ||||
| deepspeed_json_file = os.path.join(log_dir, | deepspeed_json_file = os.path.join(log_dir, | ||||
| 'config_gpt_large.json') | 'config_gpt_large.json') | ||||
| @@ -324,7 +324,7 @@ def get_checkpoint_iteration(load_path): | |||||
| print_rank_0(' will not load any checkpoints and will start from ' | print_rank_0(' will not load any checkpoints and will start from ' | ||||
| 'random') | 'random') | ||||
| return load_path, 0, False, False | return load_path, 0, False, False | ||||
| with open(tracker_filename, 'r') as f: | |||||
| with open(tracker_filename, 'r', encoding='utf-8') as f: | |||||
| metastring = f.read().strip() | metastring = f.read().strip() | ||||
| release = metastring == 'release' | release = metastring == 'release' | ||||
| # try: | # try: | ||||
| @@ -443,7 +443,7 @@ def load_stereo_chemical_props(): | |||||
| stereo_chemical_props_path = os.path.join( | stereo_chemical_props_path = os.path.join( | ||||
| os.path.dirname(os.path.abspath(__file__)), | os.path.dirname(os.path.abspath(__file__)), | ||||
| 'stereo_chemical_props.txt') | 'stereo_chemical_props.txt') | ||||
| with open(stereo_chemical_props_path, 'rt') as f: | |||||
| with open(stereo_chemical_props_path, 'rt', encoding='utf-8') as f: | |||||
| stereo_chemical_props = f.read() | stereo_chemical_props = f.read() | ||||
| lines_iter = iter(stereo_chemical_props.splitlines()) | lines_iter = iter(stereo_chemical_props.splitlines()) | ||||
| # Load bond lengths. | # Load bond lengths. | ||||
| @@ -250,7 +250,7 @@ class UnifoldDataset(UnicoreDataset): | |||||
| self.path = data_path | self.path = data_path | ||||
| def load_json(filename): | def load_json(filename): | ||||
| return json.load(open(filename, 'r')) | |||||
| return json.load(open(filename, 'r', encoding='utf-8')) | |||||
| sample_weight = load_json( | sample_weight = load_json( | ||||
| os.path.join(self.path, | os.path.join(self.path, | ||||
| @@ -400,7 +400,8 @@ class UnifoldMultimerDataset(UnifoldDataset): | |||||
| self.pdb_assembly = json.load( | self.pdb_assembly = json.load( | ||||
| open( | open( | ||||
| os.path.join(self.data_path, | os.path.join(self.data_path, | ||||
| json_prefix + 'pdb_assembly.json'))) | |||||
| json_prefix + 'pdb_assembly.json'), | |||||
| encoding='utf-8')) | |||||
| self.pdb_chains = self.get_chains(self.inverse_multi_label) | self.pdb_chains = self.get_chains(self.inverse_multi_label) | ||||
| self.monomer_feature_path = os.path.join(self.data_path, | self.monomer_feature_path = os.path.join(self.data_path, | ||||
| 'pdb_features') | 'pdb_features') | ||||
| @@ -99,7 +99,7 @@ def run_msa_tool( | |||||
| f.write(result[msa_format]) | f.write(result[msa_format]) | ||||
| else: | else: | ||||
| logging.warning('Reading MSA from file %s', msa_out_path) | logging.warning('Reading MSA from file %s', msa_out_path) | ||||
| with open(msa_out_path, 'r') as f: | |||||
| with open(msa_out_path, 'r', encoding='utf-8') as f: | |||||
| result = {msa_format: f.read()} | result = {msa_format: f.read()} | ||||
| return result | return result | ||||
| @@ -153,7 +153,7 @@ class DataPipeline: | |||||
| def process(self, input_fasta_path: str, | def process(self, input_fasta_path: str, | ||||
| msa_output_dir: str) -> FeatureDict: | msa_output_dir: str) -> FeatureDict: | ||||
| """Runs alignment tools on the input sequence and creates features.""" | """Runs alignment tools on the input sequence and creates features.""" | ||||
| with open(input_fasta_path) as f: | |||||
| with open(input_fasta_path, encoding='utf-8') as f: | |||||
| input_fasta_str = f.read() | input_fasta_str = f.read() | ||||
| input_seqs, input_descs = parsers.parse_fasta(input_fasta_str) | input_seqs, input_descs = parsers.parse_fasta(input_fasta_str) | ||||
| if len(input_seqs) != 1: | if len(input_seqs) != 1: | ||||
| @@ -155,7 +155,7 @@ def _parse_release_dates(path: str) -> Mapping[str, datetime.datetime]: | |||||
| """Parses release dates file, returns a mapping from PDBs to release dates.""" | """Parses release dates file, returns a mapping from PDBs to release dates.""" | ||||
| if path.endswith('txt'): | if path.endswith('txt'): | ||||
| release_dates = {} | release_dates = {} | ||||
| with open(path, 'r') as f: | |||||
| with open(path, 'r', encoding='utf-8') as f: | |||||
| for line in f: | for line in f: | ||||
| pdb_id, date = line.split(':') | pdb_id, date = line.split(':') | ||||
| date = date.strip() | date = date.strip() | ||||
| @@ -106,14 +106,14 @@ class MovieSceneSegmentationDataset(TorchTaskDataset): | |||||
| self.tmpl = '{}/shot_{}_img_{}.jpg' # video_id, shot_id, shot_num | self.tmpl = '{}/shot_{}_img_{}.jpg' # video_id, shot_id, shot_num | ||||
| if not self.test_mode: | if not self.test_mode: | ||||
| with open(self.ann_file) as f: | |||||
| with open(self.ann_file, encoding='utf-8') as f: | |||||
| self.anno_data = json.load(f) | self.anno_data = json.load(f) | ||||
| self.vidsid2label = { | self.vidsid2label = { | ||||
| f"{it['video_id']}_{it['shot_id']}": it['boundary_label'] | f"{it['video_id']}_{it['shot_id']}": it['boundary_label'] | ||||
| for it in self.anno_data | for it in self.anno_data | ||||
| } | } | ||||
| else: | else: | ||||
| with open(self.ann_file) as f: | |||||
| with open(self.ann_file, encoding='utf-8') as f: | |||||
| self.anno_data = json.load(f) | self.anno_data = json.load(f) | ||||
| def init_sampler(self, cfg): | def init_sampler(self, cfg): | ||||
| @@ -146,7 +146,7 @@ class ReferringVideoObjectSegmentationDataset(TorchTaskDataset): | |||||
| saved_annotations_file_path = osp.join( | saved_annotations_file_path = osp.join( | ||||
| root_path, f'sentences_single_frame_{subset}_annotations.json') | root_path, f'sentences_single_frame_{subset}_annotations.json') | ||||
| if osp.exists(saved_annotations_file_path): | if osp.exists(saved_annotations_file_path): | ||||
| with open(saved_annotations_file_path, 'r') as f: | |||||
| with open(saved_annotations_file_path, 'r', encoding='utf-8') as f: | |||||
| text_annotations_by_frame = [tuple(a) for a in json.load(f)] | text_annotations_by_frame = [tuple(a) for a in json.load(f)] | ||||
| return text_annotations_by_frame | return text_annotations_by_frame | ||||
| elif (distributed and dist.get_rank() == 0) or not distributed: | elif (distributed and dist.get_rank() == 0) or not distributed: | ||||
| @@ -203,7 +203,7 @@ class ReferringVideoObjectSegmentationDataset(TorchTaskDataset): | |||||
| json.dump(text_annotations_by_frame, f) | json.dump(text_annotations_by_frame, f) | ||||
| if distributed: | if distributed: | ||||
| dist.barrier() | dist.barrier() | ||||
| with open(saved_annotations_file_path, 'r') as f: | |||||
| with open(saved_annotations_file_path, 'r', encoding='utf-8') as f: | |||||
| text_annotations_by_frame = [tuple(a) for a in json.load(f)] | text_annotations_by_frame = [tuple(a) for a in json.load(f)] | ||||
| return text_annotations_by_frame | return text_annotations_by_frame | ||||
| @@ -267,8 +267,10 @@ def get_text_annotations_gt(root_path, subset): | |||||
| osp.join(root_path, 'Release/videoset.csv'), header=None) | osp.join(root_path, 'Release/videoset.csv'), header=None) | ||||
| # 'vid', 'label', 'start_time', 'end_time', 'height', 'width', 'total_frames', 'annotated_frames', 'subset' | # 'vid', 'label', 'start_time', 'end_time', 'height', 'width', 'total_frames', 'annotated_frames', 'subset' | ||||
| a2d_data_info.columns = ['vid', '', '', '', '', '', '', '', 'subset'] | a2d_data_info.columns = ['vid', '', '', '', '', '', '', '', 'subset'] | ||||
| with open(osp.join(root_path, 'text_annotations/missed_videos.txt'), | |||||
| 'r') as f: | |||||
| with open( | |||||
| osp.join(root_path, 'text_annotations/missed_videos.txt'), | |||||
| 'r', | |||||
| encoding='utf-8') as f: | |||||
| unused_videos = f.read().splitlines() | unused_videos = f.read().splitlines() | ||||
| subsets = {'train': 0, 'test': 1} | subsets = {'train': 0, 'test': 1} | ||||
| # filter unused videos and videos which do not belong to our train/test subset: | # filter unused videos and videos which do not belong to our train/test subset: | ||||
| @@ -26,7 +26,7 @@ class VideoSummarizationDataset(TorchTaskDataset): | |||||
| self.list_n_frames = [] | self.list_n_frames = [] | ||||
| self.list_positions = [] | self.list_positions = [] | ||||
| with open(self.split_filename) as f: | |||||
| with open(self.split_filename, encoding='utf-8') as f: | |||||
| data = json.loads(f.read()) | data = json.loads(f.read()) | ||||
| for i, split in enumerate(data): | for i, split in enumerate(data): | ||||
| if i == self.split_index: | if i == self.split_index: | ||||
| @@ -116,7 +116,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): | |||||
| } | } | ||||
| if self.framework == Frameworks.torch: | if self.framework == Frameworks.torch: | ||||
| config_file = open(inputs['asr_model_config']) | |||||
| config_file = open(inputs['asr_model_config'], encoding='utf-8') | |||||
| root = yaml.full_load(config_file) | root = yaml.full_load(config_file) | ||||
| config_file.close() | config_file.close() | ||||
| frontend_conf = None | frontend_conf = None | ||||
| @@ -109,7 +109,7 @@ class AnimalRecognitionPipeline(Pipeline): | |||||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | ||||
| label_mapping_path = osp.join(self.local_path, 'label_mapping.txt') | label_mapping_path = osp.join(self.local_path, 'label_mapping.txt') | ||||
| with open(label_mapping_path, 'r') as f: | |||||
| with open(label_mapping_path, 'r', encoding='utf-8') as f: | |||||
| label_mapping = f.readlines() | label_mapping = f.readlines() | ||||
| score = torch.max(inputs['outputs']) | score = torch.max(inputs['outputs']) | ||||
| inputs = { | inputs = { | ||||
| @@ -110,7 +110,7 @@ class GeneralRecognitionPipeline(Pipeline): | |||||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | ||||
| label_mapping_path = osp.join(self.local_path, 'meta_info.txt') | label_mapping_path = osp.join(self.local_path, 'meta_info.txt') | ||||
| with open(label_mapping_path, 'r') as f: | |||||
| with open(label_mapping_path, 'r', encoding='utf-8') as f: | |||||
| label_mapping = f.readlines() | label_mapping = f.readlines() | ||||
| score = torch.max(inputs['outputs']) | score = torch.max(inputs['outputs']) | ||||
| inputs = { | inputs = { | ||||
| @@ -49,7 +49,7 @@ class OCRRecognitionPipeline(Pipeline): | |||||
| self.infer_model.load_state_dict( | self.infer_model.load_state_dict( | ||||
| torch.load(model_path, map_location=self.device)) | torch.load(model_path, map_location=self.device)) | ||||
| self.labelMapping = dict() | self.labelMapping = dict() | ||||
| with open(label_path, 'r') as f: | |||||
| with open(label_path, 'r', encoding='utf-8') as f: | |||||
| lines = f.readlines() | lines = f.readlines() | ||||
| cnt = 2 | cnt = 2 | ||||
| for line in lines: | for line in lines: | ||||
| @@ -82,7 +82,7 @@ class TinynasClassificationPipeline(Pipeline): | |||||
| def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | def postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]: | ||||
| label_mapping_path = osp.join(self.path, 'label_map.txt') | label_mapping_path = osp.join(self.path, 'label_map.txt') | ||||
| f = open(label_mapping_path) | |||||
| f = open(label_mapping_path, encoding='utf-8') | |||||
| content = f.read() | content = f.read() | ||||
| f.close() | f.close() | ||||
| label_dict = eval(content) | label_dict = eval(content) | ||||
| @@ -36,7 +36,7 @@ class VideoCategoryPipeline(Pipeline): | |||||
| super().__init__(model=model, **kwargs) | super().__init__(model=model, **kwargs) | ||||
| config_path = osp.join(self.model, ModelFile.CONFIGURATION) | config_path = osp.join(self.model, ModelFile.CONFIGURATION) | ||||
| logger.info(f'loading configuration from {config_path}') | logger.info(f'loading configuration from {config_path}') | ||||
| with open(config_path, 'r') as f: | |||||
| with open(config_path, 'r', encoding='utf-8') as f: | |||||
| config = json.load(f) | config = json.load(f) | ||||
| self.frame_num = config['frame_num'] | self.frame_num = config['frame_num'] | ||||
| self.level_1_num = config['level_1_num'] | self.level_1_num = config['level_1_num'] | ||||
| @@ -59,8 +59,9 @@ def load_feature_for_one_target( | |||||
| else: | else: | ||||
| uniprot_msa_dir = data_folder | uniprot_msa_dir = data_folder | ||||
| sequence_ids = open(os.path.join(data_folder, | |||||
| 'chains.txt')).readline().split() | |||||
| sequence_ids = open( | |||||
| os.path.join(data_folder, 'chains.txt'), | |||||
| encoding='utf-8').readline().split() | |||||
| if symmetry_group is None: | if symmetry_group is None: | ||||
| batch, _ = load_and_process( | batch, _ = load_and_process( | ||||
| @@ -15,7 +15,7 @@ from modelscope.utils.constant import Fields | |||||
| def load_kaldi_feature_transform(filename): | def load_kaldi_feature_transform(filename): | ||||
| fp = open(filename, 'r') | |||||
| fp = open(filename, 'r', encoding='utf-8') | |||||
| all_str = fp.read() | all_str = fp.read() | ||||
| pos1 = all_str.find('AddShift') | pos1 = all_str.find('AddShift') | ||||
| pos2 = all_str.find('[', pos1) | pos2 = all_str.find('[', pos1) | ||||
| @@ -78,7 +78,7 @@ class WavToLists(Preprocessor): | |||||
| assert os.path.exists( | assert os.path.exists( | ||||
| inputs['config_path']), 'model config yaml file does not exist' | inputs['config_path']), 'model config yaml file does not exist' | ||||
| config_file = open(inputs['config_path']) | |||||
| config_file = open(inputs['config_path'], encoding='utf-8') | |||||
| root = yaml.full_load(config_file) | root = yaml.full_load(config_file) | ||||
| config_file.close() | config_file.close() | ||||
| @@ -145,8 +145,9 @@ class CLIPPreprocessor(Preprocessor): | |||||
| self.image_resolution = kwargs['resolution'] | self.image_resolution = kwargs['resolution'] | ||||
| else: | else: | ||||
| self.image_resolution = json.load( | self.image_resolution = json.load( | ||||
| open('{}/vision_model_config.json'.format( | |||||
| model_dir)))['image_resolution'] | |||||
| open( | |||||
| '{}/vision_model_config.json'.format(model_dir), | |||||
| encoding='utf-8'))['image_resolution'] | |||||
| self.img_preprocess = self._build_image_transform() | self.img_preprocess = self._build_image_transform() | ||||
| # key mapping | # key mapping | ||||
| # specify the input keys, compatible with training and inference whose key names may be different | # specify the input keys, compatible with training and inference whose key names may be different | ||||
| @@ -59,8 +59,10 @@ class NLPBasePreprocessor(Preprocessor, ABC): | |||||
| self.use_fast = False | self.use_fast = False | ||||
| elif self.use_fast is None and os.path.isfile( | elif self.use_fast is None and os.path.isfile( | ||||
| os.path.join(model_dir, 'tokenizer_config.json')): | os.path.join(model_dir, 'tokenizer_config.json')): | ||||
| with open(os.path.join(model_dir, 'tokenizer_config.json'), | |||||
| 'r') as f: | |||||
| with open( | |||||
| os.path.join(model_dir, 'tokenizer_config.json'), | |||||
| 'r', | |||||
| encoding='utf-8') as f: | |||||
| json_config = json.load(f) | json_config = json.load(f) | ||||
| self.use_fast = json_config.get('use_fast') | self.use_fast = json_config.get('use_fast') | ||||
| self.use_fast = False if self.use_fast is None else self.use_fast | self.use_fast = False if self.use_fast is None else self.use_fast | ||||
| @@ -35,7 +35,10 @@ class DialogIntentPredictionPreprocessor(Preprocessor): | |||||
| self.model_dir, config=self.config) | self.model_dir, config=self.config) | ||||
| self.categories = None | self.categories = None | ||||
| with open(os.path.join(self.model_dir, 'categories.json'), 'r') as f: | |||||
| with open( | |||||
| os.path.join(self.model_dir, 'categories.json'), | |||||
| 'r', | |||||
| encoding='utf-8') as f: | |||||
| self.categories = json.load(f) | self.categories = json.load(f) | ||||
| assert len(self.categories) == 77 | assert len(self.categories) == 77 | ||||
| @@ -184,7 +184,7 @@ class multiwoz22Processor(DSTProcessor): | |||||
| # Loads the dialogue_acts.json and returns a list | # Loads the dialogue_acts.json and returns a list | ||||
| # of slot-value pairs. | # of slot-value pairs. | ||||
| def load_acts(self, input_file): | def load_acts(self, input_file): | ||||
| with open(input_file) as f: | |||||
| with open(input_file, encoding='utf-8') as f: | |||||
| acts = json.load(f) | acts = json.load(f) | ||||
| s_dict = {} | s_dict = {} | ||||
| for d in acts: | for d in acts: | ||||
| @@ -359,12 +359,14 @@ class MultiWOZBPETextField(BPETextField): | |||||
| test_list = [ | test_list = [ | ||||
| line.strip().lower() for line in open( | line.strip().lower() for line in open( | ||||
| os.path.join(kwargs['data_dir'], 'testListFile.json'), | os.path.join(kwargs['data_dir'], 'testListFile.json'), | ||||
| 'r').readlines() | |||||
| 'r', | |||||
| encoding='utf-8').readlines() | |||||
| ] | ] | ||||
| dev_list = [ | dev_list = [ | ||||
| line.strip().lower() for line in open( | line.strip().lower() for line in open( | ||||
| os.path.join(kwargs['data_dir'], 'valListFile.json'), | os.path.join(kwargs['data_dir'], 'valListFile.json'), | ||||
| 'r').readlines() | |||||
| 'r', | |||||
| encoding='utf-8').readlines() | |||||
| ] | ] | ||||
| self.dev_files, self.test_files = {}, {} | self.dev_files, self.test_files = {}, {} | ||||
| @@ -531,7 +531,7 @@ class GPT2Tokenizer(object): | |||||
| special_tokens=None, | special_tokens=None, | ||||
| max_len=None): | max_len=None): | ||||
| self.max_len = max_len if max_len is not None else int(1e12) | self.max_len = max_len if max_len is not None else int(1e12) | ||||
| self.encoder = json.load(open(vocab_file)) | |||||
| self.encoder = json.load(open(vocab_file, encoding='utf-8')) | |||||
| self.decoder = {v: k for k, v in self.encoder.items()} | self.decoder = {v: k for k, v in self.encoder.items()} | ||||
| self.errors = errors # how to handle errors in decoding | self.errors = errors # how to handle errors in decoding | ||||
| self.byte_encoder = bytes_to_unicode() | self.byte_encoder = bytes_to_unicode() | ||||
| @@ -32,12 +32,12 @@ class Database: | |||||
| tables = {} | tables = {} | ||||
| lines = [] | lines = [] | ||||
| if type(table_file_path) == str: | if type(table_file_path) == str: | ||||
| with open(table_file_path, 'r') as fo: | |||||
| with open(table_file_path, 'r', encoding='utf-8') as fo: | |||||
| for line in fo: | for line in fo: | ||||
| lines.append(line) | lines.append(line) | ||||
| elif type(table_file_path) == list: | elif type(table_file_path) == list: | ||||
| for path in table_file_path: | for path in table_file_path: | ||||
| with open(path, 'r') as fo: | |||||
| with open(path, 'r', encoding='utf-8') as fo: | |||||
| for line in fo: | for line in fo: | ||||
| lines.append(line) | lines.append(line) | ||||
| else: | else: | ||||
| @@ -45,7 +45,7 @@ class ConversationalTextToSqlPreprocessor(Preprocessor): | |||||
| and torch.cuda.is_available() else 'cpu' | and torch.cuda.is_available() else 'cpu' | ||||
| self.processor = None | self.processor = None | ||||
| self.table_path = os.path.join(self.model_dir, 'tables.json') | self.table_path = os.path.join(self.model_dir, 'tables.json') | ||||
| self.tables = json.load(open(self.table_path, 'r')) | |||||
| self.tables = json.load(open(self.table_path, 'r', encoding='utf-8')) | |||||
| self.output_tables = None | self.output_tables = None | ||||
| self.path_cache = [] | self.path_cache = [] | ||||
| self.graph_processor = GraphProcessor() | self.graph_processor = GraphProcessor() | ||||
| @@ -89,7 +89,7 @@ class ConversationalTextToSqlPreprocessor(Preprocessor): | |||||
| 'local_db_path'] not in self.path_cache: | 'local_db_path'] not in self.path_cache: | ||||
| self.path_cache.append(data['local_db_path']) | self.path_cache.append(data['local_db_path']) | ||||
| path = os.path.join(data['local_db_path'], 'tables.json') | path = os.path.join(data['local_db_path'], 'tables.json') | ||||
| self.tables = json.load(open(path, 'r')) | |||||
| self.tables = json.load(open(path, 'r', encoding='utf-8')) | |||||
| self.processor.db_dir = os.path.join(data['local_db_path'], 'db') | self.processor.db_dir = os.path.join(data['local_db_path'], 'db') | ||||
| self.output_tables = process_tables(self.processor, self.tables) | self.output_tables = process_tables(self.processor, self.tables) | ||||
| Example.configuration( | Example.configuration( | ||||
| @@ -76,7 +76,7 @@ class OfaBasePreprocessor: | |||||
| self.constraint_trie = None | self.constraint_trie = None | ||||
| if self.cfg.model.get('answer2label', None): | if self.cfg.model.get('answer2label', None): | ||||
| ans2label_file = osp.join(model_dir, self.cfg.model.answer2label) | ans2label_file = osp.join(model_dir, self.cfg.model.answer2label) | ||||
| with open(ans2label_file, 'r') as reader: | |||||
| with open(ans2label_file, 'r', encoding='utf-8') as reader: | |||||
| ans2label_dict = json.load(reader) | ans2label_dict = json.load(reader) | ||||
| self.ans2label = ans2label_dict | self.ans2label = ans2label_dict | ||||
| self.label2ans = {v: k for k, v in self.ans2label.items()} | self.label2ans = {v: k for k, v in self.ans2label.items()} | ||||
| @@ -201,7 +201,7 @@ def run_mmseqs2( | |||||
| a3m_lines = {} | a3m_lines = {} | ||||
| for a3m_file in a3m_files: | for a3m_file in a3m_files: | ||||
| update_M, M = True, None | update_M, M = True, None | ||||
| with open(a3m_file, 'r') as f: | |||||
| with open(a3m_file, 'r', encoding='utf-8') as f: | |||||
| lines = f.readlines() | lines = f.readlines() | ||||
| for line in lines: | for line in lines: | ||||
| if len(line) > 0: | if len(line) > 0: | ||||
| @@ -771,7 +771,8 @@ class CamRestEvaluator(GenericEvaluator): | |||||
| def get_entities(self, entity_path): | def get_entities(self, entity_path): | ||||
| entities_flat = [] | entities_flat = [] | ||||
| entitiy_to_slot_dict = {} | entitiy_to_slot_dict = {} | ||||
| raw_entities = json.loads(open(entity_path).read().lower()) | |||||
| raw_entities = json.loads( | |||||
| open(entity_path, encoding='utf-8').read().lower()) | |||||
| for s in raw_entities['informable']: | for s in raw_entities['informable']: | ||||
| entities_flat.extend(raw_entities['informable'][s]) | entities_flat.extend(raw_entities['informable'][s]) | ||||
| for v in raw_entities['informable'][s]: | for v in raw_entities['informable'][s]: | ||||
| @@ -47,7 +47,7 @@ def update_conf(origin_config_file, new_config_file, conf_item: [str, str]): | |||||
| else: | else: | ||||
| return None | return None | ||||
| with open(origin_config_file) as f: | |||||
| with open(origin_config_file, encoding='utf-8') as f: | |||||
| lines = f.readlines() | lines = f.readlines() | ||||
| with open(new_config_file, 'w') as f: | with open(new_config_file, 'w') as f: | ||||
| for line in lines: | for line in lines: | ||||
| @@ -178,7 +178,7 @@ class Config: | |||||
| if cfg_text: | if cfg_text: | ||||
| text = cfg_text | text = cfg_text | ||||
| elif filename: | elif filename: | ||||
| with open(filename, 'r') as f: | |||||
| with open(filename, 'r', encoding='utf-8') as f: | |||||
| text = f.read() | text = f.read() | ||||
| else: | else: | ||||
| text = '' | text = '' | ||||
| @@ -124,7 +124,7 @@ def parse_label_mapping(model_dir): | |||||
| label2id = None | label2id = None | ||||
| label_path = os.path.join(model_dir, ModelFile.LABEL_MAPPING) | label_path = os.path.join(model_dir, ModelFile.LABEL_MAPPING) | ||||
| if os.path.exists(label_path): | if os.path.exists(label_path): | ||||
| with open(label_path) as f: | |||||
| with open(label_path, encoding='utf-8') as f: | |||||
| label_mapping = json.load(f) | label_mapping = json.load(f) | ||||
| label2id = {name: idx for name, idx in label_mapping.items()} | label2id = {name: idx for name, idx in label_mapping.items()} | ||||
| @@ -59,7 +59,9 @@ def clean_text(data_dir, text): | |||||
| text) # 'abc.xyz' -> 'abc . xyz' | text) # 'abc.xyz' -> 'abc . xyz' | ||||
| text = re.sub(r'(\w+)\.\.? ', r'\1 . ', text) # if 'abc. ' -> 'abc . ' | text = re.sub(r'(\w+)\.\.? ', r'\1 . ', text) # if 'abc. ' -> 'abc . ' | ||||
| with open(os.path.join(data_dir, 'mapping.pair'), 'r') as fin: | |||||
| with open( | |||||
| os.path.join(data_dir, 'mapping.pair'), 'r', | |||||
| encoding='utf-8') as fin: | |||||
| for line in fin.readlines(): | for line in fin.readlines(): | ||||
| fromx, tox = line.replace('\n', '').split('\t') | fromx, tox = line.replace('\n', '').split('\t') | ||||
| text = ' ' + text + ' ' | text = ' ' + text + ' ' | ||||
| @@ -15,7 +15,9 @@ class MultiWozDB(object): | |||||
| self.dbs = {} | self.dbs = {} | ||||
| self.sql_dbs = {} | self.sql_dbs = {} | ||||
| for domain in all_domains: | for domain in all_domains: | ||||
| with open(os.path.join(db_dir, db_paths[domain]), 'r') as f: | |||||
| with open( | |||||
| os.path.join(db_dir, db_paths[domain]), 'r', | |||||
| encoding='utf-8') as f: | |||||
| self.dbs[domain] = json.loads(f.read().lower()) | self.dbs[domain] = json.loads(f.read().lower()) | ||||
| def oneHotVector(self, domain, num): | def oneHotVector(self, domain, num): | ||||
| @@ -146,9 +146,9 @@ class MultiWOZVocab(object): | |||||
| def load_vocab(self, vocab_path): | def load_vocab(self, vocab_path): | ||||
| self._freq_dict = json.loads( | self._freq_dict = json.loads( | ||||
| open(vocab_path + '.freq.json', 'r').read()) | |||||
| open(vocab_path + '.freq.json', 'r', encoding='utf-8').read()) | |||||
| self._word2idx = json.loads( | self._word2idx = json.loads( | ||||
| open(vocab_path + '.word2idx.json', 'r').read()) | |||||
| open(vocab_path + '.word2idx.json', 'r', encoding='utf-8').read()) | |||||
| self._idx2word = {} | self._idx2word = {} | ||||
| for w, idx in self._word2idx.items(): | for w, idx in self._word2idx.items(): | ||||
| self._idx2word[idx] = w | self._idx2word[idx] = w | ||||
| @@ -50,7 +50,7 @@ def get_hash(): | |||||
| def get_version(): | def get_version(): | ||||
| with open(version_file, 'r') as f: | |||||
| with open(version_file, 'r', encoding='utf-8') as f: | |||||
| exec(compile(f.read(), version_file, 'exec')) | exec(compile(f.read(), version_file, 'exec')) | ||||
| return locals()['__version__'] | return locals()['__version__'] | ||||
| @@ -109,7 +109,7 @@ def parse_requirements(fname='requirements.txt', with_version=True): | |||||
| yield info | yield info | ||||
| def parse_require_file(fpath): | def parse_require_file(fpath): | ||||
| with open(fpath, 'r') as f: | |||||
| with open(fpath, 'r', encoding='utf-8') as f: | |||||
| for line in f.readlines(): | for line in f.readlines(): | ||||
| line = line.strip() | line = line.strip() | ||||
| if line.startswith('http'): | if line.startswith('http'): | ||||
| @@ -247,7 +247,7 @@ def run_in_subprocess(args): | |||||
| test_suite_env_map[test_suite_file] = 'default' | test_suite_env_map[test_suite_file] = 'default' | ||||
| if args.run_config is not None and Path(args.run_config).exists(): | if args.run_config is not None and Path(args.run_config).exists(): | ||||
| with open(args.run_config) as f: | |||||
| with open(args.run_config, encoding='utf-8') as f: | |||||
| run_config = yaml.load(f, Loader=yaml.FullLoader) | run_config = yaml.load(f, Loader=yaml.FullLoader) | ||||
| if 'isolated' in run_config: | if 'isolated' in run_config: | ||||
| isolated_cases = run_config['isolated'] | isolated_cases = run_config['isolated'] | ||||
| @@ -109,7 +109,7 @@ class EasyCVTrainerTestSingleGpu(unittest.TestCase): | |||||
| json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) | json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) | ||||
| self.assertEqual(len(json_files), 1) | self.assertEqual(len(json_files), 1) | ||||
| with open(json_files[0], 'r') as f: | |||||
| with open(json_files[0], 'r', encoding='utf-8') as f: | |||||
| lines = [i.strip() for i in f.readlines()] | lines = [i.strip() for i in f.readlines()] | ||||
| self.assertDictContainsSubset( | self.assertDictContainsSubset( | ||||
| @@ -185,7 +185,7 @@ class EasyCVTrainerTestMultiGpus(DistributedTestCase): | |||||
| json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) | json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) | ||||
| self.assertEqual(len(json_files), 1) | self.assertEqual(len(json_files), 1) | ||||
| with open(json_files[0], 'r') as f: | |||||
| with open(json_files[0], 'r', encoding='utf-8') as f: | |||||
| lines = [i.strip() for i in f.readlines()] | lines = [i.strip() for i in f.readlines()] | ||||
| self.assertDictContainsSubset( | self.assertDictContainsSubset( | ||||
| @@ -248,7 +248,7 @@ class TrainerTest(unittest.TestCase): | |||||
| results_files = os.listdir(self.tmp_dir) | results_files = os.listdir(self.tmp_dir) | ||||
| json_file = os.path.join(self.tmp_dir, f'{trainer.timestamp}.log.json') | json_file = os.path.join(self.tmp_dir, f'{trainer.timestamp}.log.json') | ||||
| with open(json_file, 'r') as f: | |||||
| with open(json_file, 'r', encoding='utf-8') as f: | |||||
| lines = [i.strip() for i in f.readlines()] | lines = [i.strip() for i in f.readlines()] | ||||
| self.assertDictContainsSubset( | self.assertDictContainsSubset( | ||||
| { | { | ||||
| @@ -367,7 +367,7 @@ class TrainerTest(unittest.TestCase): | |||||
| trainer.train() | trainer.train() | ||||
| results_files = os.listdir(self.tmp_dir) | results_files = os.listdir(self.tmp_dir) | ||||
| json_file = os.path.join(self.tmp_dir, f'{trainer.timestamp}.log.json') | json_file = os.path.join(self.tmp_dir, f'{trainer.timestamp}.log.json') | ||||
| with open(json_file, 'r') as f: | |||||
| with open(json_file, 'r', encoding='utf-8') as f: | |||||
| lines = [i.strip() for i in f.readlines()] | lines = [i.strip() for i in f.readlines()] | ||||
| self.assertDictContainsSubset( | self.assertDictContainsSubset( | ||||
| { | { | ||||
| @@ -142,7 +142,7 @@ class TrainerTestSingleGpu(unittest.TestCase): | |||||
| json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) | json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) | ||||
| self.assertEqual(len(json_files), 1) | self.assertEqual(len(json_files), 1) | ||||
| with open(json_files[0], 'r') as f: | |||||
| with open(json_files[0], 'r', encoding='utf-8') as f: | |||||
| lines = [i.strip() for i in f.readlines()] | lines = [i.strip() for i in f.readlines()] | ||||
| self.assertDictContainsSubset( | self.assertDictContainsSubset( | ||||
| { | { | ||||
| @@ -236,7 +236,7 @@ class TrainerTestMultiGpus(DistributedTestCase): | |||||
| json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) | json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) | ||||
| self.assertEqual(len(json_files), 1) | self.assertEqual(len(json_files), 1) | ||||
| with open(json_files[0], 'r') as f: | |||||
| with open(json_files[0], 'r', encoding='utf-8') as f: | |||||
| lines = [i.strip() for i in f.readlines()] | lines = [i.strip() for i in f.readlines()] | ||||
| self.assertDictContainsSubset( | self.assertDictContainsSubset( | ||||
| @@ -320,7 +320,7 @@ class TrainerTestMultiGpus(DistributedTestCase): | |||||
| json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) | json_files = glob.glob(os.path.join(self.tmp_dir, '*.log.json')) | ||||
| self.assertEqual(len(json_files), 1) | self.assertEqual(len(json_files), 1) | ||||
| with open(json_files[0], 'r') as f: | |||||
| with open(json_files[0], 'r', encoding='utf-8') as f: | |||||
| lines = [i.strip() for i in f.readlines()] | lines = [i.strip() for i in f.readlines()] | ||||
| print(results_files, lines) | print(results_files, lines) | ||||