| @@ -0,0 +1,75 @@ | |||
| from fastNLP.io.embed_loader import EmbeddingOption, EmbedLoader | |||
| from fastNLP.core.vocabulary import VocabularyOption | |||
| from fastNLP.io.base_loader import DataSetLoader, DataInfo | |||
| from typing import Union, Dict, List, Iterator | |||
| from fastNLP import DataSet | |||
| from fastNLP import Instance | |||
| from fastNLP import Vocabulary | |||
| from fastNLP import Const | |||
| from reproduction.utils import check_dataloader_paths | |||
| from functools import partial | |||
| class MTL16Loader(DataSetLoader): | |||
| """ | |||
| 读取MTL16数据集,DataSet包含以下fields: | |||
| words: list(str), 需要分类的文本 | |||
| target: str, 文本的标签 | |||
| 数据来源:https://pan.baidu.com/s/1c2L6vdA | |||
| """ | |||
| def __init__(self): | |||
| super(MTL16Loader, self).__init__() | |||
| def _load(self, path): | |||
| dataset = DataSet() | |||
| with open(path, 'r', encoding="utf-8") as f: | |||
| for line in f: | |||
| line = line.strip() | |||
| if not line: | |||
| continue | |||
| parts = line.split('\t') | |||
| target = parts[0] | |||
| words = parts[1].split() | |||
| dataset.append(Instance(words=words, target=target)) | |||
| if len(dataset)==0: | |||
| raise RuntimeError(f"{path} has no valid data.") | |||
| return dataset | |||
| def process(self, | |||
| paths: Union[str, Dict[str, str]], | |||
| src_vocab_opt: VocabularyOption = None, | |||
| tgt_vocab_opt: VocabularyOption = None, | |||
| src_embed_opt: EmbeddingOption = None): | |||
| paths = check_dataloader_paths(paths) | |||
| datasets = {} | |||
| info = DataInfo() | |||
| for name, path in paths.items(): | |||
| dataset = self.load(path) | |||
| datasets[name] = dataset | |||
| src_vocab = Vocabulary() if src_vocab_opt is None else Vocabulary(**src_vocab_opt) | |||
| src_vocab.from_dataset(datasets['train'], field_name='words') | |||
| src_vocab.index_dataset(*datasets.values(), field_name='words') | |||
| tgt_vocab = Vocabulary(unknown=None, padding=None) \ | |||
| if tgt_vocab_opt is None else Vocabulary(**tgt_vocab_opt) | |||
| tgt_vocab.from_dataset(datasets['train'], field_name='target') | |||
| tgt_vocab.index_dataset(*datasets.values(), field_name='target') | |||
| info.vocabs = { | |||
| "words": src_vocab, | |||
| "target": tgt_vocab | |||
| } | |||
| info.datasets = datasets | |||
| if src_embed_opt is not None: | |||
| embed = EmbedLoader.load_with_vocab(**src_embed_opt, vocab=src_vocab) | |||
| info.embeddings['words'] = embed | |||
| return info | |||
| @@ -0,0 +1,10 @@ | |||
| 1 the only thing better than these sunglasses is the customer service i got , after i dropped and broke the lenses on these i called 80 's purple and they actually sent me out a replacement free of charge . i was blown away | |||
| 0 this light worked for one day . i should have known better because in the past , i bought a tap light , and it worked for only a few days , too . do n't waste your money | |||
| 1 i 've tried 6 different nursing bras . this one , with the center snap closure , is the easiest to use . it is also the lightest and most comfortable , while providing good support . my only complaint is that after about 50 washes the underwire begins to poke free from the fabric . even when i try to sew it back into place , it breaks loose after a few washes . perhaps if i handwashed the bra instead of using a machine , it would last longer . this bra is less durabe than my other nursing bras ( particularly the leading lady bra , which seems to be indestructible ) , but it is well worth the sacrifice for comfort , lightness , and ease of use . it is by far my favorite | |||
| 0 i have had my bag for a couple of months . the liner on the inside has already ripped | |||
| 0 the photo is quite deceiving . this suit is made out of cheap polyester fabric that looks cheap , shiny , and is horrible to the touch . my three year olds hate the uncomfortable stiffness . spend the extra money for a decent fabric that is actually practical for a toddler if they really need a suit | |||
| 1 i had bought a bra of this model at a discount store , just got lucky . it quickly became my favorite , and i was glad to find it at amazon . | |||
| 0 lookslike it would be a nice product , but it 's only for very small babies up to 12 pounds and 23 inches . my baby is very long and just does n't fit - wish target/amazon would have been more upfront with the sizing | |||
| 0 i purchased the non-premium kit ( $ 9.99 ) with a silicone skin case cover and 2 screen protectors ( one for each screen ) , but it is the same case . the problem is that the silicone skin cover is slippery , twice as slippery as the nintendo lite without the cover . we thought that washing them in dove dish soap would wash away the slipperyness , but that did n't work . after handling the cover , your hands have a slippery residue on them . the other issue is that the cover is so thin that it is little more than scratch protection , not impact protection . the screen covers that come with the non-premium kit are ok , i guess , but one of them had 2 defect particles that were raised ( trust me , the screen was clean ) . i purchased 2 kits , and i had one screen protector defect and my wife accidentally broke one of the silicone covers hinge straps with little effort . i do not recommend this product at all | |||
| 1 good quality jeans at an affordable price . size is just right , quite comfortable | |||
| 0 not the best fabric , scratchy and see thru . you get what you pay for on these | |||
| @@ -0,0 +1,10 @@ | |||
| import unittest | |||
| from reproduction.text_classification.data.MTL16Loader import MTL16Loader | |||
| class TestDataLoader(unittest.TestCase): | |||
| def test_MTL16Loader(self): | |||
| loader = MTL16Loader() | |||
| data = loader.process('sample_MTL16.txt') | |||
| print(data.datasets) | |||