| @@ -0,0 +1,29 @@ | |||
| class Inference(object): | |||
| """ | |||
| This is an interface focusing on predicting output based on trained models. | |||
| It does not care about evaluations of the model. | |||
| Possible improvements: | |||
| - use batch to make use of GPU | |||
| """ | |||
| def __init__(self): | |||
| pass | |||
| def predict(self, model, data): | |||
| """ | |||
| this is actually a forward pass. shall be shared by Trainer/Tester | |||
| :param model: | |||
| :param data: | |||
| :return result: the output results | |||
| """ | |||
| raise NotImplementedError | |||
| def prepare_input(self, data_path): | |||
| """ | |||
| This can also be shared. | |||
| :param data_path: | |||
| :return: | |||
| """ | |||
| raise NotImplementedError | |||
| @@ -7,6 +7,7 @@ from fastNLP.action.action import Action | |||
| from fastNLP.action.action import RandomSampler, Batchifier | |||
| from fastNLP.action.tester import POSTester | |||
| from fastNLP.modules.utils import seq_mask | |||
| from fastNLP.saver.model_saver import ModelSaver | |||
| class BaseTrainer(Action): | |||
| @@ -34,9 +35,13 @@ class BaseTrainer(Action): | |||
| """ | |||
| super(BaseTrainer, self).__init__() | |||
| self.n_epochs = train_args["epochs"] | |||
| self.validate = train_args["validate"] | |||
| self.batch_size = train_args["batch_size"] | |||
| self.pickle_path = train_args["pickle_path"] | |||
| self.validate = train_args["validate"] | |||
| self.save_best_dev = train_args["save_best_dev"] | |||
| self.model_saved_path = train_args["model_saved_path"] | |||
| self.model = None | |||
| self.iterator = None | |||
| self.loss_func = None | |||
| @@ -68,7 +73,7 @@ class BaseTrainer(Action): | |||
| # main training epochs | |||
| iterations = len(data_train) // self.batch_size | |||
| for epoch in range(self.n_epochs): | |||
| for epoch in range(1, self.n_epochs + 1): | |||
| # turn on network training mode; define optimizer; prepare batch iterator | |||
| self.mode(test=False) | |||
| @@ -89,6 +94,11 @@ class BaseTrainer(Action): | |||
| if data_dev is None: | |||
| raise RuntimeError("No validation data provided.") | |||
| validator.test(network) | |||
| if self.save_best_dev and self.best_eval_result(validator): | |||
| self.save_model(network) | |||
| print("saved better model selected by dev") | |||
| print("[epoch {}]".format(epoch), end=" ") | |||
| print(validator.show_matrices()) | |||
| @@ -201,124 +211,49 @@ class BaseTrainer(Action): | |||
| batch[idx] = sample + [fill * (max_length - len(sample))] | |||
| return batch | |||
| def best_eval_result(self, validator): | |||
| """ | |||
| :param validator: a Tester instance | |||
| :return: bool, True means current results on dev set is the best. | |||
| """ | |||
| raise NotImplementedError | |||
| def save_model(self, network): | |||
| """ | |||
| :param network: the PyTorch model | |||
| model_best_dev.pkl may be overwritten by a better model in future epochs. | |||
| """ | |||
| ModelSaver(self.model_saved_path + "model_best_dev.pkl").save_pytorch(network) | |||
| class ToyTrainer(BaseTrainer): | |||
| """ | |||
| deprecated | |||
| An example to show the definition of Trainer. | |||
| """ | |||
| def __init__(self, train_args): | |||
| super(ToyTrainer, self).__init__(train_args) | |||
| self.test_mode = False | |||
| self.weight = np.random.rand(5, 1) | |||
| self.bias = np.random.rand() | |||
| self._loss = 0 | |||
| self._optimizer = None | |||
| def __init__(self, training_args): | |||
| super(ToyTrainer, self).__init__(training_args) | |||
| def prepare_input(self, data): | |||
| return data[:, :-1], data[:, -1] | |||
| def prepare_input(self, data_path): | |||
| data_train = _pickle.load(open(data_path + "/data_train.pkl", "rb")) | |||
| data_dev = _pickle.load(open(data_path + "/data_train.pkl", "rb")) | |||
| return data_train, data_dev, 0, 1 | |||
| def mode(self, test=False): | |||
| self.model.mode(test) | |||
| def data_forward(self, network, x): | |||
| return np.matmul(x, self.weight) + self.bias | |||
| return network(x) | |||
| def grad_backward(self, loss): | |||
| self.model.zero_grad() | |||
| loss.backward() | |||
| def get_loss(self, pred, truth): | |||
| self._loss = np.mean(np.square(pred - truth)) | |||
| return self._loss | |||
| return np.mean(np.square(pred - truth)) | |||
| def define_optimizer(self): | |||
| self._optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01) | |||
| def update(self): | |||
| self._optimizer.step() | |||
| class WordSegTrainer(BaseTrainer): | |||
| """ | |||
| deprecated | |||
| """ | |||
| def __init__(self, train_args): | |||
| super(WordSegTrainer, self).__init__(train_args) | |||
| self.id2word = None | |||
| self.word2id = None | |||
| self.id2tag = None | |||
| self.tag2id = None | |||
| self.lstm_batch_size = 8 | |||
| self.lstm_seq_len = 32 # Trainer batch_size == lstm_batch_size * lstm_seq_len | |||
| self.hidden_dim = 100 | |||
| self.lstm_num_layers = 2 | |||
| self.vocab_size = 100 | |||
| self.word_emb_dim = 100 | |||
| self.hidden = (self.to_var(torch.zeros(2, self.lstm_batch_size, self.word_emb_dim)), | |||
| self.to_var(torch.zeros(2, self.lstm_batch_size, self.word_emb_dim))) | |||
| self.optimizer = None | |||
| self._loss = None | |||
| self.USE_GPU = False | |||
| def to_var(self, x): | |||
| if torch.cuda.is_available() and self.USE_GPU: | |||
| x = x.cuda() | |||
| return torch.autograd.Variable(x) | |||
| def prepare_input(self, data): | |||
| """ | |||
| perform word indices lookup to convert strings into indices | |||
| :param data: list of string, each string contains word + space + [B, M, E, S] | |||
| :return | |||
| """ | |||
| word_list = [] | |||
| tag_list = [] | |||
| for line in data: | |||
| if len(line) > 2: | |||
| tokens = line.split("#") | |||
| word_list.append(tokens[0]) | |||
| tag_list.append(tokens[2][0]) | |||
| self.id2word = list(set(word_list)) | |||
| self.word2id = {word: idx for idx, word in enumerate(self.id2word)} | |||
| self.id2tag = list(set(tag_list)) | |||
| self.tag2id = {tag: idx for idx, tag in enumerate(self.id2tag)} | |||
| words = np.array([self.word2id[w] for w in word_list]).reshape(-1, 1) | |||
| tags = np.array([self.tag2id[t] for t in tag_list]).reshape(-1, 1) | |||
| return words, tags | |||
| def mode(self, test=False): | |||
| if test: | |||
| self.model.eval() | |||
| else: | |||
| self.model.train() | |||
| def data_forward(self, network, x): | |||
| """ | |||
| :param network: a PyTorch model | |||
| :param x: sequence of length [batch_size], word indices | |||
| :return: | |||
| """ | |||
| x = x.reshape(self.lstm_batch_size, self.lstm_seq_len) | |||
| output, self.hidden = network(x, self.hidden) | |||
| return output | |||
| def define_optimizer(self): | |||
| self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.85) | |||
| def get_loss(self, predict, truth): | |||
| truth = torch.Tensor(truth) | |||
| self._loss = torch.nn.CrossEntropyLoss(predict, truth) | |||
| return self._loss | |||
| def grad_backward(self, network): | |||
| self.model.zero_grad() | |||
| self._loss.backward() | |||
| torch.nn.utils.clip_grad_norm(self.model.parameters(), 5, norm_type=2) | |||
| self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01) | |||
| def update(self): | |||
| self.optimizer.step() | |||
| @@ -335,6 +270,7 @@ class POSTrainer(BaseTrainer): | |||
| self.num_classes = train_args["num_classes"] | |||
| self.max_len = None | |||
| self.mask = None | |||
| self.best_accuracy = 0.0 | |||
| def prepare_input(self, data_path): | |||
| """ | |||
| @@ -391,6 +327,26 @@ class POSTrainer(BaseTrainer): | |||
| # print("loss={:.2f}".format(loss.data)) | |||
| return loss | |||
| def best_eval_result(self, validator): | |||
| loss, accuracy = validator.matrices() | |||
| if accuracy > self.best_accuracy: | |||
| self.best_accuracy = accuracy | |||
| return True | |||
| else: | |||
| return False | |||
| class LanguageModelTrainer(BaseTrainer): | |||
| """ | |||
| Trainer for Language Model | |||
| """ | |||
| def __init__(self, train_args): | |||
| super(LanguageModelTrainer, self).__init__(train_args) | |||
| def prepare_input(self, data_path): | |||
| pass | |||
| if __name__ == "__name__": | |||
| train_args = {"epochs": 1, "validate": False, "batch_size": 3, "pickle_path": "./"} | |||
| @@ -70,7 +70,7 @@ class ConfigSection(object): | |||
| """ | |||
| if key in self.__dict__.keys(): | |||
| return getattr(self, key) | |||
| raise AttributeError('don\'t have attr %s' % (key)) | |||
| raise AttributeError("do NOT have attribute %s" % key) | |||
| def __setitem__(self, key, value): | |||
| """ | |||
| @@ -20,30 +20,6 @@ class BasePreprocess(object): | |||
| if not self.pickle_path.endswith('/'): | |||
| self.pickle_path = self.pickle_path + '/' | |||
| def word2id(self): | |||
| raise NotImplementedError | |||
| def id2word(self): | |||
| raise NotImplementedError | |||
| def class2id(self): | |||
| raise NotImplementedError | |||
| def id2class(self): | |||
| raise NotImplementedError | |||
| def embedding(self): | |||
| raise NotImplementedError | |||
| def data_train(self): | |||
| raise NotImplementedError | |||
| def data_dev(self): | |||
| raise NotImplementedError | |||
| def data_test(self): | |||
| raise NotImplementedError | |||
| class POSPreprocess(BasePreprocess): | |||
| """ | |||
| @@ -1,5 +1,4 @@ | |||
| import os | |||
| from collections import namedtuple | |||
| import numpy as np | |||
| import torch | |||
| @@ -23,8 +22,6 @@ class CharLM(BaseModel): | |||
| To do: | |||
| - where the data goes, call data savers. | |||
| """ | |||
| DataTuple = namedtuple("DataTuple", ["feature", "label"]) | |||
| def __init__(self, lstm_batch_size, lstm_seq_len): | |||
| super(CharLM, self).__init__() | |||
| """ | |||
| @@ -1,46 +0,0 @@ | |||
| import torch.nn as nn | |||
| from fastNLP.models.base_model import BaseModel | |||
| class WordSeg(BaseModel): | |||
| """ | |||
| PyTorch Network for word segmentation | |||
| """ | |||
| def __init__(self, hidden_dim, lstm_num_layers, vocab_size, word_emb_dim=100): | |||
| super(WordSeg, self).__init__() | |||
| self.vocab_size = vocab_size | |||
| self.word_emb_dim = word_emb_dim | |||
| self.lstm_num_layers = lstm_num_layers | |||
| self.hidden_dim = hidden_dim | |||
| self.word_emb = nn.Embedding(self.vocab_size, self.word_emb_dim) | |||
| self.lstm = nn.LSTM(input_size=self.word_emb_dim, | |||
| hidden_size=self.word_emb_dim, | |||
| num_layers=self.lstm_num_layers, | |||
| bias=True, | |||
| dropout=0.5, | |||
| batch_first=True) | |||
| self.linear = nn.Linear(self.word_emb_dim, self.vocab_size) | |||
| def forward(self, x, hidden): | |||
| """ | |||
| :param x: tensor of shape [batch_size, seq_len], vocabulary index | |||
| :param hidden: | |||
| :return x: probability of vocabulary entries | |||
| hidden: (memory cell, hidden state) from LSTM | |||
| """ | |||
| # [batch_size, seq_len] | |||
| x = self.word_emb(x) | |||
| # [batch_size, seq_len, word_emb_size] | |||
| x, hidden = self.lstm(x, hidden) | |||
| # [batch_size, seq_len, word_emb_size] | |||
| x = x.contiguous().view(x.shape[0] * x.shape[1], -1) | |||
| # [batch_size*seq_len, word_emb_size] | |||
| x = self.linear(x) | |||
| # [batch_size*seq_len, vocab_size] | |||
| return x, hidden | |||
| @@ -58,12 +58,19 @@ epochs = 20 | |||
| batch_size = 1 | |||
| pickle_path = "./data_for_tests/" | |||
| validate = true | |||
| save_best_dev = true | |||
| model_saved_path = "./" | |||
| rnn_hidden_units = 100 | |||
| rnn_layers = 1 | |||
| rnn_bi_direction = true | |||
| word_emb_dim = 100 | |||
| dropout = 0.5 | |||
| use_crf = true | |||
| [POS_test] | |||
| save_output = true | |||
| validate_in_training = false | |||
| validate_in_training = true | |||
| save_dev_input = false | |||
| save_loss = true | |||
| batch_size = 1 | |||
| pickle_path = "./data_for_tests/" | |||
| @@ -1,4 +1,5 @@ | |||
| import sys | |||
| sys.path.append("..") | |||
| from fastNLP.loader.config_loader import ConfigLoader, ConfigSection | |||
| @@ -9,12 +10,38 @@ from fastNLP.saver.model_saver import ModelSaver | |||
| from fastNLP.loader.model_loader import ModelLoader | |||
| from fastNLP.action.tester import POSTester | |||
| from fastNLP.models.sequence_modeling import SeqLabeling | |||
| from fastNLP.action.inference import Inference | |||
| data_name = "people.txt" | |||
| data_path = "data_for_tests/people.txt" | |||
| pickle_path = "data_for_tests" | |||
| def test_infer(): | |||
| # Define the same model | |||
| model = SeqLabeling(hidden_dim=train_args["rnn_hidden_units"], rnn_num_layer=train_args["rnn_layers"], | |||
| num_classes=train_args["num_classes"], vocab_size=train_args["vocab_size"], | |||
| word_emb_dim=train_args["word_emb_dim"], bi_direction=train_args["rnn_bi_direction"], | |||
| rnn_mode="gru", dropout=train_args["dropout"], use_crf=train_args["use_crf"]) | |||
| # Dump trained parameters into the model | |||
| ModelLoader("arbitrary_name", "./saved_model.pkl").load_pytorch(model) | |||
| print("model loaded!") | |||
| # Data Loader | |||
| pos_loader = POSDatasetLoader(data_name, data_path) | |||
| infer_data = pos_loader.load_lines() | |||
| # Preprocessor | |||
| POSPreprocess(infer_data, pickle_path) | |||
| # Inference interface | |||
| infer = Inference() | |||
| results = infer.predict(model, infer_data) | |||
| if __name__ == "__main__": | |||
| # Config Loader | |||
| train_args = ConfigSection() | |||
| ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS": train_args}) | |||
| @@ -24,37 +51,49 @@ if __name__ == "__main__": | |||
| # Preprocessor | |||
| p = POSPreprocess(train_data, pickle_path) | |||
| vocab_size = p.vocab_size | |||
| num_classes = p.num_classes | |||
| train_args["vocab_size"] = vocab_size | |||
| train_args["num_classes"] = num_classes | |||
| train_args["vocab_size"] = p.vocab_size | |||
| train_args["num_classes"] = p.num_classes | |||
| # Trainer | |||
| trainer = POSTrainer(train_args) | |||
| # Model | |||
| model = SeqLabeling(100, 1, num_classes, vocab_size, bi_direction=True) | |||
| model = SeqLabeling(hidden_dim=train_args["rnn_hidden_units"], rnn_num_layer=train_args["rnn_layers"], | |||
| num_classes=train_args["num_classes"], vocab_size=train_args["vocab_size"], | |||
| word_emb_dim=train_args["word_emb_dim"], bi_direction=train_args["rnn_bi_direction"], | |||
| rnn_mode="gru", dropout=train_args["dropout"], use_crf=train_args["use_crf"]) | |||
| # Start training | |||
| trainer.train(model) | |||
| print("Training finished!") | |||
| # Saver | |||
| saver = ModelSaver("./saved_model.pkl") | |||
| saver.save_pytorch(model) | |||
| print("Model saved!") | |||
| del model, trainer, pos_loader | |||
| model = SeqLabeling(100, 1, num_classes, vocab_size, bi_direction=True) | |||
| ModelLoader("xxx", "./saved_model.pkl").load_pytorch(model) | |||
| # Define the same model | |||
| model = SeqLabeling(hidden_dim=train_args["rnn_hidden_units"], rnn_num_layer=train_args["rnn_layers"], | |||
| num_classes=train_args["num_classes"], vocab_size=train_args["vocab_size"], | |||
| word_emb_dim=train_args["word_emb_dim"], bi_direction=train_args["rnn_bi_direction"], | |||
| rnn_mode="gru", dropout=train_args["dropout"], use_crf=train_args["use_crf"]) | |||
| # Dump trained parameters into the model | |||
| ModelLoader("arbitrary_name", "./saved_model.pkl").load_pytorch(model) | |||
| print("model loaded!") | |||
| # Load test configuration | |||
| test_args = ConfigSection() | |||
| ConfigLoader("config.cfg", "").load_config("./data_for_tests/config", {"POS_test": test_args}) | |||
| # test_args = {"save_output": True, "validate_in_training": False, "save_dev_input": False, | |||
| # "save_loss": True, "batch_size": 1, "pickle_path": pickle_path} | |||
| # Tester | |||
| tester = POSTester(test_args) | |||
| # Start testing | |||
| tester.test(model) | |||
| # print test results | |||
| print(tester.show_matrices()) | |||
| print("model tested!") | |||
| @@ -1,11 +1,3 @@ | |||
| from collections import namedtuple | |||
| import numpy as np | |||
| from model.base_model import ToyModel | |||
| from fastNLP.action.trainer import Trainer | |||
| def test_trainer(): | |||
| Config = namedtuple("config", ["epochs", "validate", "save_when_better"]) | |||
| train_config = Config(epochs=5, validate=True, save_when_better=True) | |||
| @@ -1,28 +0,0 @@ | |||
| from fastNLP.action.tester import Tester | |||
| from fastNLP.action.trainer import WordSegTrainer | |||
| from fastNLP.loader.base_loader import BaseLoader | |||
| from fastNLP.models.word_seg_model import WordSeg | |||
| def test_wordseg(): | |||
| train_config = WordSegTrainer.TrainConfig(epochs=5, validate=False, save_when_better=False, | |||
| log_per_step=10, log_validation=False, batch_size=254) | |||
| trainer = WordSegTrainer(train_config) | |||
| model = WordSeg(100, 2, 1000) | |||
| train_data = BaseLoader("load_train", "./data_for_tests/cws_train").load_lines() | |||
| trainer.train(model, train_data) | |||
| test_config = Tester.TestConfig(save_output=False, validate_in_training=False, | |||
| save_dev_input=False, save_loss=False, batch_size=254) | |||
| tester = Tester(test_config) | |||
| test_data = BaseLoader("load_test", "./data_for_tests/cws_test").load_lines() | |||
| tester.test(model, test_data) | |||
| if __name__ == "__main__": | |||
| test_wordseg() | |||