| @@ -1,3 +1,5 @@ | |||
| from saver.logger import Logger | |||
| class Action(object): | |||
| """ | |||
| @@ -6,7 +8,7 @@ class Action(object): | |||
| def __init__(self): | |||
| super(Action, self).__init__() | |||
| self.logger = None | |||
| self.logger = Logger("logger_output.txt") | |||
| def load_config(self, args): | |||
| raise NotImplementedError | |||
| @@ -14,27 +16,31 @@ class Action(object): | |||
| def load_dataset(self, args): | |||
| raise NotImplementedError | |||
| def log(self, args): | |||
| print("call logger.log") | |||
| def log(self, string): | |||
| self.logger.log(string) | |||
| def batchify(self, X, Y=None): | |||
| def batchify(self, batch_size, X, Y=None): | |||
| """ | |||
| :param X: | |||
| :param Y: | |||
| :param batch_size: int | |||
| :param X: feature matrix of size [n_sample, m_feature] | |||
| :param Y: label vector of size [n_sample, 1] (optional) | |||
| :return iteration:int, the number of step in each epoch | |||
| generator:generator, to generate batch inputs | |||
| """ | |||
| data = X | |||
| if Y is not None: | |||
| data = [X, Y] | |||
| return 2, self._batch_generate(data) | |||
| def _batch_generate(self, data): | |||
| step = 10 | |||
| for i in range(2): | |||
| start = i * step | |||
| end = (i + 1) * step | |||
| yield data[0][start:end], data[1][start:end] | |||
| n_samples = X.shape[0] | |||
| num_iter = n_samples / batch_size | |||
| if Y is None: | |||
| generator = self._batch_generate(batch_size, num_iter, X) | |||
| else: | |||
| generator = self._batch_generate(batch_size, num_iter, X, Y) | |||
| return num_iter, generator | |||
| @staticmethod | |||
| def _batch_generate(batch_size, num_iter, *data): | |||
| for step in range(num_iter): | |||
| start = batch_size * step | |||
| end = (batch_size + 1) * step | |||
| yield tuple([x[start:end, :] for x in data]) | |||
| def make_log(self, *args): | |||
| return "log" | |||
| @@ -1,3 +1,5 @@ | |||
| from collections import namedtuple | |||
| import numpy as np | |||
| from action.action import Action | |||
| @@ -6,22 +8,39 @@ from action.action import Action | |||
| class Tester(Action): | |||
| """docstring for Tester""" | |||
| TestConfig = namedtuple("config", ["validate_in_training", "save_dev_input", "save_output", | |||
| "save_loss", "batch_size"]) | |||
| def __init__(self, test_args): | |||
| """ | |||
| :param test_args: named tuple | |||
| """ | |||
| super(Tester, self).__init__() | |||
| self.test_args = test_args | |||
| # self.args_dict = {name: value for name, value in self.test_args.__dict__.iteritems()} | |||
| self.mean_loss = None | |||
| self.validate_in_training = test_args.validate_in_training | |||
| self.save_dev_input = test_args.save_dev_input | |||
| self.valid_x = None | |||
| self.valid_y = None | |||
| self.save_output = test_args.save_output | |||
| self.output = None | |||
| self.save_loss = test_args.save_loss | |||
| self.mean_loss = None | |||
| self.batch_size = test_args.batch_size | |||
| def test(self, network, data): | |||
| # transform into network input and label | |||
| X, Y = network.prepare_input(data) | |||
| network.mode(test=True) # turn on the testing mode | |||
| if not self.save_dev_input: | |||
| # transform into network input and label | |||
| valid_x, valid_y = network.prepare_input(data) | |||
| if self.validate_in_training: | |||
| self.valid_x = valid_x | |||
| self.valid_y = valid_y | |||
| else: | |||
| valid_x = self.valid_x | |||
| valid_y = self.valid_y | |||
| # split into batches by self.batch_size | |||
| iterations, test_batch_generator = self.batchify(X, Y) | |||
| iterations, test_batch_generator = self.batchify(self.batch_size, valid_x, valid_y) | |||
| batch_output = list() | |||
| loss_history = list() | |||
| @@ -33,16 +52,19 @@ class Tester(Action): | |||
| # forward pass from tests input to predicted output | |||
| prediction = network.data_forward(batch_x) | |||
| batch_output.append(prediction) | |||
| # get the loss | |||
| loss = network.loss(batch_y, prediction) | |||
| loss_history.append(loss) | |||
| self.log(self.make_log(step, loss)) | |||
| if self.save_output: | |||
| batch_output.append(prediction) | |||
| if self.save_loss: | |||
| loss_history.append(loss) | |||
| self.log(self.make_log(step, loss)) | |||
| self.mean_loss = np.mean(np.array(loss_history)) | |||
| self.output = self.make_output(batch_output) | |||
| if self.save_loss: | |||
| self.mean_loss = np.mean(np.array(loss_history)) | |||
| if self.save_output: | |||
| self.output = self.make_output(batch_output) | |||
| @property | |||
| def loss(self): | |||
| @@ -55,3 +77,9 @@ class Tester(Action): | |||
| def make_output(self, batch_output): | |||
| # construct full prediction with batch outputs | |||
| return np.concatenate((batch_output[0], batch_output[1]), axis=0) | |||
| def load_config(self, args): | |||
| raise NotImplementedError | |||
| def load_dataset(self, args): | |||
| raise NotImplementedError | |||
| @@ -1,3 +1,5 @@ | |||
| from collections import namedtuple | |||
| from .action import Action | |||
| from .tester import Tester | |||
| @@ -6,32 +8,42 @@ class Trainer(Action): | |||
| """ | |||
| Trainer for common training logic of all models | |||
| """ | |||
| TrainConfig = namedtuple("config", ["epochs", "validate", "save_when_better", "log_per_step", "log_validation"]) | |||
| def __init__(self, train_args): | |||
| """ | |||
| :param train_args: namedtuple | |||
| """ | |||
| super(Trainer, self).__init__() | |||
| self.train_args = train_args | |||
| # self.args_dict = {name: value for name, value in self.train_args.__dict__.iteritems()} | |||
| self.n_epochs = self.train_args.epochs | |||
| self.validate = self.train_args.validate | |||
| self.save_when_better = self.train_args.save_when_better | |||
| self.n_epochs = train_args.epochs | |||
| self.validate = train_args.validate | |||
| self.save_when_better = train_args.save_when_better | |||
| self.log_per_step = train_args.log_per_step | |||
| self.log_validation = train_args.log_validation | |||
| def train(self, network, train_data, dev_data): | |||
| """ | |||
| :param network: the model controller | |||
| :param train_data: raw data for training | |||
| :param dev_data: raw data for validation | |||
| :return: | |||
| """ | |||
| train_x, train_y = network.prepare_input(train_data.train_set, train_data.train_label) | |||
| def train(self, network, data, dev_data): | |||
| train_x, train_y = network.prepare_input(data.train_set, data.train_label) | |||
| valid_x, valid_y = network.prepare_input(dev_data.valid_set, dev_data.valid_label) | |||
| network.mode(test=False) # turn on the train mode | |||
| iterations, train_batch_generator = self.batchify(train_x, train_y) | |||
| loss_history = list() | |||
| network.mode(test=False) | |||
| test_args = "..." | |||
| test_args = Tester.TestConfig(save_output=True, validate_in_training=True, | |||
| save_dev_input=True, save_loss=True, batch_size=16) | |||
| evaluator = Tester(test_args) | |||
| best_loss = 1e10 | |||
| loss_history = list() | |||
| for epoch in range(self.n_epochs): | |||
| network.define_optimizer() | |||
| for step in range(iterations): | |||
| batch_x, batch_y = train_batch_generator.__next__() | |||
| @@ -39,14 +51,18 @@ class Trainer(Action): | |||
| loss = network.loss(batch_y, prediction) | |||
| network.grad_backward() | |||
| loss_history.append(loss) | |||
| self.log(self.make_log(epoch, step, loss)) | |||
| if step % self.log_per_step == 0: | |||
| loss_history.append(loss) | |||
| self.log(self.make_log(epoch, step, loss)) | |||
| #################### evaluate over dev set ################### | |||
| if self.validate: | |||
| evaluator.test(network, [valid_x, valid_y]) | |||
| # give all controls to tester | |||
| evaluator.test(network, dev_data) | |||
| self.log(self.make_valid_log(epoch, evaluator.loss)) | |||
| if self.log_validation: | |||
| self.log(self.make_valid_log(epoch, evaluator.loss)) | |||
| if evaluator.loss < best_loss: | |||
| best_loss = evaluator.loss | |||
| if self.save_when_better: | |||
| @@ -54,15 +70,20 @@ class Trainer(Action): | |||
| # finish training | |||
| @staticmethod | |||
| def prepare_training(network, data): | |||
| return network.prepare_training(data) | |||
| def make_log(self, *args): | |||
| print("logged") | |||
| return "make a log" | |||
| def make_valid_log(self, *args): | |||
| print("logged") | |||
| return "make a valid log" | |||
| def save_model(self, model): | |||
| print("model saved") | |||
| model.save() | |||
| def load_data(self, data_name): | |||
| print("load data") | |||
| def load_config(self, args): | |||
| raise NotImplementedError | |||
| def load_dataset(self, args): | |||
| raise NotImplementedError | |||
| @@ -13,3 +13,19 @@ class BaseLoader(object): | |||
| with open(self.data_path, "r", encoding="utf-8") as f: | |||
| text = f.read() | |||
| return text | |||
| class ToyLoader0(BaseLoader): | |||
| """ | |||
| For charLM | |||
| """ | |||
| def __init__(self, name, path): | |||
| super(ToyLoader0, self).__init__(name, path) | |||
| def load(self): | |||
| with open(self.data_path, 'r') as f: | |||
| corpus = f.read().lower() | |||
| import re | |||
| corpus = re.sub(r"<unk>", "unk", corpus) | |||
| return corpus.split() | |||
| @@ -14,6 +14,8 @@ from model.base_model import BaseModel | |||
| class CharLM(BaseModel): | |||
| """ | |||
| Controller of the Character-level Neural Language Model | |||
| To do: | |||
| - where the data goes, call data savers. | |||
| """ | |||
| def __init__(self): | |||
| @@ -28,12 +30,15 @@ class CharLM(BaseModel): | |||
| self.lstm_batch_size = 20 | |||
| self.vocab_size = 100 | |||
| self.num_char = 150 | |||
| self.max_word_len = 10 | |||
| self.num_epoch = 10 | |||
| self.old_PPL = 100000 | |||
| self.best_PPL = 100000 | |||
| self.data = None # named tuple to store all data set | |||
| self.data_ready = False | |||
| self.criterion = nn.CrossEntropyLoss() | |||
| self.loss = None | |||
| self.optimizer = optim.SGD(self.parameters(), lr=learning_rate, momentum=0.85) | |||
| self.use_gpu = False | |||
| # word_emb_dim == hidden_size / num of hidden units | |||
| self.hidden = (to_var(torch.zeros(2, self.lstm_batch_size, self.word_embed_dim)), | |||
| @@ -44,10 +49,17 @@ class CharLM(BaseModel): | |||
| self.vocab_size, | |||
| self.num_char, | |||
| use_gpu=self.use_gpu) | |||
| for param in self.model.parameters(): | |||
| nn.init.uniform(param.data, -0.05, 0.05) | |||
| self.learning_rate = 0.1 | |||
| self.optimizer = None | |||
| def prepare_input(self, raw_text): | |||
| """ | |||
| Do some preparation jobs. Transform raw data into input vectors. | |||
| :param raw_text: raw input data | |||
| :return: torch.Tensor, torch.Tensor | |||
| feature matrix, label vector | |||
| """ | |||
| if not self.data_ready: | |||
| # To do: These need to be dropped out from here. (below) | |||
| @@ -82,10 +94,20 @@ class CharLM(BaseModel): | |||
| DataTuple = namedtuple("DataTuple", ["feature", "label"]) | |||
| self.data = DataTuple(feature=input_vec, label=input_label) | |||
| return self.data.feature, self.data.label | |||
| feature_input = torch.from_numpy(self.data.feature) | |||
| label_input = torch.from_numpy(self.data.label) | |||
| num_seq = feature_input.size()[0] // self.lstm_seq_len | |||
| feature_input = feature_input[:num_seq * self.lstm_seq_len, :] | |||
| feature_input = feature_input.view(-1, self.lstm_seq_len, self.max_word_len + 2) | |||
| self.num_iter_per_epoch = feature_input.size()[0] // self.lstm_batch_size | |||
| return feature_input, label_input | |||
| def mode(self, test=False): | |||
| raise NotImplementedError | |||
| if test: | |||
| self.model.eval() | |||
| else: | |||
| self.model.train() | |||
| def data_forward(self, x): | |||
| # detach hidden state of LSTM from last batch | |||
| @@ -103,6 +125,13 @@ class CharLM(BaseModel): | |||
| self.loss = self.criterion(predict, to_var(truth)) | |||
| return self.loss | |||
| def define_optimizer(self): | |||
| # redefine optimizer for every new epoch | |||
| self.optimizer = optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.85) | |||
| def save(self): | |||
| torch.save(self.model, "cache/model.pkl") | |||
| @staticmethod | |||
| def preprocess(): | |||
| word_dict, char_dict = create_word_char_dict("valid.txt", "train.txt", "tests.txt") | |||
| @@ -122,23 +151,6 @@ class CharLM(BaseModel): | |||
| torch.save(objects, "cache/prep.pt") | |||
| print("Preprocess done.") | |||
| def forward(self, x, hidden): | |||
| lstm_batch_size = x.size()[0] | |||
| lstm_seq_len = x.size()[1] | |||
| x = x.contiguous().view(-1, x.size()[2]) | |||
| x = self.char_embed(x) | |||
| x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3) | |||
| x = self.conv_layers(x) | |||
| x = self.batch_norm(x) | |||
| x = self.highway1(x) | |||
| x = self.highway2(x) | |||
| x = x.contiguous().view(lstm_batch_size, lstm_seq_len, -1) | |||
| x, hidden = self.lstm(x, hidden) | |||
| x = self.dropout(x) | |||
| x = x.contiguous().view(lstm_batch_size * lstm_seq_len, -1) | |||
| x = self.linear(x) | |||
| return x, hidden | |||
| """ | |||
| Global Functions | |||
| @@ -8,4 +8,5 @@ class Logger(BaseSaver): | |||
| super(Logger, self).__init__(save_path) | |||
| def log(self, string): | |||
| raise NotImplementedError | |||
| with open(self.save_path, "a") as f: | |||
| f.write(string) | |||
| @@ -0,0 +1,30 @@ | |||
| from action.tester import Tester | |||
| from action.trainer import Trainer | |||
| from loader.base_loader import ToyLoader0 | |||
| from model.char_language_model import CharLM | |||
| def test_charlm(): | |||
| train_config = Trainer.TrainConfig(epochs=1, validate=True, save_when_better=True, | |||
| log_per_step=10, log_validation=True) | |||
| trainer = Trainer(train_config) | |||
| model = CharLM() | |||
| train_data = ToyLoader0("load_train", "path_to_train_file").load() | |||
| valid_data = ToyLoader0("load_valid", "path_to_valid_file").load() | |||
| trainer.train(model, train_data, valid_data) | |||
| trainer.save_model(model) | |||
| test_config = Tester.TestConfig(save_output=True, validate_in_training=True, | |||
| save_dev_input=True, save_loss=True, batch_size=16) | |||
| tester = Tester(test_config) | |||
| test_data = ToyLoader0("load_test", "path_to_test").load() | |||
| tester.test(model, test_data) | |||
| if __name__ == "__main__": | |||
| test_charlm() | |||