import time import aggregation import dataloader import embedding import encoder import predict import torch import torch.nn as nn import torch.optim as optim WORD_NUM = 357361 WORD_SIZE = 100 HIDDEN_SIZE = 300 D_A = 350 R = 10 MLP_HIDDEN = 2000 CLASSES_NUM = 5 from fastNLP.models.base_model import BaseModel class MyNet(BaseModel): def __init__(self): super(MyNet, self).__init__() self.embedding = embedding.Lookuptable(WORD_NUM, WORD_SIZE) self.encoder = encoder.Lstm(WORD_SIZE, HIDDEN_SIZE, 1, 0.5, True) self.aggregation = aggregation.Selfattention(2 * HIDDEN_SIZE, D_A, R) self.predict = predict.MLP(R * HIDDEN_SIZE * 2, MLP_HIDDEN, CLASSES_NUM) self.penalty = None def encode(self, x): return self.encode(self.embedding(x)) def aggregate(self, x): x, self.penalty = self.aggregate(x) return x def decode(self, x): return [self.predict(x), self.penalty] class Net(nn.Module): """ A model for sentiment analysis using lstm and self-attention """ def __init__(self): super(Net, self).__init__() self.embedding = embedding.Lookuptable(WORD_NUM, WORD_SIZE) self.encoder = encoder.Lstm(WORD_SIZE, HIDDEN_SIZE, 1, 0.5, True) self.aggregation = aggregation.Selfattention(2 * HIDDEN_SIZE, D_A, R) self.predict = predict.MLP(R * HIDDEN_SIZE * 2, MLP_HIDDEN, CLASSES_NUM) def forward(self, x): x = self.embedding(x) x = self.encoder(x) x, penalty = self.aggregation(x) x = self.predict(x) return x, penalty def train(model_dict=None, using_cuda=True, learning_rate=0.06,\ momentum=0.3, batch_size=32, epochs=5, coef=1.0, interval=10): """ training procedure Args: If model_dict is given (a file address), it will continue training on the given model. Otherwise, it would train a new model from scratch. If using_cuda is true, the training would be conducted on GPU. Learning_rate and momentum is for SGD optimizer. coef is the coefficent between the cross-entropy loss and the penalization term. interval is the frequncy of reporting. the result will be saved with a form "model_dict_+current time", which could be used for further training """ if using_cuda: net = Net().cuda() else: net = Net() if model_dict != None: net.load_state_dict(torch.load(model_dict)) optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum) criterion = nn.CrossEntropyLoss() dataset = dataloader.DataLoader("train_set.pkl", batch_size, using_cuda=using_cuda) #statistics loss_count = 0 prepare_time = 0 run_time = 0 count = 0 for epoch in range(epochs): print("epoch: %d"%(epoch)) for i, batch in enumerate(dataset): t1 = time.time() X = batch["feature"] y = batch["class"] t2 = time.time() y_pred, y_penl = net(X) loss = criterion(y_pred, y) + torch.sum(y_penl) / batch_size * coef optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm(net.parameters(), 0.5) optimizer.step() t3 = time.time() loss_count += torch.sum(y_penl).data[0] prepare_time += (t2 - t1) run_time += (t3 - t2) p, idx = torch.max(y_pred.data, dim=1) count += torch.sum(torch.eq(idx.cpu(), y.data.cpu())) if (i + 1) % interval == 0: print("epoch : %d, iters: %d"%(epoch, i + 1)) print("loss count:" + str(loss_count / (interval * batch_size))) print("acuracy:" + str(count / (interval * batch_size))) print("penalty:" + str(torch.sum(y_penl).data[0] / batch_size)) print("prepare time:" + str(prepare_time)) print("run time:" + str(run_time)) prepare_time = 0 run_time = 0 loss_count = 0 count = 0 string = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime()) torch.save(net.state_dict(), "model_dict_%s.dict"%(string)) def test(model_dict, using_cuda=True): if using_cuda: net = Net().cuda() else: net = Net() net.load_state_dict(torch.load(model_dict)) dataset = dataloader.DataLoader("test_set.pkl", batch_size=1, using_cuda=using_cuda) count = 0 for i, batch in enumerate(dataset): X = batch["feature"] y = batch["class"] y_pred, _ = net(X) p, idx = torch.max(y_pred.data, dim=1) count += torch.sum(torch.eq(idx.cpu(), y.data.cpu())) print("accuracy: %f"%(count / dataset.num)) if __name__ == "__main__": train(using_cuda=torch.cuda.is_available())