| @@ -1,7 +0,0 @@ | |||
| fastNLP.io.config\_io module | |||
| ============================ | |||
| .. automodule:: fastNLP.io.config_io | |||
| :members: | |||
| :undoc-members: | |||
| :show-inheritance: | |||
| @@ -1,7 +0,0 @@ | |||
| fastNLP.io.file\_reader module | |||
| ============================== | |||
| .. automodule:: fastNLP.io.file_reader | |||
| :members: | |||
| :undoc-members: | |||
| :show-inheritance: | |||
| @@ -12,9 +12,7 @@ Submodules | |||
| .. toctree:: | |||
| fastNLP.io.base_loader | |||
| fastNLP.io.config_io | |||
| fastNLP.io.dataset_loader | |||
| fastNLP.io.embed_loader | |||
| fastNLP.io.file_reader | |||
| fastNLP.io.model_io | |||
| @@ -9,7 +9,6 @@ fastNLP 依赖如下包:: | |||
| torch>=0.4.0 | |||
| numpy | |||
| tensorboardX | |||
| tqdm | |||
| nltk | |||
| @@ -18,4 +17,4 @@ fastNLP 依赖如下包:: | |||
| .. code:: shell | |||
| >>> pip install fitlog | |||
| >>> pip install fastNLP | |||
| @@ -5,16 +5,13 @@ | |||
| 2. 用于读入数据的 :doc:`DataSetLoader <fastNLP.io.dataset_loader>` 类 | |||
| 3. 用于读写config文件的类, 参考 :doc:`Config-IO <fastNLP.io.config_io>` | |||
| 4. 用于保存和载入模型的类, 参考 :doc:`Model-IO <fastNLP.io.model_io>` | |||
| 3. 用于保存和载入模型的类, 参考 :doc:`Model-IO <fastNLP.io.model_io>` | |||
| 这些类的使用方法可以在对应module的文档下查看. | |||
| """ | |||
| from .embed_loader import EmbedLoader | |||
| from .dataset_loader import DataSetLoader, CSVLoader, JsonLoader, ConllLoader, SNLILoader, SSTLoader, \ | |||
| PeopleDailyCorpusLoader, Conll2003Loader | |||
| from .config_io import ConfigLoader, ConfigSection, ConfigSaver | |||
| from .model_io import ModelLoader as ModelLoader, ModelSaver as ModelSaver | |||
| __all__ = [ | |||
| @@ -29,10 +26,6 @@ __all__ = [ | |||
| 'PeopleDailyCorpusLoader', | |||
| 'Conll2003Loader', | |||
| 'ConfigLoader', | |||
| 'ConfigSection', | |||
| 'ConfigSaver', | |||
| 'ModelLoader', | |||
| 'ModelSaver', | |||
| ] | |||
| @@ -5,7 +5,6 @@ TODO 详细介绍的表格,与主页相对应 | |||
| """ | |||
| from .base_model import BaseModel | |||
| from .biaffine_parser import BiaffineParser, GraphParser | |||
| from .char_language_model import CharLM | |||
| from .cnn_text_classification import CNNText | |||
| from .sequence_modeling import SeqLabeling, AdvSeqLabel | |||
| from .snli import ESIM | |||
| @@ -1,138 +0,0 @@ | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| from ..modules.encoder.lstm import LSTM | |||
| class Highway(nn.Module): | |||
| """Highway network""" | |||
| def __init__(self, input_size): | |||
| super(Highway, self).__init__() | |||
| self.fc1 = nn.Linear(input_size, input_size, bias=True) | |||
| self.fc2 = nn.Linear(input_size, input_size, bias=True) | |||
| def forward(self, x): | |||
| t = F.sigmoid(self.fc1(x)) | |||
| return torch.mul(t, F.relu(self.fc2(x))) + torch.mul(1 - t, x) | |||
| class CharLM(nn.Module): | |||
| """CNN + highway network + LSTM | |||
| # Input:: | |||
| 4D tensor with shape [batch_size, in_channel, height, width] | |||
| # Output:: | |||
| 2D Tensor with shape [batch_size, vocab_size] | |||
| # Arguments:: | |||
| char_emb_dim: the size of each character's attention | |||
| word_emb_dim: the size of each word's attention | |||
| vocab_size: num of unique words | |||
| num_char: num of characters | |||
| use_gpu: True or False | |||
| """ | |||
| def __init__(self, char_emb_dim, word_emb_dim, | |||
| vocab_size, num_char): | |||
| super(CharLM, self).__init__() | |||
| self.char_emb_dim = char_emb_dim | |||
| self.word_emb_dim = word_emb_dim | |||
| self.vocab_size = vocab_size | |||
| # char attention layer | |||
| self.char_embed = nn.Embedding(num_char, char_emb_dim) | |||
| # convolutions of filters with different sizes | |||
| self.convolutions = [] | |||
| # list of tuples: (the number of filter, width) | |||
| self.filter_num_width = [(25, 1), (50, 2), (75, 3), (100, 4), (125, 5), (150, 6)] | |||
| for out_channel, filter_width in self.filter_num_width: | |||
| self.convolutions.append( | |||
| nn.Conv2d( | |||
| 1, # in_channel | |||
| out_channel, # out_channel | |||
| kernel_size=(char_emb_dim, filter_width), # (height, width) | |||
| bias=True | |||
| ) | |||
| ) | |||
| self.highway_input_dim = sum([x for x, y in self.filter_num_width]) | |||
| self.batch_norm = nn.BatchNorm1d(self.highway_input_dim, affine=False) | |||
| # highway net | |||
| self.highway1 = Highway(self.highway_input_dim) | |||
| self.highway2 = Highway(self.highway_input_dim) | |||
| # LSTM | |||
| self.lstm_num_layers = 2 | |||
| self.lstm = LSTM(self.highway_input_dim, hidden_size=self.word_emb_dim, num_layers=self.lstm_num_layers, | |||
| dropout=0.5) | |||
| # output layer | |||
| self.dropout = nn.Dropout(p=0.5) | |||
| self.linear = nn.Linear(self.word_emb_dim, self.vocab_size) | |||
| def forward(self, x): | |||
| # Input: Variable of Tensor with shape [num_seq, seq_len, max_word_len+2] | |||
| # Return: Variable of Tensor with shape [num_words, len(word_dict)] | |||
| lstm_batch_size = x.size()[0] | |||
| lstm_seq_len = x.size()[1] | |||
| x = x.contiguous().view(-1, x.size()[2]) | |||
| # [num_seq*seq_len, max_word_len+2] | |||
| x = self.char_embed(x) | |||
| # [num_seq*seq_len, max_word_len+2, char_emb_dim] | |||
| x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3) | |||
| # [num_seq*seq_len, 1, max_word_len+2, char_emb_dim] | |||
| x = self.conv_layers(x) | |||
| # [num_seq*seq_len, total_num_filters] | |||
| x = self.batch_norm(x) | |||
| # [num_seq*seq_len, total_num_filters] | |||
| x = self.highway1(x) | |||
| x = self.highway2(x) | |||
| # [num_seq*seq_len, total_num_filters] | |||
| x = x.contiguous().view(lstm_batch_size, lstm_seq_len, -1) | |||
| # [num_seq, seq_len, total_num_filters] | |||
| x = self.lstm(x) | |||
| # [seq_len, num_seq, hidden_size] | |||
| x = self.dropout(x) | |||
| # [seq_len, num_seq, hidden_size] | |||
| x = x.contiguous().view(lstm_batch_size * lstm_seq_len, -1) | |||
| # [num_seq*seq_len, hidden_size] | |||
| x = self.linear(x) | |||
| # [num_seq*seq_len, vocab_size] | |||
| return x | |||
| def conv_layers(self, x): | |||
| chosen_list = list() | |||
| for conv in self.convolutions: | |||
| feature_map = F.tanh(conv(x)) | |||
| # (batch_size, out_channel, 1, max_word_len-width+1) | |||
| chosen = torch.max(feature_map, 3)[0] | |||
| # (batch_size, out_channel, 1) | |||
| chosen = chosen.squeeze() | |||
| # (batch_size, out_channel) | |||
| chosen_list.append(chosen) | |||
| # (batch_size, total_num_filers) | |||
| return torch.cat(chosen_list, 1) | |||
| @@ -12,19 +12,21 @@ my_inf = 10e12 | |||
| class ESIM(BaseModel): | |||
| """ESIM模型的一个PyTorch实现。 | |||
| """ | |||
| ESIM模型的一个PyTorch实现。 | |||
| ESIM模型的论文: Enhanced LSTM for Natural Language Inference (arXiv: 1609.06038) | |||
| :param int vocab_size: 词表大小 | |||
| :param int embed_dim: 词嵌入维度 | |||
| :param int hidden_size: LSTM隐层大小 | |||
| :param float dropout: dropout大小,默认为0 | |||
| :param int num_classes: 标签数目,默认为3 | |||
| :param numpy.array init_embedding: 初始词嵌入矩阵,形状为(vocab_size, embed_dim),默认为None,即随机初始化词嵌入矩阵 | |||
| """ | |||
| def __init__(self, vocab_size, embed_dim, hidden_size, dropout=0.0, num_classes=3, init_embedding=None): | |||
| """ | |||
| :param int vocab_size: 词表大小 | |||
| :param int embed_dim: 词嵌入维度 | |||
| :param int hidden_size: LSTM隐层大小 | |||
| :param float dropout: dropout大小,默认为0 | |||
| :param int num_classes: 标签数目,默认为3 | |||
| :param numpy.array init_embedding: 初始词嵌入矩阵,形状为(vocab_size, embed_dim),默认为None,即随机初始化词嵌入矩阵 | |||
| """ | |||
| super(ESIM, self).__init__() | |||
| self.vocab_size = vocab_size | |||
| self.embed_dim = embed_dim | |||
| @@ -12,8 +12,8 @@ from . import decoder | |||
| from . import encoder | |||
| from .aggregator import * | |||
| from .decoder import * | |||
| from .other_modules import * | |||
| from .dropout import TimestepDropout | |||
| from .encoder import * | |||
| from .utils import get_embeddings | |||
| __version__ = '0.0.0' | |||
| @@ -1,11 +1,7 @@ | |||
| __all__ = ["MaxPool", "MaxPoolWithMask", "AvgPool", "MeanPoolWithMask", "KMaxPool", "Attention", "BiAttention", | |||
| "SelfAttention"] | |||
| __all__ = ["MaxPool", "MaxPoolWithMask", "AvgPool", "MultiHeadAttention"] | |||
| from .pooling import MaxPool | |||
| from .pooling import MaxPoolWithMask | |||
| from .pooling import AvgPool | |||
| from .pooling import MeanPoolWithMask | |||
| from .pooling import KMaxPool | |||
| from .attention import Attention | |||
| from .attention import BiAttention | |||
| from .attention import SelfAttention | |||
| from .attention import MultiHeadAttention | |||
| @@ -1,3 +1,4 @@ | |||
| __all__ =["MultiHeadAttention"] | |||
| import math | |||
| import torch | |||
| @@ -5,27 +6,14 @@ import torch.nn.functional as F | |||
| from torch import nn | |||
| from ..dropout import TimestepDropout | |||
| from ..utils import mask_softmax | |||
| from ..utils import initial_parameter | |||
| class Attention(torch.nn.Module): | |||
| def __init__(self, normalize=False): | |||
| super(Attention, self).__init__() | |||
| self.normalize = normalize | |||
| def forward(self, query, memory, mask): | |||
| similarities = self._atten_forward(query, memory) | |||
| if self.normalize: | |||
| return mask_softmax(similarities, mask) | |||
| return similarities | |||
| def _atten_forward(self, query, memory): | |||
| raise NotImplementedError | |||
| class DotAttention(nn.Module): | |||
| """ | |||
| TODO | |||
| """ | |||
| def __init__(self, key_size, value_size, dropout=0.1): | |||
| super(DotAttention, self).__init__() | |||
| self.key_size = key_size | |||
| @@ -51,15 +39,15 @@ class DotAttention(nn.Module): | |||
| class MultiHeadAttention(nn.Module): | |||
| def __init__(self, input_size, key_size, value_size, num_head, dropout=0.1): | |||
| """ | |||
| """ | |||
| :param input_size: int, 输入维度的大小。同时也是输出维度的大小。 | |||
| :param key_size: int, 每个head的维度大小。 | |||
| :param value_size: int,每个head中value的维度。 | |||
| :param num_head: int,head的数量。 | |||
| :param dropout: float。 | |||
| """ | |||
| :param input_size: int, 输入维度的大小。同时也是输出维度的大小。 | |||
| :param key_size: int, 每个head的维度大小。 | |||
| :param value_size: int,每个head中value的维度。 | |||
| :param num_head: int,head的数量。 | |||
| :param dropout: float。 | |||
| """ | |||
| def __init__(self, input_size, key_size, value_size, num_head, dropout=0.1): | |||
| super(MultiHeadAttention, self).__init__() | |||
| self.input_size = input_size | |||
| self.key_size = key_size | |||
| @@ -112,16 +100,16 @@ class MultiHeadAttention(nn.Module): | |||
| class BiAttention(nn.Module): | |||
| """Bi Attention module | |||
| r"""Bi Attention module | |||
| Calculate Bi Attention matrix `e` | |||
| .. math:: | |||
| \\begin{array}{ll} \\\\ | |||
| e_ij = {a}^{\\mathbf{T}}_{i}{b}_{j} \\\\ | |||
| \begin{array}{ll} \\ | |||
| e_ij = {a}^{\mathbf{T}}_{i}{b}_{j} \\ | |||
| a_i = | |||
| b_j = | |||
| \\end{array} | |||
| \end{array} | |||
| """ | |||
| @@ -171,8 +159,11 @@ class BiAttention(nn.Module): | |||
| return out_x1, out_x2 | |||
| class SelfAttention(nn.Module): | |||
| """Self Attention Module. | |||
| """ | |||
| Self Attention Module. | |||
| :param int input_size: 输入tensor的hidden维度 | |||
| :param int attention_unit: 输出tensor的hidden维度 | |||
| :param int attention_hops: | |||
| @@ -1,21 +1,23 @@ | |||
| __all__ = ["MaxPool", "MaxPoolWithMask", "AvgPool"] | |||
| import torch | |||
| import torch.nn as nn | |||
| class MaxPool(nn.Module): | |||
| """Max-pooling模块。""" | |||
| """ | |||
| Max-pooling模块。 | |||
| :param stride: 窗口移动大小,默认为kernel_size | |||
| :param padding: padding的内容,默认为0 | |||
| :param dilation: 控制窗口内元素移动距离的大小 | |||
| :param dimension: MaxPool的维度,支持1,2,3维。 | |||
| :param kernel_size: max pooling的窗口大小,默认为tensor最后k维,其中k为dimension | |||
| :param return_indices: | |||
| :param ceil_mode: | |||
| """ | |||
| def __init__(self, stride=None, padding=0, dilation=1, dimension=1, kernel_size=None, | |||
| return_indices=False, ceil_mode=False): | |||
| """ | |||
| :param stride: 窗口移动大小,默认为kernel_size | |||
| :param padding: padding的内容,默认为0 | |||
| :param dilation: 控制窗口内元素移动距离的大小 | |||
| :param dimension: MaxPool的维度,支持1,2,3维。 | |||
| :param kernel_size: max pooling的窗口大小,默认为tensor最后k维,其中k为dimension | |||
| :param return_indices: | |||
| :param ceil_mode: | |||
| """ | |||
| super(MaxPool, self).__init__() | |||
| assert (1 <= dimension) and (dimension <= 3) | |||
| self.dimension = dimension | |||
| @@ -110,6 +112,7 @@ class AvgPool(nn.Module): | |||
| class MeanPoolWithMask(nn.Module): | |||
| def __init__(self): | |||
| super(MeanPoolWithMask, self).__init__() | |||
| self.inf = 10e12 | |||
| @@ -1,3 +1,4 @@ | |||
| __all__ = ["MLP", "ConditionalRandomField"] | |||
| __all__ = ["MLP", "ConditionalRandomField","viterbi_decode"] | |||
| from .CRF import ConditionalRandomField | |||
| from .MLP import MLP | |||
| from .utils import viterbi_decode | |||
| @@ -1,4 +1,4 @@ | |||
| __all__ = ["viterbi_decode"] | |||
| import torch | |||
| @@ -1,5 +1,5 @@ | |||
| import torch | |||
| __all__ = [] | |||
| class TimestepDropout(torch.nn.Dropout): | |||
| """This module accepts a ``[batch_size, num_timesteps, embedding_dim)]`` and use a single | |||
| @@ -1,11 +1,9 @@ | |||
| from .conv_maxpool import ConvMaxpool | |||
| from .embedding import Embedding | |||
| from .linear import Linear | |||
| from .lstm import LSTM | |||
| from .bert import BertModel | |||
| __all__ = ["LSTM", | |||
| "Embedding", | |||
| "Linear", | |||
| "ConvMaxpool", | |||
| "BertModel"] | |||
| @@ -6,16 +6,15 @@ from ..utils import initial_parameter | |||
| # from torch.nn.init import xavier_uniform | |||
| class ConvolutionCharEncoder(nn.Module): | |||
| """char级别的卷积编码器.""" | |||
| """ | |||
| char级别的卷积编码器. | |||
| :param int char_emb_size: char级别embedding的维度. Default: 50 | |||
| 例: 有26个字符, 每一个的embedding是一个50维的向量, 所以输入的向量维度为50. | |||
| :param tuple feature_maps: 一个由int组成的tuple. tuple的长度是char级别卷积操作的数目, 第`i`个int表示第`i`个卷积操作的filter. | |||
| :param tuple kernels: 一个由int组成的tuple. tuple的长度是char级别卷积操作的数目, 第`i`个int表示第`i`个卷积操作的卷积核. | |||
| :param initial_method: 初始化参数的方式, 默认为`xavier normal` | |||
| """ | |||
| def __init__(self, char_emb_size=50, feature_maps=(40, 30, 30), kernels=(3, 4, 5), initial_method=None): | |||
| """ | |||
| :param int char_emb_size: char级别embedding的维度. Default: 50 | |||
| 例: 有26个字符, 每一个的embedding是一个50维的向量, 所以输入的向量维度为50. | |||
| :param tuple feature_maps: 一个由int组成的tuple. tuple的长度是char级别卷积操作的数目, 第`i`个int表示第`i`个卷积操作的filter. | |||
| :param tuple kernels: 一个由int组成的tuple. tuple的长度是char级别卷积操作的数目, 第`i`个int表示第`i`个卷积操作的卷积核. | |||
| :param initial_method: 初始化参数的方式, 默认为`xavier normal` | |||
| """ | |||
| super(ConvolutionCharEncoder, self).__init__() | |||
| self.convs = nn.ModuleList([ | |||
| nn.Conv2d(1, feature_maps[i], kernel_size=(char_emb_size, kernels[i]), bias=True, padding=(0, 4)) | |||
| @@ -1,21 +0,0 @@ | |||
| import torch.nn as nn | |||
| from ..utils import initial_parameter | |||
| class Linear(nn.Module): | |||
| """ | |||
| :param int input_size: input size | |||
| :param int output_size: output size | |||
| :param bool bias: | |||
| :param str initial_method: | |||
| """ | |||
| def __init__(self, input_size, output_size, bias=True, initial_method=None): | |||
| super(Linear, self).__init__() | |||
| self.linear = nn.Linear(input_size, output_size, bias) | |||
| initial_parameter(self, initial_method) | |||
| def forward(self, x): | |||
| x = self.linear(x) | |||
| return x | |||
| @@ -19,15 +19,13 @@ class LSTM(nn.Module): | |||
| :param batch_first: 若为 ``True``, 输入和输出 ``Tensor`` 形状为 | |||
| :(batch, seq, feature). Default: ``False`` | |||
| :param bias: 如果为 ``False``, 模型将不会使用bias. Default: ``True`` | |||
| :param get_hidden: 是否返回隐状态 `h` . Default: ``False`` | |||
| """ | |||
| def __init__(self, input_size, hidden_size=100, num_layers=1, dropout=0.0, batch_first=True, | |||
| bidirectional=False, bias=True, initial_method=None, get_hidden=False): | |||
| bidirectional=False, bias=True, initial_method=None): | |||
| super(LSTM, self).__init__() | |||
| self.batch_first = batch_first | |||
| self.lstm = nn.LSTM(input_size, hidden_size, num_layers, bias=bias, batch_first=batch_first, | |||
| dropout=dropout, bidirectional=bidirectional) | |||
| self.get_hidden = get_hidden | |||
| initial_parameter(self, initial_method) | |||
| def forward(self, x, seq_len=None, h0=None, c0=None): | |||
| @@ -39,7 +37,6 @@ class LSTM(nn.Module): | |||
| :param c0: [batch, hidden_size] 初始Cell状态, 若为 ``None`` , 设为全1向量. Default: ``None`` | |||
| :return (output, ht) 或 output: 若 ``get_hidden=True`` [batch, seq_len, hidden_size*num_direction] 输出序列 | |||
| 和 [batch, hidden_size*num_direction] 最后时刻隐状态. | |||
| 若 ``get_hidden=False`` 仅返回输出序列. | |||
| """ | |||
| if h0 is not None and c0 is not None: | |||
| hx = (h0, c0) | |||
| @@ -61,16 +58,4 @@ class LSTM(nn.Module): | |||
| output = output[:, unsort_idx] | |||
| else: | |||
| output, hx = self.lstm(x, hx) | |||
| if self.get_hidden: | |||
| return output, hx | |||
| return output | |||
| if __name__ == "__main__": | |||
| lstm = LSTM(input_size=2, hidden_size=2, get_hidden=False) | |||
| x = torch.randn((3, 5, 2)) | |||
| seq_lens = torch.tensor([5,1,2]) | |||
| y = lstm(x, seq_lens) | |||
| print(x) | |||
| print(y) | |||
| print(x.size(), y.size(), ) | |||
| return output, hx | |||
| @@ -1,186 +0,0 @@ | |||
| import numpy as np | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| import torch.utils.data | |||
| from torch.nn import Parameter | |||
| class GroupNorm(nn.Module): | |||
| def __init__(self, num_features, num_groups=20, eps=1e-5): | |||
| super(GroupNorm, self).__init__() | |||
| self.weight = nn.Parameter(torch.ones(1, num_features, 1)) | |||
| self.bias = nn.Parameter(torch.zeros(1, num_features, 1)) | |||
| self.num_groups = num_groups | |||
| self.eps = eps | |||
| def forward(self, x): | |||
| N, C, H = x.size() | |||
| G = self.num_groups | |||
| assert C % G == 0 | |||
| x = x.view(N, G, -1) | |||
| mean = x.mean(-1, keepdim=True) | |||
| var = x.var(-1, keepdim=True) | |||
| x = (x - mean) / (var + self.eps).sqrt() | |||
| x = x.view(N, C, H) | |||
| return x * self.weight + self.bias | |||
| class LayerNormalization(nn.Module): | |||
| """ | |||
| :param int layer_size: | |||
| :param float eps: default=1e-3 | |||
| """ | |||
| def __init__(self, layer_size, eps=1e-3): | |||
| super(LayerNormalization, self).__init__() | |||
| self.eps = eps | |||
| self.a_2 = nn.Parameter(torch.ones(1, layer_size, requires_grad=True)) | |||
| self.b_2 = nn.Parameter(torch.zeros(1, layer_size, requires_grad=True)) | |||
| def forward(self, z): | |||
| if z.size(1) == 1: | |||
| return z | |||
| mu = torch.mean(z, keepdim=True, dim=-1) | |||
| sigma = torch.std(z, keepdim=True, dim=-1) | |||
| ln_out = (z - mu) / (sigma + self.eps) | |||
| ln_out = ln_out * self.a_2 + self.b_2 | |||
| return ln_out | |||
| class BiLinear(nn.Module): | |||
| def __init__(self, n_left, n_right, n_out, bias=True): | |||
| """ | |||
| :param int n_left: size of left input | |||
| :param int n_right: size of right input | |||
| :param int n_out: size of output | |||
| :param bool bias: If set to False, the layer will not learn an additive bias. Default: True | |||
| """ | |||
| super(BiLinear, self).__init__() | |||
| self.n_left = n_left | |||
| self.n_right = n_right | |||
| self.n_out = n_out | |||
| self.U = Parameter(torch.Tensor(self.n_out, self.n_left, self.n_right)) | |||
| self.W_l = Parameter(torch.Tensor(self.n_out, self.n_left)) | |||
| self.W_r = Parameter(torch.Tensor(self.n_out, self.n_left)) | |||
| if bias: | |||
| self.bias = Parameter(torch.Tensor(n_out)) | |||
| else: | |||
| self.register_parameter('bias', None) | |||
| self.reset_parameters() | |||
| def reset_parameters(self): | |||
| nn.init.xavier_uniform_(self.W_l) | |||
| nn.init.xavier_uniform_(self.W_r) | |||
| nn.init.constant_(self.bias, 0.) | |||
| nn.init.xavier_uniform_(self.U) | |||
| def forward(self, input_left, input_right): | |||
| """ | |||
| :param Tensor input_left: the left input tensor with shape = [batch1, batch2, ..., left_features] | |||
| :param Tensor input_right: the right input tensor with shape = [batch1, batch2, ..., right_features] | |||
| """ | |||
| left_size = input_left.size() | |||
| right_size = input_right.size() | |||
| assert left_size[:-1] == right_size[:-1], \ | |||
| "batch size of left and right inputs mis-match: (%s, %s)" % (left_size[:-1], right_size[:-1]) | |||
| batch = int(np.prod(left_size[:-1])) | |||
| # convert left and right input to matrices [batch, left_features], [batch, right_features] | |||
| input_left = input_left.view(batch, self.n_left) | |||
| input_right = input_right.view(batch, self.n_right) | |||
| # output [batch, out_features] | |||
| output = F.bilinear(input_left, input_right, self.U, self.bias) | |||
| output = output + \ | |||
| F.linear(input_left, self.W_l, None) + \ | |||
| F.linear(input_right, self.W_r, None) | |||
| # convert back to [batch1, batch2, ..., out_features] | |||
| return output.view(left_size[:-1] + (self.n_out,)) | |||
| def __repr__(self): | |||
| return self.__class__.__name__ + ' (' \ | |||
| + 'in1_features=' + str(self.n_left) \ | |||
| + ', in2_features=' + str(self.n_right) \ | |||
| + ', out_features=' + str(self.n_out) + ')' | |||
| class BiAffine(nn.Module): | |||
| def __init__(self, n_enc, n_dec, n_labels, biaffine=True, **kwargs): | |||
| """ | |||
| :param int n_enc: the dimension of the encoder input. | |||
| :param int n_dec: the dimension of the decoder input. | |||
| :param int n_labels: the number of labels of the crf layer | |||
| :param bool biaffine: if apply bi-affine parameter. | |||
| """ | |||
| super(BiAffine, self).__init__() | |||
| self.n_enc = n_enc | |||
| self.n_dec = n_dec | |||
| self.num_labels = n_labels | |||
| self.biaffine = biaffine | |||
| self.W_d = Parameter(torch.Tensor(self.num_labels, self.n_dec)) | |||
| self.W_e = Parameter(torch.Tensor(self.num_labels, self.n_enc)) | |||
| self.b = Parameter(torch.Tensor(self.num_labels, 1, 1)) | |||
| if self.biaffine: | |||
| self.U = Parameter(torch.Tensor(self.num_labels, self.n_dec, self.n_enc)) | |||
| else: | |||
| self.register_parameter('U', None) | |||
| self.reset_parameters() | |||
| def reset_parameters(self): | |||
| nn.init.xavier_uniform_(self.W_d) | |||
| nn.init.xavier_uniform_(self.W_e) | |||
| nn.init.constant_(self.b, 0.) | |||
| if self.biaffine: | |||
| nn.init.xavier_uniform_(self.U) | |||
| def forward(self, input_d, input_e, mask_d=None, mask_e=None): | |||
| """ | |||
| :param Tensor input_d: the decoder input tensor with shape = [batch, length_decoder, input_size] | |||
| :param Tensor input_e: the child input tensor with shape = [batch, length_encoder, input_size] | |||
| :param mask_d: Tensor or None, the mask tensor for decoder with shape = [batch, length_decoder] | |||
| :param mask_e: Tensor or None, the mask tensor for encoder with shape = [batch, length_encoder] | |||
| :returns: Tensor, the energy tensor with shape = [batch, num_label, length, length] | |||
| """ | |||
| assert input_d.size(0) == input_e.size(0), 'batch sizes of encoder and decoder are requires to be equal.' | |||
| batch, length_decoder, _ = input_d.size() | |||
| _, length_encoder, _ = input_e.size() | |||
| # compute decoder part: [num_label, input_size_decoder] * [batch, input_size_decoder, length_decoder] | |||
| # the output shape is [batch, num_label, length_decoder] | |||
| out_d = torch.matmul(self.W_d, input_d.transpose(1, 2)).unsqueeze(3) | |||
| # compute decoder part: [num_label, input_size_encoder] * [batch, input_size_encoder, length_encoder] | |||
| # the output shape is [batch, num_label, length_encoder] | |||
| out_e = torch.matmul(self.W_e, input_e.transpose(1, 2)).unsqueeze(2) | |||
| # output shape [batch, num_label, length_decoder, length_encoder] | |||
| if self.biaffine: | |||
| # compute bi-affine part | |||
| # [batch, 1, length_decoder, input_size_decoder] * [num_labels, input_size_decoder, input_size_encoder] | |||
| # output shape [batch, num_label, length_decoder, input_size_encoder] | |||
| output = torch.matmul(input_d.unsqueeze(1), self.U) | |||
| # [batch, num_label, length_decoder, input_size_encoder] * [batch, 1, input_size_encoder, length_encoder] | |||
| # output shape [batch, num_label, length_decoder, length_encoder] | |||
| output = torch.matmul(output, input_e.unsqueeze(1).transpose(2, 3)) | |||
| output = output + out_d + out_e + self.b | |||
| else: | |||
| output = out_d + out_d + self.b | |||
| if mask_d is not None: | |||
| output = output * mask_d.unsqueeze(1).unsqueeze(3) * mask_e.unsqueeze(1).unsqueeze(2) | |||
| return output | |||
| @@ -4,14 +4,6 @@ import torch.nn as nn | |||
| import torch.nn.init as init | |||
| def mask_softmax(matrix, mask): | |||
| if mask is None: | |||
| result = torch.nn.functional.softmax(matrix, dim=-1) | |||
| else: | |||
| raise NotImplementedError | |||
| return result | |||
| def initial_parameter(net, initial_method=None): | |||
| """A method used to initialize the weights of PyTorch models. | |||
| @@ -77,7 +69,8 @@ def initial_parameter(net, initial_method=None): | |||
| def seq_mask(seq_len, max_len): | |||
| """Create sequence mask. | |||
| """ | |||
| Create sequence mask. | |||
| :param seq_len: list or torch.Tensor, the lengths of sequences in a batch. | |||
| :param max_len: int, the maximum sequence length in a batch. | |||
| @@ -92,7 +85,8 @@ def seq_mask(seq_len, max_len): | |||
| def get_embeddings(init_embed): | |||
| """得到词嵌入 | |||
| """ | |||
| 得到词嵌入 TODO | |||
| :param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||
| embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | |||
| @@ -1,5 +1,4 @@ | |||
| numpy | |||
| torch>=0.4.0 | |||
| tensorboardX | |||
| tqdm | |||
| nltk | |||
| @@ -1,112 +1,112 @@ | |||
| import os | |||
| import unittest | |||
| from fastNLP.io.config_io import ConfigSection, ConfigLoader, ConfigSaver | |||
| from fastNLP.io import ConfigSection, ConfigLoader, ConfigSaver | |||
| class TestConfigSaver(unittest.TestCase): | |||
| def test_case_1(self): | |||
| config_file_dir = "test/io" | |||
| config_file_dir = "." | |||
| config_file_name = "config" | |||
| config_file_path = os.path.join(config_file_dir, config_file_name) | |||
| tmp_config_file_path = os.path.join(config_file_dir, "tmp_config") | |||
| with open(config_file_path, "r") as f: | |||
| lines = f.readlines() | |||
| standard_section = ConfigSection() | |||
| t_section = ConfigSection() | |||
| ConfigLoader().load_config(config_file_path, {"test": standard_section, "t": t_section}) | |||
| config_saver = ConfigSaver(config_file_path) | |||
| section = ConfigSection() | |||
| section["doubles"] = 0.8 | |||
| section["tt"] = 0.5 | |||
| section["test"] = 105 | |||
| section["str"] = "this is a str" | |||
| test_case_2_section = section | |||
| test_case_2_section["double"] = 0.5 | |||
| for k in section.__dict__.keys(): | |||
| standard_section[k] = section[k] | |||
| config_saver.save_config_file("test", section) | |||
| config_saver.save_config_file("another-test", section) | |||
| config_saver.save_config_file("one-another-test", section) | |||
| config_saver.save_config_file("test-case-2", section) | |||
| test_section = ConfigSection() | |||
| at_section = ConfigSection() | |||
| another_test_section = ConfigSection() | |||
| one_another_test_section = ConfigSection() | |||
| a_test_case_2_section = ConfigSection() | |||
| ConfigLoader().load_config(config_file_path, {"test": test_section, | |||
| "another-test": another_test_section, | |||
| "t": at_section, | |||
| "one-another-test": one_another_test_section, | |||
| "test-case-2": a_test_case_2_section}) | |||
| assert test_section == standard_section | |||
| assert at_section == t_section | |||
| assert another_test_section == section | |||
| assert one_another_test_section == section | |||
| assert a_test_case_2_section == test_case_2_section | |||
| config_saver.save_config_file("test", section) | |||
| with open(config_file_path, "w") as f: | |||
| f.writelines(lines) | |||
| with open(tmp_config_file_path, "w") as f: | |||
| f.write('[test]\n') | |||
| f.write('this is an fault example\n') | |||
| tmp_config_saver = ConfigSaver(tmp_config_file_path) | |||
| try: | |||
| tmp_config_saver._read_section() | |||
| except Exception as e: | |||
| pass | |||
| os.remove(tmp_config_file_path) | |||
| try: | |||
| tmp_config_saver = ConfigSaver("file-NOT-exist") | |||
| except Exception as e: | |||
| pass | |||
| def test_case_2(self): | |||
| config = "[section_A]\n[section_B]\n" | |||
| with open("./test.cfg", "w", encoding="utf-8") as f: | |||
| f.write(config) | |||
| saver = ConfigSaver("./test.cfg") | |||
| section = ConfigSection() | |||
| section["doubles"] = 0.8 | |||
| section["tt"] = [1, 2, 3] | |||
| section["test"] = 105 | |||
| section["str"] = "this is a str" | |||
| saver.save_config_file("section_A", section) | |||
| os.system("rm ./test.cfg") | |||
| def test_case_3(self): | |||
| config = "[section_A]\ndoubles = 0.9\ntt = [1, 2, 3]\n[section_B]\n" | |||
| with open("./test.cfg", "w", encoding="utf-8") as f: | |||
| f.write(config) | |||
| saver = ConfigSaver("./test.cfg") | |||
| section = ConfigSection() | |||
| section["doubles"] = 0.8 | |||
| section["tt"] = [1, 2, 3] | |||
| section["test"] = 105 | |||
| section["str"] = "this is a str" | |||
| saver.save_config_file("section_A", section) | |||
| os.system("rm ./test.cfg") | |||
| @@ -1,31 +1,30 @@ | |||
| import unittest | |||
| from fastNLP.io.dataset_loader import Conll2003Loader, PeopleDailyCorpusLoader, \ | |||
| CSVLoader, SNLILoader, JsonLoader | |||
| from fastNLP.io import Conll2003Loader, PeopleDailyCorpusLoader, CSVLoader, SNLILoader, JsonLoader | |||
| class TestDatasetLoader(unittest.TestCase): | |||
| class TestDatasetLoader(unittest.TestCase): | |||
| def test_Conll2003Loader(self): | |||
| """ | |||
| Test the the loader of Conll2003 dataset | |||
| """ | |||
| dataset_path = "test/data_for_tests/conll_2003_example.txt" | |||
| dataset_path = "../data_for_tests/conll_2003_example.txt" | |||
| loader = Conll2003Loader() | |||
| dataset_2003 = loader.load(dataset_path) | |||
| def test_PeopleDailyCorpusLoader(self): | |||
| data_set = PeopleDailyCorpusLoader().load("test/data_for_tests/people_daily_raw.txt") | |||
| data_set = PeopleDailyCorpusLoader().load("../data_for_tests/people_daily_raw.txt") | |||
| def test_CSVLoader(self): | |||
| ds = CSVLoader(sep='\t', headers=['words', 'label'])\ | |||
| .load('test/data_for_tests/tutorial_sample_dataset.csv') | |||
| ds = CSVLoader(sep='\t', headers=['words', 'label']) \ | |||
| .load('../data_for_tests/tutorial_sample_dataset.csv') | |||
| assert len(ds) > 0 | |||
| def test_SNLILoader(self): | |||
| ds = SNLILoader().load('test/data_for_tests/sample_snli.jsonl') | |||
| ds = SNLILoader().load('../data_for_tests/sample_snli.jsonl') | |||
| assert len(ds) == 3 | |||
| def test_JsonLoader(self): | |||
| ds = JsonLoader().load('test/data_for_tests/sample_snli.jsonl') | |||
| ds = JsonLoader().load('../data_for_tests/sample_snli.jsonl') | |||
| assert len(ds) == 3 | |||
| @@ -1,15 +1,15 @@ | |||
| import unittest | |||
| import numpy as np | |||
| from fastNLP.core.vocabulary import Vocabulary | |||
| from fastNLP.io.embed_loader import EmbedLoader | |||
| from fastNLP import Vocabulary | |||
| from fastNLP.io import EmbedLoader | |||
| class TestEmbedLoader(unittest.TestCase): | |||
| def test_load_with_vocab(self): | |||
| vocab = Vocabulary() | |||
| glove = "test/data_for_tests/glove.6B.50d_test.txt" | |||
| word2vec = "test/data_for_tests/word2vec_test.txt" | |||
| glove = "../data_for_tests/glove.6B.50d_test.txt" | |||
| word2vec = "../data_for_tests/word2vec_test.txt" | |||
| vocab.add_word('the') | |||
| vocab.add_word('none') | |||
| g_m = EmbedLoader.load_with_vocab(glove, vocab) | |||
| @@ -20,8 +20,8 @@ class TestEmbedLoader(unittest.TestCase): | |||
| def test_load_without_vocab(self): | |||
| words = ['the', 'of', 'in', 'a', 'to', 'and'] | |||
| glove = "test/data_for_tests/glove.6B.50d_test.txt" | |||
| word2vec = "test/data_for_tests/word2vec_test.txt" | |||
| glove = "../data_for_tests/glove.6B.50d_test.txt" | |||
| word2vec = "../data_for_tests/word2vec_test.txt" | |||
| g_m, vocab = EmbedLoader.load_without_vocab(glove) | |||
| self.assertEqual(g_m.shape, (8, 50)) | |||
| for word in words: | |||