| @@ -37,8 +37,8 @@ import torch | |||
| from torch import nn | |||
| from .base_model import BaseModel | |||
| from ..core.const import Const | |||
| from ..core._logger import logger | |||
| from ..core.const import Const | |||
| from ..embeddings import BertEmbedding | |||
| @@ -46,11 +46,14 @@ class BertForSequenceClassification(BaseModel): | |||
| """ | |||
| BERT model for classification. | |||
| :param fastNLP.embeddings.BertEmbedding embed: 下游模型的编码器(encoder). | |||
| :param int num_labels: 文本分类类别数目,默认值为2. | |||
| :param float dropout: dropout的大小,默认值为0.1. | |||
| """ | |||
| def __init__(self, embed: BertEmbedding, num_labels: int=2, dropout=0.1): | |||
| """ | |||
| :param fastNLP.embeddings.BertEmbedding embed: 下游模型的编码器(encoder). | |||
| :param int num_labels: 文本分类类别数目,默认值为2. | |||
| :param float dropout: dropout的大小,默认值为0.1. | |||
| """ | |||
| super(BertForSequenceClassification, self).__init__() | |||
| self.num_labels = num_labels | |||
| @@ -89,11 +92,14 @@ class BertForSentenceMatching(BaseModel): | |||
| """ | |||
| BERT model for sentence matching. | |||
| :param fastNLP.embeddings.BertEmbedding embed: 下游模型的编码器(encoder). | |||
| :param int num_labels: Matching任务类别数目,默认值为2. | |||
| :param float dropout: dropout的大小,默认值为0.1. | |||
| """ | |||
| def __init__(self, embed: BertEmbedding, num_labels: int=2, dropout=0.1): | |||
| """ | |||
| :param fastNLP.embeddings.BertEmbedding embed: 下游模型的编码器(encoder). | |||
| :param int num_labels: Matching任务类别数目,默认值为2. | |||
| :param float dropout: dropout的大小,默认值为0.1. | |||
| """ | |||
| super(BertForSentenceMatching, self).__init__() | |||
| self.num_labels = num_labels | |||
| self.bert = embed | |||
| @@ -131,11 +137,14 @@ class BertForMultipleChoice(BaseModel): | |||
| """ | |||
| BERT model for multiple choice. | |||
| :param fastNLP.embeddings.BertEmbedding embed: 下游模型的编码器(encoder). | |||
| :param int num_choices: 多选任务选项数目,默认值为2. | |||
| :param float dropout: dropout的大小,默认值为0.1. | |||
| """ | |||
| def __init__(self, embed: BertEmbedding, num_choices=2, dropout=0.1): | |||
| """ | |||
| :param fastNLP.embeddings.BertEmbedding embed: 下游模型的编码器(encoder). | |||
| :param int num_choices: 多选任务选项数目,默认值为2. | |||
| :param float dropout: dropout的大小,默认值为0.1. | |||
| """ | |||
| super(BertForMultipleChoice, self).__init__() | |||
| self.num_choices = num_choices | |||
| @@ -178,11 +187,14 @@ class BertForTokenClassification(BaseModel): | |||
| """ | |||
| BERT model for token classification. | |||
| :param fastNLP.embeddings.BertEmbedding embed: 下游模型的编码器(encoder). | |||
| :param int num_labels: 序列标注标签数目,无默认值. | |||
| :param float dropout: dropout的大小,默认值为0.1. | |||
| """ | |||
| def __init__(self, embed: BertEmbedding, num_labels, dropout=0.1): | |||
| """ | |||
| :param fastNLP.embeddings.BertEmbedding embed: 下游模型的编码器(encoder). | |||
| :param int num_labels: 序列标注标签数目,无默认值. | |||
| :param float dropout: dropout的大小,默认值为0.1. | |||
| """ | |||
| super(BertForTokenClassification, self).__init__() | |||
| self.num_labels = num_labels | |||
| @@ -221,10 +233,13 @@ class BertForQuestionAnswering(BaseModel): | |||
| """ | |||
| BERT model for classification. | |||
| :param fastNLP.embeddings.BertEmbedding embed: 下游模型的编码器(encoder). | |||
| :param int num_labels: 抽取式QA列数,默认值为2(即第一列为start_span, 第二列为end_span). | |||
| """ | |||
| def __init__(self, embed: BertEmbedding, num_labels=2): | |||
| """ | |||
| :param fastNLP.embeddings.BertEmbedding embed: 下游模型的编码器(encoder). | |||
| :param int num_labels: 抽取式QA列数,默认值为2(即第一列为start_span, 第二列为end_span). | |||
| """ | |||
| super(BertForQuestionAnswering, self).__init__() | |||
| self.bert = embed | |||
| @@ -6,23 +6,23 @@ __all__ = [ | |||
| "GraphParser" | |||
| ] | |||
| from collections import defaultdict | |||
| import numpy as np | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.functional as F | |||
| from collections import defaultdict | |||
| from .base_model import BaseModel | |||
| from ..core.const import Const as C | |||
| from ..core.losses import LossFunc | |||
| from ..core.metrics import MetricBase | |||
| from ..core.utils import seq_len_to_mask | |||
| from ..embeddings.utils import get_embeddings | |||
| from ..modules.dropout import TimestepDropout | |||
| from ..modules.encoder.transformer import TransformerEncoder | |||
| from ..modules.encoder.variational_rnn import VarLSTM | |||
| from ..modules.utils import initial_parameter | |||
| from ..embeddings.utils import get_embeddings | |||
| from .base_model import BaseModel | |||
| from ..core.utils import seq_len_to_mask | |||
| def _mst(scores): | |||
| @@ -181,11 +181,14 @@ class ArcBiaffine(nn.Module): | |||
| """ | |||
| Biaffine Dependency Parser 的子模块, 用于构建预测边的图 | |||
| :param hidden_size: 输入的特征维度 | |||
| :param bias: 是否使用bias. Default: ``True`` | |||
| """ | |||
| def __init__(self, hidden_size, bias=True): | |||
| """ | |||
| :param hidden_size: 输入的特征维度 | |||
| :param bias: 是否使用bias. Default: ``True`` | |||
| """ | |||
| super(ArcBiaffine, self).__init__() | |||
| self.U = nn.Parameter(torch.Tensor(hidden_size, hidden_size), requires_grad=True) | |||
| self.has_bias = bias | |||
| @@ -213,13 +216,16 @@ class LabelBilinear(nn.Module): | |||
| """ | |||
| Biaffine Dependency Parser 的子模块, 用于构建预测边类别的图 | |||
| :param in1_features: 输入的特征1维度 | |||
| :param in2_features: 输入的特征2维度 | |||
| :param num_label: 边类别的个数 | |||
| :param bias: 是否使用bias. Default: ``True`` | |||
| """ | |||
| def __init__(self, in1_features, in2_features, num_label, bias=True): | |||
| """ | |||
| :param in1_features: 输入的特征1维度 | |||
| :param in2_features: 输入的特征2维度 | |||
| :param num_label: 边类别的个数 | |||
| :param bias: 是否使用bias. Default: ``True`` | |||
| """ | |||
| super(LabelBilinear, self).__init__() | |||
| self.bilinear = nn.Bilinear(in1_features, in2_features, num_label, bias=bias) | |||
| self.lin = nn.Linear(in1_features + in2_features, num_label, bias=False) | |||
| @@ -241,20 +247,6 @@ class BiaffineParser(GraphParser): | |||
| Biaffine Dependency Parser 实现. | |||
| 论文参考 `Deep Biaffine Attention for Neural Dependency Parsing (Dozat and Manning, 2016) <https://arxiv.org/abs/1611.01734>`_ . | |||
| :param embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||
| embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | |||
| 此时就以传入的对象作为embedding | |||
| :param pos_vocab_size: part-of-speech 词典大小 | |||
| :param pos_emb_dim: part-of-speech 向量维度 | |||
| :param num_label: 边的类别个数 | |||
| :param rnn_layers: rnn encoder的层数 | |||
| :param rnn_hidden_size: rnn encoder 的隐状态维度 | |||
| :param arc_mlp_size: 边预测的MLP维度 | |||
| :param label_mlp_size: 类别预测的MLP维度 | |||
| :param dropout: dropout概率. | |||
| :param encoder: encoder类别, 可选 ('lstm', 'var-lstm', 'transformer'). Default: lstm | |||
| :param use_greedy_infer: 是否在inference时使用贪心算法. | |||
| 若 ``False`` , 使用更加精确但相对缓慢的MST算法. Default: ``False`` | |||
| """ | |||
| def __init__(self, | |||
| @@ -269,6 +261,23 @@ class BiaffineParser(GraphParser): | |||
| dropout=0.3, | |||
| encoder='lstm', | |||
| use_greedy_infer=False): | |||
| """ | |||
| :param embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||
| embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | |||
| 此时就以传入的对象作为embedding | |||
| :param pos_vocab_size: part-of-speech 词典大小 | |||
| :param pos_emb_dim: part-of-speech 向量维度 | |||
| :param num_label: 边的类别个数 | |||
| :param rnn_layers: rnn encoder的层数 | |||
| :param rnn_hidden_size: rnn encoder 的隐状态维度 | |||
| :param arc_mlp_size: 边预测的MLP维度 | |||
| :param label_mlp_size: 类别预测的MLP维度 | |||
| :param dropout: dropout概率. | |||
| :param encoder: encoder类别, 可选 ('lstm', 'var-lstm', 'transformer'). Default: lstm | |||
| :param use_greedy_infer: 是否在inference时使用贪心算法. | |||
| 若 ``False`` , 使用更加精确但相对缓慢的MST算法. Default: ``False`` | |||
| """ | |||
| super(BiaffineParser, self).__init__() | |||
| rnn_out_size = 2 * rnn_hidden_size | |||
| word_hid_dim = pos_hid_dim = rnn_hidden_size | |||
| @@ -473,17 +482,20 @@ class ParserLoss(LossFunc): | |||
| """ | |||
| 计算parser的loss | |||
| :param pred1: [batch_size, seq_len, seq_len] 边预测logits | |||
| :param pred2: [batch_size, seq_len, num_label] label预测logits | |||
| :param target1: [batch_size, seq_len] 真实边的标注 | |||
| :param target2: [batch_size, seq_len] 真实类别的标注 | |||
| :param seq_len: [batch_size, seq_len] 真实目标的长度 | |||
| :return loss: scalar | |||
| """ | |||
| def __init__(self, pred1=None, pred2=None, | |||
| target1=None, target2=None, | |||
| seq_len=None): | |||
| """ | |||
| :param pred1: [batch_size, seq_len, seq_len] 边预测logits | |||
| :param pred2: [batch_size, seq_len, num_label] label预测logits | |||
| :param target1: [batch_size, seq_len] 真实边的标注 | |||
| :param target2: [batch_size, seq_len] 真实类别的标注 | |||
| :param seq_len: [batch_size, seq_len] 真实目标的长度 | |||
| :return loss: scalar | |||
| """ | |||
| super(ParserLoss, self).__init__(BiaffineParser.loss, | |||
| pred1=pred1, | |||
| pred2=pred2, | |||
| @@ -496,20 +508,22 @@ class ParserMetric(MetricBase): | |||
| """ | |||
| 评估parser的性能 | |||
| :param pred1: 边预测logits | |||
| :param pred2: label预测logits | |||
| :param target1: 真实边的标注 | |||
| :param target2: 真实类别的标注 | |||
| :param seq_len: 序列长度 | |||
| :return dict: 评估结果:: | |||
| UAS: 不带label时, 边预测的准确率 | |||
| LAS: 同时预测边和label的准确率 | |||
| """ | |||
| def __init__(self, pred1=None, pred2=None, | |||
| target1=None, target2=None, seq_len=None): | |||
| """ | |||
| :param pred1: 边预测logits | |||
| :param pred2: label预测logits | |||
| :param target1: 真实边的标注 | |||
| :param target2: 真实类别的标注 | |||
| :param seq_len: 序列长度 | |||
| :return dict: 评估结果:: | |||
| UAS: 不带label时, 边预测的准确率 | |||
| LAS: 同时预测边和label的准确率 | |||
| """ | |||
| super().__init__() | |||
| self._init_param_map(pred1=pred1, pred2=pred2, | |||
| target1=target1, target2=target2, | |||
| @@ -21,12 +21,6 @@ class CNNText(torch.nn.Module): | |||
| 使用CNN进行文本分类的模型 | |||
| 'Yoon Kim. 2014. Convolution Neural Networks for Sentence Classification.' | |||
| :param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray embed: Embedding的大小(传入tuple(int, int), | |||
| 第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding | |||
| :param int num_classes: 一共有多少类 | |||
| :param int,tuple(int) out_channels: 输出channel的数量。如果为list,则需要与kernel_sizes的数量保持一致 | |||
| :param int,tuple(int) kernel_sizes: 输出channel的kernel大小。 | |||
| :param float dropout: Dropout的大小 | |||
| """ | |||
| def __init__(self, embed, | |||
| @@ -34,6 +28,15 @@ class CNNText(torch.nn.Module): | |||
| kernel_nums=(30, 40, 50), | |||
| kernel_sizes=(1, 3, 5), | |||
| dropout=0.5): | |||
| """ | |||
| :param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray embed: Embedding的大小(传入tuple(int, int), | |||
| 第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding | |||
| :param int num_classes: 一共有多少类 | |||
| :param int,tuple(int) out_channels: 输出channel的数量。如果为list,则需要与kernel_sizes的数量保持一致 | |||
| :param int,tuple(int) kernel_sizes: 输出channel的kernel大小。 | |||
| :param float dropout: Dropout的大小 | |||
| """ | |||
| super(CNNText, self).__init__() | |||
| # no support for pre-trained embedding currently | |||
| @@ -25,16 +25,19 @@ class BiLSTMCRF(BaseModel): | |||
| """ | |||
| 结构为embedding + BiLSTM + FC + Dropout + CRF. | |||
| :param embed: 支持(1)fastNLP的各种Embedding, (2) tuple, 指明num_embedding, dimension, 如(1000, 100) | |||
| :param num_classes: 一共多少个类 | |||
| :param num_layers: BiLSTM的层数 | |||
| :param hidden_size: BiLSTM的hidden_size,实际hidden size为该值的两倍(前向、后向) | |||
| :param dropout: dropout的概率,0为不dropout | |||
| :param target_vocab: Vocabulary对象,target与index的对应关系 | |||
| :param encoding_type: encoding的类型,支持'bioes', 'bmes', 'bio', 'bmeso'等 | |||
| """ | |||
| def __init__(self, embed, num_classes, num_layers=1, hidden_size=100, dropout=0.5, | |||
| target_vocab=None, encoding_type=None): | |||
| """ | |||
| :param embed: 支持(1)fastNLP的各种Embedding, (2) tuple, 指明num_embedding, dimension, 如(1000, 100) | |||
| :param num_classes: 一共多少个类 | |||
| :param num_layers: BiLSTM的层数 | |||
| :param hidden_size: BiLSTM的hidden_size,实际hidden size为该值的两倍(前向、后向) | |||
| :param dropout: dropout的概率,0为不dropout | |||
| :param target_vocab: Vocabulary对象,target与index的对应关系 | |||
| :param encoding_type: encoding的类型,支持'bioes', 'bmes', 'bio', 'bmeso'等 | |||
| """ | |||
| super().__init__() | |||
| self.embed = get_embeddings(embed) | |||
| @@ -80,13 +83,16 @@ class SeqLabeling(BaseModel): | |||
| 一个基础的Sequence labeling的模型。 | |||
| 用于做sequence labeling的基础类。结构包含一层Embedding,一层LSTM(单向,一层),一层FC,以及一层CRF。 | |||
| :param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray embed: Embedding的大小(传入tuple(int, int), | |||
| 第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, embedding, ndarray等则直接使用该值初始化Embedding | |||
| :param int hidden_size: LSTM隐藏层的大小 | |||
| :param int num_classes: 一共有多少类 | |||
| """ | |||
| def __init__(self, embed, hidden_size, num_classes): | |||
| """ | |||
| :param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray embed: Embedding的大小(传入tuple(int, int), | |||
| 第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, embedding, ndarray等则直接使用该值初始化Embedding | |||
| :param int hidden_size: LSTM隐藏层的大小 | |||
| :param int num_classes: 一共有多少类 | |||
| """ | |||
| super(SeqLabeling, self).__init__() | |||
| self.embedding = get_embeddings(embed) | |||
| @@ -155,20 +161,21 @@ class SeqLabeling(BaseModel): | |||
| class AdvSeqLabel(nn.Module): | |||
| """ | |||
| 更复杂的Sequence Labelling模型。结构为Embedding, LayerNorm, 双向LSTM(两层),FC,LayerNorm,DropOut,FC,CRF。 | |||
| :param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray embed: Embedding的大小(传入tuple(int, int), | |||
| 第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding | |||
| :param int hidden_size: LSTM的隐层大小 | |||
| :param int num_classes: 有多少个类 | |||
| :param float dropout: LSTM中以及DropOut层的drop概率 | |||
| :param dict id2words: tag id转为其tag word的表。用于在CRF解码时防止解出非法的顺序,比如'BMES'这个标签规范中,'S' | |||
| 不能出现在'B'之后。这里也支持类似与'B-NN',即'-'前为标签类型的指示,后面为具体的tag的情况。这里不但会保证 | |||
| 'B-NN'后面不为'S-NN'还会保证'B-NN'后面不会出现'M-xx'(任何非'M-NN'和'E-NN'的情况。) | |||
| :param str encoding_type: 支持"BIO", "BMES", "BEMSO", 只有在id2words不为None的情况有用。 | |||
| """ | |||
| def __init__(self, embed, hidden_size, num_classes, dropout=0.3, id2words=None, encoding_type='bmes'): | |||
| """ | |||
| :param tuple(int,int),torch.FloatTensor,nn.Embedding,numpy.ndarray embed: Embedding的大小(传入tuple(int, int), | |||
| 第一个int为vocab_zie, 第二个int为embed_dim); 如果为Tensor, Embedding, ndarray等则直接使用该值初始化Embedding | |||
| :param int hidden_size: LSTM的隐层大小 | |||
| :param int num_classes: 有多少个类 | |||
| :param float dropout: LSTM中以及DropOut层的drop概率 | |||
| :param dict id2words: tag id转为其tag word的表。用于在CRF解码时防止解出非法的顺序,比如'BMES'这个标签规范中,'S' | |||
| 不能出现在'B'之后。这里也支持类似与'B-NN',即'-'前为标签类型的指示,后面为具体的tag的情况。这里不但会保证 | |||
| 'B-NN'后面不为'S-NN'还会保证'B-NN'后面不会出现'M-xx'(任何非'M-NN'和'E-NN'的情况。) | |||
| :param str encoding_type: 支持"BIO", "BMES", "BEMSO", 只有在id2words不为None的情况有用。 | |||
| """ | |||
| super().__init__() | |||
| self.Embedding = get_embeddings(embed) | |||
| @@ -22,15 +22,18 @@ class ESIM(BaseModel): | |||
| ESIM model的一个PyTorch实现 | |||
| 论文参见: https://arxiv.org/pdf/1609.06038.pdf | |||
| :param embed: 初始化的Embedding | |||
| :param int hidden_size: 隐藏层大小,默认值为Embedding的维度 | |||
| :param int num_labels: 目标标签种类数量,默认值为3 | |||
| :param float dropout_rate: dropout的比率,默认值为0.3 | |||
| :param float dropout_embed: 对Embedding的dropout比率,默认值为0.1 | |||
| """ | |||
| def __init__(self, embed, hidden_size=None, num_labels=3, dropout_rate=0.3, | |||
| dropout_embed=0.1): | |||
| """ | |||
| :param embed: 初始化的Embedding | |||
| :param int hidden_size: 隐藏层大小,默认值为Embedding的维度 | |||
| :param int num_labels: 目标标签种类数量,默认值为3 | |||
| :param float dropout_rate: dropout的比率,默认值为0.3 | |||
| :param float dropout_embed: 对Embedding的dropout比率,默认值为0.1 | |||
| """ | |||
| super(ESIM, self).__init__() | |||
| if isinstance(embed, TokenEmbedding) or isinstance(embed, Embedding): | |||
| @@ -11,26 +11,16 @@ __all__ = [ | |||
| import torch | |||
| from torch import nn | |||
| from ..modules.encoder.star_transformer import StarTransformer | |||
| from ..core.const import Const | |||
| from ..core.utils import seq_len_to_mask | |||
| from ..embeddings.utils import get_embeddings | |||
| from ..core.const import Const | |||
| from ..modules.encoder.star_transformer import StarTransformer | |||
| class StarTransEnc(nn.Module): | |||
| """ | |||
| 带word embedding的Star-Transformer Encoder | |||
| :param embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||
| embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | |||
| 此时就以传入的对象作为embedding | |||
| :param hidden_size: 模型中特征维度. | |||
| :param num_layers: 模型层数. | |||
| :param num_head: 模型中multi-head的head个数. | |||
| :param head_dim: 模型中multi-head中每个head特征维度. | |||
| :param max_len: 模型能接受的最大输入长度. | |||
| :param emb_dropout: 词嵌入的dropout概率. | |||
| :param dropout: 模型除词嵌入外的dropout概率. | |||
| """ | |||
| def __init__(self, embed, | |||
| @@ -41,6 +31,18 @@ class StarTransEnc(nn.Module): | |||
| max_len, | |||
| emb_dropout, | |||
| dropout): | |||
| """ | |||
| :param embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||
| embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象,此时就以传入的对象作为embedding | |||
| :param hidden_size: 模型中特征维度. | |||
| :param num_layers: 模型层数. | |||
| :param num_head: 模型中multi-head的head个数. | |||
| :param head_dim: 模型中multi-head中每个head特征维度. | |||
| :param max_len: 模型能接受的最大输入长度. | |||
| :param emb_dropout: 词嵌入的dropout概率. | |||
| :param dropout: 模型除词嵌入外的dropout概率. | |||
| """ | |||
| super(StarTransEnc, self).__init__() | |||
| self.embedding = get_embeddings(embed) | |||
| emb_dim = self.embedding.embedding_dim | |||
| @@ -104,18 +106,6 @@ class STSeqLabel(nn.Module): | |||
| """ | |||
| 用于序列标注的Star-Transformer模型 | |||
| :param embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||
| embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | |||
| 此时就以传入的对象作为embedding | |||
| :param num_cls: 输出类别个数 | |||
| :param hidden_size: 模型中特征维度. Default: 300 | |||
| :param num_layers: 模型层数. Default: 4 | |||
| :param num_head: 模型中multi-head的head个数. Default: 8 | |||
| :param head_dim: 模型中multi-head中每个head特征维度. Default: 32 | |||
| :param max_len: 模型能接受的最大输入长度. Default: 512 | |||
| :param cls_hidden_size: 分类器隐层维度. Default: 600 | |||
| :param emb_dropout: 词嵌入的dropout概率. Default: 0.1 | |||
| :param dropout: 模型除词嵌入外的dropout概率. Default: 0.1 | |||
| """ | |||
| def __init__(self, embed, num_cls, | |||
| @@ -127,6 +117,20 @@ class STSeqLabel(nn.Module): | |||
| cls_hidden_size=600, | |||
| emb_dropout=0.1, | |||
| dropout=0.1, ): | |||
| """ | |||
| :param embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||
| embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, 此时就以传入的对象作为embedding | |||
| :param num_cls: 输出类别个数 | |||
| :param hidden_size: 模型中特征维度. Default: 300 | |||
| :param num_layers: 模型层数. Default: 4 | |||
| :param num_head: 模型中multi-head的head个数. Default: 8 | |||
| :param head_dim: 模型中multi-head中每个head特征维度. Default: 32 | |||
| :param max_len: 模型能接受的最大输入长度. Default: 512 | |||
| :param cls_hidden_size: 分类器隐层维度. Default: 600 | |||
| :param emb_dropout: 词嵌入的dropout概率. Default: 0.1 | |||
| :param dropout: 模型除词嵌入外的dropout概率. Default: 0.1 | |||
| """ | |||
| super(STSeqLabel, self).__init__() | |||
| self.enc = StarTransEnc(embed=embed, | |||
| hidden_size=hidden_size, | |||
| @@ -167,18 +171,6 @@ class STSeqCls(nn.Module): | |||
| """ | |||
| 用于分类任务的Star-Transformer | |||
| :param embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||
| embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | |||
| 此时就以传入的对象作为embedding | |||
| :param num_cls: 输出类别个数 | |||
| :param hidden_size: 模型中特征维度. Default: 300 | |||
| :param num_layers: 模型层数. Default: 4 | |||
| :param num_head: 模型中multi-head的head个数. Default: 8 | |||
| :param head_dim: 模型中multi-head中每个head特征维度. Default: 32 | |||
| :param max_len: 模型能接受的最大输入长度. Default: 512 | |||
| :param cls_hidden_size: 分类器隐层维度. Default: 600 | |||
| :param emb_dropout: 词嵌入的dropout概率. Default: 0.1 | |||
| :param dropout: 模型除词嵌入外的dropout概率. Default: 0.1 | |||
| """ | |||
| def __init__(self, embed, num_cls, | |||
| @@ -190,6 +182,20 @@ class STSeqCls(nn.Module): | |||
| cls_hidden_size=600, | |||
| emb_dropout=0.1, | |||
| dropout=0.1, ): | |||
| """ | |||
| :param embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||
| embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, 此时就以传入的对象作为embedding | |||
| :param num_cls: 输出类别个数 | |||
| :param hidden_size: 模型中特征维度. Default: 300 | |||
| :param num_layers: 模型层数. Default: 4 | |||
| :param num_head: 模型中multi-head的head个数. Default: 8 | |||
| :param head_dim: 模型中multi-head中每个head特征维度. Default: 32 | |||
| :param max_len: 模型能接受的最大输入长度. Default: 512 | |||
| :param cls_hidden_size: 分类器隐层维度. Default: 600 | |||
| :param emb_dropout: 词嵌入的dropout概率. Default: 0.1 | |||
| :param dropout: 模型除词嵌入外的dropout概率. Default: 0.1 | |||
| """ | |||
| super(STSeqCls, self).__init__() | |||
| self.enc = StarTransEnc(embed=embed, | |||
| hidden_size=hidden_size, | |||
| @@ -230,18 +236,6 @@ class STNLICls(nn.Module): | |||
| """ | |||
| 用于自然语言推断(NLI)的Star-Transformer | |||
| :param embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||
| embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, | |||
| 此时就以传入的对象作为embedding | |||
| :param num_cls: 输出类别个数 | |||
| :param hidden_size: 模型中特征维度. Default: 300 | |||
| :param num_layers: 模型层数. Default: 4 | |||
| :param num_head: 模型中multi-head的head个数. Default: 8 | |||
| :param head_dim: 模型中multi-head中每个head特征维度. Default: 32 | |||
| :param max_len: 模型能接受的最大输入长度. Default: 512 | |||
| :param cls_hidden_size: 分类器隐层维度. Default: 600 | |||
| :param emb_dropout: 词嵌入的dropout概率. Default: 0.1 | |||
| :param dropout: 模型除词嵌入外的dropout概率. Default: 0.1 | |||
| """ | |||
| def __init__(self, embed, num_cls, | |||
| @@ -253,6 +247,20 @@ class STNLICls(nn.Module): | |||
| cls_hidden_size=600, | |||
| emb_dropout=0.1, | |||
| dropout=0.1, ): | |||
| """ | |||
| :param embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 | |||
| embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, 此时就以传入的对象作为embedding | |||
| :param num_cls: 输出类别个数 | |||
| :param hidden_size: 模型中特征维度. Default: 300 | |||
| :param num_layers: 模型层数. Default: 4 | |||
| :param num_head: 模型中multi-head的head个数. Default: 8 | |||
| :param head_dim: 模型中multi-head中每个head特征维度. Default: 32 | |||
| :param max_len: 模型能接受的最大输入长度. Default: 512 | |||
| :param cls_hidden_size: 分类器隐层维度. Default: 600 | |||
| :param emb_dropout: 词嵌入的dropout概率. Default: 0.1 | |||
| :param dropout: 模型除词嵌入外的dropout概率. Default: 0.1 | |||
| """ | |||
| super(STNLICls, self).__init__() | |||
| self.enc = StarTransEnc(embed=embed, | |||
| hidden_size=hidden_size, | |||