# T6. fastNLP 与 paddle 或 jittor 的结合

&emsp; 1 &ensp; fastNLP 结合 paddle 训练模型
 
&emsp; &emsp; 1.1 &ensp; 关于 paddle 的简单介绍

&emsp; &emsp; 1.2 &ensp; 使用 paddle 搭建并训练模型

&emsp; 2 &ensp; fastNLP 结合 jittor 训练模型

&emsp; &emsp; 2.1 &ensp; 关于 jittor 的简单介绍

&emsp; &emsp; 2.2 &ensp; 使用 jittor 搭建并训练模型

<!-- &emsp; 3 &ensp; fastNLP 实现 paddle 与 pytorch 互转 -->

In [1]:
from datasets import load_dataset

sst2data = load_dataset('glue', 'sst2')

Reusing dataset glue (/remote-home/xrliu/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

In [2]:
import sys
sys.path.append('..')

from fastNLP import DataSet

dataset = DataSet.from_pandas(sst2data['train'].to_pandas())[:6000]

dataset.apply_more(lambda ins:{'words': ins['sentence'].lower().split(), 'target': ins['label']}, 
                   progress_bar="tqdm")
dataset.delete_field('sentence')
dataset.delete_field('label')
dataset.delete_field('idx')

from fastNLP import Vocabulary

vocab = Vocabulary()
vocab.from_dataset(dataset, field_name='words')
vocab.index_dataset(dataset, field_name='words')

train_dataset, evaluate_dataset = dataset.split(ratio=0.85)
print(type(train_dataset), isinstance(train_dataset, DataSet))

from fastNLP.io import DataBundle

data_bundle = DataBundle(datasets={'train': train_dataset, 'dev': evaluate_dataset})

[38;5;2m[i 0604 21:01:38.510813 72 log.cc:351] Load log_sync: 1[m


Processing:   0%|          | 0/6000 [00:00<?, ?it/s]

<class 'fastNLP.core.dataset.dataset.DataSet'> True


## 1. fastNLP 结合 paddle 训练模型



In [3]:
import paddle
import paddle.nn as nn
import paddle.nn.functional as F


class ClsByPaddle(nn.Layer):
    def __init__(self, vocab_size, embedding_dim, output_dim, hidden_dim=64, dropout=0.5):
        nn.Layer.__init__(self)
        self.hidden_dim = hidden_dim

        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim)
        
        self.conv1 = nn.Sequential(nn.Conv1D(embedding_dim, 30, 1, padding=0), nn.ReLU())
        self.conv2 = nn.Sequential(nn.Conv1D(embedding_dim, 40, 3, padding=1), nn.ReLU())
        self.conv3 = nn.Sequential(nn.Conv1D(embedding_dim, 50, 5, padding=2), nn.ReLU())

        self.mlp = nn.Sequential(('dropout', nn.Dropout(p=dropout)),
                                 ('linear_1', nn.Linear(120, hidden_dim)),
                                 ('activate', nn.ReLU()),
                                 ('linear_2', nn.Linear(hidden_dim, output_dim)))
        
        self.loss_fn = nn.MSELoss()

    def forward(self, words):
        output = self.embedding(words).transpose([0, 2, 1])
        conv1, conv2, conv3 = self.conv1(output), self.conv2(output), self.conv3(output)

        pool1 = F.max_pool1d(conv1, conv1.shape[-1]).squeeze(2)
        pool2 = F.max_pool1d(conv2, conv2.shape[-1]).squeeze(2)
        pool3 = F.max_pool1d(conv3, conv3.shape[-1]).squeeze(2)

        pool = paddle.concat([pool1, pool2, pool3], axis=1)
        output = self.mlp(pool)
        return output
    
    def train_step(self, words, target):
        pred = self(words)
        target = paddle.stack((1 - target, target), axis=1).cast(pred.dtype)
        return {'loss': self.loss_fn(pred, target)}

    def evaluate_step(self, words, target):
        pred = self(words)
        pred = paddle.argmax(pred, axis=-1)
        return {'pred': pred, 'target': target}

In [4]:
model = ClsByPaddle(vocab_size=len(vocab), embedding_dim=100, output_dim=2)

model

W0604 21:02:25.453869 19014 gpu_context.cc:278] Please NOTE: device: 0, GPU Compute Capability: 6.1, Driver API Version: 11.1, Runtime API Version: 10.2
W0604 21:02:26.061690 19014 gpu_context.cc:306] device: 0, cuDNN Version: 7.6.


ClsByPaddle(
  (embedding): Embedding(8458, 100, sparse=False)
  (conv1): Sequential(
    (0): Conv1D(100, 30, kernel_size=[1], data_format=NCL)
    (1): ReLU()
  )
  (conv2): Sequential(
    (0): Conv1D(100, 40, kernel_size=[3], padding=1, data_format=NCL)
    (1): ReLU()
  )
  (conv3): Sequential(
    (0): Conv1D(100, 50, kernel_size=[5], padding=2, data_format=NCL)
    (1): ReLU()
  )
  (mlp): Sequential(
    (dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train)
    (linear_1): Linear(in_features=120, out_features=64, dtype=float32)
    (activate): ReLU()
    (linear_2): Linear(in_features=64, out_features=2, dtype=float32)
  )
  (loss_fn): MSELoss()
)

In [5]:
from paddle.optimizer import AdamW

optimizers = AdamW(parameters=model.parameters(), learning_rate=5e-4)

In [6]:
from fastNLP import prepare_paddle_dataloader

train_dataloader = prepare_paddle_dataloader(train_dataset, batch_size=16, shuffle=True)
evaluate_dataloader = prepare_paddle_dataloader(evaluate_dataset, batch_size=16)

# dl_bundle = prepare_paddle_dataloader(data_bundle, batch_size=16, shuffle=True)

In [7]:
from fastNLP import Trainer, Accuracy

trainer = Trainer(
    model=model,
    driver='paddle',
    device='gpu',                              # 'cpu', 'gpu', 'gpu:x'
    n_epochs=10,
    optimizers=optimizers,
    train_dataloader=train_dataloader,         # dl_bundle['train'],
    evaluate_dataloaders=evaluate_dataloader,  # dl_bundle['dev'], 
    metrics={'acc': Accuracy()}
)

In [8]:
trainer.run(num_eval_batch_per_dl=10) 

Output()

Output()

## 2. fastNLP 结合 jittor 训练模型

In [11]:
import jittor
import jittor.nn as nn

from jittor import Module


class ClsByJittor(Module):
    def __init__(self, vocab_size, embedding_dim, output_dim, hidden_dim=64, num_layers=2, dropout=0.5):
        Module.__init__(self)
        self.hidden_dim = hidden_dim

        self.embedding = nn.Embedding(num=vocab_size, dim=embedding_dim)
        self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, batch_first=True,  # 默认 batch_first=False
                            num_layers=num_layers, bidirectional=True, dropout=dropout)
        self.mlp = nn.Sequential([nn.Dropout(p=dropout),
                                  nn.Linear(hidden_dim * 2, hidden_dim * 2),
                                  nn.ReLU(),
                                  nn.Linear(hidden_dim * 2, output_dim),
                                  nn.Sigmoid(),])

        self.loss_fn = nn.MSELoss()

    def execute(self, words):
        output = self.embedding(words)
        output, (hidden, cell) = self.lstm(output)
        output = self.mlp(jittor.concat((hidden[-1], hidden[-2]), dim=1))
        return output
    
    def train_step(self, words, target):
        pred = self(words)
        target = jittor.stack((1 - target, target), dim=1)
        return {'loss': self.loss_fn(pred, target)}

    def evaluate_step(self, words, target):
        pred = self(words)
        pred = jittor.argmax(pred, dim=-1)[0]
        return {'pred': pred, 'target': target}

In [12]:
model = ClsByJittor(vocab_size=len(vocab), embedding_dim=100, output_dim=2)

model

ClsByJittor(
    embedding: Embedding(8458, 100)
    lstm: LSTM(100, 64, 2, bias=True, batch_first=True, dropout=0.5, bidirectional=True, proj_size=0)
    mlp: Sequential(
        0: Dropout(0.5, is_train=False)
        1: Linear(128, 128, float32[128,], None)
        2: relu()
        3: Linear(128, 2, float32[2,], None)
        4: Sigmoid()
    )
    loss_fn: MSELoss(mean)
)

In [13]:
from jittor.optim import AdamW

optimizers = AdamW(params=model.parameters(), lr=5e-3)

In [14]:
from fastNLP import prepare_jittor_dataloader

train_dataloader = prepare_jittor_dataloader(train_dataset, batch_size=16, shuffle=True)
evaluate_dataloader = prepare_jittor_dataloader(evaluate_dataset, batch_size=16)

# dl_bundle = prepare_jittor_dataloader(data_bundle, batch_size=16, shuffle=True)

In [15]:
from fastNLP import Trainer, Accuracy

trainer = Trainer(
    model=model,
    driver='jittor',
    device='gpu',                              # 'cpu', 'gpu', 'cuda'
    n_epochs=10,
    optimizers=optimizers,
    train_dataloader=train_dataloader,         # dl_bundle['train'],
    evaluate_dataloaders=evaluate_dataloader,  # dl_bundle['dev'],
    metrics={'acc': Accuracy()}
)

In [16]:
trainer.run(num_eval_batch_per_dl=10)

Output()

Output()


Compiling Operators(5/6) used: 8.31s eta: 1.66s 6/6) used: 9.33s eta:    0s 

Compiling Operators(31/31) used: 7.31s eta:    0s 
