| @@ -29,7 +29,7 @@ class Callback(object): | |||||
| @property | @property | ||||
| def n_steps(self): | def n_steps(self): | ||||
| """total number of steps for training""" | """total number of steps for training""" | ||||
| return self.n_steps | |||||
| return self._trainer.n_steps | |||||
| @property | @property | ||||
| def batch_size(self): | def batch_size(self): | ||||
| @@ -124,6 +124,21 @@ class Callback(object): | |||||
| pass | pass | ||||
| def transfer(func): | |||||
| """装饰器,将对CallbackManager的调用转发到各个Callback子类. | |||||
| :param func: | |||||
| :return: | |||||
| """ | |||||
| def wrapper(manager, *arg): | |||||
| returns = [] | |||||
| for callback in manager.callbacks: | |||||
| returns.append(getattr(callback, func.__name__)(*arg)) | |||||
| return returns | |||||
| return wrapper | |||||
| class CallbackManager(Callback): | class CallbackManager(Callback): | ||||
| """A manager for all callbacks passed into Trainer. | """A manager for all callbacks passed into Trainer. | ||||
| It collects resources inside Trainer and raise callbacks. | It collects resources inside Trainer and raise callbacks. | ||||
| @@ -150,42 +165,59 @@ class CallbackManager(Callback): | |||||
| else: | else: | ||||
| raise TypeError(f"Expect callbacks in CallbackManager(callbacks) to be list. Got {type(callbacks)}.") | raise TypeError(f"Expect callbacks in CallbackManager(callbacks) to be list. Got {type(callbacks)}.") | ||||
| for env_name, env_val in env.items(): | |||||
| for callback in self.callbacks: | |||||
| setattr(callback, '_'+env_name, env_val) # Callback.trainer | |||||
| @transfer | |||||
| def on_train_begin(self): | def on_train_begin(self): | ||||
| pass | pass | ||||
| @transfer | |||||
| def on_epoch_begin(self): | def on_epoch_begin(self): | ||||
| pass | pass | ||||
| @transfer | |||||
| def on_batch_begin(self, batch_x, batch_y, indices): | def on_batch_begin(self, batch_x, batch_y, indices): | ||||
| pass | pass | ||||
| @transfer | |||||
| def on_loss_begin(self, batch_y, predict_y): | def on_loss_begin(self, batch_y, predict_y): | ||||
| pass | pass | ||||
| @transfer | |||||
| def on_backward_begin(self, loss): | def on_backward_begin(self, loss): | ||||
| pass | pass | ||||
| @transfer | |||||
| def on_backward_end(self): | def on_backward_end(self): | ||||
| pass | pass | ||||
| @transfer | |||||
| def on_step_end(self): | def on_step_end(self): | ||||
| pass | pass | ||||
| @transfer | |||||
| def on_batch_end(self): | def on_batch_end(self): | ||||
| pass | pass | ||||
| @transfer | |||||
| def on_valid_begin(self): | def on_valid_begin(self): | ||||
| pass | pass | ||||
| @transfer | |||||
| def on_valid_end(self, eval_result, metric_key): | def on_valid_end(self, eval_result, metric_key): | ||||
| pass | pass | ||||
| @transfer | |||||
| def on_epoch_end(self): | def on_epoch_end(self): | ||||
| pass | pass | ||||
| @transfer | |||||
| def on_train_end(self): | def on_train_end(self): | ||||
| pass | pass | ||||
| @transfer | |||||
| def on_exception(self, exception): | def on_exception(self, exception): | ||||
| pass | pass | ||||
| @@ -139,11 +139,14 @@ class TestCallback(unittest.TestCase): | |||||
| def test_readonly_property(self): | def test_readonly_property(self): | ||||
| from fastNLP.core.callback import Callback | from fastNLP.core.callback import Callback | ||||
| passed_epochs = [] | |||||
| total_epochs = 5 | |||||
| class MyCallback(Callback): | class MyCallback(Callback): | ||||
| def __init__(self): | def __init__(self): | ||||
| super(MyCallback, self).__init__() | super(MyCallback, self).__init__() | ||||
| def on_epoch_begin(self, cur_epoch, total_epoch): | |||||
| def on_epoch_begin(self): | |||||
| passed_epochs.append(self.epoch) | |||||
| print(self.n_epochs, self.n_steps, self.batch_size) | print(self.n_epochs, self.n_steps, self.batch_size) | ||||
| print(self.model) | print(self.model) | ||||
| print(self.optimizer) | print(self.optimizer) | ||||
| @@ -151,7 +154,7 @@ class TestCallback(unittest.TestCase): | |||||
| data_set, model = prepare_env() | data_set, model = prepare_env() | ||||
| trainer = Trainer(data_set, model, | trainer = Trainer(data_set, model, | ||||
| loss=BCELoss(pred="predict", target="y"), | loss=BCELoss(pred="predict", target="y"), | ||||
| n_epochs=5, | |||||
| n_epochs=total_epochs, | |||||
| batch_size=32, | batch_size=32, | ||||
| print_every=50, | print_every=50, | ||||
| optimizer=SGD(lr=0.1), | optimizer=SGD(lr=0.1), | ||||
| @@ -161,3 +164,4 @@ class TestCallback(unittest.TestCase): | |||||
| metrics=AccuracyMetric(pred="predict", target="y"), | metrics=AccuracyMetric(pred="predict", target="y"), | ||||
| callbacks=[MyCallback()]) | callbacks=[MyCallback()]) | ||||
| trainer.train() | trainer.train() | ||||
| assert passed_epochs == list(range(1, total_epochs+1)) | |||||
| @@ -217,6 +217,7 @@ class TestDataSetMethods(unittest.TestCase): | |||||
| self.assertTrue(len(ds) > 0) | self.assertTrue(len(ds) > 0) | ||||
| def test_add_null(self): | def test_add_null(self): | ||||
| # TODO test failed because 'fastNLP\core\fieldarray.py:143: RuntimeError' | |||||
| ds = DataSet() | ds = DataSet() | ||||
| ds.add_field('test', []) | ds.add_field('test', []) | ||||
| ds.set_target('test') | ds.set_target('test') | ||||
| @@ -1,112 +0,0 @@ | |||||
| import unittest | |||||
| from fastNLP import DataSet | |||||
| from fastNLP import Instance | |||||
| from fastNLP import Vocabulary | |||||
| from fastNLP.core.losses import CrossEntropyLoss | |||||
| from fastNLP.core.metrics import AccuracyMetric | |||||
| class TestENAS(unittest.TestCase): | |||||
| def testENAS(self): | |||||
| # 从csv读取数据到DataSet | |||||
| sample_path = "tutorials/sample_data/tutorial_sample_dataset.csv" | |||||
| dataset = DataSet.read_csv(sample_path, headers=('raw_sentence', 'label'), | |||||
| sep='\t') | |||||
| print(len(dataset)) | |||||
| print(dataset[0]) | |||||
| print(dataset[-3]) | |||||
| dataset.append(Instance(raw_sentence='fake data', label='0')) | |||||
| # 将所有数字转为小写 | |||||
| dataset.apply(lambda x: x['raw_sentence'].lower(), new_field_name='raw_sentence') | |||||
| # label转int | |||||
| dataset.apply(lambda x: int(x['label']), new_field_name='label') | |||||
| # 使用空格分割句子 | |||||
| def split_sent(ins): | |||||
| return ins['raw_sentence'].split() | |||||
| dataset.apply(split_sent, new_field_name='words') | |||||
| # 增加长度信息 | |||||
| dataset.apply(lambda x: len(x['words']), new_field_name='seq_len') | |||||
| print(len(dataset)) | |||||
| print(dataset[0]) | |||||
| # DataSet.drop(func)筛除数据 | |||||
| dataset.drop(lambda x: x['seq_len'] <= 3) | |||||
| print(len(dataset)) | |||||
| # 设置DataSet中,哪些field要转为tensor | |||||
| # set target,loss或evaluate中的golden,计算loss,模型评估时使用 | |||||
| dataset.set_target("label") | |||||
| # set input,模型forward时使用 | |||||
| dataset.set_input("words", "seq_len") | |||||
| # 分出测试集、训练集 | |||||
| test_data, train_data = dataset.split(0.5) | |||||
| print(len(test_data)) | |||||
| print(len(train_data)) | |||||
| # 构建词表, Vocabulary.add(word) | |||||
| vocab = Vocabulary(min_freq=2) | |||||
| train_data.apply(lambda x: [vocab.add(word) for word in x['words']]) | |||||
| vocab.build_vocab() | |||||
| # index句子, Vocabulary.to_index(word) | |||||
| train_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words') | |||||
| test_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words') | |||||
| print(test_data[0]) | |||||
| # 如果你们需要做强化学习或者GAN之类的项目,你们也可以使用这些数据预处理的工具 | |||||
| from fastNLP.core.batch import Batch | |||||
| from fastNLP.core.sampler import RandomSampler | |||||
| batch_iterator = Batch(dataset=train_data, batch_size=2, sampler=RandomSampler()) | |||||
| for batch_x, batch_y in batch_iterator: | |||||
| print("batch_x has: ", batch_x) | |||||
| print("batch_y has: ", batch_y) | |||||
| break | |||||
| from fastNLP.models.enas_model import ENASModel | |||||
| from fastNLP.models.enas_controller import Controller | |||||
| model = ENASModel(embed_num=len(vocab), num_classes=5) | |||||
| controller = Controller() | |||||
| from fastNLP.models.enas_trainer import ENASTrainer | |||||
| from copy import deepcopy | |||||
| # 更改DataSet中对应field的名称,要以模型的forward等参数名一致 | |||||
| train_data.rename_field('words', 'word_seq') # input field 与 forward 参数一致 | |||||
| train_data.rename_field('label', 'label_seq') | |||||
| test_data.rename_field('words', 'word_seq') | |||||
| test_data.rename_field('label', 'label_seq') | |||||
| loss = CrossEntropyLoss(pred="output", target="label_seq") | |||||
| metric = AccuracyMetric(pred="predict", target="label_seq") | |||||
| trainer = ENASTrainer(model=model, controller=controller, train_data=train_data, dev_data=test_data, | |||||
| loss=CrossEntropyLoss(pred="output", target="label_seq"), | |||||
| metrics=AccuracyMetric(pred="predict", target="label_seq"), | |||||
| check_code_level=-1, | |||||
| save_path=None, | |||||
| batch_size=32, | |||||
| print_every=1, | |||||
| n_epochs=3, | |||||
| final_epochs=1) | |||||
| trainer.train() | |||||
| print('Train finished!') | |||||
| # 调用Tester在test_data上评价效果 | |||||
| from fastNLP import Tester | |||||
| tester = Tester(data=test_data, model=model, metrics=AccuracyMetric(pred="predict", target="label_seq"), | |||||
| batch_size=4) | |||||
| acc = tester.test() | |||||
| print(acc) | |||||
| if __name__ == '__main__': | |||||
| unittest.main() | |||||