| @@ -12,7 +12,8 @@ python main.py | |||
| ## Usage | |||
| ```bash | |||
| usage: main.py [-h] [--no-cuda] [--epochs EPOCHS] [--lr LR] | |||
| usage: main.py [-h] [--no-cuda] [--epochs EPOCHS] | |||
| [--label_smoothing LABEL_SMOOTHING] [--lr LR] | |||
| [--batch-size BATCH_SIZE] | |||
| [--loops LOOPS] [--segment_size SEGMENT_SIZE] | |||
| [--save_interval SAVE_INTERVAL] [--max-revision MAX_REVISION] | |||
| @@ -26,6 +27,8 @@ optional arguments: | |||
| --no-cuda disables CUDA training | |||
| --epochs EPOCHS number of epochs in each learning loop iteration | |||
| (default : 1) | |||
| --label_smoothing LABEL_SMOOTHING | |||
| label smoothing in cross entropy loss (default : 0.2) | |||
| --lr LR base model learning rate (default : 0.001) | |||
| --batch-size BATCH_SIZE | |||
| base model batch size (default : 32) | |||
| @@ -100,7 +103,7 @@ We present the results of ABL as follows, which include the reasoning accuracy ( | |||
| <td><span style="font-weight:bold">89.7</span></td> | |||
| <td><span style="font-weight:bold">96.5</span></td> | |||
| <td><span style="font-weight:bold">97.2</span></td> | |||
| <td><span style="font-weight:bold">98.6</span></td> | |||
| <td><span style="font-weight:bold">99.2</span></td> | |||
| <td><span style="font-weight:bold">77.3</span></td> | |||
| </tr> | |||
| </tbody> | |||
| @@ -166,7 +166,7 @@ | |||
| "source": [ | |||
| "# class of symbol may be one of ['1', ..., '9', '+', '-', '*', '/'], total of 13 classes\n", | |||
| "cls = SymbolNet(num_classes=13, image_size=(45, 45, 1))\n", | |||
| "loss_fn = nn.CrossEntropyLoss()\n", | |||
| "loss_fn = nn.CrossEntropyLoss(label_smoothing=0.2)\n", | |||
| "optimizer = torch.optim.Adam(cls.parameters(), lr=0.001)\n", | |||
| "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", | |||
| "\n", | |||
| @@ -503,13 +503,18 @@ | |||
| " <td><span style=\"font-weight:bold\">89.7</span></td>\n", | |||
| " <td><span style=\"font-weight:bold\">96.5</span></td>\n", | |||
| " <td><span style=\"font-weight:bold\">97.2</span></td>\n", | |||
| " <td><span style=\"font-weight:bold\">98.6</span></td>\n", | |||
| " <td><span style=\"font-weight:bold\">99.2</span></td>\n", | |||
| " <td><span style=\"font-weight:bold\">77.3</span></td>\n", | |||
| " </tr>\n", | |||
| "</tbody>\n", | |||
| "</table>\n", | |||
| "<p style=\"font-size: 13px;\">* timeout: need more than 1 hour to execute</p>" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [] | |||
| } | |||
| ], | |||
| "metadata": { | |||
| @@ -77,6 +77,12 @@ def main(): | |||
| default=3, | |||
| help="number of epochs in each learning loop iteration (default : 3)", | |||
| ) | |||
| parser.add_argument( | |||
| "--label-smoothing", | |||
| type=float, | |||
| default=0.2, | |||
| help="label smoothing in cross entropy loss (default : 0.2)" | |||
| ) | |||
| parser.add_argument( | |||
| "--lr", type=float, default=1e-3, help="base model learning rate (default : 0.001)" | |||
| ) | |||
| @@ -84,17 +90,14 @@ def main(): | |||
| "--batch-size", type=int, default=128, help="base model batch size (default : 128)" | |||
| ) | |||
| parser.add_argument( | |||
| "--loops", type=int, default=5, help="number of loop iterations (default : 5)" | |||
| "--loops", type=int, default=3, help="number of loop iterations (default : 3)" | |||
| ) | |||
| parser.add_argument( | |||
| "--segment_size", type=int, default=1000, help="segment size (default : 1000)" | |||
| ) | |||
| parser.add_argument("--save_interval", type=int, default=1, help="save interval (default : 1)") | |||
| parser.add_argument( | |||
| "--max-revision", | |||
| type=int, | |||
| default=-1, | |||
| help="maximum revision in reasoner (default : -1)", | |||
| "--max-revision", type=int, default=-1, help="maximum revision in reasoner (default : -1)" | |||
| ) | |||
| parser.add_argument( | |||
| "--require-more-revision", | |||
| @@ -128,19 +131,14 @@ def main(): | |||
| # Build necessary components for BasicNN | |||
| cls = SymbolNet(num_classes=13, image_size=(45, 45, 1)) | |||
| loss_fn = nn.CrossEntropyLoss() | |||
| loss_fn = nn.CrossEntropyLoss(label_smoothing=args.label_smoothing) | |||
| optimizer = torch.optim.Adam(cls.parameters(), lr=args.lr) | |||
| use_cuda = not args.no_cuda and torch.cuda.is_available() | |||
| device = torch.device("cuda" if use_cuda else "cpu") | |||
| # Build BasicNN | |||
| base_model = BasicNN( | |||
| cls, | |||
| loss_fn, | |||
| optimizer, | |||
| device=device, | |||
| batch_size=args.batch_size, | |||
| num_epochs=args.epochs, | |||
| cls, loss_fn, optimizer, device=device, batch_size=args.batch_size, num_epochs=args.epochs, | |||
| ) | |||
| # Build ABLModel | |||
| @@ -175,6 +173,7 @@ def main(): | |||
| # Train and Test | |||
| bridge.train( | |||
| train_data, | |||
| val_data=test_data, | |||
| loops=args.loops, | |||
| segment_size=args.segment_size, | |||
| save_interval=args.save_interval, | |||
| @@ -12,7 +12,8 @@ python main.py | |||
| ## Usage | |||
| ```bash | |||
| usage: main.py [-h] [--no-cuda] [--epochs EPOCHS] [--lr LR] | |||
| usage: main.py [-h] [--no-cuda] [--epochs EPOCHS] | |||
| [--label_smoothing LABEL_SMOOTHING] [--lr LR] | |||
| [--alpha ALPHA] [--batch-size BATCH_SIZE] | |||
| [--loops LOOPS] [--segment_size SEGMENT_SIZE] | |||
| [--save_interval SAVE_INTERVAL] [--max-revision MAX_REVISION] | |||
| @@ -26,6 +27,8 @@ optional arguments: | |||
| --no-cuda disables CUDA training | |||
| --epochs EPOCHS number of epochs in each learning loop iteration | |||
| (default : 1) | |||
| --label_smoothing LABEL_SMOOTHING | |||
| label smoothing in cross entropy loss (default : 0.2) | |||
| --lr LR base model learning rate (default : 0.001) | |||
| --alpha ALPHA alpha in RMSprop (default : 0.9) | |||
| --batch-size BATCH_SIZE | |||
| @@ -42,6 +42,12 @@ def main(): | |||
| default=1, | |||
| help="number of epochs in each learning loop iteration (default : 1)", | |||
| ) | |||
| parser.add_argument( | |||
| "--label-smoothing", | |||
| type=float, | |||
| default=0.2, | |||
| help="label smoothing in cross entropy loss (default : 0.2)", | |||
| ) | |||
| parser.add_argument( | |||
| "--lr", type=float, default=3e-4, help="base model learning rate (default : 0.0003)" | |||
| ) | |||
| @@ -57,10 +63,7 @@ def main(): | |||
| ) | |||
| parser.add_argument("--save_interval", type=int, default=1, help="save interval (default : 1)") | |||
| parser.add_argument( | |||
| "--max-revision", | |||
| type=int, | |||
| default=-1, | |||
| help="maximum revision in reasoner (default : -1)", | |||
| "--max-revision", type=int, default=-1, help="maximum revision in reasoner (default : -1)" | |||
| ) | |||
| parser.add_argument( | |||
| "--require-more-revision", | |||
| @@ -91,7 +94,7 @@ def main(): | |||
| # Build necessary components for BasicNN | |||
| cls = LeNet5(num_classes=10) | |||
| loss_fn = nn.CrossEntropyLoss(label_smoothing=0.2) | |||
| loss_fn = nn.CrossEntropyLoss(label_smoothing=args.label_smoothing) | |||
| optimizer = RMSprop(cls.parameters(), lr=args.lr, alpha=args.alpha) | |||
| use_cuda = not args.no_cuda and torch.cuda.is_available() | |||
| device = torch.device("cuda" if use_cuda else "cpu") | |||