diff --git a/examples/hwf/README.md b/examples/hwf/README.md index b1a591d..7412ce0 100644 --- a/examples/hwf/README.md +++ b/examples/hwf/README.md @@ -12,7 +12,8 @@ python main.py ## Usage ```bash -usage: main.py [-h] [--no-cuda] [--epochs EPOCHS] [--lr LR] +usage: main.py [-h] [--no-cuda] [--epochs EPOCHS] + [--label_smoothing LABEL_SMOOTHING] [--lr LR] [--batch-size BATCH_SIZE] [--loops LOOPS] [--segment_size SEGMENT_SIZE] [--save_interval SAVE_INTERVAL] [--max-revision MAX_REVISION] @@ -26,6 +27,8 @@ optional arguments: --no-cuda disables CUDA training --epochs EPOCHS number of epochs in each learning loop iteration (default : 1) + --label_smoothing LABEL_SMOOTHING + label smoothing in cross entropy loss (default : 0.2) --lr LR base model learning rate (default : 0.001) --batch-size BATCH_SIZE base model batch size (default : 32) @@ -100,7 +103,7 @@ We present the results of ABL as follows, which include the reasoning accuracy ( 89.7 96.5 97.2 - 98.6 + 99.2 77.3 diff --git a/examples/hwf/hwf.ipynb b/examples/hwf/hwf.ipynb index baf161d..ab39b10 100644 --- a/examples/hwf/hwf.ipynb +++ b/examples/hwf/hwf.ipynb @@ -166,7 +166,7 @@ "source": [ "# class of symbol may be one of ['1', ..., '9', '+', '-', '*', '/'], total of 13 classes\n", "cls = SymbolNet(num_classes=13, image_size=(45, 45, 1))\n", - "loss_fn = nn.CrossEntropyLoss()\n", + "loss_fn = nn.CrossEntropyLoss(label_smoothing=0.2)\n", "optimizer = torch.optim.Adam(cls.parameters(), lr=0.001)\n", "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", "\n", @@ -503,13 +503,18 @@ " 89.7\n", " 96.5\n", " 97.2\n", - " 98.6\n", + " 99.2\n", " 77.3\n", " \n", "\n", "\n", "

* timeout: need more than 1 hour to execute

" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] } ], "metadata": { diff --git a/examples/hwf/main.py b/examples/hwf/main.py index e1e297f..fbcd53a 100644 --- a/examples/hwf/main.py +++ b/examples/hwf/main.py @@ -77,6 +77,12 @@ def main(): default=3, help="number of epochs in each learning loop iteration (default : 3)", ) + parser.add_argument( + "--label-smoothing", + type=float, + default=0.2, + help="label smoothing in cross entropy loss (default : 0.2)" + ) parser.add_argument( "--lr", type=float, default=1e-3, help="base model learning rate (default : 0.001)" ) @@ -84,17 +90,14 @@ def main(): "--batch-size", type=int, default=128, help="base model batch size (default : 128)" ) parser.add_argument( - "--loops", type=int, default=5, help="number of loop iterations (default : 5)" + "--loops", type=int, default=3, help="number of loop iterations (default : 3)" ) parser.add_argument( "--segment_size", type=int, default=1000, help="segment size (default : 1000)" ) parser.add_argument("--save_interval", type=int, default=1, help="save interval (default : 1)") parser.add_argument( - "--max-revision", - type=int, - default=-1, - help="maximum revision in reasoner (default : -1)", + "--max-revision", type=int, default=-1, help="maximum revision in reasoner (default : -1)" ) parser.add_argument( "--require-more-revision", @@ -128,19 +131,14 @@ def main(): # Build necessary components for BasicNN cls = SymbolNet(num_classes=13, image_size=(45, 45, 1)) - loss_fn = nn.CrossEntropyLoss() + loss_fn = nn.CrossEntropyLoss(label_smoothing=args.label_smoothing) optimizer = torch.optim.Adam(cls.parameters(), lr=args.lr) use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") # Build BasicNN base_model = BasicNN( - cls, - loss_fn, - optimizer, - device=device, - batch_size=args.batch_size, - num_epochs=args.epochs, + cls, loss_fn, optimizer, device=device, batch_size=args.batch_size, num_epochs=args.epochs, ) # Build ABLModel @@ -175,6 +173,7 @@ def main(): # Train and Test bridge.train( train_data, + val_data=test_data, loops=args.loops, segment_size=args.segment_size, save_interval=args.save_interval, diff --git a/examples/mnist_add/README.md b/examples/mnist_add/README.md index c9f24c6..390c265 100644 --- a/examples/mnist_add/README.md +++ b/examples/mnist_add/README.md @@ -12,7 +12,8 @@ python main.py ## Usage ```bash -usage: main.py [-h] [--no-cuda] [--epochs EPOCHS] [--lr LR] +usage: main.py [-h] [--no-cuda] [--epochs EPOCHS] + [--label_smoothing LABEL_SMOOTHING] [--lr LR] [--alpha ALPHA] [--batch-size BATCH_SIZE] [--loops LOOPS] [--segment_size SEGMENT_SIZE] [--save_interval SAVE_INTERVAL] [--max-revision MAX_REVISION] @@ -26,6 +27,8 @@ optional arguments: --no-cuda disables CUDA training --epochs EPOCHS number of epochs in each learning loop iteration (default : 1) + --label_smoothing LABEL_SMOOTHING + label smoothing in cross entropy loss (default : 0.2) --lr LR base model learning rate (default : 0.001) --alpha ALPHA alpha in RMSprop (default : 0.9) --batch-size BATCH_SIZE diff --git a/examples/mnist_add/main.py b/examples/mnist_add/main.py index b6b3173..c992c4e 100644 --- a/examples/mnist_add/main.py +++ b/examples/mnist_add/main.py @@ -42,6 +42,12 @@ def main(): default=1, help="number of epochs in each learning loop iteration (default : 1)", ) + parser.add_argument( + "--label-smoothing", + type=float, + default=0.2, + help="label smoothing in cross entropy loss (default : 0.2)", + ) parser.add_argument( "--lr", type=float, default=3e-4, help="base model learning rate (default : 0.0003)" ) @@ -57,10 +63,7 @@ def main(): ) parser.add_argument("--save_interval", type=int, default=1, help="save interval (default : 1)") parser.add_argument( - "--max-revision", - type=int, - default=-1, - help="maximum revision in reasoner (default : -1)", + "--max-revision", type=int, default=-1, help="maximum revision in reasoner (default : -1)" ) parser.add_argument( "--require-more-revision", @@ -91,7 +94,7 @@ def main(): # Build necessary components for BasicNN cls = LeNet5(num_classes=10) - loss_fn = nn.CrossEntropyLoss(label_smoothing=0.2) + loss_fn = nn.CrossEntropyLoss(label_smoothing=args.label_smoothing) optimizer = RMSprop(cls.parameters(), lr=args.lr, alpha=args.alpha) use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu")