diff --git a/examples/hwf/README.md b/examples/hwf/README.md
index b1a591d..7412ce0 100644
--- a/examples/hwf/README.md
+++ b/examples/hwf/README.md
@@ -12,7 +12,8 @@ python main.py
 ## Usage
 
 ```bash
-usage: main.py [-h] [--no-cuda] [--epochs EPOCHS] [--lr LR]
+usage: main.py [-h] [--no-cuda] [--epochs EPOCHS]
+               [--label_smoothing LABEL_SMOOTHING] [--lr LR] 
                [--batch-size BATCH_SIZE]
                [--loops LOOPS] [--segment_size SEGMENT_SIZE]
                [--save_interval SAVE_INTERVAL] [--max-revision MAX_REVISION]
@@ -26,6 +27,8 @@ optional arguments:
   --no-cuda             disables CUDA training
   --epochs EPOCHS       number of epochs in each learning loop iteration
                         (default : 1)
+  --label_smoothing LABEL_SMOOTHING
+                        label smoothing in cross entropy loss (default : 0.2)
   --lr LR               base model learning rate (default : 0.001)
   --batch-size BATCH_SIZE
                         base model batch size (default : 32)
@@ -100,7 +103,7 @@ We present the results of ABL as follows, which include the reasoning accuracy (
     <td><span style="font-weight:bold">89.7</span></td>
     <td><span style="font-weight:bold">96.5</span></td>
     <td><span style="font-weight:bold">97.2</span></td>
-    <td><span style="font-weight:bold">98.6</span></td>
+    <td><span style="font-weight:bold">99.2</span></td>
     <td><span style="font-weight:bold">77.3</span></td>
   </tr>
 </tbody>
diff --git a/examples/hwf/hwf.ipynb b/examples/hwf/hwf.ipynb
index baf161d..ab39b10 100644
--- a/examples/hwf/hwf.ipynb
+++ b/examples/hwf/hwf.ipynb
@@ -166,7 +166,7 @@
    "source": [
     "# class of symbol may be one of ['1', ..., '9', '+', '-', '*', '/'], total of 13 classes\n",
     "cls = SymbolNet(num_classes=13, image_size=(45, 45, 1))\n",
-    "loss_fn = nn.CrossEntropyLoss()\n",
+    "loss_fn = nn.CrossEntropyLoss(label_smoothing=0.2)\n",
     "optimizer = torch.optim.Adam(cls.parameters(), lr=0.001)\n",
     "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
     "\n",
@@ -503,13 +503,18 @@
     "    <td><span style=\"font-weight:bold\">89.7</span></td>\n",
     "    <td><span style=\"font-weight:bold\">96.5</span></td>\n",
     "    <td><span style=\"font-weight:bold\">97.2</span></td>\n",
-    "    <td><span style=\"font-weight:bold\">98.6</span></td>\n",
+    "    <td><span style=\"font-weight:bold\">99.2</span></td>\n",
     "    <td><span style=\"font-weight:bold\">77.3</span></td>\n",
     "  </tr>\n",
     "</tbody>\n",
     "</table>\n",
     "<p style=\"font-size: 13px;\">* timeout: need more than 1 hour to execute</p>"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
   }
  ],
  "metadata": {
diff --git a/examples/hwf/main.py b/examples/hwf/main.py
index e1e297f..fbcd53a 100644
--- a/examples/hwf/main.py
+++ b/examples/hwf/main.py
@@ -77,6 +77,12 @@ def main():
         default=3,
         help="number of epochs in each learning loop iteration (default : 3)",
     )
+    parser.add_argument(
+        "--label-smoothing", 
+        type=float, 
+        default=0.2, 
+        help="label smoothing in cross entropy loss (default : 0.2)"
+    )
     parser.add_argument(
         "--lr", type=float, default=1e-3, help="base model learning rate (default : 0.001)"
     )
@@ -84,17 +90,14 @@ def main():
         "--batch-size", type=int, default=128, help="base model batch size (default : 128)"
     )
     parser.add_argument(
-        "--loops", type=int, default=5, help="number of loop iterations (default : 5)"
+        "--loops", type=int, default=3, help="number of loop iterations (default : 3)"
     )
     parser.add_argument(
         "--segment_size", type=int, default=1000, help="segment size (default : 1000)"
     )
     parser.add_argument("--save_interval", type=int, default=1, help="save interval (default : 1)")
     parser.add_argument(
-        "--max-revision",
-        type=int,
-        default=-1,
-        help="maximum revision in reasoner (default : -1)",
+        "--max-revision", type=int, default=-1, help="maximum revision in reasoner (default : -1)"
     )
     parser.add_argument(
         "--require-more-revision",
@@ -128,19 +131,14 @@ def main():
 
     # Build necessary components for BasicNN
     cls = SymbolNet(num_classes=13, image_size=(45, 45, 1))
-    loss_fn = nn.CrossEntropyLoss()
+    loss_fn = nn.CrossEntropyLoss(label_smoothing=args.label_smoothing)
     optimizer = torch.optim.Adam(cls.parameters(), lr=args.lr)
     use_cuda = not args.no_cuda and torch.cuda.is_available()
     device = torch.device("cuda" if use_cuda else "cpu")
 
     # Build BasicNN
     base_model = BasicNN(
-        cls,
-        loss_fn,
-        optimizer,
-        device=device,
-        batch_size=args.batch_size,
-        num_epochs=args.epochs,
+        cls, loss_fn, optimizer, device=device, batch_size=args.batch_size, num_epochs=args.epochs,
     )
 
     # Build ABLModel
@@ -175,6 +173,7 @@ def main():
     #  Train and Test
     bridge.train(
         train_data,
+        val_data=test_data,
         loops=args.loops,
         segment_size=args.segment_size,
         save_interval=args.save_interval,
diff --git a/examples/mnist_add/README.md b/examples/mnist_add/README.md
index c9f24c6..390c265 100644
--- a/examples/mnist_add/README.md
+++ b/examples/mnist_add/README.md
@@ -12,7 +12,8 @@ python main.py
 ## Usage
 
 ```bash
-usage: main.py [-h] [--no-cuda] [--epochs EPOCHS] [--lr LR] 
+usage: main.py [-h] [--no-cuda] [--epochs EPOCHS] 
+               [--label_smoothing LABEL_SMOOTHING] [--lr LR] 
                [--alpha ALPHA] [--batch-size BATCH_SIZE]
                [--loops LOOPS] [--segment_size SEGMENT_SIZE]
                [--save_interval SAVE_INTERVAL] [--max-revision MAX_REVISION]
@@ -26,6 +27,8 @@ optional arguments:
   --no-cuda             disables CUDA training
   --epochs EPOCHS       number of epochs in each learning loop iteration
                         (default : 1)
+  --label_smoothing LABEL_SMOOTHING
+                        label smoothing in cross entropy loss (default : 0.2)
   --lr LR               base model learning rate (default : 0.001)
   --alpha ALPHA         alpha in RMSprop (default : 0.9)
   --batch-size BATCH_SIZE
diff --git a/examples/mnist_add/main.py b/examples/mnist_add/main.py
index b6b3173..c992c4e 100644
--- a/examples/mnist_add/main.py
+++ b/examples/mnist_add/main.py
@@ -42,6 +42,12 @@ def main():
         default=1,
         help="number of epochs in each learning loop iteration (default : 1)",
     )
+    parser.add_argument(
+        "--label-smoothing",
+        type=float,
+        default=0.2,
+        help="label smoothing in cross entropy loss (default : 0.2)",
+    )
     parser.add_argument(
         "--lr", type=float, default=3e-4, help="base model learning rate (default : 0.0003)"
     )
@@ -57,10 +63,7 @@ def main():
     )
     parser.add_argument("--save_interval", type=int, default=1, help="save interval (default : 1)")
     parser.add_argument(
-        "--max-revision",
-        type=int,
-        default=-1,
-        help="maximum revision in reasoner (default : -1)",
+        "--max-revision", type=int, default=-1, help="maximum revision in reasoner (default : -1)"
     )
     parser.add_argument(
         "--require-more-revision",
@@ -91,7 +94,7 @@ def main():
 
     # Build necessary components for BasicNN
     cls = LeNet5(num_classes=10)
-    loss_fn = nn.CrossEntropyLoss(label_smoothing=0.2)
+    loss_fn = nn.CrossEntropyLoss(label_smoothing=args.label_smoothing)
     optimizer = RMSprop(cls.parameters(), lr=args.lr, alpha=args.alpha)
     use_cuda = not args.no_cuda and torch.cuda.is_available()
     device = torch.device("cuda" if use_cuda else "cpu")