diff --git a/example.py b/example.py
index 30c50fc..3751db3 100644
--- a/example.py
+++ b/example.py
@@ -11,12 +11,14 @@
 # ================================================================#
 
 from utils.plog import logger, INFO
+from utils.utils import reduce_dimension
 import torch.nn as nn
 import torch
 
 from models.nn import LeNet5, SymbolNet
 from models.basic_model import BasicModel, BasicDataset
 from models.wabl_models import DecisionTree, WABLBasicModel
+from sklearn.neighbors import KNeighborsClassifier
 
 from multiprocessing import Pool
 from abducer.abducer_base import AbducerBase
@@ -25,6 +27,7 @@ from datasets.mnist_add.get_mnist_add import get_mnist_add
 from datasets.hwf.get_hwf import get_hwf
 from datasets.hed.get_hed import get_hed, split_equation
 import framework_hed
+import framework_hed_knn
 
 
 def run_test():
@@ -41,29 +44,45 @@ def run_test():
     total_train_data = get_hed(train=True)
     train_data, val_data = split_equation(total_train_data, 3, 1)
     test_data = get_hed(train=False)
-    
-    # cls = LeNet5(num_classes=len(kb.pseudo_label_list), image_size=(train_data[0][0][0].shape[1:]))
-    cls = SymbolNet(num_classes=len(kb.pseudo_label_list))
-    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-    
-    framework_hed.hed_pretrain(kb, cls, recorder)
-    
-    criterion = nn.CrossEntropyLoss()
-    optimizer = torch.optim.RMSprop(cls.parameters(), lr=0.001, weight_decay=1e-6)
-    # optimizer = torch.optim.Adam(cls.parameters(), lr=0.00001, betas=(0.9, 0.99))
-    
-
-    base_model = BasicModel(cls, criterion, optimizer, device, save_interval=1, save_dir=recorder.save_dir, batch_size=32, num_epochs=10, recorder=recorder)
+
+    # ======================== non-NN model ========================== #
+    reduce_dimension(train_data)
+    reduce_dimension(val_data)
+    reduce_dimension(test_data)
+    base_model = KNeighborsClassifier(n_neighbors=3)
+    pretrain_data_X, pretrain_data_Y = framework_hed_knn.hed_pretrain(base_model)
     model = WABLBasicModel(base_model, kb.pseudo_label_list)
-    
-    # train_X, train_Z, train_Y = get_mnist_add(train = True, get_pseudo_label = True)
-    # test_X, test_Z, test_Y = get_mnist_add(train = False, get_pseudo_label = True)
+    model, mapping = framework_hed_knn.train_with_rule(
+        model, abducer, train_data, val_data, (pretrain_data_X, pretrain_data_Y), select_num=10, min_len=5, max_len=8
+    )
+    framework_hed_knn.hed_test(
+        model, abducer, mapping, train_data, test_data, min_len=5, max_len=8
+    )
+    # ============================ End =============================== #
+
+    # ========================== NN model ============================ #
+    # # cls = LeNet5(num_classes=len(kb.pseudo_label_list), image_size=(train_data[0][0][0].shape[1:]))
+    # cls = SymbolNet(num_classes=len(kb.pseudo_label_list))
+    # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+    # framework_hed.hed_pretrain(kb, cls, recorder)
+
+    # criterion = nn.CrossEntropyLoss()
+    # optimizer = torch.optim.RMSprop(cls.parameters(), lr=0.001, weight_decay=1e-6)
+    # # optimizer = torch.optim.Adam(cls.parameters(), lr=0.00001, betas=(0.9, 0.99))
+
+    # base_model = BasicModel(cls, criterion, optimizer, device, save_interval=1, save_dir=recorder.save_dir, batch_size=32, num_epochs=10, recorder=recorder)
+    # model = WABLBasicModel(base_model, kb.pseudo_label_list)
+
+    # # train_X, train_Z, train_Y = get_mnist_add(train = True, get_pseudo_label = True)
+    # # test_X, test_Z, test_Y = get_mnist_add(train = False, get_pseudo_label = True)
 
-    # train_data = get_hwf(train = True, get_pseudo_label = True)
-    # test_data = get_hwf(train = False, get_pseudo_label = True)
+    # # train_data = get_hwf(train = True, get_pseudo_label = True)
+    # # test_data = get_hwf(train = False, get_pseudo_label = True)
 
-    model, mapping = framework_hed.train_with_rule(model, abducer, train_data, val_data, select_num=10, min_len=5, max_len=8)
-    framework_hed.hed_test(model, abducer, mapping, train_data, test_data, min_len=5, max_len=8)
+    # model, mapping = framework_hed.train_with_rule(model, abducer, train_data, val_data, select_num=10, min_len=5, max_len=8)
+    # framework_hed.hed_test(model, abducer, mapping, train_data, test_data, min_len=5, max_len=8)
+    # ============================ End =============================== #
 
     recorder.dump()
     return True
diff --git a/framework_hed_knn.py b/framework_hed_knn.py
new file mode 100644
index 0000000..a5e5506
--- /dev/null
+++ b/framework_hed_knn.py
@@ -0,0 +1,407 @@
+# coding: utf-8
+# ================================================================#
+#   Copyright (C) 2021 Freecss All rights reserved.
+#
+#   File Name     ：framework.py
+#   Author        ：freecss
+#   Email         ：karlfreecss@gmail.com
+#   Created Date  ：2021/06/07
+#   Description   ：
+#
+# ================================================================#
+
+import pickle as pk
+import torch
+import torch.nn as nn
+import numpy as np
+import os
+
+from utils.plog import INFO, DEBUG, clocker
+from utils.utils import (
+    flatten,
+    reform_idx,
+    block_sample,
+    gen_mappings,
+    mapping_res,
+    remapping_res,
+    extract_feature,
+)
+
+from models.nn import MLP, SymbolNetAutoencoder
+from models.basic_model import BasicModel, BasicDataset
+from datasets.hed.get_hed import get_pretrain_data
+
+
+def result_statistics(pred_Z, Z, Y, logic_forward, char_acc_flag):
+    result = {}
+    if char_acc_flag:
+        char_acc_num = 0
+        char_num = 0
+        for pred_z, z in zip(pred_Z, Z):
+            char_num += len(z)
+            for zidx in range(len(z)):
+                if pred_z[zidx] == z[zidx]:
+                    char_acc_num += 1
+        char_acc = char_acc_num / char_num
+        result["Character level accuracy"] = char_acc
+
+    abl_acc_num = 0
+    for pred_z, y in zip(pred_Z, Y):
+        if logic_forward(pred_z) == y:
+            abl_acc_num += 1
+    abl_acc = abl_acc_num / len(Y)
+    result["ABL accuracy"] = abl_acc
+
+    return result
+
+
+def filter_data(X, abduced_Z):
+    finetune_Z = []
+    finetune_X = []
+    for abduced_x, abduced_z in zip(X, abduced_Z):
+        if abduced_z is not []:
+            finetune_X.append(abduced_x)
+            finetune_Z.append(abduced_z)
+    return finetune_X, finetune_Z
+
+
+def hed_pretrain(cls, image_size=(28, 28, 1)):
+    import cv2
+
+    INFO("Pretrain Start")
+    pretrain_data_X, pretrain_data_Y = [], []
+    for i, label in enumerate(["0", "1", "10", "11"]):
+        label_path = os.path.join("./datasets/hed/dataset/mnist_images", label)
+        img_path_list = os.listdir(label_path)
+        for j in range(10):
+            img = cv2.imread(
+                os.path.join(label_path, img_path_list[j]), cv2.IMREAD_GRAYSCALE
+            )
+            img = np.array(cv2.resize(img, (image_size[1], image_size[0])), np.float32)
+            img = (img - 127) / 128.0
+            pretrain_data_X.append(
+                extract_feature(img.reshape((1, image_size[0], image_size[1])))
+            )
+            pretrain_data_Y.append(i)
+    cls.fit(pretrain_data_X, pretrain_data_Y)
+    import random
+
+    for i, label in enumerate(["0", "1", "10", "11"]):
+        label_path = os.path.join("./datasets/hed/dataset/mnist_images", label)
+        img_path_list = os.listdir(label_path)
+        cnt = 0
+        for j in range(50):
+            img = cv2.imread(
+                os.path.join(label_path, random.choice(img_path_list)),
+                cv2.IMREAD_GRAYSCALE,
+            )
+            img = np.array(cv2.resize(img, (image_size[1], image_size[0])), np.float32)
+            img = (img - 127) / 128.0
+            predict_label = cls.predict(
+                [extract_feature(img.reshape((1, image_size[0], image_size[1])))]
+            )
+            # predict_label = cls.predict_proba(
+            #     [
+            #         extract_feature(
+            #             np.array(img, dtype=np.float32).reshape(
+            #                 (1, image_size[0], image_size[1])
+            #             )
+            #         )
+            #     ]
+            # ).argmax(axis=1)
+
+            if predict_label == i:
+                cnt += 1
+        INFO(
+            "%d predict accuracy is " % i,
+            cnt / 50,
+        )
+
+    return pretrain_data_X, pretrain_data_Y
+
+
+def _get_char_acc(model, X, consistent_pred_res, mapping):
+    original_pred_res = model.predict(X)["cls"]
+    pred_res = flatten(mapping_res(original_pred_res, mapping))
+    INFO("Current model's output: ", pred_res)
+    INFO("Abduced labels:         ", flatten(consistent_pred_res))
+    assert len(pred_res) == len(flatten(consistent_pred_res))
+    return sum(
+        [
+            pred_res[idx] == flatten(consistent_pred_res)[idx]
+            for idx in range(len(pred_res))
+        ]
+    ) / len(pred_res)
+
+
+def abduce_and_train(model, abducer, mapping, train_X_true, pretrain_data, select_num):
+    select_idx = np.random.randint(len(train_X_true), size=select_num)
+    X = []
+    for idx in select_idx:
+        X.append(train_X_true[idx])
+
+    original_pred_res = model.predict(X)["cls"]
+
+    if mapping == None:
+        mappings = gen_mappings(["+", "=", 0, 1], ["+", "=", 0, 1])
+    else:
+        mappings = [mapping]
+
+    consistent_idx = []
+    consistent_pred_res = []
+
+    for m in mappings:
+        pred_res = mapping_res(original_pred_res, m)
+        max_abduce_num = 20
+        solution = abducer.zoopt_get_solution(
+            pred_res, [1] * len(pred_res), max_abduce_num
+        )
+        all_address_flag = reform_idx(solution, pred_res)
+
+        consistent_idx_tmp = []
+        consistent_pred_res_tmp = []
+
+        for idx in range(len(pred_res)):
+            address_idx = [
+                i for i, flag in enumerate(all_address_flag[idx]) if flag != 0
+            ]
+            candidate = abducer.kb.address_by_idx([pred_res[idx]], 1, address_idx, True)
+            if len(candidate) > 0:
+                consistent_idx_tmp.append(idx)
+                consistent_pred_res_tmp.append(candidate[0][0])
+
+        if len(consistent_idx_tmp) > len(consistent_idx):
+            consistent_idx = consistent_idx_tmp
+            consistent_pred_res = consistent_pred_res_tmp
+            if len(mappings) > 1:
+                mapping = m
+
+    if len(consistent_idx) == 0:
+        return 0, 0, None
+
+    if len(mappings) > 1:
+        INFO("Final mapping is: ", mapping)
+
+    INFO("Train pool size is:", len(flatten(consistent_pred_res)))
+    INFO("Start to use abduced pseudo label to train model...")
+    pretrain_data_X, pretrain_data_Y = pretrain_data
+    pretrain_mappping = {0: 0, 1: 1, 2: "+", 3: "="}
+    pretrain_data_X = [[X] for X in pretrain_data_X]
+    pretrain_data_Y = [[pretrain_mappping[Y]] for Y in pretrain_data_Y]
+    model.train(
+        [X[idx] for idx in consistent_idx] + pretrain_data_X,
+        remapping_res(consistent_pred_res + pretrain_data_Y, mapping),
+    )
+
+    consistent_acc = len(consistent_idx) / select_num
+    char_acc = _get_char_acc(
+        model, [X[idx] for idx in consistent_idx], consistent_pred_res, mapping
+    )
+    INFO("consistent_acc is %s, char_acc is %s" % (consistent_acc, char_acc))
+    return consistent_acc, char_acc, mapping
+
+
+def _remove_duplicate_rule(rule_dict):
+    add_nums_dict = {}
+    for r in list(rule_dict):
+        add_nums = str(r.split("]")[0].split("[")[1]) + str(
+            r.split("]")[1].split("[")[1]
+        )  # r = 'my_op([1], [0], [1, 0])' then add_nums = '10'
+        if add_nums in add_nums_dict:
+            old_r = add_nums_dict[add_nums]
+            if rule_dict[r] >= rule_dict[old_r]:
+                rule_dict.pop(old_r)
+                add_nums_dict[add_nums] = r
+            else:
+                rule_dict.pop(r)
+        else:
+            add_nums_dict[add_nums] = r
+    return list(rule_dict)
+
+
+def get_rules_from_data(
+    model, abducer, mapping, train_X_true, samples_per_rule, samples_num
+):
+    rules = []
+    for _ in range(samples_num):
+        while True:
+            select_idx = np.random.randint(len(train_X_true), size=samples_per_rule)
+            X = []
+            for idx in select_idx:
+                X.append(train_X_true[idx])
+            original_pred_res = model.predict(X)["cls"]
+            pred_res = mapping_res(original_pred_res, mapping)
+
+            consistent_idx = []
+            consistent_pred_res = []
+            for idx in range(len(pred_res)):
+                if abducer.kb.logic_forward([pred_res[idx]]):
+                    consistent_idx.append(idx)
+                    consistent_pred_res.append(pred_res[idx])
+
+            if len(consistent_pred_res) != 0:
+                rule = abducer.abduce_rules(consistent_pred_res)
+                if rule != None:
+                    break
+        rules.append(rule)
+
+    all_rule_dict = {}
+    for rule in rules:
+        for r in rule:
+            all_rule_dict[r] = 1 if r not in all_rule_dict else all_rule_dict[r] + 1
+    rule_dict = {rule: cnt for rule, cnt in all_rule_dict.items() if cnt >= 5}
+    rules = _remove_duplicate_rule(rule_dict)
+
+    return rules
+
+
+def _get_consist_rule_acc(model, abducer, mapping, rules, X):
+    cnt = 0
+    for x in X:
+        original_pred_res = model.predict([x])["cls"]
+        pred_res = flatten(mapping_res(original_pred_res, mapping))
+        if abducer.kb.consist_rule(pred_res, rules):
+            cnt += 1
+    return cnt / len(X)
+
+
+def train_with_rule(
+    model,
+    abducer,
+    train_data,
+    val_data,
+    pretrain_data,
+    select_num=10,
+    min_len=5,
+    max_len=8,
+):
+    train_X = train_data
+    val_X = val_data
+
+    samples_num = 50
+    samples_per_rule = 3
+
+    # Start training / for each length of equations
+    for equation_len in range(min_len, max_len):
+        INFO(
+            "============== equation_len: %d-%d ================"
+            % (equation_len, equation_len + 1)
+        )
+        train_X_true = train_X[1][equation_len]
+        train_X_false = train_X[0][equation_len]
+        val_X_true = val_X[1][equation_len]
+        val_X_false = val_X[0][equation_len]
+
+        train_X_true.extend(train_X[1][equation_len + 1])
+        train_X_false.extend(train_X[0][equation_len + 1])
+        val_X_true.extend(val_X[1][equation_len + 1])
+        val_X_false.extend(val_X[0][equation_len + 1])
+
+        condition_cnt = 0
+        while True:
+            if equation_len == min_len:
+                mapping = None
+
+            # Abduce and train NN
+            consistent_acc, char_acc, mapping = abduce_and_train(
+                model, abducer, mapping, train_X_true, pretrain_data, select_num
+            )
+            if consistent_acc == 0:
+                continue
+
+            # Test if we can use mlp to evaluate
+            if consistent_acc >= 0.9 and char_acc >= 0.9:
+                condition_cnt += 1
+            else:
+                condition_cnt = 0
+
+            # The condition has been satisfied continuously five times
+            if condition_cnt >= 5:
+                INFO("Now checking if we can go to next course")
+                rules = get_rules_from_data(
+                    model, abducer, mapping, train_X_true, samples_per_rule, samples_num
+                )
+                INFO("Learned rules from data:", rules)
+
+                true_consist_rule_acc = _get_consist_rule_acc(
+                    model, abducer, mapping, rules, val_X_true
+                )
+                false_consist_rule_acc = _get_consist_rule_acc(
+                    model, abducer, mapping, rules, val_X_false
+                )
+
+                INFO(
+                    "consist_rule_acc is %f, %f\n"
+                    % (true_consist_rule_acc, false_consist_rule_acc)
+                )
+                # decide next course or restart
+                if true_consist_rule_acc > 0.9 and false_consist_rule_acc < 0.1:
+                    break
+                else:
+                    if equation_len == min_len:
+                        # model.cls_list[0].model.load_state_dict(
+                        #     torch.load("./weights/pretrain_weights.pth")
+                        # )
+                        pretrain_data_X, pretrain_data_Y = pretrain_data
+                        model.cls_list[0].fit(pretrain_data_X, pretrain_data_Y)
+                    else:
+                        pretrain_data_X, pretrain_data_Y = pretrain_data
+                        model.cls_list[0].fit(pretrain_data_X, pretrain_data_Y)
+                        # model.cls_list[0].model.load_state_dict(
+                        #     torch.load("./weights/weights_%d.pth" % (equation_len - 1))
+                        # )
+                    condition_cnt = 0
+                    INFO("Reload Model and retrain")
+
+    return model, mapping
+
+
+def hed_test(model, abducer, mapping, train_data, test_data, min_len=5, max_len=8):
+    train_X = train_data
+    test_X = test_data
+
+    # Calcualte how many equations should be selected in each length
+    # for each length, there are equation_samples_num[equation_len] rules
+    print("Now begin to train final mlp model")
+    equation_samples_num = []
+    len_cnt = max_len - min_len + 1
+    samples_num = 50
+    equation_samples_num += [0] * min_len
+    if samples_num % len_cnt == 0:
+        equation_samples_num += [samples_num // len_cnt] * len_cnt
+    else:
+        equation_samples_num += [samples_num // len_cnt] * len_cnt
+        equation_samples_num[-1] += samples_num % len_cnt
+    assert sum(equation_samples_num) == samples_num
+
+    # Abduce rules
+    rules = []
+    samples_per_rule = 3
+    for equation_len in range(min_len, max_len + 1):
+        equation_rules = get_rules_from_data(
+            model,
+            abducer,
+            mapping,
+            train_X[1][equation_len],
+            samples_per_rule,
+            equation_samples_num[equation_len],
+        )
+        rules.extend(equation_rules)
+    rules = list(set(rules))
+    INFO("Learned rules from data:", rules)
+
+    for equation_len in range(5, 27):
+        true_consist_rule_acc = _get_consist_rule_acc(
+            model, abducer, mapping, rules, test_X[1][equation_len]
+        )
+        false_consist_rule_acc = _get_consist_rule_acc(
+            model, abducer, mapping, rules, test_X[0][equation_len]
+        )
+        INFO(
+            "consist_rule_acc of testing length %d equations are %f, %f"
+            % (equation_len, true_consist_rule_acc, false_consist_rule_acc)
+        )
+
+
+if __name__ == "__main__":
+    pass
diff --git a/utils/utils.py b/utils/utils.py
index 1138361..5cd433d 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -1,11 +1,18 @@
+import torch
+import torch.nn as nn
 import numpy as np
 from utils.plog import INFO
 from collections import OrderedDict
 
 # for multiple predictions, modify from `learn_add.py`
 def flatten(l):
-    return [item for sublist in l for item in flatten(sublist)] if isinstance(l, list) else [l]
-    
+    return (
+        [item for sublist in l for item in flatten(sublist)]
+        if isinstance(l, list)
+        else [l]
+    )
+
+
 # for multiple predictions, modify from `learn_add.py`
 def reform_idx(flatten_pred_res, save_pred_res):
     re = []
@@ -20,10 +27,12 @@ def reform_idx(flatten_pred_res, save_pred_res):
         i = i + j
     return re
 
+
 def hamming_dist(A, B):
     B = np.array(B)
-    A = np.expand_dims(A, axis = 0).repeat(axis=0, repeats=(len(B)))
-    return np.sum(A != B, axis = 1)
+    A = np.expand_dims(A, axis=0).repeat(axis=0, repeats=(len(B)))
+    return np.sum(A != B, axis=1)
+
 
 def confidence_dist(A, B):
     B = np.array(B)
@@ -31,10 +40,10 @@ def confidence_dist(A, B):
     A = np.expand_dims(A, axis=0)
     A = A.repeat(axis=0, repeats=(len(B)))
     rows = np.array(range(len(B)))
-    rows = np.expand_dims(rows, axis = 1).repeat(axis = 1, repeats = len(B[0]))
+    rows = np.expand_dims(rows, axis=1).repeat(axis=1, repeats=len(B[0]))
     cols = np.array(range(len(B[0])))
-    cols = np.expand_dims(cols, axis = 0).repeat(axis = 0, repeats = len(B))
-    return 1 - np.prod(A[rows, cols, B], axis = 1)
+    cols = np.expand_dims(cols, axis=0).repeat(axis=0, repeats=len(B))
+    return 1 - np.prod(A[rows, cols, B], axis=1)
 
 
 def block_sample(X, Z, Y, sample_num, epoch_idx):
@@ -51,32 +60,36 @@ def block_sample(X, Z, Y, sample_num, epoch_idx):
 
 
 def gen_mappings(chars, symbs):
-	n_char = len(chars)
-	n_symbs = len(symbs)
-	if n_char != n_symbs:
-		print('Characters and symbols size dosen\'t match.')
-		return
-	from itertools import permutations
-	mappings = []
-	# returned mappings
-	perms = permutations(symbs)
-	for p in perms:
-		mappings.append(dict(zip(chars, list(p))))
-	return mappings
+    n_char = len(chars)
+    n_symbs = len(symbs)
+    if n_char != n_symbs:
+        print("Characters and symbols size dosen't match.")
+        return
+    from itertools import permutations
+
+    mappings = []
+    # returned mappings
+    perms = permutations(symbs)
+    for p in perms:
+        mappings.append(dict(zip(chars, list(p))))
+    return mappings
+
 
 def mapping_res(original_pred_res, m):
     return [[m[symbol] for symbol in formula] for formula in original_pred_res]
 
+
 def remapping_res(pred_res, m):
     remapping = {}
     for key, value in m.items():
         remapping[value] = key
     return [[remapping[symbol] for symbol in formula] for formula in pred_res]
 
+
 def check_equal(a, b):
     if isinstance(a, (int, float)) and isinstance(b, (int, float)):
         return abs(a - b) <= 1e-3
-    
+
     if isinstance(a, list) and isinstance(b, list):
         if len(a) != len(b):
             return False
@@ -84,6 +97,26 @@ def check_equal(a, b):
             if not check_equal(a[i], b[i]):
                 return False
         return True
-    
-    else:    
-        return a == b       
+
+    else:
+        return a == b
+
+
+def extract_feature(img):
+    extractor = nn.AvgPool2d(2, stride=2)
+    feature_map = np.array(extractor(torch.Tensor(img)))
+    return feature_map.reshape((-1,))
+    return np.concatenate(
+        (np.squeeze(np.sum(img, axis=1)), np.squeeze(np.sum(img, axis=2))), axis=0
+    )
+
+
+def reduce_dimension(data):
+    for truth_value in [0, 1]:
+        for equation_len in range(5, 27):
+            equations = data[truth_value][equation_len]
+            reduced_equations = [
+                [extract_feature(symbol_img) for symbol_img in equation]
+                for equation in equations
+            ]
+            data[truth_value][equation_len] = reduced_equations