| @@ -1,338 +0,0 @@ | |||
| # coding: utf-8 | |||
| # ================================================================# | |||
| # Copyright (C) 2021 Freecss All rights reserved. | |||
| # | |||
| # File Name :framework.py | |||
| # Author :freecss | |||
| # Email :karlfreecss@gmail.com | |||
| # Created Date :2021/06/07 | |||
| # Description : | |||
| # | |||
| # ================================================================# | |||
| import pickle as pk | |||
| import torch | |||
| import torch.nn as nn | |||
| import numpy as np | |||
| import os | |||
| from .utils.plog import INFO, DEBUG, clocker | |||
| from .utils.utils import flatten, reform_idx, block_sample, gen_mappings, mapping_res, remapping_res | |||
| from .models.nn import MLP, SymbolNetAutoencoder | |||
| from .models.basic_model import BasicModel, BasicDataset | |||
| import sys | |||
| sys.path.append("..") | |||
| from examples.datasets.hed.get_hed import get_pretrain_data | |||
| def result_statistics(pred_Z, Z, Y, logic_forward, char_acc_flag): | |||
| result = {} | |||
| if char_acc_flag: | |||
| char_acc_num = 0 | |||
| char_num = 0 | |||
| for pred_z, z in zip(pred_Z, Z): | |||
| char_num += len(z) | |||
| for zidx in range(len(z)): | |||
| if pred_z[zidx] == z[zidx]: | |||
| char_acc_num += 1 | |||
| char_acc = char_acc_num / char_num | |||
| result["Character level accuracy"] = char_acc | |||
| abl_acc_num = 0 | |||
| for pred_z, y in zip(pred_Z, Y): | |||
| if logic_forward(pred_z) == y: | |||
| abl_acc_num += 1 | |||
| abl_acc = abl_acc_num / len(Y) | |||
| result["ABL accuracy"] = abl_acc | |||
| return result | |||
| def filter_data(X, abduced_Z): | |||
| finetune_Z = [] | |||
| finetune_X = [] | |||
| for x, abduced_z in zip(X, abduced_Z): | |||
| if len(abduced_z) > 0: | |||
| finetune_X.append(x) | |||
| finetune_Z.append(abduced_z) | |||
| return finetune_X, finetune_Z | |||
| def train(model, abducer, train_data, test_data, loop_num=50, sample_num=-1, verbose=-1): | |||
| train_X, train_Z, train_Y = train_data | |||
| test_X, test_Z, test_Y = test_data | |||
| # Set default parameters | |||
| if sample_num == -1: | |||
| sample_num = len(train_X) | |||
| if verbose < 1: | |||
| verbose = loop_num | |||
| char_acc_flag = 1 | |||
| if train_Z == None: | |||
| char_acc_flag = 0 | |||
| train_Z = [None] * len(train_X) | |||
| predict_func = clocker(model.predict) | |||
| train_func = clocker(model.train) | |||
| abduce_func = clocker(abducer.batch_abduce) | |||
| for loop_idx in range(loop_num): | |||
| X, Z, Y = block_sample(train_X, train_Z, train_Y, sample_num, loop_idx) | |||
| preds_res = predict_func(X) | |||
| abduced_Z = abduce_func(preds_res, Y) | |||
| if ((loop_idx + 1) % verbose == 0) or (loop_idx == loop_num - 1): | |||
| res = result_statistics(preds_res['cls'], Z, Y, abducer.kb.logic_forward, char_acc_flag) | |||
| INFO('loop: ', loop_idx + 1, ' ', res) | |||
| finetune_X, finetune_Z = filter_data(X, abduced_Z) | |||
| if len(finetune_X) > 0: | |||
| # model.valid(finetune_X, finetune_Z) | |||
| train_func(finetune_X, finetune_Z) | |||
| else: | |||
| INFO("lack of data, all abduced failed", len(finetune_X)) | |||
| return res | |||
| def hed_pretrain(kb, cls, recorder): | |||
| cls_autoencoder = SymbolNetAutoencoder(num_classes=len(kb.pseudo_label_list)) | |||
| device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |||
| if not os.path.exists("./weights/pretrain_weights.pth"): | |||
| INFO("Pretrain Start") | |||
| pretrain_data_X, pretrain_data_Y = get_pretrain_data(['0', '1', '10', '11']) | |||
| pretrain_data = BasicDataset(pretrain_data_X, pretrain_data_Y) | |||
| pretrain_data_loader = torch.utils.data.DataLoader(pretrain_data, batch_size=64, shuffle=True) | |||
| criterion = nn.MSELoss() | |||
| optimizer = torch.optim.RMSprop(cls_autoencoder.parameters(), lr=0.001, alpha=0.9, weight_decay=1e-6) | |||
| pretrain_model = BasicModel(cls_autoencoder, criterion, optimizer, device, save_interval=1, save_dir=recorder.save_dir, num_epochs=10, recorder=recorder) | |||
| pretrain_model.fit(pretrain_data_loader) | |||
| torch.save(cls_autoencoder.base_model.state_dict(), "./weights/pretrain_weights.pth") | |||
| cls.load_state_dict(cls_autoencoder.base_model.state_dict()) | |||
| else: | |||
| cls.load_state_dict(torch.load("./weights/pretrain_weights.pth")) | |||
| def _get_char_acc(model, X, consistent_pred_res, mapping): | |||
| original_pred_res = model.predict(X)['cls'] | |||
| pred_res = flatten(mapping_res(original_pred_res, mapping)) | |||
| INFO('Current model\'s output: ', pred_res) | |||
| INFO('Abduced labels: ', flatten(consistent_pred_res)) | |||
| assert len(pred_res) == len(flatten(consistent_pred_res)) | |||
| return sum([pred_res[idx] == flatten(consistent_pred_res)[idx] for idx in range(len(pred_res))]) / len(pred_res) | |||
| def abduce_and_train(model, abducer, mapping, train_X_true, select_num): | |||
| select_idx = np.random.randint(len(train_X_true), size=select_num) | |||
| X = [] | |||
| for idx in select_idx: | |||
| X.append(train_X_true[idx]) | |||
| original_pred_res = model.predict(X)['cls'] | |||
| if mapping == None: | |||
| mappings = gen_mappings(['+', '=', 0, 1],['+', '=', 0, 1]) | |||
| else: | |||
| mappings = [mapping] | |||
| consistent_idx = [] | |||
| consistent_pred_res = [] | |||
| for m in mappings: | |||
| pred_res = mapping_res(original_pred_res, m) | |||
| max_abduce_num = 20 | |||
| solution = abducer.zoopt_get_solution(pred_res, [None] * len(pred_res), [None] * len(pred_res), max_abduce_num) | |||
| all_address_flag = reform_idx(solution, pred_res) | |||
| consistent_idx_tmp = [] | |||
| consistent_pred_res_tmp = [] | |||
| for idx in range(len(pred_res)): | |||
| address_idx = [i for i, flag in enumerate(all_address_flag[idx]) if flag != 0] | |||
| candidate = abducer.address_by_idx([pred_res[idx]], None, address_idx) | |||
| if len(candidate) > 0: | |||
| consistent_idx_tmp.append(idx) | |||
| consistent_pred_res_tmp.append(candidate[0][0]) | |||
| if len(consistent_idx_tmp) > len(consistent_idx): | |||
| consistent_idx = consistent_idx_tmp | |||
| consistent_pred_res = consistent_pred_res_tmp | |||
| if len(mappings) > 1: | |||
| mapping = m | |||
| if len(consistent_idx) == 0: | |||
| return 0, 0, None | |||
| INFO('Train pool size is:', len(flatten(consistent_pred_res))) | |||
| INFO("Start to use abduced pseudo label to train model...") | |||
| model.train([X[idx] for idx in consistent_idx], remapping_res(consistent_pred_res, mapping)) | |||
| consistent_acc = len(consistent_idx) / select_num | |||
| char_acc = _get_char_acc(model, [X[idx] for idx in consistent_idx], consistent_pred_res, mapping) | |||
| INFO('consistent_acc is %s, char_acc is %s' % (consistent_acc, char_acc)) | |||
| return consistent_acc, char_acc, mapping | |||
| def _remove_duplicate_rule(rule_dict): | |||
| add_nums_dict = {} | |||
| for r in list(rule_dict): | |||
| add_nums = str(r.split(']')[0].split('[')[1]) + str(r.split(']')[1].split('[')[1]) # r = 'my_op([1], [0], [1, 0])' then add_nums = '10' | |||
| if add_nums in add_nums_dict: | |||
| old_r = add_nums_dict[add_nums] | |||
| if rule_dict[r] >= rule_dict[old_r]: | |||
| rule_dict.pop(old_r) | |||
| add_nums_dict[add_nums] = r | |||
| else: | |||
| rule_dict.pop(r) | |||
| else: | |||
| add_nums_dict[add_nums] = r | |||
| return list(rule_dict) | |||
| def get_rules_from_data(model, abducer, mapping, train_X_true, samples_per_rule, samples_num): | |||
| rules = [] | |||
| for _ in range(samples_num): | |||
| while True: | |||
| select_idx = np.random.randint(len(train_X_true), size=samples_per_rule) | |||
| X = [] | |||
| for idx in select_idx: | |||
| X.append(train_X_true[idx]) | |||
| original_pred_res = model.predict(X)['cls'] | |||
| pred_res = mapping_res(original_pred_res, mapping) | |||
| consistent_idx = [] | |||
| consistent_pred_res = [] | |||
| for idx in range(len(pred_res)): | |||
| if abducer.kb.logic_forward([pred_res[idx]]): | |||
| consistent_idx.append(idx) | |||
| consistent_pred_res.append(pred_res[idx]) | |||
| if len(consistent_pred_res) != 0: | |||
| rule = abducer.abduce_rules(consistent_pred_res) | |||
| if rule != None: | |||
| break | |||
| rules.append(rule) | |||
| all_rule_dict = {} | |||
| for rule in rules: | |||
| for r in rule: | |||
| all_rule_dict[r] = 1 if r not in all_rule_dict else all_rule_dict[r] + 1 | |||
| rule_dict = {rule: cnt for rule, cnt in all_rule_dict.items() if cnt >= 5} | |||
| rules = _remove_duplicate_rule(rule_dict) | |||
| return rules | |||
| def _get_consist_rule_acc(model, abducer, mapping, rules, X): | |||
| cnt = 0 | |||
| for x in X: | |||
| original_pred_res = model.predict([x])['cls'] | |||
| pred_res = flatten(mapping_res(original_pred_res, mapping)) | |||
| if abducer.kb.consist_rule(pred_res, rules): | |||
| cnt += 1 | |||
| return cnt / len(X) | |||
| def train_with_rule(model, abducer, train_data, val_data, select_num=10, min_len=5, max_len=8): | |||
| train_X = train_data | |||
| val_X = val_data | |||
| samples_num = 50 | |||
| samples_per_rule = 3 | |||
| # Start training / for each length of equations | |||
| for equation_len in range(min_len, max_len): | |||
| INFO("============== equation_len: %d-%d ================" % (equation_len, equation_len + 1)) | |||
| train_X_true = train_X[1][equation_len] | |||
| train_X_false = train_X[0][equation_len] | |||
| val_X_true = val_X[1][equation_len] | |||
| val_X_false = val_X[0][equation_len] | |||
| train_X_true.extend(train_X[1][equation_len + 1]) | |||
| train_X_false.extend(train_X[0][equation_len + 1]) | |||
| val_X_true.extend(val_X[1][equation_len + 1]) | |||
| val_X_false.extend(val_X[0][equation_len + 1]) | |||
| condition_cnt = 0 | |||
| while True: | |||
| if equation_len == min_len: | |||
| mapping = None | |||
| # Abduce and train NN | |||
| consistent_acc, char_acc, mapping = abduce_and_train(model, abducer, mapping, train_X_true, select_num) | |||
| if consistent_acc == 0: | |||
| continue | |||
| # Test if we can use mlp to evaluate | |||
| if consistent_acc >= 0.9 and char_acc >= 0.9: | |||
| condition_cnt += 1 | |||
| else: | |||
| condition_cnt = 0 | |||
| # The condition has been satisfied continuously five times | |||
| if condition_cnt >= 5: | |||
| INFO("Now checking if we can go to next course") | |||
| rules = get_rules_from_data(model, abducer, mapping, train_X_true, samples_per_rule, samples_num) | |||
| INFO('Learned rules from data:', rules) | |||
| true_consist_rule_acc = _get_consist_rule_acc(model, abducer, mapping, rules, val_X_true) | |||
| false_consist_rule_acc = _get_consist_rule_acc(model, abducer, mapping, rules, val_X_false) | |||
| INFO('consist_rule_acc is %f, %f\n' %(true_consist_rule_acc, false_consist_rule_acc)) | |||
| # decide next course or restart | |||
| if true_consist_rule_acc > 0.95 and false_consist_rule_acc < 0.1: | |||
| torch.save(model.cls_list[0].model.state_dict(), "./weights/weights_%d.pth" % equation_len) | |||
| break | |||
| else: | |||
| if equation_len == min_len: | |||
| INFO('Final mapping is: ', mapping) | |||
| model.cls_list[0].model.load_state_dict(torch.load("./weights/pretrain_weights.pth")) | |||
| else: | |||
| model.cls_list[0].model.load_state_dict(torch.load("./weights/weights_%d.pth" % (equation_len - 1))) | |||
| condition_cnt = 0 | |||
| INFO('Reload Model and retrain') | |||
| return model, mapping | |||
| def hed_test(model, abducer, mapping, train_data, test_data, min_len=5, max_len=8): | |||
| train_X = train_data | |||
| test_X = test_data | |||
| # Calcualte how many equations should be selected in each length | |||
| # for each length, there are equation_samples_num[equation_len] rules | |||
| print("Now begin to train final mlp model") | |||
| equation_samples_num = [] | |||
| len_cnt = max_len - min_len + 1 | |||
| samples_num = 50 | |||
| equation_samples_num += [0] * min_len | |||
| if samples_num % len_cnt == 0: | |||
| equation_samples_num += [samples_num // len_cnt] * len_cnt | |||
| else: | |||
| equation_samples_num += [samples_num // len_cnt] * len_cnt | |||
| equation_samples_num[-1] += samples_num % len_cnt | |||
| assert sum(equation_samples_num) == samples_num | |||
| # Abduce rules | |||
| rules = [] | |||
| samples_per_rule = 3 | |||
| for equation_len in range(min_len, max_len + 1): | |||
| equation_rules = get_rules_from_data(model, abducer, mapping, train_X[1][equation_len], samples_per_rule, equation_samples_num[equation_len]) | |||
| rules.extend(equation_rules) | |||
| rules = list(set(rules)) | |||
| INFO('Learned rules from data:', rules) | |||
| for equation_len in range(5, 27): | |||
| true_consist_rule_acc = _get_consist_rule_acc(model, abducer, mapping, rules, test_X[1][equation_len]) | |||
| false_consist_rule_acc = _get_consist_rule_acc(model, abducer, mapping, rules, test_X[0][equation_len]) | |||
| INFO('consist_rule_acc of testing length %d equations are %f, %f' %(equation_len, true_consist_rule_acc, false_consist_rule_acc)) | |||
| if __name__ == "__main__": | |||
| pass | |||
| @@ -1,147 +0,0 @@ | |||
| # coding: utf-8 | |||
| # ================================================================# | |||
| # Copyright (C) 2021 Freecss All rights reserved. | |||
| # | |||
| # File Name :lenet5.py | |||
| # Author :freecss | |||
| # Email :karlfreecss@gmail.com | |||
| # Created Date :2021/03/03 | |||
| # Description : | |||
| # | |||
| # ================================================================# | |||
| import torchvision | |||
| import torch | |||
| from torch import nn | |||
| from torch.nn import functional as F | |||
| from torch.autograd import Variable | |||
| import torchvision.transforms as transforms | |||
| import numpy as np | |||
| class LeNet5(nn.Module): | |||
| def __init__(self, num_classes=10, image_size=(28, 28)): | |||
| super().__init__() | |||
| self.conv1 = nn.Conv2d(1, 6, 3, padding=1) | |||
| self.conv2 = nn.Conv2d(6, 16, 3) | |||
| self.conv3 = nn.Conv2d(16, 16, 3) | |||
| feature_map_size = (np.array(image_size) // 2 - 2) // 2 - 2 | |||
| num_features = 16 * feature_map_size[0] * feature_map_size[1] | |||
| self.fc1 = nn.Linear(num_features, 120) | |||
| self.fc2 = nn.Linear(120, 84) | |||
| self.fc3 = nn.Linear(84, num_classes) | |||
| def forward(self, x): | |||
| """前向传播函数""" | |||
| x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) | |||
| x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2)) | |||
| x = F.relu(self.conv3(x)) | |||
| x = x.view(-1, self.num_flat_features(x)) | |||
| # print(x.size()) | |||
| x = F.relu(self.fc1(x)) | |||
| x = F.relu(self.fc2(x)) | |||
| x = self.fc3(x) | |||
| return x | |||
| def num_flat_features(self, x): | |||
| size = x.size()[1:] | |||
| num_features = 1 | |||
| for s in size: | |||
| num_features *= s | |||
| return num_features | |||
| # class SymbolNet(nn.Module): | |||
| # def __init__(self, num_classes=4, image_size=(28, 28, 1)): | |||
| # super(SymbolNet, self).__init__() | |||
| # self.conv1 = nn.Sequential( | |||
| # nn.Conv2d(1, 32, 3, stride=1, padding=1), | |||
| # nn.ReLU(inplace=True), | |||
| # nn.BatchNorm2d(32), | |||
| # ) | |||
| # self.conv2 = nn.Sequential( | |||
| # nn.Conv2d(32, 64, 3, stride=1, padding=1), | |||
| # nn.ReLU(inplace=True), | |||
| # nn.MaxPool2d(kernel_size=2, stride=2), | |||
| # nn.BatchNorm2d(64), | |||
| # nn.Dropout(0.25), | |||
| # ) | |||
| # num_features = 64 * (image_size[0] // 2) * (image_size[1] // 2) | |||
| # self.fc1 = nn.Sequential( | |||
| # nn.Linear(num_features, 128), nn.ReLU(inplace=True), nn.Dropout(0.5) | |||
| # ) | |||
| # self.fc2 = nn.Sequential(nn.Linear(128, num_classes), nn.Softmax(dim=1)) | |||
| # def forward(self, x): | |||
| # x = self.conv1(x) | |||
| # x = self.conv2(x) | |||
| # x = torch.flatten(x, 1) | |||
| # x = self.fc1(x) | |||
| # x = self.fc2(x) | |||
| # return x | |||
| class SymbolNet(nn.Module): | |||
| def __init__(self, num_classes=4, image_size=(28, 28, 1)): | |||
| super(SymbolNet, self).__init__() | |||
| self.conv1 = nn.Sequential( | |||
| nn.Conv2d(1, 32, 5, stride=1), | |||
| nn.ReLU(), | |||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||
| nn.BatchNorm2d(32, momentum=0.99, eps=0.001), | |||
| ) | |||
| self.conv2 = nn.Sequential( | |||
| nn.Conv2d(32, 64, 5, padding=2, stride=1), | |||
| nn.ReLU(), | |||
| nn.MaxPool2d(kernel_size=2, stride=2), | |||
| nn.BatchNorm2d(64, momentum=0.99, eps=0.001), | |||
| ) | |||
| num_features = 64 * (image_size[0] // 4 - 1) * (image_size[1] // 4 - 1) | |||
| self.fc1 = nn.Sequential(nn.Linear(num_features, 120), nn.ReLU()) | |||
| self.fc2 = nn.Sequential(nn.Linear(120, 84), nn.ReLU()) | |||
| self.fc3 = nn.Sequential(nn.Linear(84, num_classes), nn.Softmax(dim=1)) | |||
| def forward(self, x): | |||
| x = self.conv1(x) | |||
| x = self.conv2(x) | |||
| x = torch.flatten(x, 1) | |||
| x = self.fc1(x) | |||
| x = self.fc2(x) | |||
| x = self.fc3(x) | |||
| return x | |||
| class SymbolNetAutoencoder(nn.Module): | |||
| def __init__(self, num_classes=4, image_size=(28, 28, 1)): | |||
| super(SymbolNetAutoencoder, self).__init__() | |||
| self.base_model = SymbolNet(num_classes, image_size) | |||
| self.fc1 = nn.Sequential(nn.Linear(num_classes, 100), nn.ReLU()) | |||
| self.fc2 = nn.Sequential( | |||
| nn.Linear(100, image_size[0] * image_size[1]), nn.ReLU() | |||
| ) | |||
| def forward(self, x): | |||
| x = self.base_model(x) | |||
| x = self.fc1(x) | |||
| x = self.fc2(x) | |||
| return x | |||
| class MLP(nn.Module): | |||
| def __init__(self, input_dim=50, num_classes=2): | |||
| super(MLP, self).__init__() | |||
| assert input_dim > 0 | |||
| hidden_dim = int(np.sqrt(input_dim)) | |||
| self.fc1 = nn.Sequential(nn.Linear(input_dim, hidden_dim), nn.ReLU()) | |||
| self.fc2 = nn.Sequential(nn.Linear(hidden_dim, num_classes), nn.Softmax(dim=1)) | |||
| def forward(self, x): | |||
| x = self.fc1(x) | |||
| x = self.fc2(x) | |||
| return x | |||
| @@ -1,83 +0,0 @@ | |||
| :- use_module(library(apply)). | |||
| :- use_module(library(lists)). | |||
| % :- use_module(library(tabling)). | |||
| % :- table valid_rules/2, op_rule/2. | |||
| %%%%%%%%%%%%%%%%%%%%%%%%%%%% | |||
| %% DCG parser for equations | |||
| %%%%%%%%%%%%%%%%%%%%%%%%%%%% | |||
| %% symbols to be mapped | |||
| digit(1). | |||
| digit(0). | |||
| % digits | |||
| digits([D]) --> [D], { digit(D) }. % empty list [] is not a digit | |||
| digits([D | T]) --> [D], !, digits(T), { digit(D) }. | |||
| digits(X):- | |||
| phrase(digits(X), X). | |||
| % More integrity constraints 1: | |||
| % This two clauses forbid the first digit to be 0. | |||
| % You may uncomment them to prune the search space | |||
| % length(X, L), | |||
| % (L > 1 -> X \= [0 | _]; true). | |||
| % Equation definition | |||
| eq_arg([D]) --> [D], { \+ D == '+', \+ D == '=' }. | |||
| eq_arg([D | T]) --> [D], !, eq_arg(T), { \+ D == '+', \+ D == '=' }. | |||
| equation(eq(X, Y, Z)) --> | |||
| eq_arg(X), [+], eq_arg(Y), [=], eq_arg(Z). | |||
| % More integrity constraints 2: | |||
| % This clause restricts the length of arguments to be sane, | |||
| % You may uncomment them to prune the search space | |||
| % { length(X, LX), length(Y, LY), length(Z, LZ), | |||
| % LZ =< max(LX, LY) + 1, LZ >= max(LX, LY) }. | |||
| parse_eq(List_of_Terms, Eq) :- | |||
| phrase(equation(Eq), List_of_Terms). | |||
| %%%%%%%%%%%%%%%%%%%%%% | |||
| %% Bit-wise operation | |||
| %%%%%%%%%%%%%%%%%%%%%% | |||
| % Abductive calculation with given pseudo-labels, abduces pseudo-labels as well as operation rules | |||
| calc(Rules, Pseudo) :- | |||
| calc([], Rules, Pseudo). | |||
| calc(Rules0, Rules1, Pseudo) :- | |||
| parse_eq(Pseudo, eq(X,Y,Z)), | |||
| bitwise_calc(Rules0, Rules1, X, Y, Z). | |||
| % Bit-wise calculation that handles carrying | |||
| bitwise_calc(Rules, Rules1, X, Y, Z) :- | |||
| reverse(X, X1), reverse(Y, Y1), reverse(Z, Z1), | |||
| bitwise_calc_r(Rules, Rules1, X1, Y1, Z1), | |||
| maplist(digits, [X,Y,Z]). | |||
| bitwise_calc_r(Rs, Rs, [], Y, Y). | |||
| bitwise_calc_r(Rs, Rs, X, [], X). | |||
| bitwise_calc_r(Rules, Rules1, [D1 | X], [D2 | Y], [D3 | Z]) :- | |||
| abduce_op_rule(my_op([D1],[D2],Sum), Rules, Rules2), | |||
| ((Sum = [D3], Carry = []); (Sum = [C, D3], Carry = [C])), | |||
| bitwise_calc_r(Rules2, Rules3, X, Carry, X_carried), | |||
| bitwise_calc_r(Rules3, Rules1, X_carried, Y, Z). | |||
| %%%%%%%%%%%%%%%%%%%%%%%%% | |||
| % Abduce operation rules | |||
| %%%%%%%%%%%%%%%%%%%%%%%%% | |||
| % Get an existed rule | |||
| abduce_op_rule(R, Rules, Rules) :- | |||
| member(R, Rules). | |||
| % Add a new rule | |||
| abduce_op_rule(R, Rules, [R|Rules]) :- | |||
| op_rule(R), | |||
| valid_rules(Rules, R). | |||
| % Integrity Constraints | |||
| valid_rules([], _). | |||
| valid_rules([my_op([X1],[Y1],_)|Rs], my_op([X],[Y],Z)) :- | |||
| op_rule(my_op([X],[Y],Z)), | |||
| [X,Y] \= [X1,Y1], | |||
| [X,Y] \= [Y1,X1], | |||
| valid_rules(Rs, my_op([X],[Y],Z)). | |||
| valid_rules([my_op([Y],[X],Z)|Rs], my_op([X],[Y],Z)) :- | |||
| X \= Y, | |||
| valid_rules(Rs, my_op([X],[Y],Z)). | |||
| op_rule(my_op([X],[Y],[Z])) :- digit(X), digit(Y), digit(Z). | |||
| op_rule(my_op([X],[Y],[Z1,Z2])) :- digit(X), digit(Y), digits([Z1,Z2]). | |||
| @@ -1,4 +0,0 @@ | |||
| Download the Handwritten Equation Decipherment dataset from [NJU Box](https://box.nju.edu.cn/f/391c2d48c32b436cb833/) to this folder and unzip it: | |||
| ``` | |||
| unzip HED.zip | |||
| ``` | |||
| @@ -1,130 +0,0 @@ | |||
| import os | |||
| import cv2 | |||
| import torch | |||
| import torchvision | |||
| import pickle | |||
| import numpy as np | |||
| import random | |||
| from collections import defaultdict | |||
| from torch.utils.data import Dataset | |||
| from torchvision.transforms import transforms | |||
| def get_data(img_dataset, train): | |||
| transform = transforms.Compose([transforms.ToTensor()]) | |||
| X = [] | |||
| Y = [] | |||
| if train: | |||
| positive = img_dataset["train:positive"] | |||
| negative = img_dataset["train:negative"] | |||
| else: | |||
| positive = img_dataset["test:positive"] | |||
| negative = img_dataset["test:negative"] | |||
| for equation in positive: | |||
| equation = equation.astype(np.float32) | |||
| img_list = np.vsplit(equation, equation.shape[0]) | |||
| X.append(img_list) | |||
| Y.append(1) | |||
| for equation in negative: | |||
| equation = equation.astype(np.float32) | |||
| img_list = np.vsplit(equation, equation.shape[0]) | |||
| X.append(img_list) | |||
| Y.append(0) | |||
| return X, None, Y | |||
| def get_pretrain_data(labels, image_size=(28, 28, 1)): | |||
| transform = transforms.Compose([transforms.ToTensor()]) | |||
| X = [] | |||
| for label in labels: | |||
| label_path = os.path.join( | |||
| "./datasets/hed/mnist_images", label | |||
| ) | |||
| img_path_list = os.listdir(label_path) | |||
| for img_path in img_path_list: | |||
| img = cv2.imread( | |||
| os.path.join(label_path, img_path), cv2.IMREAD_GRAYSCALE | |||
| ) | |||
| img = cv2.resize(img, (image_size[1], image_size[0])) | |||
| X.append(np.array(img, dtype=np.float32)) | |||
| X = [((img[:, :, np.newaxis] - 127) / 128.0) for img in X] | |||
| Y = [img.copy().reshape(image_size[0] * image_size[1] * image_size[2]) for img in X] | |||
| X = [transform(img) for img in X] | |||
| return X, Y | |||
| # def get_pretrain_data(train_data, image_size=(28, 28, 1)): | |||
| # X = [] | |||
| # for label in [0, 1]: | |||
| # for _, equation_list in train_data[label].items(): | |||
| # for equation in equation_list: | |||
| # X = X + equation | |||
| # X = np.array(X) | |||
| # index = np.array(list(range(len(X)))) | |||
| # np.random.shuffle(index) | |||
| # X = X[index] | |||
| # X = [img for img in X] | |||
| # Y = [img.copy().reshape(image_size[0] * image_size[1] * image_size[2]) for img in X] | |||
| # return X, Y | |||
| def divide_equations_by_len(equations, labels): | |||
| equations_by_len = {1: defaultdict(list), 0: defaultdict(list)} | |||
| for i, equation in enumerate(equations): | |||
| equations_by_len[labels[i]][len(equation)].append(equation) | |||
| return equations_by_len | |||
| def split_equation(equations_by_len, prop_train, prop_val): | |||
| """ | |||
| Split the equations in each length to training and validation data according to the proportion | |||
| """ | |||
| train_equations_by_len = {1: dict(), 0: dict()} | |||
| val_equations_by_len = {1: dict(), 0: dict()} | |||
| for label in range(2): | |||
| for equation_len, equations in equations_by_len[label].items(): | |||
| random.shuffle(equations) | |||
| train_equations_by_len[label][equation_len] = equations[ | |||
| : len(equations) // (prop_train + prop_val) * prop_train | |||
| ] | |||
| val_equations_by_len[label][equation_len] = equations[ | |||
| len(equations) // (prop_train + prop_val) * prop_train : | |||
| ] | |||
| return train_equations_by_len, val_equations_by_len | |||
| def get_hed(dataset="mnist", train=True): | |||
| if dataset == "mnist": | |||
| with open( | |||
| "./datasets/hed/mnist_equation_data_train_len_26_test_len_26_sys_2_.pk", | |||
| "rb", | |||
| ) as f: | |||
| img_dataset = pickle.load(f) | |||
| elif dataset == "random": | |||
| with open( | |||
| "./datasets/hed/random_equation_data_train_len_26_test_len_26_sys_2_.pk", | |||
| "rb", | |||
| ) as f: | |||
| img_dataset = pickle.load(f) | |||
| else: | |||
| raise Exception("Undefined dataset") | |||
| X, _, Y = get_data(img_dataset, train) | |||
| equations_by_len = divide_equations_by_len(X, Y) | |||
| return equations_by_len | |||
| if __name__ == "__main__": | |||
| get_hed() | |||
| @@ -1,84 +0,0 @@ | |||
| :- ensure_loaded(['BK.pl']). | |||
| :- thread_setconcurrency(_, 8). | |||
| :- use_module(library(thread)). | |||
| %%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |||
| %% For propositionalisation | |||
| %%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |||
| eval_inst_feature(Ex, Feature):- | |||
| eval_eq(Ex, Feature). | |||
| %% Evaluate instance given feature | |||
| eval_eq(Ex, Feature):- | |||
| parse_eq(Ex, eq(X,Y,Z)), | |||
| bitwise_calc(Feature,_,X,Y,Z), !. | |||
| %%%%%%%%%%%%%% | |||
| %% Abduction | |||
| %%%%%%%%%%%%%% | |||
| % Make abduction when given examples that have been interpreted as pseudo-labels | |||
| abduce(Exs, Delta_C) :- | |||
| abduce(Exs, [], Delta_C). | |||
| abduce([], Delta_C, Delta_C). | |||
| abduce([E|Exs], Delta_C0, Delta_C1) :- | |||
| calc(Delta_C0, Delta_C2, E), | |||
| abduce(Exs, Delta_C2, Delta_C1). | |||
| %%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |||
| %% Abduce pseudo-labels only | |||
| %%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |||
| abduce_consistent_insts(Exs):- | |||
| abduce(Exs, _), !. | |||
| % (Experimental) Uncomment to use parallel abduction | |||
| % abduce_consistent_exs_concurrent(Exs), !. | |||
| logic_forward(Exs, X) :- abduce_consistent_insts(Exs) -> X = true ; X = false. | |||
| logic_forward(Exs) :- abduce_consistent_insts(Exs). | |||
| %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |||
| %% Abduce Delta_C given pseudo-labels | |||
| %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |||
| consistent_inst_feature(Exs, Delta_C):- | |||
| abduce(Exs, Delta_C), !. | |||
| %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |||
| %% (Experimental) Parallel abduction | |||
| %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |||
| abduce_consistent_exs_concurrent(Exs) :- | |||
| % Split the current data batch into grounding examples and variable examples (which need to be revised) | |||
| split_exs(Exs, Ground_Exs, Var_Exs), | |||
| % Find the simplest Delta_C for grounding examples. | |||
| abduce(Ground_Exs, Ground_Delta_C), !, | |||
| % Extend Ground Delta_C into all possible variations | |||
| extend_op_rule(Ground_Delta_C, Possible_Deltas), | |||
| % Concurrently abduce the variable examples | |||
| maplist(append([abduce2, Var_Exs, Ground_Exs]), [[Possible_Deltas]], Call_List), | |||
| maplist(=.., Goals, Call_List), | |||
| % writeln(Goals), | |||
| first_solution(Var_Exs, Goals, [local(inf)]). | |||
| split_exs([],[],[]). | |||
| split_exs([E | Exs], [E | G_Exs], V_Exs):- | |||
| ground(E), !, | |||
| split_exs(Exs, G_Exs, V_Exs). | |||
| split_exs([E | Exs], G_Exs, [E | V_Exs]):- | |||
| split_exs(Exs, G_Exs, V_Exs). | |||
| :- table extend_op_rule/2. | |||
| extend_op_rule(Rules, Rules) :- | |||
| length(Rules, 4). | |||
| extend_op_rule(Rules, Ext) :- | |||
| op_rule(R), | |||
| valid_rules(Rules, R), | |||
| extend_op_rule([R|Rules], Ext). | |||
| % abduction without learning new Delta_C (Because they have been extended!) | |||
| abduce2([], _, _). | |||
| abduce2([E|Exs], Ground_Exs, Delta_C) :- | |||
| % abduce by finding ground examples | |||
| member(E, Ground_Exs), | |||
| abduce2(Exs, Ground_Exs, Delta_C). | |||
| abduce2([E|Exs], Ground_Exs, Delta_C) :- | |||
| eval_inst_feature(E, Delta_C), | |||
| abduce2(Exs, Ground_Exs, Delta_C). | |||
| @@ -1,4 +0,0 @@ | |||
| Download the Handwritten Formula Recognition dataset from [google drive](https://drive.google.com/file/d/1G07kw-wK-rqbg_85tuB7FNfA49q8lvoy/view?usp=sharing) to this folder and unzip it: | |||
| ``` | |||
| unzip HWF.zip | |||
| ``` | |||
| @@ -1,50 +0,0 @@ | |||
| import json | |||
| from PIL import Image | |||
| from torchvision.transforms import transforms | |||
| img_transform = transforms.Compose([ | |||
| transforms.ToTensor(), | |||
| transforms.Normalize((0.5,), (1,)) | |||
| ]) | |||
| def get_data(file, get_pseudo_label): | |||
| X = [] | |||
| if get_pseudo_label: | |||
| Z = [] | |||
| Y = [] | |||
| img_dir = './datasets/hwf/data/Handwritten_Math_Symbols/' | |||
| with open(file) as f: | |||
| data = json.load(f) | |||
| for idx in range(len(data)): | |||
| imgs = [] | |||
| imgs_pseudo_label = [] | |||
| for img_path in data[idx]['img_paths']: | |||
| img = Image.open(img_dir + img_path).convert('L') | |||
| img = img_transform(img) | |||
| imgs.append(img) | |||
| if get_pseudo_label: | |||
| imgs_pseudo_label.append(img_path.split('/')[0]) | |||
| X.append(imgs) | |||
| if get_pseudo_label: | |||
| Z.append(imgs_pseudo_label) | |||
| Y.append(data[idx]['res']) | |||
| if get_pseudo_label: | |||
| return X, Z, Y | |||
| else: | |||
| return X, None, Y | |||
| def get_hwf(train = True, get_pseudo_label = False): | |||
| if(train): | |||
| file = './datasets/hwf/data/expr_train.json' | |||
| else: | |||
| file = './datasets/hwf/data/expr_test.json' | |||
| return get_data(file, get_pseudo_label) | |||
| if __name__ == "__main__": | |||
| train_X, train_Y = get_hwf(train = True) | |||
| test_X, test_Y = get_hwf(train = False) | |||
| print(len(train_X), len(test_X)) | |||
| print(len(train_X[0]), train_X[0][0].shape, train_Y[0]) | |||
| @@ -1,2 +0,0 @@ | |||
| pseudo_label(N) :- between(0, 9, N). | |||
| logic_forward([Z1, Z2], Res) :- pseudo_label(Z1), pseudo_label(Z2), Res is Z1+Z2. | |||
| @@ -1,41 +0,0 @@ | |||
| import torch | |||
| import torchvision | |||
| from torch.utils.data import Dataset | |||
| from torchvision.transforms import transforms | |||
| def get_data(file, img_dataset, get_pseudo_label): | |||
| X = [] | |||
| if get_pseudo_label: | |||
| Z = [] | |||
| Y = [] | |||
| with open(file) as f: | |||
| for line in f: | |||
| line = line.strip().split(' ') | |||
| X.append([img_dataset[int(line[0])][0], img_dataset[int(line[1])][0]]) | |||
| if get_pseudo_label: | |||
| Z.append([img_dataset[int(line[0])][1], img_dataset[int(line[1])][1]]) | |||
| Y.append(int(line[2])) | |||
| if get_pseudo_label: | |||
| return X, Z, Y | |||
| else: | |||
| return X, None, Y | |||
| def get_mnist_add(train = True, get_pseudo_label = False): | |||
| transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081, ))]) | |||
| img_dataset = torchvision.datasets.MNIST(root='./datasets/mnist_add/', train=train, download=True, transform=transform) | |||
| if train: | |||
| file = './datasets/mnist_add/train_data.txt' | |||
| else: | |||
| file = './datasets/mnist_add/test_data.txt' | |||
| return get_data(file, img_dataset, get_pseudo_label) | |||
| if __name__ == "__main__": | |||
| train_X, train_Y = get_mnist_add(train = True) | |||
| test_X, test_Y = get_mnist_add(train = False) | |||
| print(len(train_X), len(test_X)) | |||
| print(train_X[0][0].shape, train_X[0][1].shape, train_Y[0]) | |||
| @@ -1,199 +0,0 @@ | |||
| { | |||
| "cells": [ | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 4, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "import sys\n", | |||
| "\n", | |||
| "sys.path.append(\"../\")\n", | |||
| "\n", | |||
| "import torch.nn as nn\n", | |||
| "import torch\n", | |||
| "\n", | |||
| "from abl.abducer.abducer_base import HED_Abducer\n", | |||
| "from abl.abducer.kb import HED_prolog_KB\n", | |||
| "\n", | |||
| "from abl.utils.plog import logger\n", | |||
| "from abl.models.nn import SymbolNet\n", | |||
| "from abl.models.basic_model import BasicModel\n", | |||
| "from abl.models.wabl_models import WABLBasicModel\n", | |||
| "\n", | |||
| "from datasets.hed.get_hed import get_hed, split_equation\n", | |||
| "from abl import framework_hed" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 5, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Initialize logger\n", | |||
| "recorder = logger()" | |||
| ] | |||
| }, | |||
| { | |||
| "attachments": {}, | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "### Logic Part" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": 6, | |||
| "metadata": {}, | |||
| "outputs": [ | |||
| { | |||
| "name": "stderr", | |||
| "output_type": "stream", | |||
| "text": [ | |||
| "ERROR: /home/gaoeh/ABL-Package/examples/datasets/hed/learn_add.pl:67:9: Syntax error: Operator expected\n" | |||
| ] | |||
| } | |||
| ], | |||
| "source": [ | |||
| "# Initialize knowledge base and abducer\n", | |||
| "kb = HED_prolog_KB(pseudo_label_list=[1, 0, '+', '='], pl_file='./datasets/hed/learn_add.pl')\n", | |||
| "abducer = HED_Abducer(kb)" | |||
| ] | |||
| }, | |||
| { | |||
| "attachments": {}, | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "### Machine Learning Part" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Initialize necessary component for machine learning part\n", | |||
| "cls = SymbolNet(\n", | |||
| " num_classes=len(kb.pseudo_label_list),\n", | |||
| " image_size=(28, 28, 1),\n", | |||
| ")\n", | |||
| "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", | |||
| "criterion = nn.CrossEntropyLoss()\n", | |||
| "optimizer = torch.optim.RMSprop(cls.parameters(), lr=0.001, weight_decay=1e-6)" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Pretrain NN classifier\n", | |||
| "framework_hed.hed_pretrain(kb, cls, recorder)" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Initialize BasicModel\n", | |||
| "# The function of BasicModel is to wrap NN models into the form of an sklearn estimator\n", | |||
| "base_model = BasicModel(\n", | |||
| " cls,\n", | |||
| " criterion,\n", | |||
| " optimizer,\n", | |||
| " device,\n", | |||
| " save_interval=1,\n", | |||
| " save_dir=recorder.save_dir,\n", | |||
| " batch_size=32,\n", | |||
| " num_epochs=1,\n", | |||
| " recorder=recorder,\n", | |||
| ")" | |||
| ] | |||
| }, | |||
| { | |||
| "attachments": {}, | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "### Use WABL model to join two parts" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "model = WABLBasicModel(base_model, kb.pseudo_label_list)" | |||
| ] | |||
| }, | |||
| { | |||
| "attachments": {}, | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "### Dataset" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "total_train_data = get_hed(train=True)\n", | |||
| "train_data, val_data = split_equation(total_train_data, 3, 1)\n", | |||
| "test_data = get_hed(train=False)" | |||
| ] | |||
| }, | |||
| { | |||
| "attachments": {}, | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "### Train and save" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "model, mapping = framework_hed.train_with_rule(model, abducer, train_data, val_data, select_num=10, min_len=5, max_len=8)\n", | |||
| "framework_hed.hed_test(model, abducer, mapping, train_data, test_data, min_len=5, max_len=8)\n", | |||
| "\n", | |||
| "recorder.dump()" | |||
| ] | |||
| } | |||
| ], | |||
| "metadata": { | |||
| "kernelspec": { | |||
| "display_name": "ABL", | |||
| "language": "python", | |||
| "name": "python3" | |||
| }, | |||
| "language_info": { | |||
| "codemirror_mode": { | |||
| "name": "ipython", | |||
| "version": 3 | |||
| }, | |||
| "file_extension": ".py", | |||
| "mimetype": "text/x-python", | |||
| "name": "python", | |||
| "nbconvert_exporter": "python", | |||
| "pygments_lexer": "ipython3", | |||
| "version": "3.8.16" | |||
| }, | |||
| "orig_nbformat": 4 | |||
| }, | |||
| "nbformat": 4, | |||
| "nbformat_minor": 2 | |||
| } | |||
| @@ -1,184 +0,0 @@ | |||
| { | |||
| "cells": [ | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "import sys\n", | |||
| "\n", | |||
| "sys.path.append(\"../\")\n", | |||
| "\n", | |||
| "import torch.nn as nn\n", | |||
| "import torch\n", | |||
| "\n", | |||
| "from abl.abducer.abducer_base import AbducerBase\n", | |||
| "from abl.abducer.kb import HWF_KB\n", | |||
| "\n", | |||
| "from abl.utils.plog import logger\n", | |||
| "from abl.models.nn import SymbolNet\n", | |||
| "from abl.models.basic_model import BasicModel\n", | |||
| "from abl.models.wabl_models import WABLBasicModel\n", | |||
| "\n", | |||
| "from datasets.hwf.get_hwf import get_hwf\n", | |||
| "from abl import framework_hed" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Initialize logger\n", | |||
| "recorder = logger()" | |||
| ] | |||
| }, | |||
| { | |||
| "attachments": {}, | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "### Logic Part" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Initialize knowledge base and abducer\n", | |||
| "kb = HWF_KB(GKB_flag=True)\n", | |||
| "abducer = AbducerBase(kb)" | |||
| ] | |||
| }, | |||
| { | |||
| "attachments": {}, | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "### Machine Learning Part" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Initialize necessary component for machine learning part\n", | |||
| "cls = SymbolNet(num_classes=len(kb.pseudo_label_list), image_size=(45, 45, 1))\n", | |||
| "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", | |||
| "criterion = nn.CrossEntropyLoss()\n", | |||
| "optimizer = torch.optim.Adam(cls.parameters(), lr=0.001, betas=(0.9, 0.99))" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Initialize BasicModel\n", | |||
| "# The function of BasicModel is to wrap NN models into the form of an sklearn estimator\n", | |||
| "base_model = BasicModel(\n", | |||
| " cls,\n", | |||
| " criterion,\n", | |||
| " optimizer,\n", | |||
| " device,\n", | |||
| " save_interval=1,\n", | |||
| " save_dir=recorder.save_dir,\n", | |||
| " batch_size=32,\n", | |||
| " num_epochs=1,\n", | |||
| " recorder=recorder,\n", | |||
| ")" | |||
| ] | |||
| }, | |||
| { | |||
| "attachments": {}, | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "### Use WABL model to join two parts" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Initialize WABL model\n", | |||
| "# The main function of the WABL model is to serialize data and \n", | |||
| "# provide a unified interface for different machine learning models\n", | |||
| "model = WABLBasicModel(base_model, kb.pseudo_label_list)" | |||
| ] | |||
| }, | |||
| { | |||
| "attachments": {}, | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "### Dataset" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Get training and testing data\n", | |||
| "train_data = get_hwf(train=True, get_pseudo_label=True)\n", | |||
| "test_data = get_hwf(train=False, get_pseudo_label=True)" | |||
| ] | |||
| }, | |||
| { | |||
| "attachments": {}, | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "### Train and save" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Train model\n", | |||
| "framework_hed.train(\n", | |||
| " model, abducer, train_data, test_data, loop_num=15, sample_num=5000, verbose=1\n", | |||
| ")\n", | |||
| "\n", | |||
| "# Save results\n", | |||
| "recorder.dump()" | |||
| ] | |||
| } | |||
| ], | |||
| "metadata": { | |||
| "kernelspec": { | |||
| "display_name": "ABL", | |||
| "language": "python", | |||
| "name": "python3" | |||
| }, | |||
| "language_info": { | |||
| "codemirror_mode": { | |||
| "name": "ipython", | |||
| "version": 3 | |||
| }, | |||
| "file_extension": ".py", | |||
| "mimetype": "text/x-python", | |||
| "name": "python", | |||
| "nbconvert_exporter": "python", | |||
| "pygments_lexer": "ipython3", | |||
| "version": "3.8.13" | |||
| }, | |||
| "orig_nbformat": 4 | |||
| }, | |||
| "nbformat": 4, | |||
| "nbformat_minor": 2 | |||
| } | |||
| @@ -1,190 +0,0 @@ | |||
| { | |||
| "cells": [ | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "import sys\n", | |||
| "\n", | |||
| "sys.path.append(\"../\")\n", | |||
| "\n", | |||
| "import torch.nn as nn\n", | |||
| "import torch\n", | |||
| "\n", | |||
| "from abl.abducer.abducer_base import AbducerBase\n", | |||
| "from abl.abducer.kb import add_KB\n", | |||
| "\n", | |||
| "from abl.utils.plog import logger\n", | |||
| "from abl.models.nn import LeNet5\n", | |||
| "from abl.models.basic_model import BasicModel\n", | |||
| "from abl.models.wabl_models import WABLBasicModel\n", | |||
| "\n", | |||
| "from datasets.mnist_add.get_mnist_add import get_mnist_add\n", | |||
| "from abl import framework_hed" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Initialize logger\n", | |||
| "recorder = logger()" | |||
| ] | |||
| }, | |||
| { | |||
| "attachments": {}, | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "### Logic Part" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Initialize knowledge base and abducer\n", | |||
| "kb = add_KB(GKB_flag=True)\n", | |||
| "abducer = AbducerBase(kb, dist_func=\"confidence\")" | |||
| ] | |||
| }, | |||
| { | |||
| "attachments": {}, | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "### Machine Learning Part" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Initialize necessary component for machine learning part\n", | |||
| "cls = LeNet5(num_classes=len(kb.pseudo_label_list))\n", | |||
| "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", | |||
| "criterion = nn.CrossEntropyLoss()\n", | |||
| "optimizer = torch.optim.Adam(cls.parameters(), lr=0.001, betas=(0.9, 0.99))" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Initialize BasicModel\n", | |||
| "# The function of BasicModel is to wrap NN models into the form of an sklearn estimator\n", | |||
| "base_model = BasicModel(\n", | |||
| " cls,\n", | |||
| " criterion,\n", | |||
| " optimizer,\n", | |||
| " device,\n", | |||
| " save_interval=1,\n", | |||
| " save_dir=recorder.save_dir,\n", | |||
| " batch_size=32,\n", | |||
| " num_epochs=1,\n", | |||
| " recorder=recorder,\n", | |||
| ")" | |||
| ] | |||
| }, | |||
| { | |||
| "attachments": {}, | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "### Use WABL model to join two parts" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Initialize WABL model\n", | |||
| "# The main function of the WABL model is to serialize data and \n", | |||
| "# provide a unified interface for different machine learning models\n", | |||
| "model = WABLBasicModel(base_model, kb.pseudo_label_list)" | |||
| ] | |||
| }, | |||
| { | |||
| "attachments": {}, | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "### Dataset" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Get training and testing data\n", | |||
| "train_X, train_Z, train_Y = get_mnist_add(train=True, get_pseudo_label=True)\n", | |||
| "test_X, test_Z, test_Y = get_mnist_add(train=False, get_pseudo_label=True)" | |||
| ] | |||
| }, | |||
| { | |||
| "attachments": {}, | |||
| "cell_type": "markdown", | |||
| "metadata": {}, | |||
| "source": [ | |||
| "### Train and save" | |||
| ] | |||
| }, | |||
| { | |||
| "cell_type": "code", | |||
| "execution_count": null, | |||
| "metadata": {}, | |||
| "outputs": [], | |||
| "source": [ | |||
| "# Train model\n", | |||
| "framework_hed.train(\n", | |||
| " model,\n", | |||
| " abducer,\n", | |||
| " (train_X, train_Z, train_Y),\n", | |||
| " (test_X, test_Z, test_Y),\n", | |||
| " loop_num=15,\n", | |||
| " sample_num=5000,\n", | |||
| " verbose=1,\n", | |||
| ")\n", | |||
| "\n", | |||
| "# Save results\n", | |||
| "recorder.dump()" | |||
| ] | |||
| } | |||
| ], | |||
| "metadata": { | |||
| "kernelspec": { | |||
| "display_name": "ABL", | |||
| "language": "python", | |||
| "name": "python3" | |||
| }, | |||
| "language_info": { | |||
| "codemirror_mode": { | |||
| "name": "ipython", | |||
| "version": 3 | |||
| }, | |||
| "file_extension": ".py", | |||
| "mimetype": "text/x-python", | |||
| "name": "python", | |||
| "nbconvert_exporter": "python", | |||
| "pygments_lexer": "ipython3", | |||
| "version": "3.8.16" | |||
| }, | |||
| "orig_nbformat": 4 | |||
| }, | |||
| "nbformat": 4, | |||
| "nbformat_minor": 2 | |||
| } | |||
| @@ -1,407 +0,0 @@ | |||
| # coding: utf-8 | |||
| # ================================================================# | |||
| # Copyright (C) 2021 Freecss All rights reserved. | |||
| # | |||
| # File Name :framework.py | |||
| # Author :freecss | |||
| # Email :karlfreecss@gmail.com | |||
| # Created Date :2021/06/07 | |||
| # Description : | |||
| # | |||
| # ================================================================# | |||
| import pickle as pk | |||
| import torch | |||
| import torch.nn as nn | |||
| import numpy as np | |||
| import os | |||
| from utils.plog import INFO, DEBUG, clocker | |||
| from utils.utils import ( | |||
| flatten, | |||
| reform_idx, | |||
| block_sample, | |||
| gen_mappings, | |||
| mapping_res, | |||
| remapping_res, | |||
| extract_feature, | |||
| ) | |||
| from models.nn import MLP, SymbolNetAutoencoder | |||
| from models.basic_model import BasicModel, BasicDataset | |||
| from datasets.hed.get_hed import get_pretrain_data | |||
| def result_statistics(pred_Z, Z, Y, logic_forward, char_acc_flag): | |||
| result = {} | |||
| if char_acc_flag: | |||
| char_acc_num = 0 | |||
| char_num = 0 | |||
| for pred_z, z in zip(pred_Z, Z): | |||
| char_num += len(z) | |||
| for zidx in range(len(z)): | |||
| if pred_z[zidx] == z[zidx]: | |||
| char_acc_num += 1 | |||
| char_acc = char_acc_num / char_num | |||
| result["Character level accuracy"] = char_acc | |||
| abl_acc_num = 0 | |||
| for pred_z, y in zip(pred_Z, Y): | |||
| if logic_forward(pred_z) == y: | |||
| abl_acc_num += 1 | |||
| abl_acc = abl_acc_num / len(Y) | |||
| result["ABL accuracy"] = abl_acc | |||
| return result | |||
| def filter_data(X, abduced_Z): | |||
| finetune_Z = [] | |||
| finetune_X = [] | |||
| for abduced_x, abduced_z in zip(X, abduced_Z): | |||
| if abduced_z is not []: | |||
| finetune_X.append(abduced_x) | |||
| finetune_Z.append(abduced_z) | |||
| return finetune_X, finetune_Z | |||
| def hed_pretrain(cls, image_size=(28, 28, 1)): | |||
| import cv2 | |||
| INFO("Pretrain Start") | |||
| pretrain_data_X, pretrain_data_Y = [], [] | |||
| for i, label in enumerate(["0", "1", "10", "11"]): | |||
| label_path = os.path.join("./datasets/hed/dataset/mnist_images", label) | |||
| img_path_list = os.listdir(label_path) | |||
| for j in range(10): | |||
| img = cv2.imread( | |||
| os.path.join(label_path, img_path_list[j]), cv2.IMREAD_GRAYSCALE | |||
| ) | |||
| img = np.array(cv2.resize(img, (image_size[1], image_size[0])), np.float32) | |||
| img = (img - 127) / 128.0 | |||
| pretrain_data_X.append( | |||
| extract_feature(img.reshape((1, image_size[0], image_size[1]))) | |||
| ) | |||
| pretrain_data_Y.append(i) | |||
| cls.fit(pretrain_data_X, pretrain_data_Y) | |||
| import random | |||
| for i, label in enumerate(["0", "1", "10", "11"]): | |||
| label_path = os.path.join("./datasets/hed/dataset/mnist_images", label) | |||
| img_path_list = os.listdir(label_path) | |||
| cnt = 0 | |||
| for j in range(50): | |||
| img = cv2.imread( | |||
| os.path.join(label_path, random.choice(img_path_list)), | |||
| cv2.IMREAD_GRAYSCALE, | |||
| ) | |||
| img = np.array(cv2.resize(img, (image_size[1], image_size[0])), np.float32) | |||
| img = (img - 127) / 128.0 | |||
| predict_label = cls.predict( | |||
| [extract_feature(img.reshape((1, image_size[0], image_size[1])))] | |||
| ) | |||
| # predict_label = cls.predict_proba( | |||
| # [ | |||
| # extract_feature( | |||
| # np.array(img, dtype=np.float32).reshape( | |||
| # (1, image_size[0], image_size[1]) | |||
| # ) | |||
| # ) | |||
| # ] | |||
| # ).argmax(axis=1) | |||
| if predict_label == i: | |||
| cnt += 1 | |||
| INFO( | |||
| "%d predict accuracy is " % i, | |||
| cnt / 50, | |||
| ) | |||
| return pretrain_data_X, pretrain_data_Y | |||
| def _get_char_acc(model, X, consistent_pred_res, mapping): | |||
| original_pred_res = model.predict(X)["cls"] | |||
| pred_res = flatten(mapping_res(original_pred_res, mapping)) | |||
| INFO("Current model's output: ", pred_res) | |||
| INFO("Abduced labels: ", flatten(consistent_pred_res)) | |||
| assert len(pred_res) == len(flatten(consistent_pred_res)) | |||
| return sum( | |||
| [ | |||
| pred_res[idx] == flatten(consistent_pred_res)[idx] | |||
| for idx in range(len(pred_res)) | |||
| ] | |||
| ) / len(pred_res) | |||
| def abduce_and_train(model, abducer, mapping, train_X_true, pretrain_data, select_num): | |||
| select_idx = np.random.randint(len(train_X_true), size=select_num) | |||
| X = [] | |||
| for idx in select_idx: | |||
| X.append(train_X_true[idx]) | |||
| original_pred_res = model.predict(X)["cls"] | |||
| if mapping == None: | |||
| mappings = gen_mappings(["+", "=", 0, 1], ["+", "=", 0, 1]) | |||
| else: | |||
| mappings = [mapping] | |||
| consistent_idx = [] | |||
| consistent_pred_res = [] | |||
| for m in mappings: | |||
| pred_res = mapping_res(original_pred_res, m) | |||
| max_abduce_num = 20 | |||
| solution = abducer.zoopt_get_solution( | |||
| pred_res, [1] * len(pred_res), max_abduce_num | |||
| ) | |||
| all_address_flag = reform_idx(solution, pred_res) | |||
| consistent_idx_tmp = [] | |||
| consistent_pred_res_tmp = [] | |||
| for idx in range(len(pred_res)): | |||
| address_idx = [ | |||
| i for i, flag in enumerate(all_address_flag[idx]) if flag != 0 | |||
| ] | |||
| candidate = abducer.kb.address_by_idx([pred_res[idx]], 1, address_idx, True) | |||
| if len(candidate) > 0: | |||
| consistent_idx_tmp.append(idx) | |||
| consistent_pred_res_tmp.append(candidate[0][0]) | |||
| if len(consistent_idx_tmp) > len(consistent_idx): | |||
| consistent_idx = consistent_idx_tmp | |||
| consistent_pred_res = consistent_pred_res_tmp | |||
| if len(mappings) > 1: | |||
| mapping = m | |||
| if len(consistent_idx) == 0: | |||
| return 0, 0, None | |||
| if len(mappings) > 1: | |||
| INFO("Final mapping is: ", mapping) | |||
| INFO("Train pool size is:", len(flatten(consistent_pred_res))) | |||
| INFO("Start to use abduced pseudo label to train model...") | |||
| pretrain_data_X, pretrain_data_Y = pretrain_data | |||
| pretrain_mappping = {0: 0, 1: 1, 2: "+", 3: "="} | |||
| pretrain_data_X = [[X] for X in pretrain_data_X] | |||
| pretrain_data_Y = [[pretrain_mappping[Y]] for Y in pretrain_data_Y] | |||
| model.train( | |||
| [X[idx] for idx in consistent_idx] + pretrain_data_X, | |||
| remapping_res(consistent_pred_res + pretrain_data_Y, mapping), | |||
| ) | |||
| consistent_acc = len(consistent_idx) / select_num | |||
| char_acc = _get_char_acc( | |||
| model, [X[idx] for idx in consistent_idx], consistent_pred_res, mapping | |||
| ) | |||
| INFO("consistent_acc is %s, char_acc is %s" % (consistent_acc, char_acc)) | |||
| return consistent_acc, char_acc, mapping | |||
| def _remove_duplicate_rule(rule_dict): | |||
| add_nums_dict = {} | |||
| for r in list(rule_dict): | |||
| add_nums = str(r.split("]")[0].split("[")[1]) + str( | |||
| r.split("]")[1].split("[")[1] | |||
| ) # r = 'my_op([1], [0], [1, 0])' then add_nums = '10' | |||
| if add_nums in add_nums_dict: | |||
| old_r = add_nums_dict[add_nums] | |||
| if rule_dict[r] >= rule_dict[old_r]: | |||
| rule_dict.pop(old_r) | |||
| add_nums_dict[add_nums] = r | |||
| else: | |||
| rule_dict.pop(r) | |||
| else: | |||
| add_nums_dict[add_nums] = r | |||
| return list(rule_dict) | |||
| def get_rules_from_data( | |||
| model, abducer, mapping, train_X_true, samples_per_rule, samples_num | |||
| ): | |||
| rules = [] | |||
| for _ in range(samples_num): | |||
| while True: | |||
| select_idx = np.random.randint(len(train_X_true), size=samples_per_rule) | |||
| X = [] | |||
| for idx in select_idx: | |||
| X.append(train_X_true[idx]) | |||
| original_pred_res = model.predict(X)["cls"] | |||
| pred_res = mapping_res(original_pred_res, mapping) | |||
| consistent_idx = [] | |||
| consistent_pred_res = [] | |||
| for idx in range(len(pred_res)): | |||
| if abducer.kb.logic_forward([pred_res[idx]]): | |||
| consistent_idx.append(idx) | |||
| consistent_pred_res.append(pred_res[idx]) | |||
| if len(consistent_pred_res) != 0: | |||
| rule = abducer.abduce_rules(consistent_pred_res) | |||
| if rule != None: | |||
| break | |||
| rules.append(rule) | |||
| all_rule_dict = {} | |||
| for rule in rules: | |||
| for r in rule: | |||
| all_rule_dict[r] = 1 if r not in all_rule_dict else all_rule_dict[r] + 1 | |||
| rule_dict = {rule: cnt for rule, cnt in all_rule_dict.items() if cnt >= 5} | |||
| rules = _remove_duplicate_rule(rule_dict) | |||
| return rules | |||
| def _get_consist_rule_acc(model, abducer, mapping, rules, X): | |||
| cnt = 0 | |||
| for x in X: | |||
| original_pred_res = model.predict([x])["cls"] | |||
| pred_res = flatten(mapping_res(original_pred_res, mapping)) | |||
| if abducer.kb.consist_rule(pred_res, rules): | |||
| cnt += 1 | |||
| return cnt / len(X) | |||
| def train_with_rule( | |||
| model, | |||
| abducer, | |||
| train_data, | |||
| val_data, | |||
| pretrain_data, | |||
| select_num=10, | |||
| min_len=5, | |||
| max_len=8, | |||
| ): | |||
| train_X = train_data | |||
| val_X = val_data | |||
| samples_num = 50 | |||
| samples_per_rule = 3 | |||
| # Start training / for each length of equations | |||
| for equation_len in range(min_len, max_len): | |||
| INFO( | |||
| "============== equation_len: %d-%d ================" | |||
| % (equation_len, equation_len + 1) | |||
| ) | |||
| train_X_true = train_X[1][equation_len] | |||
| train_X_false = train_X[0][equation_len] | |||
| val_X_true = val_X[1][equation_len] | |||
| val_X_false = val_X[0][equation_len] | |||
| train_X_true.extend(train_X[1][equation_len + 1]) | |||
| train_X_false.extend(train_X[0][equation_len + 1]) | |||
| val_X_true.extend(val_X[1][equation_len + 1]) | |||
| val_X_false.extend(val_X[0][equation_len + 1]) | |||
| condition_cnt = 0 | |||
| while True: | |||
| if equation_len == min_len: | |||
| mapping = None | |||
| # Abduce and train NN | |||
| consistent_acc, char_acc, mapping = abduce_and_train( | |||
| model, abducer, mapping, train_X_true, pretrain_data, select_num | |||
| ) | |||
| if consistent_acc == 0: | |||
| continue | |||
| # Test if we can use mlp to evaluate | |||
| if consistent_acc >= 0.9 and char_acc >= 0.9: | |||
| condition_cnt += 1 | |||
| else: | |||
| condition_cnt = 0 | |||
| # The condition has been satisfied continuously five times | |||
| if condition_cnt >= 5: | |||
| INFO("Now checking if we can go to next course") | |||
| rules = get_rules_from_data( | |||
| model, abducer, mapping, train_X_true, samples_per_rule, samples_num | |||
| ) | |||
| INFO("Learned rules from data:", rules) | |||
| true_consist_rule_acc = _get_consist_rule_acc( | |||
| model, abducer, mapping, rules, val_X_true | |||
| ) | |||
| false_consist_rule_acc = _get_consist_rule_acc( | |||
| model, abducer, mapping, rules, val_X_false | |||
| ) | |||
| INFO( | |||
| "consist_rule_acc is %f, %f\n" | |||
| % (true_consist_rule_acc, false_consist_rule_acc) | |||
| ) | |||
| # decide next course or restart | |||
| if true_consist_rule_acc > 0.9 and false_consist_rule_acc < 0.1: | |||
| break | |||
| else: | |||
| if equation_len == min_len: | |||
| # model.cls_list[0].model.load_state_dict( | |||
| # torch.load("./weights/pretrain_weights.pth") | |||
| # ) | |||
| pretrain_data_X, pretrain_data_Y = pretrain_data | |||
| model.cls_list[0].fit(pretrain_data_X, pretrain_data_Y) | |||
| else: | |||
| pretrain_data_X, pretrain_data_Y = pretrain_data | |||
| model.cls_list[0].fit(pretrain_data_X, pretrain_data_Y) | |||
| # model.cls_list[0].model.load_state_dict( | |||
| # torch.load("./weights/weights_%d.pth" % (equation_len - 1)) | |||
| # ) | |||
| condition_cnt = 0 | |||
| INFO("Reload Model and retrain") | |||
| return model, mapping | |||
| def hed_test(model, abducer, mapping, train_data, test_data, min_len=5, max_len=8): | |||
| train_X = train_data | |||
| test_X = test_data | |||
| # Calcualte how many equations should be selected in each length | |||
| # for each length, there are equation_samples_num[equation_len] rules | |||
| print("Now begin to train final mlp model") | |||
| equation_samples_num = [] | |||
| len_cnt = max_len - min_len + 1 | |||
| samples_num = 50 | |||
| equation_samples_num += [0] * min_len | |||
| if samples_num % len_cnt == 0: | |||
| equation_samples_num += [samples_num // len_cnt] * len_cnt | |||
| else: | |||
| equation_samples_num += [samples_num // len_cnt] * len_cnt | |||
| equation_samples_num[-1] += samples_num % len_cnt | |||
| assert sum(equation_samples_num) == samples_num | |||
| # Abduce rules | |||
| rules = [] | |||
| samples_per_rule = 3 | |||
| for equation_len in range(min_len, max_len + 1): | |||
| equation_rules = get_rules_from_data( | |||
| model, | |||
| abducer, | |||
| mapping, | |||
| train_X[1][equation_len], | |||
| samples_per_rule, | |||
| equation_samples_num[equation_len], | |||
| ) | |||
| rules.extend(equation_rules) | |||
| rules = list(set(rules)) | |||
| INFO("Learned rules from data:", rules) | |||
| for equation_len in range(5, 27): | |||
| true_consist_rule_acc = _get_consist_rule_acc( | |||
| model, abducer, mapping, rules, test_X[1][equation_len] | |||
| ) | |||
| false_consist_rule_acc = _get_consist_rule_acc( | |||
| model, abducer, mapping, rules, test_X[0][equation_len] | |||
| ) | |||
| INFO( | |||
| "consist_rule_acc of testing length %d equations are %f, %f" | |||
| % (equation_len, true_consist_rule_acc, false_consist_rule_acc) | |||
| ) | |||
| if __name__ == "__main__": | |||
| pass | |||