|
- from autogl.datasets import build_dataset_from_name
- from autogl.solver import AutoLinkPredictor
- from autogl.backend import DependentBackend
- import numpy as np
- import scipy.sparse as sp
-
- if DependentBackend.is_pyg():
- from torch_geometric.utils import train_test_split_edges
- from autogl.datasets.utils.conversion._to_pyg_dataset import to_pyg_dataset as convert_dataset
- def split_edges(dataset, train, val):
- for i in range(len(dataset)):
- dataset[i] = train_test_split_edges(dataset[i], val, 1 - train - val)
- return dataset
- else:
- import dgl
- from autogl.datasets.utils.conversion._to_dgl_dataset import to_dgl_dataset as convert_dataset
- def split_train_test(g, train, val):
- u, v = g.edges()
-
- eids = np.arange(g.number_of_edges())
- eids = np.random.permutation(eids)
- test_size = int(len(eids) * (1 - train - val))
- val_size = int(len(eids) * val)
- train_size = g.number_of_edges() - test_size - val_size
- test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]]
- val_pos_u, val_pos_v = u[eids[test_size : test_size + val_size]], v[eids[test_size : test_size + val_size]]
- train_pos_u, train_pos_v = u[eids[test_size + val_size:]], v[eids[test_size + val_size:]]
-
- # Find all negative edges and split them for training and testing
- adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy())))
- adj_neg = 1 - adj.todense() - np.eye(g.number_of_nodes())
- neg_u, neg_v = np.where(adj_neg != 0)
-
- neg_eids = np.random.choice(len(neg_u), g.number_of_edges())
- test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]]
- val_neg_u, val_neg_v = neg_u[neg_eids[test_size: test_size + val_size]], neg_v[neg_eids[test_size: test_size + val_size]]
- train_neg_u, train_neg_v = neg_u[neg_eids[test_size + val_size:]], neg_v[neg_eids[test_size + val_size:]]
-
- train_g = dgl.add_self_loop(dgl.remove_edges(g, eids[:test_size + val_size]))
- # import pdb
- # pdb.set_trace()
-
- train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.number_of_nodes())
- train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.number_of_nodes())
-
- val_pos_g = dgl.graph((val_pos_u, val_pos_v), num_nodes=g.number_of_nodes())
- val_neg_g = dgl.graph((val_neg_u, val_neg_v), num_nodes=g.number_of_nodes())
-
- test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.number_of_nodes())
- test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes())
-
- return train_g, train_pos_g, train_neg_g, val_pos_g, val_neg_g, test_pos_g, test_neg_g
-
- def split_edges(dataset, train, val):
- for i in range(len(dataset)):
- dataset[i] = split_train_test(dataset[i], train, val)
- return dataset
-
- from autogl.datasets.utils import split_edges
-
- cora = build_dataset_from_name("cora")
- cora = convert_dataset(cora)
- cora = split_edges(cora, 0.8, 0.05)
-
- solver = AutoLinkPredictor(
- graph_models=("gin", "gat", "gcn"),
- hpo_module=None,
- device="auto"
- )
-
- solver.fit(cora, evaluation_method=["acc"])
- solver.leaderboard.show()
- result = solver.predict(cora)
-
- print(result)
|