{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Link Prediction\n", "In this file we will give you a simple example to show how to use AutoGL to do link prediction.\n", "\n", "## Import libraries\n", "First, you should import some libraries and you can set the random seed before you train the model." ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "from autogl.datasets import build_dataset_from_name\n", "from autogl.solver import AutoLinkPredictor\n", "from autogl.solver.utils import set_seed\n", "import argparse\n", "from autogl.backend import DependentBackend\n", "from autogl.datasets.utils import split_edges\n", "from autogl.module.train.evaluation import Auc\n", "import yaml\n", "import random\n", "import torch\n", "import numpy as np\n", "from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter\n", "set_seed(202106)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Config & Dataset\n", "Then, you can load dataset and graph nas methods from configs" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " NumNodes: 2708\n", " NumEdges: 10556\n", " NumFeats: 1433\n", " NumClasses: 7\n", " NumTrainingSamples: 140\n", " NumValidationSamples: 500\n", " NumTestSamples: 1000\n", "Done loading data from cached files.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING:root:Ignore passed dec since enc is a whole model\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[2022-11-09 23:02:49] WARNING (root/MainThread) Ignore passed dec since enc is a whole model\n" ] } ], "source": [ "def parse(args=None):\n", " parser = ArgumentParser(\"auto link prediction\", formatter_class=ArgumentDefaultsHelpFormatter)\n", " parser.add_argument('--config', type=str, default=\"../../configs/lp_gcn_benchmark.yml\")\n", " parser.add_argument('--dataset', choices=['cora', 'citeseer', 'pubmed'], default='cora', type=str)\n", " parser.add_argument(\"--hpo\", type=str, default=\"tpe\", help=\"hpo methods\")\n", " parser.add_argument(\"--max_eval\", type=int, default=50, help=\"max hpo evaluation times\")\n", " parser.add_argument(\"--seed\", type=int, default=0, help=\"random seed\")\n", " parser.add_argument(\"--device\", default=0, type=int, help=\"GPU device\")\n", " args = parser.parse_args(args)\n", " return args\n", "\n", "args = parse('')\n", "\n", "dataset = build_dataset_from_name(args.dataset)\n", "dataset = split_edges(dataset, 0.8, 0.05) # split the edges for dataset\n", "if DependentBackend.is_dgl(): # add self-loop\n", " import dgl\n", " # add self loop to 0\n", " data = list(dataset[0])\n", " data[0] = dgl.add_self_loop(data[0])\n", " dataset = [data]\n", "configs = yaml.load(open(args.config, \"r\").read(), Loader=yaml.FullLoader)\n", "configs[\"hpo\"][\"name\"] = args.hpo\n", "configs[\"hpo\"][\"max_evals\"] = args.max_eval\n", "solver = AutoLinkPredictor.from_config(configs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Run " ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:LinkPredictor:Use the default train/val/test ratio in given dataset\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[2022-11-09 23:03:03] INFO (LinkPredictor/MainThread) Use the default train/val/test ratio in given dataset\n", "HPO Search Phase:\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/50 [00:00\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msolver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtime_limit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3600\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mevaluation_method\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mAuc\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0msolver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_leaderboard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mauc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msolver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmetric\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"auc\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"test auc: {:.4f}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mauc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/home/jcai/code/AutoGL/autogl/solver/classifier/link_predictor.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, dataset, time_limit, inplace, train_split, val_split, evaluation_method, seed)\u001b[0m\n\u001b[1;32m 362\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 363\u001b[0m optimized, _ = self.hpo_module.optimize(\n\u001b[0;32m--> 364\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compose_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtime_limit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtime_for_each_model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 365\u001b[0m )\n\u001b[1;32m 366\u001b[0m \u001b[0;31m# to save memory, all the trainer derived will be mapped to cpu\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/home/jcai/code/AutoGL/autogl/module/hpo/advisorbase.py\u001b[0m in \u001b[0;36moptimize\u001b[0;34m(self, trainer, dataset, time_limit, memory_limit)\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0mnew_parameter_values_json\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_trial\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparameter_values\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0mdecoded_json\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrial_para\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_decode_para\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_parameter_values_json\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 167\u001b[0;31m \u001b[0mcurrent_trainer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mperf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdecoded_json\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 168\u001b[0m \u001b[0mnew_trial\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparameter_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdumps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrial_para\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 169\u001b[0m \u001b[0mnew_trial\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Completed\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/home/jcai/code/AutoGL/autogl/module/hpo/advisorbase.py\u001b[0m in \u001b[0;36mfn\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 145\u001b[0m \u001b[0mcurrent_trainer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mduplicate_from_hyper_parameter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 146\u001b[0;31m \u001b[0mcurrent_trainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 147\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_higher_better\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcurrent_trainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_valid_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 148\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_higher_better\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/home/jcai/code/AutoGL/autogl/module/train/link_prediction_full.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, dataset, keep_valid_result)\u001b[0m\n\u001b[1;32m 376\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpyg_dgl\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'dgl'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 377\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 378\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_train_only_dgl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 379\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkeep_valid_result\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalid_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_predict_only_dgl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/home/jcai/code/AutoGL/autogl/module/train/link_prediction_full.py\u001b[0m in \u001b[0;36m_train_only_dgl\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 314\u001b[0m )\n\u001b[1;32m 315\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 316\u001b[0;31m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 317\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 318\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlr_scheduler_type\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/home/jcai/virtualenvs/py3_hetgnn/lib/python3.6/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 243\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 244\u001b[0m inputs=inputs)\n\u001b[0;32m--> 245\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 247\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/home/jcai/virtualenvs/py3_hetgnn/lib/python3.6/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 145\u001b[0m Variable._execution_engine.run_backward(\n\u001b[1;32m 146\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 147\u001b[0;31m allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag\n\u001b[0m\u001b[1;32m 148\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "solver.fit(dataset, time_limit=3600, evaluation_method=[Auc], seed=args.seed)\n", "solver.get_leaderboard().show()\n", "auc = solver.evaluate(metric=\"auc\")\n", "print(\"test auc: {:.4f}\".format(auc))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.6.9 ('py3_hetgnn')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "c71a81e10bf2a5359758b67f38009e83c4204a5be77a74a1c3689ce1bb3351ac" } } }, "nbformat": 4, "nbformat_minor": 2 }