From 5d600b9ced86d377293ba36b489fab3aff4f4026 Mon Sep 17 00:00:00 2001 From: troyyyyy Date: Wed, 17 Jan 2024 18:15:34 +0800 Subject: [PATCH] [MNT] remove lambdalearn converter --- ablkit/data/data_converter.py | 141 ------------ ablkit/learning/model_converter.py | 211 ------------------ .../mnist_add/main_with_model_converter.py | 160 ------------- 3 files changed, 512 deletions(-) delete mode 100644 ablkit/data/data_converter.py delete mode 100644 ablkit/learning/model_converter.py delete mode 100644 examples/mnist_add/main_with_model_converter.py diff --git a/ablkit/data/data_converter.py b/ablkit/data/data_converter.py deleted file mode 100644 index 6841ffe..0000000 --- a/ablkit/data/data_converter.py +++ /dev/null @@ -1,141 +0,0 @@ -from typing import Any, Tuple - -from ablkit.utils import tab_data_to_tuple -from .structures.list_data import ListData -from lambdaLearn.Base.TabularMixin import TabularMixin - - -class DataConverter: - """ - This class provides functionality to convert LambdaLearn data to ABLkit data. - """ - - def __init__(self) -> None: - pass - - def convert_lambdalearn_to_tuple( - self, dataset: TabularMixin, reasoning_result: Any - ) -> Tuple[Tuple, Tuple, Tuple, Tuple]: - """ - Convert a lambdalearn dataset to a tuple of tuples (label_data, train_data, valid_data, test_data), # noqa: E501 - each containing (data, label, reasoning_result). - - Parameters - ---------- - dataset : TabularMixin - The LambdaLearn dataset to be converted. - reasoning_result : Any - The reasoning result of the dataset. - Returns - ------- - Tuple[Tuple, Tuple, Tuple, Tuple] - A tuple of (label_data, train_data, valid_data, test_data), where each element is - a tuple of (data, label, reasoning_result). - """ - - if not isinstance(dataset, TabularMixin): - raise NotImplementedError( - "Only support converting the datasets that are instances of TabularMixin. " - + "Please refer to the documentation and manually convert the dataset into a tuple." - ) - - label_data = tab_data_to_tuple( - dataset.labeled_X, dataset.labeled_y, reasoning_result=reasoning_result - ) - train_data = tab_data_to_tuple( - dataset.unlabeled_X, dataset.unlabeled_y, reasoning_result=reasoning_result - ) - valid_data = tab_data_to_tuple( - dataset.valid_X, dataset.valid_y, reasoning_result=reasoning_result - ) - test_data = tab_data_to_tuple( - dataset.test_X, dataset.test_y, reasoning_result=reasoning_result - ) - - return label_data, train_data, valid_data, test_data - - def convert_lambdalearn_to_listdata( - self, dataset: TabularMixin, reasoning_result: Any - ) -> Tuple[ListData, ListData, ListData, ListData]: - """ - Convert a lambdalearn dataset to a tuple of ListData - (label_data_examples, train_data_examples, valid_data_examples, test_data_examples). - - Parameters - ---------- - dataset : TabularMixin - The LambdaLearn dataset to be converted. - reasoning_result : Any - The reasoning result of the dataset. - Returns - ------- - Tuple[ListData, ListData, ListData, ListData] - A tuple of ListData (label_data_examples, train_data_examples, valid_data_examples, test_data_examples) # noqa: E501 - """ - - if not isinstance(dataset, TabularMixin): - raise NotImplementedError( - "Only support converting the datasets that are instances of TabularMixin. " - + "Please refer to the documentation and manually convert the dataset " - + "into a ListData." - ) - - label_data, train_data, valid_data, test_data = self.convert_lambdalearn_to_tuple( - dataset, reasoning_result - ) - - if label_data is not None: - X, gt_pseudo_label, Y = label_data - label_data_examples = ListData(X=X, gt_pseudo_label=gt_pseudo_label, Y=Y) - if train_data is not None: - X, gt_pseudo_label, Y = train_data - train_data_examples = ListData(X=X, gt_pseudo_label=gt_pseudo_label, Y=Y) - if valid_data is not None: - X, gt_pseudo_label, Y = valid_data - valid_data_examples = ListData(X=X, gt_pseudo_label=gt_pseudo_label, Y=Y) - if test_data is not None: - X, gt_pseudo_label, Y = test_data - test_data_examples = ListData(X=X, gt_pseudo_label=gt_pseudo_label, Y=Y) - - return label_data_examples, train_data_examples, valid_data_examples, test_data_examples - - -if __name__ == "__main__": - from lambdaLearn.Dataset.Tabular.BreastCancer import BreastCancer - - breast_dataset = BreastCancer(labeled_size=0.1, stratified=True, shuffle=True) - dataconverter = DataConverter() - - label_data, train_data, valid_data, test_data = dataconverter.convert_lambdalearn_to_tuple( - breast_dataset, 0 - ) - print( - type(label_data).__name__, - type(train_data).__name__, - type(valid_data).__name__, - type(test_data).__name__, - ) - print(len(label_data)) - print(len(label_data[0]), len(label_data[1]), len(label_data[2])) - print(label_data[0][0], label_data[1][0], label_data[2][0]) - print() - - ( - label_data_examples, - train_data_examples, - valid_data_examples, - test_data_examples, - ) = dataconverter.convert_lambdalearn_to_listdata(breast_dataset, 0) - print( - type(label_data_examples).__name__, - type(train_data_examples).__name__, - type(valid_data_examples).__name__, - type(test_data_examples).__name__, - ) - print( - len(label_data_examples.X), - len(label_data_examples.gt_pseudo_label), - len(label_data_examples.Y), - ) - label_data_example = label_data_examples[0] - print(label_data_example.X, label_data_example.gt_pseudo_label, label_data_example.Y) diff --git a/ablkit/learning/model_converter.py b/ablkit/learning/model_converter.py deleted file mode 100644 index 13b36f1..0000000 --- a/ablkit/learning/model_converter.py +++ /dev/null @@ -1,211 +0,0 @@ -import torch -import copy -from typing import Any, Callable, List, Optional - -from .abl_model import ABLModel -from .basic_nn import BasicNN -from lambdaLearn.Base.DeepModelMixin import DeepModelMixin - - -class ModelConverter: - """ - This class provides functionality to convert LambdaLearn models to ABLkit models. - """ - - def __init__(self) -> None: - pass - - def convert_lambdalearn_to_ablmodel( - self, - lambdalearn_model, - loss_fn: torch.nn.Module, - optimizer_dict: dict, - scheduler_dict: Optional[dict] = None, - device: Optional[torch.device] = None, - batch_size: int = 32, - num_epochs: int = 1, - stop_loss: Optional[float] = 0.0001, - num_workers: int = 0, - save_interval: Optional[int] = None, - save_dir: Optional[str] = None, - train_transform: Callable[..., Any] = None, - test_transform: Callable[..., Any] = None, - collate_fn: Callable[[List[Any]], Any] = None, - ): - """ - Convert a lambdalearn model to an ABLModel. If the lambdalearn model is an instance of - DeepModelMixin, its network will be used as the model of BasicNN. Otherwise, the lambdalearn - model should implement ``fit`` and ``predict`` methods. - - Parameters - ---------- - lambdalearn_model : Union[DeepModelMixin, Any] - The LambdaLearn model to be converted. - loss_fn : torch.nn.Module - The loss function used for training. - optimizer_dict : dict - The dict contains necessary parameters to construct a optimizer used for training. - The optimizer class is specified by the ``optimizer`` key. - scheduler_dict : dict, optional - The dict contains necessary parameters to construct a learning rate scheduler used - for training, which will be called at the end of each run of the ``fit`` method. - The scheduler class is specified by the ``scheduler`` key. It should implement the - ``step`` method. Defaults to None. - device : torch.device, optional - The device on which the model will be trained or used for prediction, - Defaults to torch.device("cpu"). - batch_size : int, optional - The batch size used for training. Defaults to 32. - num_epochs : int, optional - The number of epochs used for training. Defaults to 1. - stop_loss : float, optional - The loss value at which to stop training. Defaults to 0.0001. - num_workers : int - The number of workers used for loading data. Defaults to 0. - save_interval : int, optional - The model will be saved every ``save_interval`` epoch during training. Defaults to None. - save_dir : str, optional - The directory in which to save the model during training. Defaults to None. - train_transform : Callable[..., Any], optional - A function/transform that takes an object and returns a transformed version used - in the `fit` and `train_epoch` methods. Defaults to None. - test_transform : Callable[..., Any], optional - A function/transform that takes an object and returns a transformed version in the - `predict`, `predict_proba` and `score` methods. Defaults to None. - collate_fn : Callable[[List[T]], Any], optional - The function used to collate data. Defaults to None. - - Returns - ------- - ABLModel - The converted ABLModel instance. - """ - if isinstance(lambdalearn_model, DeepModelMixin): - base_model = self.convert_lambdalearn_to_basicnn( - lambdalearn_model, - loss_fn, - optimizer_dict, - scheduler_dict, - device, - batch_size, - num_epochs, - stop_loss, - num_workers, - save_interval, - save_dir, - train_transform, - test_transform, - collate_fn, - ) - return ABLModel(base_model) - - if not (hasattr(lambdalearn_model, "fit") and hasattr(lambdalearn_model, "predict")): - raise NotImplementedError( - "The lambdalearn_model should be an instance of DeepModelMixin, or implement " - + "fit and predict methods." - ) - - return ABLModel(lambdalearn_model) - - def convert_lambdalearn_to_basicnn( - self, - lambdalearn_model: DeepModelMixin, - loss_fn: torch.nn.Module, - optimizer_dict: dict, - scheduler_dict: Optional[dict] = None, - device: Optional[torch.device] = None, - batch_size: int = 32, - num_epochs: int = 1, - stop_loss: Optional[float] = 0.0001, - num_workers: int = 0, - save_interval: Optional[int] = None, - save_dir: Optional[str] = None, - train_transform: Callable[..., Any] = None, - test_transform: Callable[..., Any] = None, - collate_fn: Callable[[List[Any]], Any] = None, - ): - """ - Convert a lambdalearn model to a BasicNN. If the lambdalearn model is an instance of - DeepModelMixin, its network will be used as the model of BasicNN. - - Parameters - ---------- - lambdalearn_model : Union[DeepModelMixin, Any] - The LambdaLearn model to be converted. - loss_fn : torch.nn.Module - The loss function used for training. - optimizer_dict : dict - The dict contains necessary parameters to construct a optimizer used for training. - scheduler_dict : dict, optional - The dict contains necessary parameters to construct a learning rate scheduler used - for training, which will be called at the end of each run of the ``fit`` method. - The scheduler class is specified by the ``scheduler`` key. It should implement the - ``step`` method. Defaults to None. - device : torch.device, optional - The device on which the model will be trained or used for prediction, - Defaults to torch.device("cpu"). - batch_size : int, optional - The batch size used for training. Defaults to 32. - num_epochs : int, optional - The number of epochs used for training. Defaults to 1. - stop_loss : float, optional - The loss value at which to stop training. Defaults to 0.0001. - num_workers : int - The number of workers used for loading data. Defaults to 0. - save_interval : int, optional - The model will be saved every ``save_interval`` epoch during training. Defaults to None. - save_dir : str, optional - The directory in which to save the model during training. Defaults to None. - train_transform : Callable[..., Any], optional - A function/transform that takes an object and returns a transformed version used - in the `fit` and `train_epoch` methods. Defaults to None. - test_transform : Callable[..., Any], optional - A function/transform that takes an object and returns a transformed version in the - `predict`, `predict_proba` and `score` methods. Defaults to None. - collate_fn : Callable[[List[T]], Any], optional - The function used to collate data. Defaults to None. - - Returns - ------- - BasicNN - The converted BasicNN instance. - """ - if isinstance(lambdalearn_model, DeepModelMixin): - if not isinstance(lambdalearn_model.network, torch.nn.Module): - raise NotImplementedError( - "Expected lambdalearn_model.network to be a torch.nn.Module, " - + f"but got {type(lambdalearn_model.network)}" - ) - # Only use the network part and device of the lambdalearn model - network = copy.deepcopy(lambdalearn_model.network) - optimizer_class = optimizer_dict["optimizer"] - optimizer_dict.pop("optimizer") - optimizer = optimizer_class(network.parameters(), **optimizer_dict) - if scheduler_dict is not None: - scheduler_class = scheduler_dict["scheduler"] - scheduler_dict.pop("scheduler") - scheduler = scheduler_class(optimizer, **scheduler_dict) - else: - scheduler = None - device = lambdalearn_model.device if device is None else device - base_model = BasicNN( - model=network, - loss_fn=loss_fn, - optimizer=optimizer, - scheduler=scheduler, - device=device, - batch_size=batch_size, - num_epochs=num_epochs, - stop_loss=stop_loss, - num_workers=num_workers, - save_interval=save_interval, - save_dir=save_dir, - train_transform=train_transform, - test_transform=test_transform, - collate_fn=collate_fn, - ) - return base_model - else: - raise NotImplementedError( - "The lambdalearn_model should be an instance of DeepModelMixin." - ) diff --git a/examples/mnist_add/main_with_model_converter.py b/examples/mnist_add/main_with_model_converter.py deleted file mode 100644 index 9e3b251..0000000 --- a/examples/mnist_add/main_with_model_converter.py +++ /dev/null @@ -1,160 +0,0 @@ -import argparse -import os.path as osp - -from torch import nn -from torch.optim import RMSprop, lr_scheduler - -from lambdaLearn.Algorithm.AbductiveLearning.bridge import SimpleBridge -from lambdaLearn.Algorithm.AbductiveLearning.data.evaluation import ReasoningMetric, SymbolAccuracy -from lambdaLearn.Algorithm.AbductiveLearning.learning import ABLModel -from lambdaLearn.Algorithm.AbductiveLearning.learning.model_converter import ModelConverter -from lambdaLearn.Algorithm.AbductiveLearning.reasoning import GroundKB, KBBase, PrologKB, Reasoner -from lambdaLearn.Algorithm.AbductiveLearning.utils import ABLLogger, print_log -from lambdaLearn.Algorithm.SemiSupervised.Classification.FixMatch import FixMatch - -from datasets import get_dataset -from models.nn import LeNet5 - - -class AddKB(KBBase): - def __init__(self, pseudo_label_list=list(range(10))): - super().__init__(pseudo_label_list) - - def logic_forward(self, nums): - return sum(nums) - - -class AddGroundKB(GroundKB): - def __init__(self, pseudo_label_list=list(range(10)), GKB_len_list=[2]): - super().__init__(pseudo_label_list, GKB_len_list) - - def logic_forward(self, nums): - return sum(nums) - - -def main(): - parser = argparse.ArgumentParser(description="MNIST Addition example") - parser.add_argument( - "--no-cuda", action="store_true", default=False, help="disables CUDA training" - ) - parser.add_argument( - "--epochs", - type=int, - default=1, - help="number of epochs in each learning loop iteration (default : 1)", - ) - parser.add_argument( - "--lr", type=float, default=3e-4, help="base model learning rate (default : 0.0003)" - ) - parser.add_argument("--alpha", type=float, default=0.9, help="alpha in RMSprop (default : 0.9)") - parser.add_argument( - "--batch-size", type=int, default=32, help="base model batch size (default : 32)" - ) - parser.add_argument( - "--loops", type=int, default=2, help="number of loop iterations (default : 2)" - ) - parser.add_argument( - "--segment_size", type=int, default=0.01, help="segment size (default : 0.01)" - ) - parser.add_argument("--save_interval", type=int, default=1, help="save interval (default : 1)") - parser.add_argument( - "--max-revision", - type=int, - default=-1, - help="maximum revision in reasoner (default : -1)", - ) - parser.add_argument( - "--require-more-revision", - type=int, - default=0, - help="require more revision in reasoner (default : 0)", - ) - kb_type = parser.add_mutually_exclusive_group() - kb_type.add_argument( - "--prolog", action="store_true", default=False, help="use PrologKB (default: False)" - ) - kb_type.add_argument( - "--ground", action="store_true", default=False, help="use GroundKB (default: False)" - ) - - args = parser.parse_args() - - # Build logger - print_log("Abductive Learning on the MNIST Addition example.", logger="current") - - # -- Working with Data ------------------------------ - print_log("Working with Data.", logger="current") - train_data = get_dataset(train=True, get_pseudo_label=True) - test_data = get_dataset(train=False, get_pseudo_label=True) - - # -- Building the Learning Part --------------------- - print_log("Building the Learning Part.", logger="current") - - # Build necessary components for BasicNN - model = FixMatch( - network=LeNet5(), - threshold=0.95, - lambda_u=1.0, - mu=7, - T=0.5, - epoch=1, - num_it_epoch=2**20, - num_it_total=2**20, - device="cuda", - ) - - loss_fn = nn.CrossEntropyLoss(label_smoothing=0.2) - optimizer_dict = dict(optimizer=RMSprop, lr=0.0003, alpha=0.9) - scheduler_dict = dict( - scheduler=lr_scheduler.OneCycleLR, max_lr=0.0003, pct_start=0.15, total_steps=200 - ) - - converter = ModelConverter() - base_model = converter.convert_lambdalearn_to_basicnn( - model, loss_fn=loss_fn, optimizer_dict=optimizer_dict, scheduler_dict=scheduler_dict - ) - - # Build ABLModel - model = ABLModel(base_model) - - # -- Building the Reasoning Part -------------------- - print_log("Building the Reasoning Part.", logger="current") - - # Build knowledge base - if args.prolog: - kb = PrologKB(pseudo_label_list=list(range(10)), pl_file="add.pl") - elif args.ground: - kb = AddGroundKB() - else: - kb = AddKB() - - # Create reasoner - reasoner = Reasoner( - kb, max_revision=args.max_revision, require_more_revision=args.require_more_revision - ) - - # -- Building Evaluation Metrics -------------------- - print_log("Building Evaluation Metrics.", logger="current") - metric_list = [SymbolAccuracy(prefix="mnist_add"), ReasoningMetric(kb=kb, prefix="mnist_add")] - - # -- Bridging Learning and Reasoning ---------------- - print_log("Bridge Learning and Reasoning.", logger="current") - bridge = SimpleBridge(model, reasoner, metric_list) - - # Retrieve the directory of the Log file and define the directory for saving the model weights. - log_dir = ABLLogger.get_current_instance().log_dir - weights_dir = osp.join(log_dir, "weights") - - # Train and Test - bridge.train( - train_data, - loops=args.loops, - segment_size=args.segment_size, - save_interval=args.save_interval, - save_dir=weights_dir, - ) - bridge.test(test_data) - - -if __name__ == "__main__": - main()