[MNT] remove lambdalearn converter

2 years ago · 5d600b9ced
--- a/ablkit/data/data_converter.py
+++ b/ablkit/data/data_converter.py
@@ -1,141 +0,0 @@
 from typing import Any, Tuple

 from ablkit.utils import tab_data_to_tuple
 from .structures.list_data import ListData
 from lambdaLearn.Base.TabularMixin import TabularMixin


 class DataConverter:
    """
    This class provides functionality to convert LambdaLearn data to ABLkit data.
    """

    def __init__(self) -> None:
        pass

    def convert_lambdalearn_to_tuple(
        self, dataset: TabularMixin, reasoning_result: Any
    ) -> Tuple[Tuple, Tuple, Tuple, Tuple]:
        """
        Convert a lambdalearn dataset to a tuple of tuples (label_data, train_data, valid_data, test_data), # noqa: E501
        each containing (data, label, reasoning_result).

        Parameters
        ----------
        dataset : TabularMixin
            The LambdaLearn dataset to be converted.
        reasoning_result : Any
            The reasoning result of the dataset.
        Returns
        -------
        Tuple[Tuple, Tuple, Tuple, Tuple]
            A tuple of (label_data, train_data, valid_data, test_data), where each element is
            a tuple of (data, label, reasoning_result).
        """

        if not isinstance(dataset, TabularMixin):
            raise NotImplementedError(
                "Only support converting the datasets that are instances of TabularMixin. "
                + "Please refer to the documentation and manually convert the dataset into a tuple."
            )

        label_data = tab_data_to_tuple(
            dataset.labeled_X, dataset.labeled_y, reasoning_result=reasoning_result
        )
        train_data = tab_data_to_tuple(
            dataset.unlabeled_X, dataset.unlabeled_y, reasoning_result=reasoning_result
        )
        valid_data = tab_data_to_tuple(
            dataset.valid_X, dataset.valid_y, reasoning_result=reasoning_result
        )
        test_data = tab_data_to_tuple(
            dataset.test_X, dataset.test_y, reasoning_result=reasoning_result
        )

        return label_data, train_data, valid_data, test_data

    def convert_lambdalearn_to_listdata(
        self, dataset: TabularMixin, reasoning_result: Any
    ) -> Tuple[ListData, ListData, ListData, ListData]:
        """
        Convert a lambdalearn dataset to a tuple of ListData
        (label_data_examples, train_data_examples, valid_data_examples, test_data_examples).

        Parameters
        ----------
        dataset : TabularMixin
            The LambdaLearn dataset to be converted.
        reasoning_result : Any
            The reasoning result of the dataset.
        Returns
        -------
        Tuple[ListData, ListData, ListData, ListData]
            A tuple of ListData (label_data_examples, train_data_examples, valid_data_examples, test_data_examples) # noqa: E501
        """

        if not isinstance(dataset, TabularMixin):
            raise NotImplementedError(
                "Only support converting the datasets that are instances of TabularMixin. "
                + "Please refer to the documentation and manually convert the dataset "
                + "into a ListData."
            )

        label_data, train_data, valid_data, test_data = self.convert_lambdalearn_to_tuple(
            dataset, reasoning_result
        )

        if label_data is not None:
            X, gt_pseudo_label, Y = label_data
            label_data_examples = ListData(X=X, gt_pseudo_label=gt_pseudo_label, Y=Y)
        if train_data is not None:
            X, gt_pseudo_label, Y = train_data
            train_data_examples = ListData(X=X, gt_pseudo_label=gt_pseudo_label, Y=Y)
        if valid_data is not None:
            X, gt_pseudo_label, Y = valid_data
            valid_data_examples = ListData(X=X, gt_pseudo_label=gt_pseudo_label, Y=Y)
        if test_data is not None:
            X, gt_pseudo_label, Y = test_data
            test_data_examples = ListData(X=X, gt_pseudo_label=gt_pseudo_label, Y=Y)

        return label_data_examples, train_data_examples, valid_data_examples, test_data_examples


 if __name__ == "__main__":
    from lambdaLearn.Dataset.Tabular.BreastCancer import BreastCancer

    breast_dataset = BreastCancer(labeled_size=0.1, stratified=True, shuffle=True)
    dataconverter = DataConverter()

    label_data, train_data, valid_data, test_data = dataconverter.convert_lambdalearn_to_tuple(
        breast_dataset, 0
    )
    print(
        type(label_data).__name__,
        type(train_data).__name__,
        type(valid_data).__name__,
        type(test_data).__name__,
    )
    print(len(label_data))
    print(len(label_data[0]), len(label_data[1]), len(label_data[2]))
    print(label_data[0][0], label_data[1][0], label_data[2][0])
    print()

    (
        label_data_examples,
        train_data_examples,
        valid_data_examples,
        test_data_examples,
    ) = dataconverter.convert_lambdalearn_to_listdata(breast_dataset, 0)
    print(
        type(label_data_examples).__name__,
        type(train_data_examples).__name__,
        type(valid_data_examples).__name__,
        type(test_data_examples).__name__,
    )
    print(
        len(label_data_examples.X),
        len(label_data_examples.gt_pseudo_label),
        len(label_data_examples.Y),
    )
    label_data_example = label_data_examples[0]
    print(label_data_example.X, label_data_example.gt_pseudo_label, label_data_example.Y)
--- a/ablkit/learning/model_converter.py
+++ b/ablkit/learning/model_converter.py
@@ -1,211 +0,0 @@
 import torch
 import copy
 from typing import Any, Callable, List, Optional

 from .abl_model import ABLModel
 from .basic_nn import BasicNN
 from lambdaLearn.Base.DeepModelMixin import DeepModelMixin


 class ModelConverter:
    """
    This class provides functionality to convert LambdaLearn models to ABLkit models.
    """

    def __init__(self) -> None:
        pass

    def convert_lambdalearn_to_ablmodel(
        self,
        lambdalearn_model,
        loss_fn: torch.nn.Module,
        optimizer_dict: dict,
        scheduler_dict: Optional[dict] = None,
        device: Optional[torch.device] = None,
        batch_size: int = 32,
        num_epochs: int = 1,
        stop_loss: Optional[float] = 0.0001,
        num_workers: int = 0,
        save_interval: Optional[int] = None,
        save_dir: Optional[str] = None,
        train_transform: Callable[..., Any] = None,
        test_transform: Callable[..., Any] = None,
        collate_fn: Callable[[List[Any]], Any] = None,
    ):
        """
        Convert a lambdalearn model to an ABLModel. If the lambdalearn model is an instance of
        DeepModelMixin, its network will be used as the model of BasicNN. Otherwise, the lambdalearn
        model should implement ``fit`` and ``predict`` methods.

        Parameters
        ----------
        lambdalearn_model : Union[DeepModelMixin, Any]
            The LambdaLearn model to be converted.
        loss_fn : torch.nn.Module
            The loss function used for training.
        optimizer_dict : dict
            The dict contains necessary parameters to construct a optimizer used for training.
            The optimizer class is specified by the ``optimizer`` key.
        scheduler_dict : dict, optional
            The dict contains necessary parameters to construct a learning rate scheduler used
            for training, which will be called at the end of each run of the ``fit`` method.
            The scheduler class is specified by the ``scheduler`` key. It should implement the
            ``step`` method. Defaults to None.
        device : torch.device, optional
            The device on which the model will be trained or used for prediction,
            Defaults to torch.device("cpu").
        batch_size : int, optional
            The batch size used for training. Defaults to 32.
        num_epochs : int, optional
            The number of epochs used for training. Defaults to 1.
        stop_loss : float, optional
            The loss value at which to stop training. Defaults to 0.0001.
        num_workers : int
            The number of workers used for loading data. Defaults to 0.
        save_interval : int, optional
            The model will be saved every ``save_interval`` epoch during training. Defaults to None.
        save_dir : str, optional
            The directory in which to save the model during training. Defaults to None.
        train_transform : Callable[..., Any], optional
            A function/transform that takes an object and returns a transformed version used
            in the `fit` and `train_epoch` methods. Defaults to None.
        test_transform : Callable[..., Any], optional
            A function/transform that takes an object and returns a transformed version in the
            `predict`, `predict_proba` and `score` methods. Defaults to None.
        collate_fn : Callable[[List[T]], Any], optional
            The function used to collate data. Defaults to None.

        Returns
        -------
        ABLModel
            The converted ABLModel instance.
        """
        if isinstance(lambdalearn_model, DeepModelMixin):
            base_model = self.convert_lambdalearn_to_basicnn(
                lambdalearn_model,
                loss_fn,
                optimizer_dict,
                scheduler_dict,
                device,
                batch_size,
                num_epochs,
                stop_loss,
                num_workers,
                save_interval,
                save_dir,
                train_transform,
                test_transform,
                collate_fn,
            )
            return ABLModel(base_model)

        if not (hasattr(lambdalearn_model, "fit") and hasattr(lambdalearn_model, "predict")):
            raise NotImplementedError(
                "The lambdalearn_model should be an instance of DeepModelMixin, or implement "
                + "fit and predict methods."
            )

        return ABLModel(lambdalearn_model)

    def convert_lambdalearn_to_basicnn(
        self,
        lambdalearn_model: DeepModelMixin,
        loss_fn: torch.nn.Module,
        optimizer_dict: dict,
        scheduler_dict: Optional[dict] = None,
        device: Optional[torch.device] = None,
        batch_size: int = 32,
        num_epochs: int = 1,
        stop_loss: Optional[float] = 0.0001,
        num_workers: int = 0,
        save_interval: Optional[int] = None,
        save_dir: Optional[str] = None,
        train_transform: Callable[..., Any] = None,
        test_transform: Callable[..., Any] = None,
        collate_fn: Callable[[List[Any]], Any] = None,
    ):
        """
        Convert a lambdalearn model to a BasicNN. If the lambdalearn model is an instance of
        DeepModelMixin, its network will be used as the model of BasicNN.

        Parameters
        ----------
        lambdalearn_model : Union[DeepModelMixin, Any]
            The LambdaLearn model to be converted.
        loss_fn : torch.nn.Module
            The loss function used for training.
        optimizer_dict : dict
            The dict contains necessary parameters to construct a optimizer used for training.
        scheduler_dict : dict, optional
            The dict contains necessary parameters to construct a learning rate scheduler used
            for training, which will be called at the end of each run of the ``fit`` method.
            The scheduler class is specified by the ``scheduler`` key. It should implement the
            ``step`` method. Defaults to None.
        device : torch.device, optional
            The device on which the model will be trained or used for prediction,
            Defaults to torch.device("cpu").
        batch_size : int, optional
            The batch size used for training. Defaults to 32.
        num_epochs : int, optional
            The number of epochs used for training. Defaults to 1.
        stop_loss : float, optional
            The loss value at which to stop training. Defaults to 0.0001.
        num_workers : int
            The number of workers used for loading data. Defaults to 0.
        save_interval : int, optional
            The model will be saved every ``save_interval`` epoch during training. Defaults to None.
        save_dir : str, optional
            The directory in which to save the model during training. Defaults to None.
        train_transform : Callable[..., Any], optional
            A function/transform that takes an object and returns a transformed version used
            in the `fit` and `train_epoch` methods. Defaults to None.
        test_transform : Callable[..., Any], optional
            A function/transform that takes an object and returns a transformed version in the
            `predict`, `predict_proba` and `score` methods. Defaults to None.
        collate_fn : Callable[[List[T]], Any], optional
            The function used to collate data. Defaults to None.

        Returns
        -------
        BasicNN
            The converted BasicNN instance.
        """
        if isinstance(lambdalearn_model, DeepModelMixin):
            if not isinstance(lambdalearn_model.network, torch.nn.Module):
                raise NotImplementedError(
                    "Expected lambdalearn_model.network to be a torch.nn.Module, "
                    + f"but got {type(lambdalearn_model.network)}"
                )
            # Only use the network part and device of the lambdalearn model
            network = copy.deepcopy(lambdalearn_model.network)
            optimizer_class = optimizer_dict["optimizer"]
            optimizer_dict.pop("optimizer")
            optimizer = optimizer_class(network.parameters(), **optimizer_dict)
            if scheduler_dict is not None:
                scheduler_class = scheduler_dict["scheduler"]
                scheduler_dict.pop("scheduler")
                scheduler = scheduler_class(optimizer, **scheduler_dict)
            else:
                scheduler = None
            device = lambdalearn_model.device if device is None else device
            base_model = BasicNN(
                model=network,
                loss_fn=loss_fn,
                optimizer=optimizer,
                scheduler=scheduler,
                device=device,
                batch_size=batch_size,
                num_epochs=num_epochs,
                stop_loss=stop_loss,
                num_workers=num_workers,
                save_interval=save_interval,
                save_dir=save_dir,
                train_transform=train_transform,
                test_transform=test_transform,
                collate_fn=collate_fn,
            )
            return base_model
        else:
            raise NotImplementedError(
                "The lambdalearn_model should be an instance of DeepModelMixin."
            )
--- a/examples/mnist_add/main_with_model_converter.py
+++ b/examples/mnist_add/main_with_model_converter.py
@@ -1,160 +0,0 @@
 import argparse
 import os.path as osp

 from torch import nn
 from torch.optim import RMSprop, lr_scheduler

 from lambdaLearn.Algorithm.AbductiveLearning.bridge import SimpleBridge
 from lambdaLearn.Algorithm.AbductiveLearning.data.evaluation import ReasoningMetric, SymbolAccuracy
 from lambdaLearn.Algorithm.AbductiveLearning.learning import ABLModel
 from lambdaLearn.Algorithm.AbductiveLearning.learning.model_converter import ModelConverter
 from lambdaLearn.Algorithm.AbductiveLearning.reasoning import GroundKB, KBBase, PrologKB, Reasoner
 from lambdaLearn.Algorithm.AbductiveLearning.utils import ABLLogger, print_log
 from lambdaLearn.Algorithm.SemiSupervised.Classification.FixMatch import FixMatch

 from datasets import get_dataset
 from models.nn import LeNet5


 class AddKB(KBBase):
    def __init__(self, pseudo_label_list=list(range(10))):
        super().__init__(pseudo_label_list)

    def logic_forward(self, nums):
        return sum(nums)


 class AddGroundKB(GroundKB):
    def __init__(self, pseudo_label_list=list(range(10)), GKB_len_list=[2]):
        super().__init__(pseudo_label_list, GKB_len_list)

    def logic_forward(self, nums):
        return sum(nums)


 def main():
    parser = argparse.ArgumentParser(description="MNIST Addition example")
    parser.add_argument(
        "--no-cuda", action="store_true", default=False, help="disables CUDA training"
    )
    parser.add_argument(
        "--epochs",
        type=int,
        default=1,
        help="number of epochs in each learning loop iteration (default : 1)",
    )
    parser.add_argument(
        "--lr", type=float, default=3e-4, help="base model learning rate (default : 0.0003)"
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="alpha in RMSprop (default : 0.9)")
    parser.add_argument(
        "--batch-size", type=int, default=32, help="base model batch size (default : 32)"
    )
    parser.add_argument(
        "--loops", type=int, default=2, help="number of loop iterations (default : 2)"
    )
    parser.add_argument(
        "--segment_size", type=int, default=0.01, help="segment size (default : 0.01)"
    )
    parser.add_argument("--save_interval", type=int, default=1, help="save interval (default : 1)")
    parser.add_argument(
        "--max-revision",
        type=int,
        default=-1,
        help="maximum revision in reasoner (default : -1)",
    )
    parser.add_argument(
        "--require-more-revision",
        type=int,
        default=0,
        help="require more revision in reasoner (default : 0)",
    )
    kb_type = parser.add_mutually_exclusive_group()
    kb_type.add_argument(
        "--prolog", action="store_true", default=False, help="use PrologKB (default: False)"
    )
    kb_type.add_argument(
        "--ground", action="store_true", default=False, help="use GroundKB (default: False)"
    )

    args = parser.parse_args()

    # Build logger
    print_log("Abductive Learning on the MNIST Addition example.", logger="current")

    # -- Working with Data ------------------------------
    print_log("Working with Data.", logger="current")
    train_data = get_dataset(train=True, get_pseudo_label=True)
    test_data = get_dataset(train=False, get_pseudo_label=True)

    # -- Building the Learning Part ---------------------
    print_log("Building the Learning Part.", logger="current")

    # Build necessary components for BasicNN
    model = FixMatch(
        network=LeNet5(),
        threshold=0.95,
        lambda_u=1.0,
        mu=7,
        T=0.5,
        epoch=1,
        num_it_epoch=2**20,
        num_it_total=2**20,
        device="cuda",
    )

    loss_fn = nn.CrossEntropyLoss(label_smoothing=0.2)
    optimizer_dict = dict(optimizer=RMSprop, lr=0.0003, alpha=0.9)
    scheduler_dict = dict(
        scheduler=lr_scheduler.OneCycleLR, max_lr=0.0003, pct_start=0.15, total_steps=200
    )

    converter = ModelConverter()
    base_model = converter.convert_lambdalearn_to_basicnn(
        model, loss_fn=loss_fn, optimizer_dict=optimizer_dict, scheduler_dict=scheduler_dict
    )

    # Build ABLModel
    model = ABLModel(base_model)

    # -- Building the Reasoning Part --------------------
    print_log("Building the Reasoning Part.", logger="current")

    # Build knowledge base
    if args.prolog:
        kb = PrologKB(pseudo_label_list=list(range(10)), pl_file="add.pl")
    elif args.ground:
        kb = AddGroundKB()
    else:
        kb = AddKB()

    # Create reasoner
    reasoner = Reasoner(
        kb, max_revision=args.max_revision, require_more_revision=args.require_more_revision
    )

    # -- Building Evaluation Metrics --------------------
    print_log("Building Evaluation Metrics.", logger="current")
    metric_list = [SymbolAccuracy(prefix="mnist_add"), ReasoningMetric(kb=kb, prefix="mnist_add")]

    # -- Bridging Learning and Reasoning ----------------
    print_log("Bridge Learning and Reasoning.", logger="current")
    bridge = SimpleBridge(model, reasoner, metric_list)

    # Retrieve the directory of the Log file and define the directory for saving the model weights.
    log_dir = ABLLogger.get_current_instance().log_dir
    weights_dir = osp.join(log_dir, "weights")

    #  Train and Test
    bridge.train(
        train_data,
        loops=args.loops,
        segment_size=args.segment_size,
        save_interval=args.save_interval,
        save_dir=weights_dir,
    )
    bridge.test(test_data)


 if __name__ == "__main__":
    main()