From 099a6a3fdbb9187a2955d22d8ff091892391889a Mon Sep 17 00:00:00 2001
From: Gao Enhao <gaoeh@lamda.nju.edu.cn>
Date: Fri, 7 Apr 2023 14:07:17 +0800
Subject: [PATCH] [MNT] delte basic_model.py and wabl_models.py

---
 abl/learning/basic_model.py | 560 ------------------------------------
 abl/learning/wabl_models.py | 137 ---------
 2 files changed, 697 deletions(-)
 delete mode 100644 abl/learning/basic_model.py
 delete mode 100644 abl/learning/wabl_models.py

diff --git a/abl/learning/basic_model.py b/abl/learning/basic_model.py
deleted file mode 100644
index 00e2d3f..0000000
--- a/abl/learning/basic_model.py
+++ /dev/null
@@ -1,560 +0,0 @@
-# coding: utf-8
-# ================================================================#
-#   Copyright (C) 2020 Freecss All rights reserved.
-#
-#   File Name     ：basic_model.py
-#   Author        ：freecss
-#   Email         ：karlfreecss@gmail.com
-#   Created Date  ：2020/11/21
-#   Description   ：
-#
-# ================================================================#
-
-import sys
-
-sys.path.append("..")
-
-import torch
-import numpy
-from torch.utils.data import Dataset, DataLoader
-
-import os
-from multiprocessing import Pool
-from typing import List, Any, T, Tuple, Optional, Callable
-
-
-class BasicDataset(Dataset):
-    def __init__(self, X: List[Any], Y: List[Any]):
-        """Initialize a basic dataset.
-
-        Parameters
-        ----------
-        X : List[Any]
-            A list of objects representing the input data.
-        Y : List[Any]
-            A list of objects representing the output data.
-        """
-        self.X = X
-        self.Y = Y
-
-    def __len__(self):
-        """Return the length of the dataset.
-
-        Returns
-        -------
-        int
-            The length of the dataset.
-        """
-        return len(self.X)
-
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
-        """Get an item from the dataset.
-
-        Parameters
-        ----------
-        index : int
-            The index of the item to retrieve.
-
-        Returns
-        -------
-        Tuple[Any, Any]
-            A tuple containing the input and output data at the specified index.
-        """
-        if index >= len(self):
-            raise ValueError("index range error")
-
-        img = self.X[index]
-        label = self.Y[index]
-
-        return (img, label)
-
-
-class XYDataset(Dataset):
-    def __init__(self, X: List[Any], Y: List[int], transform: Callable[..., Any] = None):
-        """
-        Initialize the dataset used for classification task.
-
-        Parameters
-        ----------
-        X : List[Any]
-            The input data.
-        Y : List[int]
-            The target data.
-        transform : Callable[..., Any], optional
-            A function/transform that takes in an object and returns a transformed version. Defaults to None.
-        """
-        self.X = X
-        self.Y = torch.LongTensor(Y)
-
-        self.n_sample = len(X)
-        self.transform = transform
-
-    def __len__(self) -> int:
-        """
-        Return the length of the dataset.
-
-        Returns
-        -------
-        int
-            The length of the dataset.
-        """
-        return len(self.X)
-
-    def __getitem__(self, index: int) -> Tuple[Any, torch.Tensor]:
-        """
-        Get the item at the given index.
-
-        Parameters
-        ----------
-        index : int
-            The index of the item to get.
-
-        Returns
-        -------
-        Tuple[Any, torch.Tensor]
-            A tuple containing the object and its label.
-        """
-        if index >= len(self):
-            raise ValueError("index range error")
-
-        img = self.X[index]
-        if self.transform is not None:
-            img = self.transform(img)
-
-        label = self.Y[index]
-
-        return (img, label)
-
-
-class FakeRecorder:
-    def __init__(self):
-        pass
-
-    def print(self, *x):
-        pass
-
-
-class BasicModel:
-    """
-    Wrap NN models into the form of an sklearn estimator
-
-    Parameters
-    ----------
-    model : torch.nn.Module
-        The PyTorch model to be trained or used for prediction.
-    criterion : torch.nn.Module
-        The loss function used for training.
-    optimizer : torch.nn.Module
-        The optimizer used for training.
-    device : torch.device, optional
-        The device on which the model will be trained or used for prediction, by default torch.decive("cpu").
-    batch_size : int, optional
-        The batch size used for training, by default 1.
-    num_epochs : int, optional
-        The number of epochs used for training, by default 1.
-    stop_loss : Optional[float], optional
-        The loss value at which to stop training, by default 0.01.
-    num_workers : int, optional
-        The number of workers used for loading data, by default 0.
-    save_interval : Optional[int], optional
-        The interval at which to save the model during training, by default None.
-    save_dir : Optional[str], optional
-        The directory in which to save the model during training, by default None.
-    transform : Callable[..., Any], optional
-        A function/transform that takes in an object and returns a transformed version. Defaults to None.
-    collate_fn : Callable[[List[T]], Any], optional
-        The function used to collate data, by default None.
-    recorder : Any, optional
-        The recorder used to record training progress, by default None.
-
-    Attributes
-    ----------
-    model : torch.nn.Module
-        The PyTorch model to be trained or used for prediction.
-    batch_size : int
-        The batch size used for training.
-    num_epochs : int
-        The number of epochs used for training.
-    stop_loss : Optional[float]
-        The loss value at which to stop training.
-    num_workers : int
-        The number of workers used for loading data.
-    criterion : torch.nn.Module
-        The loss function used for training.
-    optimizer : torch.nn.Module
-        The optimizer used for training.
-    transform : Callable[..., Any]
-        The transformation function used for data augmentation.
-    device : torch.device
-        The device on which the model will be trained or used for prediction.
-    recorder : Any
-        The recorder used to record training progress.
-    save_interval : Optional[int]
-        The interval at which to save the model during training.
-    save_dir : Optional[str]
-        The directory in which to save the model during training.
-    collate_fn : Callable[[List[T]], Any]
-        The function used to collate data.
-
-    Methods
-    -------
-    fit(data_loader=None, X=None, y=None)
-        Train the model.
-    train_epoch(data_loader)
-        Train the model for one epoch.
-    predict(data_loader=None, X=None, print_prefix="")
-        Predict the class of the input data.
-    predict_proba(data_loader=None, X=None, print_prefix="")
-        Predict the probability of each class for the input data.
-    val(data_loader=None, X=None, y=None, print_prefix="")
-        Validate the model.
-    score(data_loader=None, X=None, y=None, print_prefix="")
-        Score the model.
-    _data_loader(X, y=None)
-        Generate the data_loader.
-    save(epoch_id, save_dir="")
-        Save the model.
-    load(epoch_id, load_dir="")
-        Load the model.
-    """
-
-    def __init__(
-        self,
-        model: torch.nn.Module,
-        criterion: torch.nn.Module,
-        optimizer: torch.nn.Module,
-        device: torch.device = torch.device("cpu"),
-        batch_size: int = 1,
-        num_epochs: int = 1,
-        stop_loss: Optional[float] = 0.01,
-        num_workers: int = 0,
-        save_interval: Optional[int] = None,
-        save_dir: Optional[str] = None,
-        transform: Callable[..., Any] = None,
-        collate_fn: Callable[[List[T]], Any] = None,
-        recorder=None,
-    ):
-
-        self.model = model.to(device)
-
-        self.batch_size = batch_size
-        self.num_epochs = num_epochs
-        self.stop_loss = stop_loss
-        self.num_workers = num_workers
-
-        self.criterion = criterion
-        self.optimizer = optimizer
-        self.transform = transform
-        self.device = device
-
-        if recorder is None:
-            recorder = FakeRecorder()
-        self.recorder = recorder
-
-        self.save_interval = save_interval
-        self.save_dir = save_dir
-        self.collate_fn = collate_fn
-
-    def _fit(self, data_loader, n_epoch, stop_loss):
-        recorder = self.recorder
-        recorder.print("model fitting")
-
-        min_loss = 1e10
-        for epoch in range(n_epoch):
-            loss_value = self.train_epoch(data_loader)
-            recorder.print(f"{epoch}/{n_epoch} model training loss is {loss_value}")
-            if min_loss < 0 or loss_value < min_loss:
-                min_loss = loss_value
-            if self.save_interval is not None and (epoch + 1) % self.save_interval == 0:
-                if self.save_dir is None:
-                    raise ValueError(
-                        "save_dir should not be None if save_interval is not None"
-                    )
-                self.save(epoch + 1, self.save_dir)
-            if stop_loss is not None and loss_value < stop_loss:
-                break
-        recorder.print("Model fitted, minimal loss is ", min_loss)
-        return loss_value
-
-    def fit(
-        self, data_loader: DataLoader = None, X: List[Any] = None, y: List[int] = None
-    ) -> float:
-        """
-        Train the model.
-
-        Parameters
-        ----------
-        data_loader : DataLoader, optional
-            The data loader used for training, by default None
-        X : List[Any], optional
-            The input data, by default None
-        y : List[int], optional
-            The target data, by default None
-
-        Returns
-        -------
-        float
-            The loss value of the trained model.
-        """
-        if data_loader is None:
-            data_loader = self._data_loader(X, y)
-        return self._fit(data_loader, self.num_epochs, self.stop_loss)
-
-    def train_epoch(self, data_loader: DataLoader):
-        """
-        Train the model for one epoch.
-
-        Parameters
-        ----------
-        data_loader : DataLoader
-            The data loader used for training.
-
-        Returns
-        -------
-        float
-            The loss value of the trained model.
-        """
-        model = self.model
-        criterion = self.criterion
-        optimizer = self.optimizer
-        device = self.device
-
-        model.train()
-
-        total_loss, total_num = 0.0, 0
-        for data, target in data_loader:
-            data, target = data.to(device), target.to(device)
-            out = model(data)
-            loss = criterion(out, target)
-
-            optimizer.zero_grad()
-            loss.backward()
-            optimizer.step()
-
-            total_loss += loss.item() * data.size(0)
-            total_num += data.size(0)
-
-        return total_loss / total_num
-
-    def _predict(self, data_loader):
-        model = self.model
-        device = self.device
-
-        model.eval()
-
-        with torch.no_grad():
-            results = []
-            for data, _ in data_loader:
-                data = data.to(device)
-                out = model(data)
-                results.append(out)
-
-        return torch.cat(results, axis=0)
-
-    def predict(
-        self,
-        data_loader: DataLoader = None,
-        X: List[Any] = None,
-        print_prefix: str = "",
-    ) -> numpy.ndarray:
-        """
-        Predict the class of the input data.
-
-        Parameters
-        ----------
-        data_loader : DataLoader, optional
-            The data loader used for prediction, by default None
-        X : List[Any], optional
-            The input data, by default None
-        print_prefix : str, optional
-            The prefix used for printing, by default ""
-
-        Returns
-        -------
-        numpy.ndarray
-            The predicted class of the input data.
-        """
-        recorder = self.recorder
-        recorder.print("Start Predict Class ", print_prefix)
-
-        if data_loader is None:
-            data_loader = self._data_loader(X)
-        return self._predict(data_loader).argmax(axis=1).cpu().numpy()
-
-    def predict_proba(
-        self,
-        data_loader: DataLoader = None,
-        X: List[Any] = None,
-        print_prefix: str = "",
-    ) -> numpy.ndarray:
-        """
-        Predict the probability of each class for the input data.
-
-        Parameters
-        ----------
-        data_loader : DataLoader, optional
-            The data loader used for prediction, by default None
-        X : List[Any], optional
-            The input data, by default None
-        print_prefix : str, optional
-            The prefix used for printing, by default ""
-
-        Returns
-        -------
-        numpy.ndarray
-            The predicted probability of each class for the input data.
-        """
-        recorder = self.recorder
-        recorder.print("Start Predict Probability ", print_prefix)
-
-        if data_loader is None:
-            data_loader = self._data_loader(X)
-        return self._predict(data_loader).softmax(axis=1).cpu().numpy()
-
-    def _score(self, data_loader):
-        model = self.model
-        criterion = self.criterion
-        device = self.device
-
-        model.eval()
-
-        total_correct_num, total_num, total_loss = 0, 0, 0.0
-
-        with torch.no_grad():
-            for data, target in data_loader:
-                data, target = data.to(device), target.to(device)
-
-                out = model(data)
-
-                if len(out.shape) > 1:
-                    correct_num = sum(target == out.argmax(axis=1)).item()
-                else:
-                    correct_num = sum(target == (out > 0.5)).item()
-                loss = criterion(out, target)
-                total_loss += loss.item() * data.size(0)
-
-                total_correct_num += correct_num
-                total_num += data.size(0)
-
-        mean_loss = total_loss / total_num
-        accuracy = total_correct_num / total_num
-
-        return mean_loss, accuracy
-
-    def score(
-        self,
-        data_loader: DataLoader = None,
-        X: List[Any] = None,
-        y: List[int] = None,
-        print_prefix: str = "",
-    ) -> float:
-        """
-        Validate the model.
-
-        Parameters
-        ----------
-        data_loader : DataLoader, optional
-            The data loader used for scoring, by default None
-        X : List[Any], optional
-            The input data, by default None
-        y : List[int], optional
-            The target data, by default None
-        print_prefix : str, optional
-            The prefix used for printing, by default ""
-
-        Returns
-        -------
-        float
-            The accuracy of the model.
-        """
-        recorder = self.recorder
-        recorder.print("Start validation ", print_prefix)
-
-        if data_loader is None:
-            data_loader = self._data_loader(X, y)
-        mean_loss, accuracy = self._score(data_loader)
-        recorder.print(
-            "[%s] mean loss: %f, accuray: %f" % (print_prefix, mean_loss, accuracy)
-        )
-        return accuracy
-
-    def _data_loader(
-        self,
-        X: List[Any],
-        y: List[int] = None,
-    ) -> DataLoader:
-        """
-        Generate data_loader for user provided data.
-
-        Parameters
-        ----------
-        X : List[Any]
-            The input data.
-        y : List[int], optional
-            The target data, by default None
-
-        Returns
-        -------
-        DataLoader
-            The data loader.
-        """
-        collate_fn = self.collate_fn
-        transform = self.transform
-
-        if y is None:
-            y = [0] * len(X)
-        dataset = XYDataset(X, y, transform=transform)
-        sampler = None
-        data_loader = DataLoader(
-            dataset,
-            batch_size=self.batch_size,
-            shuffle=False,
-            sampler=sampler,
-            num_workers=int(self.num_workers),
-            collate_fn=collate_fn,
-        )
-        return data_loader
-
-    def save(self, epoch_id: int, save_dir: str = ""):
-        """
-        Save the model and the optimizer.
-
-        Parameters
-        ----------
-        epoch_id : int
-            The epoch id.
-        save_dir : str, optional
-            The directory to save the model, by default ""
-        """
-        recorder = self.recorder
-        if not os.path.exists(save_dir):
-            os.makedirs(save_dir)
-        recorder.print("Saving model and opter")
-        save_path = os.path.join(save_dir, str(epoch_id) + "_net.pth")
-        torch.save(self.model.state_dict(), save_path)
-
-        save_path = os.path.join(save_dir, str(epoch_id) + "_opt.pth")
-        torch.save(self.optimizer.state_dict(), save_path)
-
-    def load(self, epoch_id: int, load_dir: str = ""):
-        """
-        Load the model and the optimizer.
-
-        Parameters
-        ----------
-        epoch_id : int
-            The epoch id.
-        load_dir : str, optional
-            The directory to load the model, by default ""
-        """
-        recorder = self.recorder
-        recorder.print("Loading model and opter")
-        load_path = os.path.join(load_dir, str(epoch_id) + "_net.pth")
-        self.model.load_state_dict(torch.load(load_path))
-
-        load_path = os.path.join(load_dir, str(epoch_id) + "_opt.pth")
-        self.optimizer.load_state_dict(torch.load(load_path))
-
-
-if __name__ == "__main__":
-    pass
diff --git a/abl/learning/wabl_models.py b/abl/learning/wabl_models.py
deleted file mode 100644
index e986f94..0000000
--- a/abl/learning/wabl_models.py
+++ /dev/null
@@ -1,137 +0,0 @@
-# coding: utf-8
-# ================================================================#
-#   Copyright (C) 2020 Freecss All rights reserved.
-#
-#   File Name     ：models.py
-#   Author        ：freecss
-#   Email         ：karlfreecss@gmail.com
-#   Created Date  ：2020/04/02
-#   Description   ：
-#
-# ================================================================#
-from itertools import chain
-from typing import List, Any
-
-
-def get_part_data(X, i):
-    return list(map(lambda x: x[i], X))
-
-
-def merge_data(X):
-    ret_mark = list(map(lambda x: len(x), X))
-    ret_X = list(chain(*X))
-    return ret_X, ret_mark
-
-
-def reshape_data(Y, marks):
-    begin_mark = 0
-    ret_Y = []
-    for mark in marks:
-        end_mark = begin_mark + mark
-        ret_Y.append(Y[begin_mark:end_mark])
-        begin_mark = end_mark
-    return ret_Y
-
-
-class WABLBasicModel:
-    """
-    Serialize data and provide a unified interface for different machine learning models.
-
-    Parameters
-    ----------
-    base_model : Machine Learning Model
-        The base model to use for training and prediction.
-    pseudo_label_list : List[Any]
-        A list of pseudo labels to use for training.
-
-    Attributes
-    ----------
-    cls_list : List[Any]
-        A list of classifiers.
-    pseudo_label_list : List[Any]
-        A list of pseudo labels to use for training.
-    mapping : dict
-        A dictionary mapping pseudo labels to integers.
-    remapping : dict
-        A dictionary mapping integers to pseudo labels.
-
-    Methods
-    -------
-    predict(X: List[List[Any]]) -> dict
-        Predict the class labels and probabilities for the given data.
-    valid(X: List[List[Any]], Y: List[Any]) -> float
-        Calculate the accuracy score for the given data.
-    train(X: List[List[Any]], Y: List[Any])
-        Train the model on the given data.
-    """
-    def __init__(self, base_model, pseudo_label_list: List[Any]):
-        self.cls_list = []
-        self.cls_list.append(base_model)
-
-        self.pseudo_label_list = pseudo_label_list
-        self.mapping = dict(zip(pseudo_label_list, list(range(len(pseudo_label_list)))))
-        self.remapping = dict(
-            zip(list(range(len(pseudo_label_list))), pseudo_label_list)
-        )
-
-    def predict(self, X: List[List[Any]]) -> dict:
-        """
-        Predict the class labels and probabilities for the given data.
-
-        Parameters
-        ----------
-        X : List[List[Any]]
-            The data to predict on.
-
-        Returns
-        -------
-        dict
-            A dictionary containing the predicted class labels and probabilities.
-        """
-        data_X, marks = merge_data(X)
-        prob = self.cls_list[0].predict_proba(X=data_X)
-        _cls = prob.argmax(axis=1)
-        cls = list(map(lambda x: self.remapping[x], _cls))
-
-        prob = reshape_data(prob, marks)
-        cls = reshape_data(cls, marks)
-
-        return {"cls": cls, "prob": prob}
-
-    def valid(self, X: List[List[Any]], Y: List[Any]) -> float:
-        """
-        Calculate the accuracy for the given data.
-
-        Parameters
-        ----------
-        X : List[List[Any]]
-            The data to calculate the accuracy on.
-        Y : List[Any]
-            The true class labels for the given data.
-
-        Returns
-        -------
-        float
-            The accuracy score for the given data.
-        """
-        data_X, _ = merge_data(X)
-        _data_Y, _ = merge_data(Y)
-        data_Y = list(map(lambda y: self.mapping[y], _data_Y))
-        score = self.cls_list[0].score(X=data_X, y=data_Y)
-        return score
-
-    def train(self, X: List[List[Any]], Y: List[Any]):
-        """
-        Train the model on the given data.
-
-        Parameters
-        ----------
-        X : List[List[Any]]
-            The data to train on.
-        Y : List[Any]
-            The true class labels for the given data.
-        """
-        data_X, _ = merge_data(X)
-        _data_Y, _ = merge_data(Y)
-        data_Y = list(map(lambda y: self.mapping[y], _data_Y))
-        self.cls_list[0].fit(X=data_X, y=data_Y)