jjfraaa
/
AutoGL

 
			
							"""
Solver base class

Provide some standard solver interface.
"""

from typing import Any, Iterable, Tuple
from copy import deepcopy

import torch

from ..module.feature import FEATURE_DICT
from ..module.hpo import HPO_DICT
from ..module.model import EncoderUniversalRegistry, DecoderUniversalRegistry, ModelUniversalRegistry
from ..module.nas.algorithm import NAS_ALGO_DICT
from ..module.nas.estimator import NAS_ESTIMATOR_DICT
from ..module.nas.space import NAS_SPACE_DICT
from ..module import BaseFeature, BaseHPOptimizer, BaseTrainer
from .utils import LeaderBoard
from ..utils import get_logger

LOGGER = get_logger("BaseSolver")


def _initialize_single_model(model):
    encoder, decoder = None, None
    if "encoder" in model:
        # initialize encoder
        name = model["encoder"].pop("name")
        encoder = EncoderUniversalRegistry.get_encoder(name)(**model["encoder"])
    if "decoder" in model:
        # initialize decoder
        name = model["decoder"].pop("name")
        decoder = DecoderUniversalRegistry.get_decoder(name)(**model["decoder"])
    if "name" in model:
        # whole model
        name = model.pop("name")
        encoder = ModelUniversalRegistry.get_model(name)(**model)
    return (encoder, decoder)

def _parse_hp_space(spaces):
    if spaces is None:
        return None
    for space in spaces:
        if "cutFunc" in space and isinstance(space["cutFunc"], str):
            space["cutFunc"] = eval(space["cutFunc"])
    return spaces

def _parse_model_hp(model):
    assert isinstance(model, dict)
    output = []
    if "encoder" in model and "decoder" in model:
        output.append({
            "encoder": _parse_hp_space(model["encoder"].pop("hp_space", None)),
            "decoder": _parse_hp_space(model["decoder"].pop("hp_space", None)),
        })
    elif "encoder" in model:
        output.append({
            "encoder": _parse_hp_space(model["encoder"].pop("hp_space", None)),
            "decoder": None,
        })
    else:
        output.append(_parse_hp_space(model.pop("hp_space", None)))
    return output

class BaseSolver:
    r"""
    Base solver class, define some standard solver interfaces.

    Parameters
    ----------
    feature_module: autogl.module.feature.BaseFeatureEngineer or str or None
        The (name of) auto feature engineer used to process the given dataset.
        Disable feature engineer by setting it to ``None``.

    graph_models: list of autogl.module.model.BaseModel or str
        The (name of) models to be optimized as backbone.

    hpo_module: autogl.module.hpo.BaseHPOptimizer or str or None
        The (name of) hpo module used to search for best hyper parameters.
        Disable hpo by setting it to ``None``.

    ensemble_module: autogl.module.ensemble.BaseEnsembler or str or None
        The (name of) ensemble module used to ensemble the multi-models found.
        Disable ensemble by setting it to ``None``.

    max_evals: int (Optional)
        If given, will set the number eval times the hpo module will use.
        Only be effective when hpo_module is  of type ``str``. Default ``50``.

    default_trainer: str or list of str (Optional)
        Default trainer class to be used.
        If a single trainer class is given, will set all trainer to default trainer.
        If a list of trainer class is given, will set every model with corresponding trainer
        cls. Default ``None``.

    trainer_hp_space: list of dict (Optional)
        trainer hp space or list of trainer hp spaces configuration.
        If a single trainer hp is given, will specify the hp space of trainer for every model.
        If a list of trainer hp is given, will specify every model with corrsponding
        trainer hp space. Default ``None``

    model_hp_spaces: list of list of dict (Optional)
        model hp space configuration.
        If given, will specify every hp space of every passed model. Default ``None``.

    size: int (Optional)
        The max models ensemble module will use. Default ``None``.

    device: torch.device or str
        The device where model will be running on. If set to ``auto``, will use gpu when available.
        You can also specify the device by directly giving ``gpu`` or ``cuda:0``, etc.
        Default ``auto``.
    """

    # pylint: disable=W0102

    def __init__(
        self,
        feature_module,
        graph_models,
        nas_spaces,
        nas_algorithms,
        nas_estimators,
        hpo_module,
        ensemble_module,
        max_evals=50,
        default_trainer=None,
        trainer_hp_space=None,
        model_hp_spaces=None,
        size=4,
        device="auto",
    ):

        # set default device
        if device == "auto":
            self.runtime_device = torch.device(
                "cuda" if torch.cuda.is_available() else "cpu"
            )
        elif isinstance(device, torch.device):
            self.runtime_device = device
        elif isinstance(device, str) and (device == "cpu" or device.startswith("cuda")):
            self.runtime_device = torch.device(device)
        else:
            LOGGER.error("Cannot parse device %s", str(device))
            raise ValueError("Cannot parse device {}".format(device))

        # initialize modules
        self.graph_model_list = []
        self.set_graph_models(
            graph_models, default_trainer, trainer_hp_space, model_hp_spaces
        )
        self.set_feature_module(feature_module)
        self.set_hpo_module(hpo_module, max_evals=max_evals)
        self.set_ensemble_module(ensemble_module, size=size)
        self.set_nas_module(nas_algorithms, nas_spaces, nas_estimators)

        # initialize leaderboard
        self.leaderboard = None

        # trained model is saved here
        self.trained_models = {}

    def set_feature_module(
        self,
        feature_module,
        *args,
        **kwargs,
    ) -> "BaseSolver":
        r"""
        Set the feature module of current solver.

        Parameters
        ----------
        feature_module: autogl.module.feature.BaseFeature or str or None
            The (name of) auto feature engineer used to process the given dataset.
            Disable feature engineer by setting it to ``None``.

        Returns
        -------
        self: autogl.solver.BaseSolver
            A reference of current solver.
        """
        # load feature engineer module

        def get_feature(feature_engineer):
            if isinstance(feature_engineer, BaseFeature):
                return feature_engineer
            if isinstance(feature_engineer, str):
                if feature_engineer in FEATURE_DICT:
                    return FEATURE_DICT[feature_engineer](*args, **kwargs)
                raise ValueError(
                    "cannot find feature engineer %s." % (feature_engineer)
                )
            raise TypeError(
                f"Cannot parse feature argument {str(feature_engineer)} of"
                " type {str(type(feature_engineer))}"
            )

        if feature_module is None:
            self.feature_module = None
        elif isinstance(feature_module, (BaseFeature, str)):
            self.feature_module = get_feature(feature_module)
        elif isinstance(feature_module, Iterable):
            self.feature_module = get_feature(feature_module[0])
            for feature_engineer in feature_module[1:]:
                self.feature_module &= get_feature(feature_engineer)
        else:
            raise ValueError(
                "need feature module to be str or a BaseFeatureEngineer instance, get",
                type(feature_module),
                "instead.",
            )

        return self

    def set_graph_models(
        self,
        graph_models,
        default_trainer=None,
        trainer_hp_space=None,
        model_hp_spaces=None,
    ) -> "BaseSolver":
        r"""
        Set the graph models used in current solver.

        Parameters
        ----------
        graph_models: list of autogl.module.model.BaseModel or list of str
            The (name of) models to be optimized as backbone.

        default_trainer: str or list of str (Optional)
            Default trainer class to be used.
            If a single trainer class is given, will set all trainer to default trainer.
            If a list of trainer class is given, will set every model with corresponding trainer
            cls. Default ``None``.

        trainer_hp_space: list of dict (Optional)
            trainer hp space or list of trainer hp spaces configuration.
            If a single trainer hp is given, will specify the hp space of trainer for every model.
            If a list of trainer hp is given, will specify every model with corrsponding
            trainer hp space.
            Default ``None``.

        model_hp_spaces: list of list of dict (Optional)
            model hp space configuration.
            If given, will specify every hp space of every passed model. Default ``None``.

        Returns
        -------
        self: autogl.solver.BaseSolver
            A reference of current solver.
        """
        self.gml = graph_models
        self._default_trainer = default_trainer
        self._trainer_hp_space = trainer_hp_space
        self._model_hp_spaces = model_hp_spaces
        return self

    def set_hpo_module(self, hpo_module, *args, **kwargs) -> "BaseSolver":
        r"""
        Set the hpo module used in current solver.

        Parameters
        ----------
        hpo_module: autogl.module.hpo.BaseHPOptimizer or str or None
            The (name of) hpo module used to search for best hyper parameters.
            Disable hpo by setting it to ``None``.

        Returns
        -------
        self: autogl.solver.BaseSolver
            A reference of current solver.
        """

        # load hpo module
        if hpo_module is None:
            self.hpo_module = None
        elif isinstance(hpo_module, BaseHPOptimizer):
            self.hpo_module = hpo_module
        elif isinstance(hpo_module, str):
            if hpo_module in HPO_DICT:
                self.hpo_module = HPO_DICT[hpo_module](*args, **kwargs)
            else:
                raise KeyError("cannot find hpo module %s." % (hpo_module))
        else:
            raise ValueError(
                "need hpo module to be str or a BaseHPOtimizer instance, get",
                type(hpo_module),
                "instead.",
            )
        return self

    def set_nas_module(
        self, nas_algorithms=None, nas_spaces=None, nas_estimators=None
    ) -> "BaseSolver":
        """
        Set the neural architecture search module in current solver.

        Parameters
        ----------
        nas_spaces: (list of) `autogl.module.hpo.nas.GraphSpace`
            The search space of nas. You can pass a list of space to enable
            multiple space search. If list passed, the length of `nas_spaces`,
            `nas_algorithms` and `nas_estimators` should be the same. If set
            to `None`, will disable the whole nas module.

        nas_algorithms: (list of) `autogl.module.hpo.nas.BaseNAS`
            The search algorithm of nas. You can pass a list of algorithms
            to enable multiple algorithms search. If list passed, the length of
            `nas_spaces`, `nas_algorithms` and `nas_estimators` should be the same.
            Default `None`.

        nas_estimators: (list of) `autogl.module.hpo.nas.BaseEstimators`
            The nas estimators. You can pass a list of estimators to enable multiple
            estimators search. If list passed, the length of `nas_spaces`, `nas_algorithms`
            and `nas_estimators` should be the same. Default `None`.

        Returns
        -------
        self: autogl.solver.BaseSolver
            A reference of current solver.
        """
        if nas_algorithms is None and nas_estimators is None and nas_spaces is None:
            self.nas_algorithms = self.nas_estimators = self.nas_spaces = None
            return
        assert None not in [
            nas_algorithms,
            nas_estimators,
            nas_spaces,
        ], "The algorithms, estimators and spaces should all be set"

        nas_algorithms = (
            nas_algorithms
            if isinstance(nas_algorithms, Iterable)
            else [nas_algorithms]
        )
        nas_spaces = (
            nas_spaces if isinstance(nas_spaces, Iterable) else [nas_spaces]
        )
        nas_estimators = (
            nas_estimators
            if isinstance(nas_estimators, Iterable)
            else [nas_estimators]
        )

        # parse all str elements
        nas_algorithms = [
            algo if not isinstance(algo, str) else NAS_ALGO_DICT[algo]()
            for algo in nas_algorithms
        ]
        nas_spaces = [
            space if not isinstance(space, str) else NAS_SPACE_DICT[space]()
            for space in nas_spaces
        ]
        nas_estimators = [
            estimator
            if not isinstance(estimator, str)
            else NAS_ESTIMATOR_DICT[estimator]()
            for estimator in nas_estimators
        ]

        max_number = max([len(x) for x in [nas_algorithms, nas_spaces, nas_estimators]])
        assert all(
            [
                len(x) in [1, max_number]
                for x in [nas_algorithms, nas_spaces, nas_estimators]
            ]
        ), "lengths of algorithms/spaces/estimators do not match!"

        self.nas_algorithms = (
            [deepcopy(nas_algorithms) for _ in range(max_number)]
            if len(nas_algorithms) == 1 and max_number > 1
            else nas_algorithms
        )
        self.nas_spaces = (
            [deepcopy(nas_spaces) for _ in range(max_number)]
            if len(nas_spaces) == 1 and max_number > 1
            else nas_spaces
        )
        self.nas_estimators = (
            [deepcopy(nas_estimators) for _ in range(max_number)]
            if len(nas_estimators) == 1 and max_number > 1
            else nas_estimators
        )

        return self

    def set_ensemble_module(self, ensemble_module, *args, **kwargs) -> "BaseSolver":
        r"""
        Set the ensemble module used in current solver.

        Parameters
        ----------
        ensemble_module: autogl.module.ensemble.BaseEnsembler or str or None
            The (name of) ensemble module used to ensemble the multi-models found.
            Disable ensemble by setting it to ``None``.

        Returns
        -------
        self: autogl.solver.BaseSolver
            A reference of current solver.
        """

        raise NotImplementedError()

    def fit(self, *args, **kwargs) -> "BaseSolver":
        r"""
        Fit current solver on given dataset.

        Returns
        -------
        self: autogl.solver.BaseSolver
            A reference of current solver.
        """
        raise NotImplementedError()

    def fit_predict(self, *args, **kwargs) -> Any:
        r"""
        Fit current solver on given dataset and return the predicted value.

        Returns
        -------
        result: Any
            The predicted result
        """
        raise NotImplementedError()

    def predict(self, *args, **kwargs) -> Any:
        r"""
        Predict the node class number.

        Returns
        -------
        result: Any
            The predicted result
        """
        raise NotImplementedError()

    def get_leaderboard(self) -> LeaderBoard:
        r"""
        Get the current leaderboard of this solver.

        Returns
        -------
        lb: autogl.solver.leaderboard
            A leaderboard instance.
        """
        return self.leaderboard

    def get_model_by_name(self, name) -> BaseTrainer:
        r"""
        Find and get the model instance by name.

        Parameters
        ----------
        name: str
            The name of model

        Returns
        -------
        trainer: autogl.module.train.BaseTrainer
            A trainer instance containing the trained models and training status.
        """
        assert name in self.trained_models, "cannot find model by name" + name
        return self.trained_models[name]

    def get_model_by_performance(self, index) -> Tuple[BaseTrainer, str]:
        r"""
        Find and get the model instance by performance.

        Parameters
        ----------
        index: int
            The performance index of model (from good to bad). Index from 0.

        Returns
        -------
        trainer: autogl.module.train.BaseTrainer
            A trainer instance containing the trained models and training status.
        name: str
            The name of current trainer.
        """
        name = self.leaderboard.get_best_model(index=index)
        return self.trained_models[name], name

    @classmethod
    def from_config(cls, path_or_dict, filetype="auto") -> "BaseSolver":
        r"""
        Load solver from config file.

        You can use this function to directly load a solver from predefined config dict
        or config file path.

        Parameters
        ----------
        path_or_dict: str or dict
            The path to the config file or the config dictionary object

        filetype: str
            The filetype the given file if the path is specified.

        Returns
        -------
        solver: autogl.solver.AutoGraphClassifier
            The solver that is created from given file or dictionary.
        """
        raise NotImplementedError()