diff --git a/README.md b/README.md index a01c2d7..9b02df8 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,7 @@ We also demonstrate the detail format of learnware zipfile in [DOC link], and al Users can start an ``Learnware`` workflow according to the following steps: -### Initialize a Learware Market +### Initialize a Learnware Market The ``EasyMarket`` class implements the most basic set of functions in a ``Learnware``. You can use the following code snippet to initialize a basic ``Learnware`` named "demo": diff --git a/docs/_static/img/image_labeled.png b/docs/_static/img/image_labeled.png deleted file mode 100644 index b375f19..0000000 Binary files a/docs/_static/img/image_labeled.png and /dev/null differ diff --git a/docs/_static/img/image_labeled.svg b/docs/_static/img/image_labeled.svg new file mode 100644 index 0000000..0de61b1 --- /dev/null +++ b/docs/_static/img/image_labeled.svg @@ -0,0 +1,1423 @@ + + + + + + + + 2023-12-27T16:40:12.240585 + image/svg+xml + + + Matplotlib v3.8.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/_static/img/learnware_market.jpg b/docs/_static/img/learnware_market.jpg deleted file mode 100644 index 1f69769..0000000 Binary files a/docs/_static/img/learnware_market.jpg and /dev/null differ diff --git a/docs/_static/img/learnware_market.svg b/docs/_static/img/learnware_market.svg new file mode 100644 index 0000000..68d0a87 --- /dev/null +++ b/docs/_static/img/learnware_market.svg @@ -0,0 +1 @@ +Task CTask 1Task 2“ab”Spam?0011Model 1Model 2spamemailemailab>0.8ab0.8spamemailour>1.2our1.2Model CspecificationspecificationLearnwareCLearnware2Learnware1specificationLearnware MarketSubmitting StageDeploying StageDevelopersStandard learnware formatRequire-mentReturnSearchSubmitApply returned models directlyAdapt modelson user data(optional) \ No newline at end of file diff --git a/docs/_static/img/text_labeled.svg b/docs/_static/img/text_labeled.svg new file mode 100644 index 0000000..669247e --- /dev/null +++ b/docs/_static/img/text_labeled.svg @@ -0,0 +1,1358 @@ + + + + + + + + 2023-12-27T16:34:04.155470 + image/svg+xml + + + Matplotlib v3.8.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/_static/img/text_labeled_curves.png b/docs/_static/img/text_labeled_curves.png deleted file mode 100644 index bfb9d7e..0000000 Binary files a/docs/_static/img/text_labeled_curves.png and /dev/null differ diff --git a/docs/advanced/anchor.rst b/docs/advanced/anchor.rst index 75fdcc1..db39bba 100644 --- a/docs/advanced/anchor.rst +++ b/docs/advanced/anchor.rst @@ -4,15 +4,15 @@ Anchor learnware Anchor learnwares are a small fraction of representative learnwares that helps locate user's requirements through user feedback. The learnware market can choose or generate several learnwares as anchor learnwares corresponding to the specification island. If the user does not have sufficient training data for constructing an RKME requirement, the learnware market can send several anchor learnwares to the user. By feeding her own data to these anchor learnwares, some information such as (precision, recall) or other performance indicators, can be generated and returned to the market. These information could help the market identify potentially helpful models, e.g., by identifying models that are far from anchors exhibiting poor performance whereas close to anchors exhibiting relatively better performance in the specification island. -To fulfill the anchor learnware method, you need to implement the following functions in ``anchor.py``: +To fulfill the anchor learnware method, you need to implement the following functions in ``learnware/market/anchor/``: -- First, you should design how the market chooses or generates anchor learnwares. This can be realized by selecting prototype models through functional space clustering, and more interesting designs can be explored. The function ``AnchoredMarket.update_anchor_learnware_list`` is reserved for it. The functions ``AnchoredMarket._update_anchor_learnware`` and ``AnchoredMarket._delete_anchor_learnware`` have been completed as auxiliary. +- First, you should design how the market chooses or generates anchor learnwares. This can be realized by selecting prototype models through functional space clustering, and more interesting designs can be explored. The function ``AnchoredOrganizer.update_anchor_learnware_list`` is reserved for it. The functions ``AnchoredOrganizer._update_anchor_learnware`` and ``AnchoredOrganizer._delete_anchor_learnware`` have been completed as auxiliary. -- Second, when a user comes with no RKME(or other statistical) specifications, the market should choose several anchor learnwares and send them to the user. This process is done by ``AnchoredMarket.search_anchor_learnware``, and the chosen anchors are stored in ``AnchoredUserInfo`` by ``AnchoredUserInfo.add_anchor_learnware``. +- Second, when a user comes with no RKME(or other statistical) specifications, the market should choose several anchor learnwares and send them to the user. This process is done by ``AnchoredSearcher.search_anchor_learnware``, and the chosen anchors are stored in ``AnchoredUserInfo`` by ``AnchoredUserInfo.add_anchor_learnware_ids``. - Third, the market should specify which performance indicator should the user return. By feeding the user's data to these anchor learnwares, the returned information is calculated and stored in ``AnchoredUserInfo`` by ``AnchoredUserInfo.update_stat_info``. -- Fourth, according to the returned information from the user, the market should identify the helpful learnwares for the user. This process is done in ``AnchoredMarket.search_learnware``. +- Fourth, according to the returned information from the user, the market should identify the helpful learnwares for the user. This process is done in ``AnchoredSearcher.search_learnware``. diff --git a/docs/advanced/evolve.rst b/docs/advanced/evolve.rst index 85201fa..4847c7b 100644 --- a/docs/advanced/evolve.rst +++ b/docs/advanced/evolve.rst @@ -1,5 +1,5 @@ ============================== -Specification Evolvement +Evolvable Specification ============================== The specification is the core of the learnware paradigm. @@ -8,16 +8,16 @@ As the number of learnwares in the market increases, the knowledge held in the l This growth makes it possible for specification evolvement, enabling the market to generate new specifications for each learnware that more accurately characterize the properties of each model and its relationships with others. As a result, the learnware market can more effectively identify learnwares beneficial for user tasks. -To achieve the evolvement of specifications, you need to implement the class ``EvolvedMarket`` in the following way: +To achieve evolvable specifications, you need to implement the class ``EvolvedOrganizer`` in ``learnware/market/evolve/``: -- First, design a method for the learnware market to generate new statistical specifications for learnwares and implement the function ``EvolvedMarket.generate_new_stat_specification``. -- Second, use the function ``EvolvedMarket.generate_new_stat_specification`` to implement the function ``EvolvedMarket.evolve_learnware_list``, which enables learnwares to evolve by assigning new statistical specifications. +- First, design a method for the learnware market to generate new statistical specifications for learnwares and implement the function ``EvolvedOrganizer.generate_new_stat_specification``. +- Second, use the function ``EvolvedOrganizer.generate_new_stat_specification`` to implement the function ``EvolvedOrganizer.evolve_learnware_list``, which enables learnwares to evolve by assigning new statistical specifications. When implementing the anchor design, it is essential to develop an appropriate evolvement method for anchor learnwares based on the specific anchor selection method. In the anchor design, the learnware market sends anchor learnware to users, who then provide statistical information about the anchor learnwares on their tasks to the market. Based on this statistical feedback from users, the market can more accurately characterize anchor learnwares and continuously evolve them. -To realize specification evolvement, including anchor learnwares, you need to additionally implement the class ``EvolvedAnchoredMarket`` in the following way: +To realize evolvable specifications, including anchor learnwares, you need to additionally implement the class ``EvolvedAnchoredOrganizer`` in ``learnware/market/evolve_anchor/``: -- First, based on the specific anchor selection method, design an appropriate evolvement method for anchor learnwares and implement the function ``EvolvedAnchoredMarket.evolve_anchor_learnware_list``. -- Second, utilize the statistical feedback from users to implement the function ``EvolvedAnchoredMarket.evolve_anchor_learnware_by_user``, which enables anchor learnwares to evolve continually as users interact with the learnware market. \ No newline at end of file +- First, based on the specific anchor selection method, design an appropriate evolvement method for anchor learnwares and implement the function ``EvolvedAnchoredOrganizer.evolve_anchor_learnware_list``. +- Second, utilize the statistical feedback from users to implement the function ``EvolvedAnchoredOrganizer.evolve_anchor_learnware_by_user``, which enables anchor learnwares to evolve continually as users interact with the learnware market. \ No newline at end of file diff --git a/docs/components/market.rst b/docs/components/market.rst index 258732a..5756263 100644 --- a/docs/components/market.rst +++ b/docs/components/market.rst @@ -26,7 +26,7 @@ Current Checkers The ``Learnware`` package provide two different implementation of ``market`` where both of them share the same ``checker`` list. So we first introduce the details of ``checker``\ s. -The ``checker``s check a learnware object in different aspects, including environment configuration (``CondaChecker``), semantic specifications (``EasySemanticChecker``), and statistical specifications (``EasyStatChecker``). The ``__call__`` method of each checker is designed to be invoked as a function to conduct the respective checks on the learnware and return the outcomes. It defines three types of learnwares: ``INVALID_LEARNWARE`` denotes the learnware does not pass the check, ``NONUSABLE_LEARNWARE`` denotes the learnware pass the check but cannot make prediction, ``USABLE_LEARWARE`` denotes the leanrware pass the check and can make prediction. Currently, we have three ``checker``\ s, which are described below. +The ``checker``s check a learnware object in different aspects, including environment configuration (``CondaChecker``), semantic specifications (``EasySemanticChecker``), and statistical specifications (``EasyStatChecker``). The ``__call__`` method of each checker is designed to be invoked as a function to conduct the respective checks on the learnware and return the outcomes. It defines three types of learnwares: ``INVALID_LEARNWARE`` denotes the learnware does not pass the check, ``NONUSABLE_LEARNWARE`` denotes the learnware pass the check but cannot make prediction, ``USABLE_LEARNWARE`` denotes the leanrware pass the check and can make prediction. Currently, we have three ``checker``\ s, which are described below. ``CondaChecker`` diff --git a/docs/components/model.rst b/docs/components/model.rst index ba4e48b..76952ea 100644 --- a/docs/components/model.rst +++ b/docs/components/model.rst @@ -3,15 +3,55 @@ Model ================================ +A learnware is a well-performed trained model with a specification, where the model is an indispensable component of the learnware. + + +In this section, we will first introduce the ``BaseModel``, which defines the standard format for models in the learnware package. +Following that, we will introduce the ``ModelContainer``, which implements model deployment in conda virtual environments and Docker containers. + BaseModel ====================================== +The ``BaseModel`` class is a fundamental component of the learnware package and serves as a standard interface for defining machine learning models. +This class is created to make it easier for users to submit learnwares to the market. +It helps ensure that submitted models follow a clear set of rules and requirements. + +The model in a learnware should inherit the ``BaseModel`` class. +Here's a more detailed explanation of key components: + +- ``input_shape``: Specify the shape of the input features your model expects. +- ``output_shape``: Define the shape of the output predictions generated by your model. +- ``predict``: Implement the predict method to make predictions using your model. +- ``fit`` (optional): Use the fit method for training a model with input data and labels. +- ``finetune`` (optional): Utilize the finetune method for further adjusting pre-existing models sourced from the market. + +By adhering to these standards, the compatibility and quality of submitted learnwares in the market are ensured. + ModelContainer ====================================== -CondaContainer +The ``ModelContainer`` class is an essential component of the learnware package, designed to facilitate the management, deployment, and execution of machine learning models within a containerized environment. +It inherits from the ``BaseModel`` class and extends its functionality to encapsulate model deployment and execution. + +ModelCondaContainer +--------------------- + +The ``ModelCondaContainer`` class is an extension of the ``ModelContainer`` class within the learnware package. +Its primary purpose is to enable the management, deployment, and execution of machine learning models in a containerized environment, with a specific focus on using Conda virtual environments. +This class inherits functionality from ``ModelContainer`` while providing additional capabilities related to Conda-based model execution. + +Specifically, the ``ModelCondaContainer`` supports the automatic creation of new Conda virtual environments based on the ``requirements.txt`` file (for pip installation) or ``environment.yaml`` file (for Conda installation) included within the learnware itself. +It also installs the environment dependencies of the learnware, enabling it to run. + +ModelDockerContainer --------------------- +The ``ModelDockerContainer`` class is a specialized extension of the ``ModelContainer`` class within the learnware package. +It is designed to manage, deploy, and execute machine learning models within a containerized environment, specifically using Docker containers. +This class inherits functionality from ``ModelContainer`` and enhances it with features related to Docker-based model execution. + +Compared to ``ModelCondaContainer``, ``ModelDockerContainer`` confines the model's execution within a Docker container. +It installs the learnware's virtual environment inside the Docker container, isolating the learnware's execution from the host machine, thus enhancing the security of the learnware. -DockerContainer ---------------------- \ No newline at end of file +Similar to the ``ModelCondaContainer`` class, the ``ModelDockerContainer`` class also supports both types of environment dependency files for learnware: ``requirements.txt`` for pip-based installation and ``environment.yaml`` for conda-based installation. +It automates the creation of Docker containers and the installation of learnware's environment dependencies within the container, enabling the learnware to run. \ No newline at end of file diff --git a/docs/components/spec.rst b/docs/components/spec.rst index 84e235c..de85b94 100644 --- a/docs/components/spec.rst +++ b/docs/components/spec.rst @@ -86,17 +86,18 @@ By randomly sampling a subset of the dataset, we can construct Image Specificati .. code-block:: python - import torchvision - from torch.utils.data import DataLoader - from learnware.specification import generate_rkme_image_spec + import torchvision + from torch.utils.data import DataLoader + from learnware.specification import generate_rkme_image_spec - cifar10 = torchvision.datasets.CIFAR10( - root='./data', train=True, download=True, transform=torchvision.transforms.ToTensor()) - X, _ = next(iter(DataLoader(cifar10, batch_size=len(cifar10)))) + cifar10 = torchvision.datasets.CIFAR10( + root='./data', train=True, download=True, transform=torchvision.transforms.ToTensor() + ) + X, _ = next(iter(DataLoader(cifar10, batch_size=len(cifar10)))) - spec = generate_rkme_image_spec(X, sample_size=5000) - spec.save("cifar10.json") + spec = generate_rkme_image_spec(X, sample_size=5000) + spec.save("cifar10.json") Privacy Protection ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -113,6 +114,7 @@ The RBF not only exposes the real data (plotted in the corresponding position in Text Specification -------------------------- +Different from tabular data, each text input is a string of different length, so we should first transform them to equal-length arrays. Sentence embedding is used here to complete this transformation. We choose the model ``paraphrase-multilingual-MiniLM-L12-v2``, a lightweight multilingual embedding model. Then, we calculate the RKME specification on the embedding, just like we do with tabular data. Besides, we use the package ``langdetect`` to detect and store the language of the text inputs for further search. We hope to search for the learnware which supports the language of the user task. System Specification ====================================== diff --git a/docs/index.rst b/docs/index.rst index 33441a3..1137040 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -51,14 +51,14 @@ Document Structure :caption: ADVANCED TOPICS: Anchor Learnware - Specification Evolvement + Evolvable Specification .. toctree:: :maxdepth: 3 :caption: REFERENCES: API - BeimingWu System + Beimingwu System FAQ .. toctree:: diff --git a/docs/references/beiming.rst b/docs/references/beiming.rst deleted file mode 100644 index 3fce4d3..0000000 --- a/docs/references/beiming.rst +++ /dev/null @@ -1,6 +0,0 @@ -.. _beiming: -==================== -BeimingWu System -==================== - -`Clik here for beiming system `_ \ No newline at end of file diff --git a/docs/references/beimingwu.rst b/docs/references/beimingwu.rst new file mode 100644 index 0000000..c616289 --- /dev/null +++ b/docs/references/beimingwu.rst @@ -0,0 +1,30 @@ +.. _beimingwu: +==================== +Beimingwu System +==================== + +`Beimingwu System `_ is based on the learnware paradigm, which systematically implements the entire process of learnware from submission to deployment, helping users effectively search and reuse learnwares without the need to build machine learning models from scratch. + +The ``learnware`` package is the cornerstone of the Beimingwu system, functioning as its core engine. +It offers a comprehensive suite of central APIs that encompass a wide range of functionalities, including the submission, verification, organization, search, and deployment of learnware. +This integration ensures a streamlined and efficient process, facilitating seamless interactions within the system. + +Core Features in the Beimingwu System +======================================= + +Beimingwu systematically implements the core process of the learnware paradigm for the first time: + +- ``Submitting Stage``: The system includes multiple detection mechanisms to ensure the quality of uploaded learnwares. Additionally, the system trains a heterogeneous engine based on existing learnware specifications in the system to merge different specification islands and assign new specifications to learnwares. With more learnwares are submitted, the heterogeneous engine will continue to update, achieving continuous iteration of learnware specifications and building a more precise specification world. +- ``Deploying Stage``: After users upload task requirements, the system automatically selects whether to recommend a single learnware or multiple learnware combinations and provides efficient deployment methods. Whether it's a single learnware or a combination of multiple learnwares, the system offers convenient learnware reuse tools. + +In addition, the Beimingwu system also has the following features: + +- ``Learnware Specification Generation``: The Beimingwu system provides specification generation interfaces in the learnware package, supporting various data types (tables, images, and text) for efficient local generation. +- ``Learnware Quality Inspection``: The Beimingwu system includes multiple detection mechanisms to ensure the quality of each learnware in the system. +- ``Diverse Learnware Search``: The Beimingwu system supports both semantic specifications and statistical specifications searches, covering data types such as tables, images, and text. In addition, for table-based tasks, the system also supports the search for heterogeneous table learnwares. +- ``Local Learnware Deployment``: The Beimingwu system provides interfaces for learnware deployment and learnware reuse in the learnware package, facilitating users' convenient and secure learnware deployment. +- ``Data Privacy Protection``: The Beimingwu system operations, including learnware upload, search, and deployment, do not require users to upload local data. All relevant statistical specifications are generated locally by users, ensuring data privacy. +- ``Fully Open Source``: The Beimingwu system's source code is completely open-source, including the learnware package and frontend/backend code. The learnware package is highly extensible, making it easy to integrate new specification designs, learnware system designs, and learnware reuse methods in the future. + +Beimingwu is the first system-level implementation of the learnware paradigm. +This pioneering venture is just the beginning, with vast opportunities for enhancement and growth in the related technological fields still ahead. \ No newline at end of file diff --git a/docs/start/exp.rst b/docs/start/exp.rst index 58e251c..eaa19e2 100644 --- a/docs/start/exp.rst +++ b/docs/start/exp.rst @@ -49,20 +49,20 @@ Homogeneous Tabular Dataset For homogeneous experiments, the 55 stores in the Corporacion dataset act as 55 users, each applying one feature engineering method, and using the test data from their respective store as user data. These users can then search for homogeneous learnwares in the market with the same feature spaces as their tasks. -The Mean Squared Error (MSE) of search and reuse is presented in the table below: +The Mean Squared Error (MSE) of search and reuse across all users is presented in the table below: +-----------------------------------+---------------------+ | Setting | MSE | +===================================+=====================+ -| Mean in Market (Single) | 0.331 ± 0.040 | +| Mean in Market (Single) | 0.331 | +-----------------------------------+---------------------+ -| Best in Market (Single) | 0.151 ± 0.046 | +| Best in Market (Single) | 0.151 | +-----------------------------------+---------------------+ -| Top-1 Reuse (Single) | 0.280 ± 0.090 | +| Top-1 Reuse (Single) | 0.280 | +-----------------------------------+---------------------+ -| Job Selector Reuse (Multiple) | 0.274 ± 0.064 | +| Job Selector Reuse (Multiple) | 0.274 | +-----------------------------------+---------------------+ -| Average Ensemble Reuse (Multiple) | 0.267 ± 0.051 | +| Average Ensemble Reuse (Multiple) | 0.267 | +-----------------------------------+---------------------+ @@ -96,15 +96,15 @@ The average MSE performance across 41 users are as follows: +-----------------------------------+---------------------+ | Setting | MSE | +===================================+=====================+ -| Mean in Market (Single) | 1.459 ± 1.066 | +| Mean in Market (Single) | 1.459 | +-----------------------------------+---------------------+ -| Best in Market (Single) | 1.226 ± 1.032 | +| Best in Market (Single) | 1.226 | +-----------------------------------+---------------------+ -| Top-1 Reuse (Single) | 1.407 ± 1.061 | +| Top-1 Reuse (Single) | 1.407 | +-----------------------------------+---------------------+ -| Average Ensemble Reuse (Multiple) | 1.312 ± 1.099 | +| Average Ensemble Reuse (Multiple) | 1.312 | +-----------------------------------+---------------------+ -| User model with 50 labeled data | 1.267 ± 1.055 | +| User model with 50 labeled data | 1.267 | +-----------------------------------+---------------------+ From the results, it is noticeable that the learnware market still perform quite well even when users lack labeled data, @@ -127,6 +127,34 @@ The average results across 10 users are depicted in the figure below: We can observe that heterogeneous learnwares are beneficial when there's a limited amount of the user's labeled training data available, aiding in better alignment with the user's specific task. This underscores the potential of learnwares to be applied to tasks beyond their original purpose. +Image Data Experiment +========================= + +For the CIFAR-10 dataset, we sampled the training set unevenly by category and constructed unbalanced training datasets for the 50 learnwares that contained only some of the categories. This makes it unlikely that there exists any learnware in the learnware market that can accurately handle all categories of data; only the learnware whose training data is closest to the data distribution of the target task is likely to perform well on the target task. Specifically, the probability of each category being sampled obeys a random multinomial distribution, with a non-zero probability of sampling on only 4 categories, and the sampling ratio is 0.4: 0.4: 0.1: 0.1. Ultimately, the training set for each learnware contains 12,000 samples covering the data of 4 categories in CIFAR-10. + +We constructed 50 target tasks using data from the test set of CIFAR-10. Similar to constructing the training set for the learnwares, in order to allow for some variation between tasks, we sampled the test set unevenly. Specifically, the probability of each category being sampled obeys a random multinomial distribution, with non-zero sampling probability on 6 categories, and the sampling ratio is 0.3: 0.3: 0.1: 0.1: 0.1: 0.1. Ultimately, each target task contains 3000 samples covering the data of 6 categories in CIFAR-10. + +With this experimental setup, we evaluated the performance of RKME Image using 1 - Accuracy as the loss. + ++-----------------------------------+---------------------+ +| Setting | Accuracy | ++===================================+=====================+ +| Mean in Market (Single) | 0.655 | ++-----------------------------------+---------------------+ +| Best in Market (Single) | 0.304 | ++-----------------------------------+---------------------+ +| Top-1 Reuse (Single) | 0.406 | ++-----------------------------------+---------------------+ +| Job Selector Reuse (Multiple) | 0.406 | ++-----------------------------------+---------------------+ +| Average Ensemble Reuse (Multiple) | 0.310 | ++-----------------------------------+---------------------+ + +In some specific settings, the user will have a small number of labelled samples. In such settings, learning the weight of selected learnwares on a limited number of labelled samples can result in a better performance than training directly on a limited number of labelled samples. + +.. image:: ../_static/img/image_labeled.png + :align: center + Text Data Experiment ========================== @@ -152,70 +180,50 @@ Results * ``unlabeled_text_example``: -The accuracy of search and reuse is presented in the table below: +The table below presents the mean accuracy of search and reuse across all users: +-----------------------------------+---------------------+ | Setting | Accuracy | +===================================+=====================+ -| Mean in Market (Single) | 0.507 ± 0.030 | +| Mean in Market (Single) | 0.507 | +-----------------------------------+---------------------+ -| Best in Market (Single) | 0.859 ± 0.051 | +| Best in Market (Single) | 0.859 | +-----------------------------------+---------------------+ -| Top-1 Reuse (Single) | 0.846 ± 0.054 | +| Top-1 Reuse (Single) | 0.846 | +-----------------------------------+---------------------+ -| Job Selector Reuse (Multiple) | 0.845 ± 0.053 | +| Job Selector Reuse (Multiple) | 0.845 | +-----------------------------------+---------------------+ -| Average Ensemble Reuse (Multiple) | 0.862 ± 0.051 | +| Average Ensemble Reuse (Multiple) | 0.862 | +-----------------------------------+---------------------+ * ``labeled_text_example``: We present the change curves in classification error rates for both the user's self-trained model and the multiple learnware reuse(EnsemblePrune), showcasing their performance on the user's test data as the user's training data increases. The average results across 10 users are depicted below: -.. image:: ../_static/img/text_labeled_curves.png +.. image:: ../_static/img/text_labeled.svg :align: center - :alt: Text Limited Labeled Data + :alt: Results on Text Experimental Scenario From the figure above, it is evident that when the user's own training data is limited, the performance of multiple learnware reuse surpasses that of the user's own model. As the user's training data grows, it is expected that the user's model will eventually outperform the learnware reuse. This underscores the value of reusing learnware to significantly conserve training data and achieve superior performance when user training data is limited. -Image Data Experiment -========================= - -For the CIFAR-10 dataset, we sampled the training set unevenly by category and constructed unbalanced training datasets for the 50 learnwares that contained only some of the categories. This makes it unlikely that there exists any learnware in the learnware market that can accurately handle all categories of data; only the learnware whose training data is closest to the data distribution of the target task is likely to perform well on the target task. Specifically, the probability of each category being sampled obeys a random multinomial distribution, with a non-zero probability of sampling on only 4 categories, and the sampling ratio is 0.4: 0.4: 0.1: 0.1. Ultimately, the training set for each learnware contains 12,000 samples covering the data of 4 categories in CIFAR-10. - -We constructed 50 target tasks using data from the test set of CIFAR-10. Similar to constructing the training set for the learnwares, in order to allow for some variation between tasks, we sampled the test set unevenly. Specifically, the probability of each category being sampled obeys a random multinomial distribution, with non-zero sampling probability on 6 categories, and the sampling ratio is 0.3: 0.3: 0.1: 0.1: 0.1: 0.1. Ultimately, each target task contains 3000 samples covering the data of 6 categories in CIFAR-10. - -With this experimental setup, we evaluated the performance of RKME Image using 1 - Accuracy as the loss. - -+-----------------------------------+---------------------+ -| Setting | Accuracy | -+===================================+=====================+ -| Mean in Market (Single) | 0.655 ± 0.021 | -+-----------------------------------+---------------------+ -| Best in Market (Single) | 0.304 ± 0.046 | -+-----------------------------------+---------------------+ -| Top-1 Reuse (Single) | 0.406 ± 0.128 | -+-----------------------------------+---------------------+ -| Job Selector Reuse (Multiple) | 0.406 ± 0.128 | -+-----------------------------------+---------------------+ -| Average Ensemble Reuse (Multiple) | 0.310 ± 0.112 | -+-----------------------------------+---------------------+ - -In some specific settings, the user will have a small number of labelled samples. In such settings, learning the weight of selected learnwares on a limited number of labelled samples can result in a better performance than training directly on a limited number of labelled samples. - -.. image:: ../_static/img/image_labeled.png - :align: center - Get Start Examples ========================= Examples for `Tabular, Text` and `Image` data sets are available at `Learnware Examples `_. You can run { main.py } directly to reproduce related experiments. We utilize the `fire` module to construct our experiments. -Tabular Examples +Text Examples ------------------ You can execute the experiment with the following commands: * `python main.py unlabeled_text_example`: Executes the unlabeled_text_example experiment; the results will be printed in the terminal. * `python main.py labeled_text_example`: Executes the labeled_text_example experiment; result curves will be automatically saved in the `figs` directory. + +Image Examples +------------------ +You can execute the experiment with the following commands: + +.. code-block:: bash + + python workflow.py image_example \ No newline at end of file diff --git a/docs/start/intro.rst b/docs/start/intro.rst index 3463750..fa153d2 100644 --- a/docs/start/intro.rst +++ b/docs/start/intro.rst @@ -3,61 +3,33 @@ Introduction ================ -``Learnware`` is a model sharing platform, which give a basic implementation of the learnware paradigm. A learnware is a well-performed trained machine learning model with a specification that enables it to be adequately identified to reuse according to the requirement of future users who may know nothing about the learnware in advance. The learnware paradigm can solve entangled problems in the current machine learning paradigm, like continual learning and catastrophic forgetting. It also reduces resources for training a well-performed model. +The ``learnware`` package provides a fundamental implementation of the central concepts and procedures for the learnware paradigm, which is a new paradigm aimed at enabling users to reuse existed well-trained models to solve their AI tasks instead of starting from scratch. +Moreover, the package's well-structured design ensures high scalability and allows for the effortless integration of various new features and techniques in the future. -Motivation -================= +In addition, the ``learnware`` package serves as the engine for the `Beimingwu System `_ and can be effectively employed for conducting experiments related to learnware. -.. image:: ../_static/img/learnware_paradigm.jpg - :align: center +What is Learnware? +==================== -Machine learning, especially the prevailing big model paradigm, has achieved great success in natural language processing and computer vision applications. However, it still faces challenges such as the requirement of a large amount of labeled training data, difficulty in adapting to changing environments, and catastrophic forgetting when refining trained models incrementally. These big models, while useful in their targeted tasks, often fail to address the above issues and struggle to generalize beyond their specific purposes. - -To better address the entangled issues in machine learning, we should consider the following aspects: - -+------------------------------------------------------------------------------------+ -| Aspect | -+====================================================================================+ -| 1. Investigate techniques that address multiple challenges simultaneously, | -| recognizing that these issues are often intertwined in real-world applications. | -+------------------------------------------------------------------------------------+ -| 2. Explore paradigms like learnware, which offers the possibility of | -| systematically reusing small models for tasks beyond their original purposes, | -| reducing the need for users to build models from scratch. | -+------------------------------------------------------------------------------------+ -| 3. Develop solutions that enable ordinary users to create well-performing models | -| without requiring proficient training skills. | -+------------------------------------------------------------------------------------+ -| 4. Address data privacy and proprietary concerns to facilitate experience | -| sharing among different users while respecting confidentiality. | -+------------------------------------------------------------------------------------+ -| 5. Adapt to the constraints of big data applications, where it may be | -| unaffordable or infeasible to hold all data for multiple passes of scanning. | -+------------------------------------------------------------------------------------+ -| 6. Consider the environmental impact of training large models, as their carbon | -| emissions pose a threat to our environment. | -+------------------------------------------------------------------------------------+ - -By considering these factors, we can develop a more comprehensive framework for tackling the complex challenges in machine learning, moving beyond the limitations of the big model paradigm, called Learnware. - - - -Framework -======================= - -.. image:: ../_static/img/learnware_market.jpg - :align: center +A learnware consists of high-performance machine learning models and specifications that characterize the models, i.e., "Learnware = Model + Specification." -The learnware paradigm introduces the concept of a well-performed, trained machine learning model with a specification that allows future users, who have no prior knowledge of the learnware, to reuse it based on their requirements. +The learnware specification consists of "semantic specification" and "statistical specification": -Developers or owners of trained machine learning models can submit their models to a learnware market. If accepted, the market assigns a specification to the model and accommodates it. The learnware market could host thousands or millions of well-performed models from different developers, for various tasks, using diverse data, and optimizing different objectives. +- ``Semantic Specification``: Describe the type and functionality of the model through text. +- ``Statistical Specification``: Characterize the statistical information contained in the model using various machine learning techniques. -Instead of building a model from scratch, users can submit their requirements to the learnware market, which then identifies and deploys helpful learnware(s) based on the specifications. Users can apply the learnware directly, adapt it using their data, or exploit it in other ways to improve their model. This process is more efficient and less expensive than building a model from scratch. +Learnware specifications describe the model's capabilities, enabling the model to be identified and reused by future users who may know nothing about the learnware in advance. +Why do we need Learnware? +============================ -Benefits of the Learnware Paradigm -============================================== +The Benefits of Learnware Paradigm +------------------------------------- + +Machine learning has achieved great success in many fields but still faces various challenges, such as the need for extensive training data and advanced training techniques, the difficulty of continuous learning, the risk of catastrophic forgetting, and the leakage of data privacy. + +Although there are many efforts focusing on one of these issues separately, they are entangled, and solving one problem may exacerbate others. The learnware paradigm aimss to address many of these challenges through a unified framework. +-----------------------+-----------------------------------------------------------------------------------------------+ | Benefit | Description | @@ -83,11 +55,25 @@ Benefits of the Learnware Paradigm | | large models and the carbon footprint. | +-----------------------+-----------------------------------------------------------------------------------------------+ -Challenges and Future Work -============================================== +How to Solve Future Tasks with Learnware Paradigm? +---------------------------------------------------- + +.. image:: ../_static/img/learnware_paradigm.jpg + :align: center + +Instead of building a model from scratch, users can submit their requirements to the learnware market, which then identifies and deploys helpful learnware(s) based on the specifications. Users can apply the learnware directly, adapt it using their data, or exploit it in other ways to improve their models. This process is more efficient and less expensive than building a model from scratch. -Although the learnware proposal shows promise, much work remains to make it a reality. The next sections will present some of the progress made so far. +Procedure of Learnware Paradigm +================================== +- ``Submitting Stage``: Developers voluntarily submit various learnwares to the learnware market, and the system conducts quality checks and further organization of these learnwares. +- ``Deploying Stage``: When users submit task requirements, the learnware market automatically selects whether to recommend a single learnware or a combination of multiple learnwares and provides efficient deployment methods. Whether it's a single learnware or a combination of multiple learnwares, the system offers convenient learnware reuse interfaces. + +.. image:: ../_static/img/learnware_market.svg + :align: center +Learnware Package Design +========================== +TBD by xiaodong. diff --git a/docs/workflows/client.rst b/docs/workflows/client.rst index b76fb8e..69114ad 100644 --- a/docs/workflows/client.rst +++ b/docs/workflows/client.rst @@ -19,7 +19,7 @@ How to Use Client ============================ -Initialize a Learware Client +Initialize a Learnware Client ------------------------------- diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/dataset_image_workflow/README.md b/examples/dataset_image_workflow/README.md new file mode 100644 index 0000000..c1f6ff9 --- /dev/null +++ b/examples/dataset_image_workflow/README.md @@ -0,0 +1,48 @@ +# Image Dataset Workflow Example + +## Introduction + +We conducted experiments on the widely used image benchmark dataset: [``CIFAR-10``](https://www.cs.toronto.edu/~kriz/cifar.html). +The ``CIFAR-10`` dataset consists of 60000 32x32 color images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images. The 10 different classes represent airplanes, cars, birds, cats, deer, dogs, frogs, horses, ships, and trucks. + +In the submitting stage, we sampled the training set non-uniformly by category, and constructed unbalanced training datasets for the 50 learnwares that contained only part of the categories randomly. Specifically, the probability of each category being sampled obeys a random multinomial distribution, with positive sampling probability on only 4 categories, and a sampling ratio of 0.4: 0.4: 0.1: 0.1. The training set for each learnware contains 12,500 samples covering data from the 4 categories in CIFAR-10. + +In the deploying stage, we constructed 100 user tasks using the CIFAR-10 test set data. Similar to constructing the training set, the probability of each category being sampled obeys a random multinomial distribution, with positive sampling probabilities on only 6 categories, with a sampling ratio of 0.3: 0.3: 0.1: 0.1: 0.1: 0.1. Each user task contains 3,000 samples covering the data of 6 categories in CIFAR-10. + +Our example ``image_example`` shows the performance in two different scenarios: + +**Unlabelled Sample Scenario**: This scenario is designed to evaluate performance when users possess only testing data, searching and reusing learnware available in the market. + +**Labelled Sample Scenario**: This scenario aims to assess performance when users have both testing and limited training data, searching and reusing learnware directly from the market instead of training a model from scratch. This helps determine the amount of training data saved for the user. + +## Run the code + +Run the following command to start the ``image_example``. + +```bash +python workflow.py image_example +``` + +## Results + +With the experimental setup above, we evaluated the performance of RKME Image by calculating the mean accuracy across all users. + +### Unlabelled Sample Scenario + +| Metric | Value | +|--------------------------------------|---------------------| +| Mean in Market (Single) | 0.346 | +| Best in Market (Single) | 0.688 | +| Top-1 Reuse (Single) | 0.534 | +| Job Selector Reuse (Multiple) | 0.534 | +| Average Ensemble Reuse (Multiple) | 0.676 | + +### Labelled Sample Scenario + +In some specific settings, the user will have a small number of labeled samples. In such settings, learning the weight of selected learnwares on a limited number of labeled samples can result in a better performance than training directly on a limited number of labeled samples. + +
+ Results on Image Experimental Scenario +
+ +Note that in labelled sample scenario, the labelled samples are repeatedly sampled 3 to 10 times, in order to reduce the estimation error in accuracy due to random sampling. \ No newline at end of file diff --git a/examples/dataset_image_workflow/config.py b/examples/dataset_image_workflow/config.py new file mode 100644 index 0000000..ea199a5 --- /dev/null +++ b/examples/dataset_image_workflow/config.py @@ -0,0 +1,62 @@ +from learnware.tests.benchmarks import BenchmarkConfig + + +image_benchmark_config = BenchmarkConfig( + name="CIFAR-10", + user_num=100, + learnware_ids=[ + "00002207", + "00002208", + "00002209", + "00002210", + "00002211", + "00002212", + "00002213", + "00002214", + "00002215", + "00002216", + "00002217", + "00002218", + "00002219", + "00002220", + "00002221", + "00002222", + "00002223", + "00002224", + "00002225", + "00002226", + "00002227", + "00002228", + "00002229", + "00002230", + "00002231", + "00002232", + "00002233", + "00002234", + "00002235", + "00002236", + "00002237", + "00002238", + "00002239", + "00002240", + "00002241", + "00002242", + "00002243", + "00002244", + "00002245", + "00002246", + "00002247", + "00002248", + "00002249", + "00002250", + "00002251", + "00002252", + "00002253", + "00002254", + "00002255", + "00002256", + ], + test_data_path="CIFAR-10/test_data.zip", + train_data_path="CIFAR-10/train_data.zip", + extra_info_path="CIFAR-10/extra_info.zip", +) diff --git a/examples/dataset_image_workflow/example_files/example_init.py b/examples/dataset_image_workflow/example_files/example_init.py deleted file mode 100644 index b318ee8..0000000 --- a/examples/dataset_image_workflow/example_files/example_init.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -import joblib -import numpy as np -from learnware.model import BaseModel -from .model import ConvModel -import torch - - -class Model(BaseModel): - def __init__(self): - super().__init__(input_shape=(3, 32, 32), output_shape=(10,)) - dir_path = os.path.dirname(os.path.abspath(__file__)) - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - self.model = ConvModel(channel=3, n_random_features=10).to(self.device) - self.model.load_state_dict(torch.load(os.path.join(dir_path, "conv_model.pth"))) - self.model.eval() - - def fit(self, X: np.ndarray, y: np.ndarray): - pass - - def predict(self, X: np.ndarray) -> np.ndarray: - X = torch.Tensor(X).to(self.device) - return self.model(X) - - def finetune(self, X: np.ndarray, y: np.ndarray): - pass diff --git a/examples/dataset_image_workflow/example_files/example_yaml.yaml b/examples/dataset_image_workflow/example_files/example_yaml.yaml deleted file mode 100644 index 9aaf820..0000000 --- a/examples/dataset_image_workflow/example_files/example_yaml.yaml +++ /dev/null @@ -1,8 +0,0 @@ -model: - class_name: Model - kwargs: {} -stat_specifications: - - module_path: learnware.specification - class_name: RKMEImageSpecification - file_name: rkme.json - kwargs: {} \ No newline at end of file diff --git a/examples/dataset_image_workflow/example_files/model.py b/examples/dataset_image_workflow/example_files/model.py deleted file mode 100644 index 3281416..0000000 --- a/examples/dataset_image_workflow/example_files/model.py +++ /dev/null @@ -1,183 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -import numpy as np - - -class Linear(nn.Module): - def __init__(self, input_feature=256, num_classes=10): - super().__init__() - self.linear_1 = nn.Linear(input_feature, 128) - self.dropout_1 = nn.Dropout(p=0.5) - self.linear_2 = nn.Linear(128, 128) - self.dropout_2 = nn.Dropout(p=0.5) - self.linear_3 = nn.Linear(128, num_classes) - - def forward(self, x): - out1 = F.relu(self.dropout_1(self.linear_1(x))) - out2 = F.relu(self.dropout_2(self.linear_2(out1))) - out = self.linear_3(out2) - return out - - -class OriginModel(nn.Module): - def __init__(self, last_layer_feature=256): - super().__init__() - self.linear_1 = nn.Linear(last_layer_feature, 128) - self.linear_2 = nn.Linear(128, 128) - self.linear_3 = nn.Linear(128, 10) - - def forward(self, x): - out = F.relu(self.linear_1(x)) - out = F.relu(self.linear_2(out)) - out = self.linear_3(out) - return out - - -class ConvModel(nn.Module): - def __init__( - self, - channel, - n_random_features, - net_width=64, - net_depth=3, - net_act="relu", - net_norm="batchnorm", - net_pooling="avgpooling", - im_size=(32, 32), - ): - super().__init__() - # print('Building Conv Model') - self.features, shape_feat = self._make_layers( - channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size - ) - num_feat = shape_feat[0] * shape_feat[1] * shape_feat[2] - self.classifier = GaussianLinear(num_feat, n_random_features) - - def forward(self, x): - out = self.features(x) - out = out.reshape(out.size(0), -1) - out = self.classifier(out) - return out - - def _get_activation(self, net_act): - if net_act == "sigmoid": - return nn.Sigmoid() - elif net_act == "relu": - return nn.ReLU(inplace=True) - elif net_act == "leakyrelu": - return nn.LeakyReLU(negative_slope=0.01) - elif net_act == "gelu": - return nn.SiLU() - else: - exit("unknown activation function: %s" % net_act) - - def _get_pooling(self, net_pooling): - if net_pooling == "maxpooling": - return nn.MaxPool2d(kernel_size=2, stride=2) - elif net_pooling == "avgpooling": - return nn.AvgPool2d(kernel_size=2, stride=2) - elif net_pooling == "none": - return None - else: - exit("unknown net_pooling: %s" % net_pooling) - - def _get_normlayer(self, net_norm, shape_feat): - # shape_feat = (c*h*w) - if net_norm == "batchnorm": - return nn.BatchNorm2d(shape_feat[0], affine=True) - elif net_norm == "layernorm": - return nn.LayerNorm(shape_feat, elementwise_affine=True) - elif net_norm == "instancenorm": - return nn.GroupNorm(shape_feat[0], shape_feat[0], affine=True) - elif net_norm == "groupnorm": - return nn.GroupNorm(4, shape_feat[0], affine=True) - elif net_norm == "none": - return None - else: - exit("unknown net_norm: %s" % net_norm) - - def _make_layers(self, channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size): - layers = [] - in_channels = channel - # if im_size[0] == 28: - # im_size = (32, 32) - shape_feat = [in_channels, im_size[0], im_size[1]] - for d in range(net_depth): - # print(shape_feat) - layers += [Conv2d_gaussian(in_channels, net_width, kernel_size=3, padding=1)] - # layers += [nn.Conv2d(in_channels, net_width, kernel_size=3, padding='same')] - shape_feat[0] = net_width - if net_norm != "none": - layers += [self._get_normlayer(net_norm, shape_feat)] - layers += [self._get_activation(net_act)] - in_channels = net_width - if net_pooling != "none": - layers += [self._get_pooling(net_pooling)] - shape_feat[1] //= 2 - shape_feat[2] //= 2 - - return nn.Sequential(*layers), shape_feat - - -class Conv2d_gaussian(torch.nn.Conv2d): - def reset_parameters(self) -> None: - # Setting a=sqrt(5) in kaiming_uniform is the same as initializing with - # uniform(-1/sqrt(k), 1/sqrt(k)), where k = weight.size(1) * prod(*kernel_size) - # For more details see: https://github.com/pytorch/pytorch/issues/15314#issuecomment-477448573 - # torch.nn.init.kaiming_normal_(self.weight, a= math.sqrt(5)) - # W has shape out, in, h, w - torch.nn.init.normal_( - self.weight, 0, np.sqrt(2) / np.sqrt(self.weight.shape[1] * self.weight.shape[2] * self.weight.shape[3]) - ) - if self.bias is not None: - fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weight) - # print(fan_in) - if fan_in != 0: - # bound = 0 * 1 / math.sqrt(fan_in) - # torch.nn.init.uniform_(self.bias, -bound, bound) - # torch.nn.init.uniform_(self.bias, -bound, bound) - torch.nn.init.normal_(self.bias, 0, 0.1) - - -class GaussianLinear(torch.nn.Module): - __constants__ = ["in_features", "out_features"] - in_features: int - out_features: int - weight: torch.Tensor - - def __init__( - self, in_features: int, out_features: int, bias: bool = True, device=None, dtype=None, funny=False - ) -> None: - factory_kwargs = {"device": device, "dtype": dtype} - super(GaussianLinear, self).__init__() - self.funny = funny - self.in_features = in_features - self.out_features = out_features - self.weight = torch.nn.Parameter(torch.empty((out_features, in_features), **factory_kwargs)) - if bias: - self.bias = torch.nn.Parameter(torch.empty(out_features, **factory_kwargs)) - else: - self.register_parameter("bias", None) - self.reset_parameters() - - def reset_parameters(self) -> None: - # Setting a=sqrt(5) in kaiming_uniform is the same as initializing with - # uniform(-1/sqrt(in_features), 1/sqrt(in_features)). For details, see - # https://github.com/pytorch/pytorch/issues/57109 - # torch.nn.init.kaiming_normal_(self.weight, a=1 * np.sqrt(5)) - torch.nn.init.normal_(self.weight, 0, np.sqrt(2) / np.sqrt(self.in_features)) - # torch.nn.init.normal_(self.weight, 0, 3/np.sqrt(self.in_features)) - if self.bias is not None: - fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weight) - bound = 1 / np.sqrt(fan_in) if fan_in > 0 else 0 - # torch.nn.init.uniform_(self.bias, -bound, bound) - torch.nn.init.normal_(self.bias, 0, 0.1) - - def forward(self, input: torch.Tensor) -> torch.Tensor: - return torch.nn.functional.linear(input, self.weight, self.bias) - - def extra_repr(self) -> str: - return "in_features={}, out_features={}, bias={}".format( - self.in_features, self.out_features, self.bias is not None - ) diff --git a/examples/dataset_image_workflow/get_data.py b/examples/dataset_image_workflow/get_data.py deleted file mode 100644 index c3af534..0000000 --- a/examples/dataset_image_workflow/get_data.py +++ /dev/null @@ -1,283 +0,0 @@ -import torch -from torchvision import datasets, transforms -import torch.nn.functional as F -from scipy.ndimage.interpolation import rotate as scipyrotate - -import numpy as np - - -def get_fashion_mnist(data_root="./data", output_channels=1, image_size=28): - ds_train = datasets.FashionMNIST( - data_root, - train=True, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - X_train = ds_train.data - y_train = ds_train.targets - ds_test = datasets.FashionMNIST( - data_root, - train=False, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - - X_test = ds_test.data - y_test = ds_test.targets - - X_train = X_train[:, None, :, :].float() - X_test = X_test[:, None, :, :].float() - - if output_channels > 1: - X_train = torch.cat([X_train for i in range(output_channels)], 1) - X_test = torch.cat([X_test for i in range(output_channels)], 1) - - X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - - return X_train, y_train, X_test, y_test - - -def get_mnist(data_root="./data/", output_channels=1, image_size=28): - ds_train = datasets.MNIST( - data_root, - train=True, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - X_train = [] - - for x, _ in ds_train: - X_train.append(x) - X_train = torch.stack(X_train) - - y_train = ds_train.targets - ds_test = datasets.MNIST( - data_root, - train=False, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - - X_test = [] - - for x, _ in ds_test: - X_test.append(x) - X_test = torch.stack(X_test) - - y_test = ds_test.targets - - if output_channels > 1: - X_train = torch.cat([X_train for i in range(output_channels)], 1) - X_test = torch.cat([X_test for i in range(output_channels)], 1) - - X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - - return X_train, y_train, X_test, y_test - - -def get_cifar10(data_root="./data/", output_channels=3, image_size=32): - ds_train = datasets.CIFAR10( - data_root, - train=True, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - X_train = ds_train.data - y_train = ds_train.targets - ds_test = datasets.CIFAR10( - data_root, - train=False, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - - X_test = ds_test.data - y_test = ds_test.targets - - X_train = torch.Tensor(np.moveaxis(X_train, 3, 1)) - y_train = torch.Tensor(y_train).long() - X_test = torch.Tensor(np.moveaxis(X_test, 3, 1)) - y_test = torch.Tensor(y_test).long() - - if output_channels == 1: - X_train = torch.mean(X_train, 1, keepdim=True) - X_test = torch.mean(X_test, 1, keepdim=True) - - X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - - return X_train, y_train, X_test, y_test - - -def get_svhn(output_channels=1, image_size=32): - ds_train = datasets.SVHN( - "./data/", - split="train", - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - X_train = ds_train.data - y_train = ds_train.labels - ds_test = datasets.SVHN( - "./data/", - split="test", - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - - X_test = ds_test.data - y_test = ds_test.labels - - X_train = torch.Tensor(X_train) - y_train = torch.Tensor(y_train).long() - X_test = torch.Tensor(X_test) - y_test = torch.Tensor(y_test).long() - - if output_channels == 1: - X_train = torch.mean(X_train, 1, keepdim=True) - X_test = torch.mean(X_test, 1, keepdim=True) - - X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - - return X_train, y_train, X_test, y_test - - -def get_cifar100(data_root="./data/", output_channels=3, image_size=32): - ds_train = datasets.CIFAR100( - data_root, - train=True, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - X_train = ds_train.data - y_train = ds_train.targets - ds_test = datasets.CIFAR100( - data_root, - train=False, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - - X_test = ds_test.data - y_test = ds_test.targets - - X_train = torch.Tensor(np.moveaxis(X_train, 3, 1)) - y_train = torch.Tensor(y_train).long() - X_test = torch.Tensor(np.moveaxis(X_test, 3, 1)) - y_test = torch.Tensor(y_test).long() - - if output_channels == 1: - X_train = torch.mean(X_train, 1, keepdim=True) - X_test = torch.mean(X_test, 1, keepdim=True) - - X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - - return X_train, y_train, X_test, y_test - - -def get_zca_matrix(X, reg_coef=0.1): - X_flat = X.reshape(X.shape[0], -1) - cov = (X_flat.T @ X_flat) / X_flat.shape[0] - reg_amount = reg_coef * torch.trace(cov) / cov.shape[0] - u, s, _ = torch.svd(cov.cuda() + reg_amount * torch.eye(cov.shape[0]).cuda()) - inv_sqrt_zca_eigs = s ** (-0.5) - whitening_transform = torch.einsum("ij,j,kj->ik", u, inv_sqrt_zca_eigs, u) - - return whitening_transform.cpu() - - -def layernorm_data(X): - X_processed = X - torch.mean(X, [1, 2, 3], keepdim=True) - X_processed = X_processed / torch.sqrt(torch.sum(X_processed**2, [1, 2, 3], keepdim=True)) - - return X_processed - - -def transform_data(X, whitening_transform): - if len(whitening_transform.shape) == 2: - X_flat = X.reshape(X.shape[0], -1) - X_flat = X_flat @ whitening_transform - return X_flat.view(*X.shape) - else: - X_flat = X.reshape(X.shape[0], -1) - X_flat = torch.einsum("nd, ndi->ni", X_flat, whitening_transform) - return X_flat.view(*X.shape) - - -def scale_to_zero_one(X): - mins = torch.min(X.view(X.shape[0], -1), 1)[0].view(-1, 1, 1, 1) - maxes = torch.max(X.view(X.shape[0], -1), 1)[0].view(-1, 1, 1, 1) - return (X - mins) / (maxes - mins) - - -def augment(images, dc_aug_param, device): - # This can be sped up in the future. - - if dc_aug_param != None and dc_aug_param["strategy"] != "none": - scale = dc_aug_param["scale"] - crop = dc_aug_param["crop"] - rotate = dc_aug_param["rotate"] - noise = dc_aug_param["noise"] - strategy = dc_aug_param["strategy"] - - shape = images.shape - mean = [] - for c in range(shape[1]): - mean.append(float(torch.mean(images[:, c]))) - - def cropfun(i): - im_ = torch.zeros(shape[1], shape[2] + crop * 2, shape[3] + crop * 2, dtype=torch.float, device=device) - for c in range(shape[1]): - im_[c] = mean[c] - im_[:, crop : crop + shape[2], crop : crop + shape[3]] = images[i] - r, c = np.random.permutation(crop * 2)[0], np.random.permutation(crop * 2)[0] - images[i] = im_[:, r : r + shape[2], c : c + shape[3]] - - def scalefun(i): - h = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2]) - w = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2]) - tmp = F.interpolate( - images[i : i + 1], - [h, w], - )[0] - mhw = max(h, w, shape[2], shape[3]) - im_ = torch.zeros(shape[1], mhw, mhw, dtype=torch.float, device=device) - r = int((mhw - h) / 2) - c = int((mhw - w) / 2) - im_[:, r : r + h, c : c + w] = tmp - r = int((mhw - shape[2]) / 2) - c = int((mhw - shape[3]) / 2) - images[i] = im_[:, r : r + shape[2], c : c + shape[3]] - - def rotatefun(i): - im_ = scipyrotate( - images[i].cpu().data.numpy(), - angle=np.random.randint(-rotate, rotate), - axes=(-2, -1), - cval=np.mean(mean), - ) - r = int((im_.shape[-2] - shape[-2]) / 2) - c = int((im_.shape[-1] - shape[-1]) / 2) - images[i] = torch.tensor(im_[:, r : r + shape[-2], c : c + shape[-1]], dtype=torch.float, device=device) - - def noisefun(i): - images[i] = images[i] + noise * torch.randn(shape[1:], dtype=torch.float, device=device) - - augs = strategy.split("_") - - for i in range(shape[0]): - choice = np.random.permutation(augs)[0] # randomly implement one augmentation - if choice == "crop": - cropfun(i) - elif choice == "scale": - scalefun(i) - elif choice == "rotate": - rotatefun(i) - elif choice == "noise": - noisefun(i) - - return images diff --git a/examples/dataset_image_workflow/main.py b/examples/dataset_image_workflow/main.py deleted file mode 100644 index 0512c31..0000000 --- a/examples/dataset_image_workflow/main.py +++ /dev/null @@ -1,221 +0,0 @@ -import numpy as np -import torch -from tqdm import tqdm - -from get_data import * -import os -import random - -from learnware.specification import RKMEImageSpecification -from learnware.reuse.averaging import AveragingReuser -from utils import generate_uploader, generate_user, ImageDataLoader, train, eval_prediction -from learnware.learnware import Learnware -import time - -from learnware.market import instantiate_learnware_market, BaseUserInfo -from learnware.market.easy import database_ops -from learnware.learnware import Learnware -import learnware.specification as specification -from learnware.logger import get_module_logger - -from shutil import copyfile, rmtree -import zipfile - -logger = get_module_logger("image_test", level="INFO") -origin_data_root = "./data/origin_data" -processed_data_root = "./data/processed_data" -tmp_dir = "./data/tmp" -learnware_pool_dir = "./data/learnware_pool" -dataset = "cifar10" -n_uploaders = 30 -n_users = 20 -n_classes = 10 -data_root = os.path.join(origin_data_root, dataset) -data_save_root = os.path.join(processed_data_root, dataset) -user_save_root = os.path.join(data_save_root, "user") -uploader_save_root = os.path.join(data_save_root, "uploader") -model_save_root = os.path.join(data_save_root, "uploader_model") -os.makedirs(data_root, exist_ok=True) -os.makedirs(user_save_root, exist_ok=True) -os.makedirs(uploader_save_root, exist_ok=True) -os.makedirs(model_save_root, exist_ok=True) - - -semantic_specs = [ - { - "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": {"Values": ["Classification"], "Type": "Class"}, - "Library": {"Values": ["Pytorch"], "Type": "Class"}, - "Scenario": {"Values": ["Business"], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "learnware_1", "Type": "String"}, - "Output": {"Dimension": 10}, - "License": {"Values": ["MIT"], "Type": "Class"}, - } -] - -user_semantic = { - "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": {"Values": ["Classification"], "Type": "Class"}, - "Library": {"Values": ["Pytorch"], "Type": "Class"}, - "Scenario": {"Values": ["Business"], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "", "Type": "String"}, - "License": {"Values": ["MIT"], "Type": "Class"}, -} - - -def prepare_data(): - if dataset == "cifar10": - X_train, y_train, X_test, y_test = get_cifar10(data_root) - elif dataset == "mnist": - X_train, y_train, X_test, y_test = get_mnist(data_root) - else: - return - generate_uploader(X_train, y_train, n_uploaders=n_uploaders, data_save_root=uploader_save_root) - generate_user(X_test, y_test, n_users=n_users, data_save_root=user_save_root) - - -def prepare_model(): - dataloader = ImageDataLoader(data_save_root, train=True) - for i in range(n_uploaders): - logger.info("Train on uploader: %d" % (i)) - X, y = dataloader.get_idx_data(i) - model = train(X, y, out_classes=n_classes) - model_save_path = os.path.join(model_save_root, "uploader_%d.pth" % (i)) - torch.save(model.state_dict(), model_save_path) - logger.info("Model saved to '%s'" % (model_save_path)) - - -def prepare_learnware(data_path, model_path, init_file_path, yaml_path, save_root, zip_name): - os.makedirs(save_root, exist_ok=True) - tmp_spec_path = os.path.join(save_root, "rkme.json") - tmp_model_path = os.path.join(save_root, "conv_model.pth") - tmp_yaml_path = os.path.join(save_root, "learnware.yaml") - tmp_init_path = os.path.join(save_root, "__init__.py") - tmp_model_file_path = os.path.join(save_root, "model.py") - mmodel_file_path = "./example_files/model.py" - - # Computing the specification from the whole dataset is too costly. - X = np.load(data_path) - indices = np.random.choice(len(X), size=2000, replace=False) - X_sampled = X[indices] - - st = time.time() - user_spec = RKMEImageSpecification(cuda_idx=0) - user_spec.generate_stat_spec_from_data(X=X_sampled) - ed = time.time() - logger.info("Stat spec generated in %.3f s" % (ed - st)) - user_spec.save(tmp_spec_path) - copyfile(model_path, tmp_model_path) - copyfile(yaml_path, tmp_yaml_path) - copyfile(init_file_path, tmp_init_path) - copyfile(mmodel_file_path, tmp_model_file_path) - zip_file_name = os.path.join(learnware_pool_dir, "%s.zip" % (zip_name)) - with zipfile.ZipFile(zip_file_name, "w", compression=zipfile.ZIP_DEFLATED) as zip_obj: - zip_obj.write(tmp_spec_path, "rkme.json") - zip_obj.write(tmp_model_path, "conv_model.pth") - zip_obj.write(tmp_yaml_path, "learnware.yaml") - zip_obj.write(tmp_init_path, "__init__.py") - zip_obj.write(tmp_model_file_path, "model.py") - rmtree(save_root) - logger.info("New Learnware Saved to %s" % (zip_file_name)) - return zip_file_name - - -def prepare_market(): - image_market = instantiate_learnware_market(market_id="cifar10", name="easy", rebuild=True) - try: - rmtree(learnware_pool_dir) - except: - pass - os.makedirs(learnware_pool_dir, exist_ok=True) - for i in tqdm(range(n_uploaders), total=n_uploaders, desc="Preparing..."): - data_path = os.path.join(uploader_save_root, "uploader_%d_X.npy" % (i)) - model_path = os.path.join(model_save_root, "uploader_%d.pth" % (i)) - init_file_path = "./example_files/example_init.py" - yaml_file_path = "./example_files/example_yaml.yaml" - new_learnware_path = prepare_learnware( - data_path, model_path, init_file_path, yaml_file_path, tmp_dir, "%s_%d" % (dataset, i) - ) - semantic_spec = semantic_specs[0] - semantic_spec["Name"]["Values"] = "learnware_%d" % (i) - semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (i) - image_market.add_learnware(new_learnware_path, semantic_spec) - - logger.info("Total Item: %d" % (len(image_market))) - curr_inds = image_market._get_ids() - logger.info("Available ids: " + str(curr_inds)) - - -def test_search(gamma=0.1, load_market=True): - if load_market: - image_market = instantiate_learnware_market(market_id="cifar10", name="easy") - else: - prepare_market() - image_market = instantiate_learnware_market(market_id="cifar10", name="easy") - logger.info("Number of items in the market: %d" % len(image_market)) - - select_list = [] - avg_list = [] - improve_list = [] - job_selector_score_list = [] - ensemble_score_list = [] - for i in tqdm(range(n_users), total=n_users, desc="Searching..."): - user_data_path = os.path.join(user_save_root, "user_%d_X.npy" % (i)) - user_label_path = os.path.join(user_save_root, "user_%d_y.npy" % (i)) - user_data = np.load(user_data_path) - user_label = np.load(user_label_path) - user_stat_spec = RKMEImageSpecification(cuda_idx=0) - user_stat_spec.generate_stat_spec_from_data(X=user_data, resize=False) - user_info = BaseUserInfo(semantic_spec=user_semantic, stat_info={"RKMETableSpecification": user_stat_spec}) - logger.info("Searching Market for user: %d" % i) - search_result = image_market.search_learnware(user_info) - single_result = search_result.get_single_results() - acc_list = [] - for idx, single_item in enumerate(single_result[:5]): - pred_y = single_item.learnware.predict(user_data) - acc = eval_prediction(pred_y, user_label) - acc_list.append(acc) - logger.info( - "Search rank: %d, score: %.3f, learnware_id: %s, acc: %.3f" - % (idx, single_item.score, single_item.learnware.id, acc) - ) - - # test reuse (job selector) - # reuse_baseline = JobSelectorReuser(learnware_list=mixture_learnware_list, herding_num=100) - # reuse_predict = reuse_baseline.predict(user_data=user_data) - # reuse_score = eval_prediction(reuse_predict, user_label) - # job_selector_score_list.append(reuse_score) - # print(f"mixture reuse loss: {reuse_score}") - - # test reuse (ensemble) - single_learnware_list = [single_item.learnware for single_item in single_result] - reuse_ensemble = AveragingReuser(learnware_list=single_learnware_list[:3], mode="vote_by_prob") - ensemble_predict_y = reuse_ensemble.predict(user_data=user_data) - ensemble_score = eval_prediction(ensemble_predict_y, user_label) - ensemble_score_list.append(ensemble_score) - print(f"reuse accuracy (vote_by_prob): {ensemble_score}\n") - - select_list.append(acc_list[0]) - avg_list.append(np.mean(acc_list)) - improve_list.append((acc_list[0] - np.mean(acc_list)) / np.mean(acc_list)) - - logger.info( - "Accuracy of selected learnware: %.3f +/- %.3f, Average performance: %.3f +/- %.3f" - % (np.mean(select_list), np.std(select_list), np.mean(avg_list), np.std(avg_list)) - ) - logger.info( - "Ensemble Reuse Performance: %.3f +/- %.3f" % (np.mean(ensemble_score_list), np.std(ensemble_score_list)) - ) - - -if __name__ == "__main__": - logger.info("=" * 40) - logger.info(f"n_uploaders:\t{n_uploaders}") - logger.info(f"n_users:\t{n_users}") - logger.info("=" * 40) - - prepare_data() - prepare_model() - test_search(load_market=False) diff --git a/examples/dataset_image_workflow/model.py b/examples/dataset_image_workflow/model.py new file mode 100644 index 0000000..c1415c7 --- /dev/null +++ b/examples/dataset_image_workflow/model.py @@ -0,0 +1,82 @@ +from torch import nn + + +class ConvModel(nn.Module): + def __init__( + self, + channel, + n_random_features, + net_width=64, + net_depth=3, + net_act="relu", + net_norm="batchnorm", + net_pooling="avgpooling", + im_size=(32, 32), + ): + super().__init__() + self.features, shape_feat = self._make_layers( + channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size + ) + num_feat = shape_feat[0] * shape_feat[1] * shape_feat[2] + self.classifier = nn.Linear(num_feat, n_random_features) + + def forward(self, x): + out = self.features(x) + out = out.reshape(out.size(0), -1) + out = self.classifier(out) + return out + + def _get_activation(self, net_act): + if net_act == "sigmoid": + return nn.Sigmoid() + elif net_act == "relu": + return nn.ReLU(inplace=True) + elif net_act == "leakyrelu": + return nn.LeakyReLU(negative_slope=0.01) + elif net_act == "gelu": + return nn.SiLU() + else: + raise Exception("unknown activation function: %s" % net_act) + + def _get_pooling(self, net_pooling): + if net_pooling == "maxpooling": + return nn.MaxPool2d(kernel_size=2, stride=2) + elif net_pooling == "avgpooling": + return nn.AvgPool2d(kernel_size=2, stride=2) + elif net_pooling == "none": + return None + else: + raise Exception("unknown net_pooling: %s" % net_pooling) + + def _get_normlayer(self, net_norm, shape_feat): + if net_norm == "batchnorm": + return nn.BatchNorm2d(shape_feat[0], affine=True) + elif net_norm == "layernorm": + return nn.LayerNorm(shape_feat, elementwise_affine=True) + elif net_norm == "instancenorm": + return nn.GroupNorm(shape_feat[0], shape_feat[0], affine=True) + elif net_norm == "groupnorm": + return nn.GroupNorm(4, shape_feat[0], affine=True) + elif net_norm == "none": + return None + else: + raise Exception("unknown net_norm: %s" % net_norm) + + def _make_layers(self, channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size): + layers = [] + in_channels = channel + shape_feat = [in_channels, im_size[0], im_size[1]] + for d in range(net_depth): + layers += [nn.Conv2d(in_channels, net_width, kernel_size=3, padding="same")] + + shape_feat[0] = net_width + if net_norm != "none": + layers += [self._get_normlayer(net_norm, shape_feat)] + layers += [self._get_activation(net_act)] + in_channels = net_width + if net_pooling != "none": + layers += [self._get_pooling(net_pooling)] + shape_feat[1] //= 2 + shape_feat[2] //= 2 + + return nn.Sequential(*layers), shape_feat diff --git a/examples/dataset_image_workflow/utils.py b/examples/dataset_image_workflow/utils.py index b7f5056..1c9625d 100644 --- a/examples/dataset_image_workflow/utils.py +++ b/examples/dataset_image_workflow/utils.py @@ -1,174 +1,94 @@ -import os +import torch import numpy as np -import random -import math +from torch import optim, nn +from torch.utils.data import DataLoader, Dataset + +from learnware.utils import choose_device + + +@torch.no_grad() +def evaluate(model, evaluate_set: Dataset, device=None, distribution=True): + device = choose_device(0) if device is None else device + + if isinstance(model, nn.Module): + model.eval() + mapping = lambda m, x: m(x) + else: + mapping = lambda m, x: m.predict(x) + + criterion = nn.CrossEntropyLoss(reduction="sum") + total, correct, loss = 0, 0, torch.as_tensor(0.0, dtype=torch.float32, device=device) + dataloader = DataLoader(evaluate_set, batch_size=1024, shuffle=True) + for i, (X, y) in enumerate(dataloader): + X, y = X.to(device), y.to(device) + out = mapping(model, X) + if not torch.is_tensor(out): + out = torch.from_numpy(out).to(device) + + if distribution: + loss += criterion(out, y) + _, predicted = torch.max(out.data, 1) + else: + predicted = out -import torch -import torch.nn as nn -import torch.optim as optim + total += y.size(0) + correct += (predicted == y).sum().item() -from example_files.model import ConvModel + acc = correct / total * 100 + loss = loss / total + if isinstance(model, nn.Module): + model.train() -class ImageDataLoader: - def __init__(self, data_root, train: bool = True): - self.data_root = data_root - self.train = train + return loss.item(), acc - def get_idx_data(self, idx=0): - if self.train: - X_path = os.path.join(self.data_root, "uploader", "uploader_%d_X.npy" % (idx)) - y_path = os.path.join(self.data_root, "uploader", "uploader_%d_y.npy" % (idx)) - if not (os.path.exists(X_path) and os.path.exists(y_path)): - raise Exception("Index Error") - X = np.load(X_path) - y = np.load(y_path) - else: - X_path = os.path.join(self.data_root, "user", "user_%d_X.npy" % (idx)) - y_path = os.path.join(self.data_root, "user", "user_%d_y.npy" % (idx)) - if not (os.path.exists(X_path) and os.path.exists(y_path)): - raise Exception("Index Error") - X = np.load(X_path) - y = np.load(y_path) - return X, y - - -def generate_uploader(data_x, data_y, n_uploaders=50, data_save_root=None): - if data_save_root is None: - return - os.makedirs(data_save_root, exist_ok=True) - for i in range(n_uploaders): - random_class_num = random.randint(6, 10) - cls_indx = list(range(10)) - random.shuffle(cls_indx) - selected_cls_indx = cls_indx[:random_class_num] - rest_cls_indx = cls_indx[random_class_num:] - selected_data_indx = [] - for cls in selected_cls_indx: - data_indx = list(torch.where(data_y == cls)[0]) - # print(type(data_indx)) - random.shuffle(data_indx) - data_num = random.randint(800, 2000) - selected_indx = data_indx[:data_num] - selected_data_indx = selected_data_indx + selected_indx - for cls in rest_cls_indx: - flag = random.randint(0, 1) - if flag == 0: - continue - data_indx = list(torch.where(data_y == cls)[0]) - random.shuffle(data_indx) - data_num = random.randint(20, 80) - selected_indx = data_indx[:data_num] - selected_data_indx = selected_data_indx + selected_indx - selected_X = data_x[selected_data_indx].numpy() - selected_y = data_y[selected_data_indx].numpy() - print(selected_X.dtype, selected_y.dtype) - print(selected_X.shape, selected_y.shape) - X_save_dir = os.path.join(data_save_root, "uploader_%d_X.npy" % (i)) - y_save_dir = os.path.join(data_save_root, "uploader_%d_y.npy" % (i)) - np.save(X_save_dir, selected_X) - np.save(y_save_dir, selected_y) - print("Saving to %s" % (X_save_dir)) - - -def generate_user(data_x, data_y, n_users=50, data_save_root=None): - if data_save_root is None: - return - os.makedirs(data_save_root, exist_ok=True) - for i in range(n_users): - random_class_num = random.randint(3, 6) - cls_indx = list(range(10)) - random.shuffle(cls_indx) - selected_cls_indx = cls_indx[:random_class_num] - selected_data_indx = [] - for cls in selected_cls_indx: - data_indx = list(torch.where(data_y == cls)[0]) - # print(type(data_indx)) - random.shuffle(data_indx) - data_num = random.randint(150, 350) - selected_indx = data_indx[:data_num] - selected_data_indx = selected_data_indx + selected_indx - # print('Total Index:', len(selected_data_indx)) - selected_X = data_x[selected_data_indx].numpy() - selected_y = data_y[selected_data_indx].numpy() - print(selected_X.shape, selected_y.shape) - X_save_dir = os.path.join(data_save_root, "user_%d_X.npy" % (i)) - y_save_dir = os.path.join(data_save_root, "user_%d_y.npy" % (i)) - np.save(X_save_dir, selected_X) - np.save(y_save_dir, selected_y) - print("Saving to %s" % (X_save_dir)) - - -# Train Uploaders' models -def train(X, y, out_classes, epochs=35, batch_size=128): - print(X.shape, y.shape) - input_feature = X.shape[1] - data_size = X.shape[0] - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - model = ConvModel(channel=input_feature, n_random_features=out_classes).to(device) - model.train() - # Adam optimizer with learning rate 1e-3 - # optimizer = optim.Adam(model.parameters(), lr=1e-3) +def train_model( + model: nn.Module, + train_set: Dataset, + valid_set: Dataset, + save_path: str, + epochs=35, + batch_size=128, + device=None, + verbose=True, +): + device = choose_device(0) if device is None else device - # SGD optimizer with learning rate 1e-2 + model.train() optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) - - # mean-squared error loss criterion = nn.CrossEntropyLoss() + dataloader = DataLoader(train_set, batch_size=batch_size, shuffle=True) + best_loss = 100000 for epoch in range(epochs): running_loss = [] - indx = list(range(data_size)) - random.shuffle(indx) - curr_X = X[indx] - curr_y = y[indx] - for i in range(math.floor(data_size / batch_size)): - inputs, annos = curr_X[i * batch_size : (i + 1) * batch_size], curr_y[i * batch_size : (i + 1) * batch_size] - inputs = torch.from_numpy(inputs).to(device) - annos = torch.from_numpy(annos).to(device) - # print(inputs.dtype, annos.dtype) - out = model(inputs) + model.train() + for i, (X, y) in enumerate(dataloader): + X, y = X.to(device=device), y.to(device=device) optimizer.zero_grad() - loss = criterion(out, annos) + out = model(X) + loss = criterion(out, y) loss.backward() optimizer.step() running_loss.append(loss.item()) - # print('Epoch: %d, Average Loss: %.3f'%(epoch+1, np.mean(running_loss))) - # Train Accuracy - acc = test(X, y, model) - model.train() - return model - - -def test(test_X, test_y, model, batch_size=128): - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - model.eval() - total, correct = 0, 0 - data_size = test_X.shape[0] - for i in range(math.ceil(data_size / batch_size)): - inputs, annos = test_X[i * batch_size : (i + 1) * batch_size], test_y[i * batch_size : (i + 1) * batch_size] - inputs = torch.Tensor(inputs).to(device) - annos = torch.Tensor(annos).to(device) - out = model(inputs) - _, predicted = torch.max(out.data, 1) - total += annos.size(0) - correct += (predicted == annos).sum().item() - acc = correct / total * 100 - print("Accuracy: %.2f" % (acc)) - return acc - - -def eval_prediction(pred_y, target_y): - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - if not isinstance(pred_y, np.ndarray): - pred_y = pred_y.detach().cpu().numpy() - predicted = np.argmax(pred_y, 1) - # print(predicted) - # annos = torch.from_numpy(target_y).to(device) - annos = target_y - total = annos.shape[0] - correct = (predicted == annos).sum().item() - criterion = nn.CrossEntropyLoss() - return correct / total + valid_loss, valid_acc = evaluate(model, valid_set, device=device) + train_loss, train_acc = evaluate(model, train_set, device=device) + if valid_loss < best_loss: + best_loss = valid_loss + + torch.save(model.state_dict(), save_path) + if verbose: + print("Epoch: {}, Valid Best Accuracy: {:.3f}% ({:.3f})".format(epoch + 1, valid_acc, valid_loss)) + if valid_acc > 99.0: + if verbose: + print("Early Stopping at 99% !") + break + + if verbose and (epoch + 1) % 5 == 0: + print( + "Epoch: {}, Train Average Loss: {:.3f}, Accuracy {:.3f}%, Valid Average Loss: {:.3f}".format( + epoch + 1, np.mean(running_loss), train_acc, valid_loss + ) + ) diff --git a/examples/dataset_image_workflow/workflow.py b/examples/dataset_image_workflow/workflow.py new file mode 100644 index 0000000..dbe293a --- /dev/null +++ b/examples/dataset_image_workflow/workflow.py @@ -0,0 +1,264 @@ +import os +import fire +import time +import torch +import pickle +import random +import tempfile +import numpy as np +import matplotlib.pyplot as plt +from torch.utils.data import TensorDataset + +from learnware.utils import choose_device +from learnware.client import LearnwareClient +from learnware.logger import get_module_logger +from learnware.specification import generate_stat_spec +from learnware.tests.benchmarks import LearnwareBenchmark +from learnware.market import instantiate_learnware_market, BaseUserInfo +from learnware.reuse import JobSelectorReuser, AveragingReuser, EnsemblePruningReuser +from model import ConvModel +from utils import train_model, evaluate +from config import image_benchmark_config + +logger = get_module_logger("image_workflow", level="INFO") + + +class ImageDatasetWorkflow: + def _plot_labeled_peformance_curves(self, all_user_curves_data): + plt.figure(figsize=(10, 6)) + plt.xticks(range(len(self.n_labeled_list)), self.n_labeled_list) + + styles = [ + {"color": "navy", "linestyle": "-", "marker": "o"}, + {"color": "magenta", "linestyle": "-.", "marker": "d"}, + ] + labels = ["User Model", "Multiple Learnware Reuse (EnsemblePrune)"] + + user_array, pruning_array = all_user_curves_data + for array, style, label in zip([user_array, pruning_array], styles, labels): + mean_curve = np.array([item[0] for item in array]) + std_curve = np.array([item[1] for item in array]) + plt.plot(mean_curve, **style, label=label) + plt.fill_between( + range(len(mean_curve)), + mean_curve - std_curve, + mean_curve + std_curve, + color=style["color"], + alpha=0.2, + ) + + plt.xlabel("Amout of Labeled User Data", fontsize=14) + plt.ylabel("1 - Accuracy", fontsize=14) + plt.title(f"Results on Image Experimental Scenario", fontsize=16) + plt.legend(fontsize=14) + plt.tight_layout() + plt.savefig(os.path.join(self.fig_path, "image_labeled_curves.svg"), bbox_inches="tight", dpi=700) + + def _prepare_market(self, rebuild=False): + client = LearnwareClient() + self.image_benchmark = LearnwareBenchmark().get_benchmark(image_benchmark_config) + self.image_market = instantiate_learnware_market(market_id=self.image_benchmark.name, rebuild=rebuild) + self.user_semantic = client.get_semantic_specification(self.image_benchmark.learnware_ids[0]) + self.user_semantic["Name"]["Values"] = "" + + if len(self.image_market) == 0 or rebuild == True: + for learnware_id in self.image_benchmark.learnware_ids: + with tempfile.TemporaryDirectory(prefix="image_benchmark_") as tempdir: + zip_path = os.path.join(tempdir, f"{learnware_id}.zip") + for i in range(20): + try: + semantic_spec = client.get_semantic_specification(learnware_id) + client.download_learnware(learnware_id, zip_path) + self.image_market.add_learnware(zip_path, semantic_spec) + break + except: + time.sleep(1) + continue + + logger.info("Total Item: %d" % (len(self.image_market))) + + def image_example(self, rebuild=False, skip_test=False): + np.random.seed(1) + random.seed(1) + self.n_labeled_list = [100, 200, 500, 1000, 2000, 4000] + self.repeated_list = [10, 10, 10, 3, 3, 3] + device = choose_device(0) + + self.root_path = os.path.dirname(os.path.abspath(__file__)) + self.fig_path = os.path.join(self.root_path, "figs") + self.curve_path = os.path.join(self.root_path, "curves") + self.model_path = os.path.join(self.root_path, "models") + os.makedirs(self.fig_path, exist_ok=True) + os.makedirs(self.curve_path, exist_ok=True) + os.makedirs(self.model_path, exist_ok=True) + + select_list = [] + avg_list = [] + best_list = [] + improve_list = [] + job_selector_score_list = [] + ensemble_score_list = [] + + if not skip_test: + self._prepare_market(rebuild) + all_learnwares = self.image_market.get_learnwares() + + for i in range(image_benchmark_config.user_num): + test_x, test_y = self.image_benchmark.get_test_data(user_ids=i) + train_x, train_y = self.image_benchmark.get_train_data(user_ids=i) + + test_x = torch.from_numpy(test_x) + test_y = torch.from_numpy(test_y) + test_dataset = TensorDataset(test_x, test_y) + + user_stat_spec = generate_stat_spec(type="image", X=test_x, whitening=False) + user_info = BaseUserInfo( + semantic_spec=self.user_semantic, stat_info={user_stat_spec.type: user_stat_spec} + ) + logger.info("Searching Market for user: %d" % (i)) + + search_result = self.image_market.search_learnware(user_info) + single_result = search_result.get_single_results() + multiple_result = search_result.get_multiple_results() + + print(f"search result of user{i}:") + print( + f"single model num: {len(single_result)}, max_score: {single_result[0].score}, min_score: {single_result[-1].score}" + ) + + acc_list = [] + for idx in range(len(all_learnwares)): + learnware = all_learnwares[idx] + loss, acc = evaluate(learnware, test_dataset) + acc_list.append(acc) + + learnware = single_result[0].learnware + best_loss, best_acc = evaluate(learnware, test_dataset) + best_list.append(np.max(acc_list)) + select_list.append(best_acc) + avg_list.append(np.mean(acc_list)) + improve_list.append((best_acc - np.mean(acc_list)) / np.mean(acc_list)) + print(f"market mean accuracy: {np.mean(acc_list)}, market best accuracy: {np.max(acc_list)}") + print( + f"Top1-score: {single_result[0].score}, learnware_id: {single_result[0].learnware.id}, acc: {best_acc}" + ) + + if len(multiple_result) > 0: + mixture_id = " ".join([learnware.id for learnware in multiple_result[0].learnwares]) + print(f"mixture_score: {multiple_result[0].score}, mixture_learnware: {mixture_id}") + mixture_learnware_list = multiple_result[0].learnwares + else: + mixture_learnware_list = [single_result[0].learnware] + + # test reuse (job selector) + reuse_job_selector = JobSelectorReuser(learnware_list=mixture_learnware_list, use_herding=False) + job_loss, job_acc = evaluate(reuse_job_selector, test_dataset) + job_selector_score_list.append(job_acc) + print(f"mixture reuse accuracy (job selector): {job_acc}") + + # test reuse (ensemble) + reuse_ensemble = AveragingReuser(learnware_list=mixture_learnware_list, mode="vote_by_prob") + ensemble_loss, ensemble_acc = evaluate(reuse_ensemble, test_dataset) + ensemble_score_list.append(ensemble_acc) + print(f"mixture reuse accuracy (ensemble): {ensemble_acc}\n") + + user_model_score_mat = [] + pruning_score_mat = [] + single_score_mat = [] + + for n_label, repeated in zip(self.n_labeled_list, self.repeated_list): + user_model_score_list, reuse_pruning_score_list = [], [] + if n_label > len(train_x): + n_label = len(train_x) + for _ in range(repeated): + x_train, y_train = zip(*random.sample(list(zip(train_x, train_y)), k=n_label)) + x_train = np.array(list(x_train)) + y_train = np.array(list(y_train)) + + x_train = torch.from_numpy(x_train) + y_train = torch.from_numpy(y_train) + sampled_dataset = TensorDataset(x_train, y_train) + + mode_save_path = os.path.abspath(os.path.join(self.model_path, "model.pth")) + model = ConvModel( + channel=x_train.shape[1], im_size=(x_train.shape[2], x_train.shape[3]), n_random_features=10 + ).to(device) + train_model( + model, + sampled_dataset, + sampled_dataset, + mode_save_path, + epochs=35, + batch_size=128, + device=device, + verbose=False, + ) + model.load_state_dict(torch.load(mode_save_path)) + _, user_model_acc = evaluate(model, test_dataset, distribution=True) + user_model_score_list.append(user_model_acc) + + reuse_pruning = EnsemblePruningReuser( + learnware_list=mixture_learnware_list, mode="classification" + ) + reuse_pruning.fit(x_train, y_train) + _, pruning_acc = evaluate(reuse_pruning, test_dataset, distribution=False) + reuse_pruning_score_list.append(pruning_acc) + + single_score_mat.append([best_acc] * repeated) + user_model_score_mat.append(user_model_score_list) + pruning_score_mat.append(reuse_pruning_score_list) + print( + f"user_label_num: {n_label}, user_acc: {np.mean(user_model_score_mat[-1])}, pruning_acc: {np.mean(pruning_score_mat[-1])}" + ) + + logger.info(f"Saving Curves for User_{i}") + user_curves_data = (single_score_mat, user_model_score_mat, pruning_score_mat) + with open(os.path.join(self.curve_path, f"curve{str(i)}.pkl"), "wb") as f: + pickle.dump(user_curves_data, f) + + logger.info( + "Accuracy of selected learnware: %.3f +/- %.3f, Average performance: %.3f +/- %.3f, Best performance: %.3f +/- %.3f" + % ( + np.mean(select_list), + np.std(select_list), + np.mean(avg_list), + np.std(avg_list), + np.mean(best_list), + np.std(best_list), + ) + ) + logger.info("Average performance improvement: %.3f" % (np.mean(improve_list))) + logger.info( + "Average Job Selector Reuse Performance: %.3f +/- %.3f" + % (np.mean(job_selector_score_list), np.std(job_selector_score_list)) + ) + logger.info( + "Averaging Ensemble Reuse Performance: %.3f +/- %.3f" + % (np.mean(ensemble_score_list), np.std(ensemble_score_list)) + ) + + pruning_curves_data, user_model_curves_data = [], [] + total_user_model_score_mat = [np.zeros(self.repeated_list[i]) for i in range(len(self.n_labeled_list))] + total_pruning_score_mat = [np.zeros(self.repeated_list[i]) for i in range(len(self.n_labeled_list))] + for user_idx in range(image_benchmark_config.user_num): + with open(os.path.join(self.curve_path, f"curve{str(user_idx)}.pkl"), "rb") as f: + user_curves_data = pickle.load(f) + (single_score_mat, user_model_score_mat, pruning_score_mat) = user_curves_data + + for i in range(len(self.n_labeled_list)): + total_user_model_score_mat[i] += 1 - np.array(user_model_score_mat[i]) / 100 + total_pruning_score_mat[i] += 1 - np.array(pruning_score_mat[i]) / 100 + + for i in range(len(self.n_labeled_list)): + total_user_model_score_mat[i] /= image_benchmark_config.user_num + total_pruning_score_mat[i] /= image_benchmark_config.user_num + user_model_curves_data.append( + (np.mean(total_user_model_score_mat[i]), np.std(total_user_model_score_mat[i])) + ) + pruning_curves_data.append((np.mean(total_pruning_score_mat[i]), np.std(total_pruning_score_mat[i]))) + + self._plot_labeled_peformance_curves([user_model_curves_data, pruning_curves_data]) + + +if __name__ == "__main__": + fire.Fire(ImageDatasetWorkflow) diff --git a/examples/dataset_text_workflow/README.md b/examples/dataset_text_workflow/README.md index 4ac6b33..1b73787 100644 --- a/examples/dataset_text_workflow/README.md +++ b/examples/dataset_text_workflow/README.md @@ -37,15 +37,15 @@ python workflow.py labeled_text_example ### ``unlabeled_text_example``: -The accuracy of search and reuse is presented in the table below: +The table below presents the mean accuracy of search and reuse across all users: | Setting | Accuracy | |---------------------------------------|---------------------| -| Mean in Market (Single) | 0.507 ± 0.030 | -| Best in Market (Single) | 0.859 ± 0.051 | -| Top-1 Reuse (Single) | 0.846 ± 0.054 | -| Job Selector Reuse (Multiple) | 0.845 ± 0.053 | -| Average Ensemble Reuse (Multiple) | 0.862 ± 0.051 | +| Mean in Market (Single) | 0.507 | +| Best in Market (Single) | 0.859 | +| Top-1 Reuse (Single) | 0.846 | +| Job Selector Reuse (Multiple) | 0.845 | +| Average Ensemble Reuse (Multiple) | 0.862 | ### ``labeled_text_example``: @@ -53,7 +53,7 @@ The accuracy of search and reuse is presented in the table below: We present the change curves in classification error rates for both the user's self-trained model and the multiple learnware reuse(EnsemblePrune), showcasing their performance on the user's test data as the user's training data increases. The average results across 10 users are depicted below:
- Text Limited Labeled Data + Results on Text Experimental Scenario
From the figure above, it is evident that when the user's own training data is limited, the performance of multiple learnware reuse surpasses that of the user's own model. As the user's training data grows, it is expected that the user's model will eventually outperform the learnware reuse. This underscores the value of reusing learnware to significantly conserve training data and achieve superior performance when user training data is limited. diff --git a/examples/dataset_text_workflow/workflow.py b/examples/dataset_text_workflow/workflow.py index dfbacda..5a03a2f 100644 --- a/examples/dataset_text_workflow/workflow.py +++ b/examples/dataset_text_workflow/workflow.py @@ -49,25 +49,25 @@ class TextDatasetWorkflow: ] labels = ["User Model", "Multiple Learnware Reuse (EnsemblePrune)"] - user_mat, pruning_mat = all_user_curves_data - user_mat, pruning_mat = np.array(user_mat), np.array(pruning_mat) - for mat, style, label in zip([user_mat, pruning_mat], styles, labels): - mean_curve, std_curve = 1 - np.mean(mat, axis=0), np.std(mat, axis=0) + user_array, pruning_array = all_user_curves_data + for array, style, label in zip([user_array, pruning_array], styles, labels): + mean_curve = np.array([item[0] for item in array]) + std_curve = np.array([item[1] for item in array]) plt.plot(mean_curve, **style, label=label) plt.fill_between( range(len(mean_curve)), - mean_curve - 0.5 * std_curve, - mean_curve + 0.5 * std_curve, + mean_curve - std_curve, + mean_curve + std_curve, color=style["color"], alpha=0.2, ) - plt.xlabel("Labeled Data Size") - plt.ylabel("1 - Accuracy") - plt.title(f"Text Limited Labeled Data") - plt.legend() + plt.xlabel("Amout of Labeled User Data", fontsize=14) + plt.ylabel("1 - Accuracy", fontsize=14) + plt.title(f"Results on Text Experimental Scenario", fontsize=16) + plt.legend(fontsize=14) plt.tight_layout() - plt.savefig(os.path.join(self.fig_path, "text_labeled_curves.png"), bbox_inches="tight", dpi=700) + plt.savefig(os.path.join(self.fig_path, "text_labeled_curves.svg"), bbox_inches="tight", dpi=700) def _prepare_market(self, rebuild=False): client = LearnwareClient() @@ -103,7 +103,7 @@ class TextDatasetWorkflow: ensemble_score_list = [] all_learnwares = self.text_market.get_learnwares() - for i in range(self.text_benchmark.user_num): + for i in range(text_benchmark_config.user_num): user_data, user_label = self.text_benchmark.get_test_data(user_ids=i) user_stat_spec = RKMETextSpecification() @@ -183,19 +183,19 @@ class TextDatasetWorkflow: % (np.mean(ensemble_score_list), np.std(ensemble_score_list)) ) - def labeled_text_example(self, rebuild=False, train_flag=True): + def labeled_text_example(self, rebuild=False, skip_test=False): self.n_labeled_list = [100, 200, 500, 1000, 2000, 4000] self.repeated_list = [10, 10, 10, 3, 3, 3] self.root_path = os.path.dirname(os.path.abspath(__file__)) self.fig_path = os.path.join(self.root_path, "figs") self.curve_path = os.path.join(self.root_path, "curves") - if train_flag: + if not skip_test: self._prepare_market(rebuild) os.makedirs(self.fig_path, exist_ok=True) os.makedirs(self.curve_path, exist_ok=True) - for i in range(self.text_benchmark.user_num): + for i in range(text_benchmark_config.user_num): user_model_score_mat = [] pruning_score_mat = [] single_score_mat = [] @@ -230,7 +230,6 @@ class TextDatasetWorkflow: mixture_learnware_list = multiple_result[0].learnwares else: mixture_learnware_list = [single_result[0].learnware] - print(len(train_x)) for n_label, repeated in zip(self.n_labeled_list, self.repeated_list): user_model_score_list, reuse_pruning_score_list = [], [] @@ -257,7 +256,9 @@ class TextDatasetWorkflow: single_score_mat.append([best_acc] * repeated) user_model_score_mat.append(user_model_score_list) pruning_score_mat.append(reuse_pruning_score_list) - print(n_label, np.mean(user_model_score_mat[-1]), np.mean(pruning_score_mat[-1])) + print( + f"user_label_num: {n_label}, user_acc: {np.mean(user_model_score_mat[-1])}, pruning_acc: {np.mean(pruning_score_mat[-1])}" + ) logger.info(f"Saving Curves for User_{i}") user_curves_data = (single_score_mat, user_model_score_mat, pruning_score_mat) @@ -265,19 +266,25 @@ class TextDatasetWorkflow: pickle.dump(user_curves_data, f) pruning_curves_data, user_model_curves_data = [], [] - for i in range(self.text_benchmark.user_num): - with open(os.path.join(self.curve_path, f"curve{str(i)}.pkl"), "rb") as f: + total_user_model_score_mat = [np.zeros(self.repeated_list[i]) for i in range(len(self.n_labeled_list))] + total_pruning_score_mat = [np.zeros(self.repeated_list[i]) for i in range(len(self.n_labeled_list))] + for user_idx in range(text_benchmark_config.user_num): + with open(os.path.join(self.curve_path, f"curve{str(user_idx)}.pkl"), "rb") as f: user_curves_data = pickle.load(f) (single_score_mat, user_model_score_mat, pruning_score_mat) = user_curves_data - for i in range(len(single_score_mat)): - user_model_score_mat[i] = np.mean(user_model_score_mat[i]) - pruning_score_mat[i] = np.mean(pruning_score_mat[i]) - if len(user_model_score_mat) < 6: - for i in range(6 - len(user_model_score_mat)): - user_model_score_mat.append(user_model_score_mat[-1]) - pruning_score_mat.append(pruning_score_mat[-1]) - user_model_curves_data.append(user_model_score_mat[:6]) - pruning_curves_data.append(pruning_score_mat[:6]) + + for i in range(len(self.n_labeled_list)): + total_user_model_score_mat[i] += 1 - np.array(user_model_score_mat[i]) + total_pruning_score_mat[i] += 1 - np.array(pruning_score_mat[i]) + + for i in range(len(self.n_labeled_list)): + total_user_model_score_mat[i] /= text_benchmark_config.user_num + total_pruning_score_mat[i] /= text_benchmark_config.user_num + user_model_curves_data.append( + (np.mean(total_user_model_score_mat[i]), np.std(total_user_model_score_mat[i])) + ) + pruning_curves_data.append((np.mean(total_pruning_score_mat[i]), np.std(total_pruning_score_mat[i]))) + self._plot_labeled_peformance_curves([user_model_curves_data, pruning_curves_data]) diff --git a/learnware/client/utils.py b/learnware/client/utils.py index cfb12c2..4cd9e0e 100644 --- a/learnware/client/utils.py +++ b/learnware/client/utils.py @@ -23,14 +23,15 @@ def system_execute(args, timeout=None, env=None, stdout=subprocess.DEVNULL, stde errmsg = err.stderr.decode() logger.warning(f"System Execute Error: {errmsg}") raise err - + return com_process def remove_enviroment(conda_env): system_execute(args=["conda", "env", "remove", "-n", f"{conda_env}"]) -def install_environment(learnware_dirpath, conda_env): + +def install_environment(learnware_dirpath, conda_env, conda_prefix=None): """Install environment of a learnware Parameters @@ -39,12 +40,21 @@ def install_environment(learnware_dirpath, conda_env): Path of the learnware folder conda_env : str a new conda environment will be created with the given name; + conda_prefix: str + install env in a specific location, not default env path; Raises ------ Exception Lack of the environment configuration file. """ + if conda_prefix is not None: + args_location = ["--prefix", conda_prefix] + conda_env = conda_prefix + else: + args_location = ["--name", conda_env] + pass + with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir: logger.info(f"learnware_dir namelist: {os.listdir(learnware_dirpath)}") if "environment.yaml" in os.listdir(learnware_dirpath): @@ -54,7 +64,7 @@ def install_environment(learnware_dirpath, conda_env): filter_nonexist_conda_packages_file(yaml_file=yaml_path, output_yaml_file=yaml_path_filter) # create environment logger.info(f"create conda env [{conda_env}] according to .yaml file") - system_execute(args=["conda", "env", "create", "--name", f"{conda_env}", "--file", f"{yaml_path_filter}"]) + system_execute(args=["conda", "env", "create"] + args_location + ["--file", f"{yaml_path_filter}"]) elif "requirements.txt" in os.listdir(learnware_dirpath): requirements_path: str = os.path.join(learnware_dirpath, "requirements.txt") @@ -62,14 +72,15 @@ def install_environment(learnware_dirpath, conda_env): logger.info(f"checking the available pip packages for {conda_env}") filter_nonexist_pip_packages_file(requirements_file=requirements_path, output_file=requirements_path_filter) logger.info(f"create empty conda env [{conda_env}]") - system_execute(args=["conda", "create", "-y", "--name", f"{conda_env}", "python=3.8"]) + system_execute(args=["conda", "create", "-y"] + args_location + ["python=3.8"]) logger.info(f"install pip requirements for conda env [{conda_env}]") system_execute( args=[ "conda", "run", - "-n", - f"{conda_env}", + ] + + args_location + + [ "--no-capture-output", "python", "-m", @@ -87,8 +98,9 @@ def install_environment(learnware_dirpath, conda_env): args=[ "conda", "run", - "-n", - f"{conda_env}", + ] + + args_location + + [ "--no-capture-output", "python", "-m", diff --git a/learnware/market/base.py b/learnware/market/base.py index 754ecde..3b53798 100644 --- a/learnware/market/base.py +++ b/learnware/market/base.py @@ -411,7 +411,7 @@ class BaseOrganizer: ---------- ids : Union[str, List[str]] Give a id or a list of ids - str: id of targer learware + str: id of target learnware List[str]: A list of ids of target learnwares Returns @@ -429,7 +429,7 @@ class BaseOrganizer: ---------- ids : Union[str, List[str]] Give a id or a list of ids - str: id of targer learware + str: id of target learnware List[str]: A list of ids of target learnwares Returns @@ -504,7 +504,7 @@ class BaseSearcher: class BaseChecker: INVALID_LEARNWARE = -1 NONUSABLE_LEARNWARE = 0 - USABLE_LEARWARE = 1 + USABLE_LEARNWARE = 1 def reset(self, **kwargs): pass diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py index 57ff554..3b22c9b 100644 --- a/learnware/market/easy/checker.py +++ b/learnware/market/easy/checker.py @@ -217,4 +217,4 @@ class EasyStatChecker(BaseChecker): logger.warning(message) return self.INVALID_LEARNWARE, message - return self.USABLE_LEARWARE, "EasyStatChecker Success" + return self.USABLE_LEARNWARE, "EasyStatChecker Success" diff --git a/learnware/market/easy/organizer.py b/learnware/market/easy/organizer.py index c2d054f..ffc6428 100644 --- a/learnware/market/easy/organizer.py +++ b/learnware/market/easy/organizer.py @@ -232,7 +232,7 @@ class EasyOrganizer(BaseOrganizer): ---------- ids : Union[str, List[str]] Give a id or a list of ids - str: id of target learware + str: id of target learnware List[str]: A list of ids of target learnwares Returns @@ -264,7 +264,7 @@ class EasyOrganizer(BaseOrganizer): ---------- ids : Union[str, List[str]] Give a id or a list of ids - str: id of targer learware + str: id of target learnware List[str]: A list of ids of target learnwares Returns @@ -296,7 +296,7 @@ class EasyOrganizer(BaseOrganizer): ---------- ids : Union[str, List[str]] Give a id or a list of ids - str: id of targer learware + str: id of target learnware List[str]: A list of ids of target learnwares Returns @@ -339,11 +339,11 @@ class EasyOrganizer(BaseOrganizer): """ if check_status is None: filtered_ids = list(self.use_flags.keys()) - elif check_status in [BaseChecker.NONUSABLE_LEARNWARE, BaseChecker.USABLE_LEARWARE]: + elif check_status in [BaseChecker.NONUSABLE_LEARNWARE, BaseChecker.USABLE_LEARNWARE]: filtered_ids = [key for key, value in self.use_flags.items() if value == check_status] else: logger.warning( - f"check_status must be in [{BaseChecker.NONUSABLE_LEARNWARE}, {BaseChecker.USABLE_LEARWARE}]!" + f"check_status must be in [{BaseChecker.NONUSABLE_LEARNWARE}, {BaseChecker.USABLE_LEARNWARE}]!" ) return None diff --git a/learnware/market/heterogeneous/organizer/__init__.py b/learnware/market/heterogeneous/organizer/__init__.py index 8ff9e1e..50bce60 100644 --- a/learnware/market/heterogeneous/organizer/__init__.py +++ b/learnware/market/heterogeneous/organizer/__init__.py @@ -37,7 +37,7 @@ class HeteroMapTableOrganizer(EasyOrganizer): logger.info(f"Reload market mapping from checkpoint {self.market_mapping_path}") self.market_mapping = HeteroMap.load(checkpoint=self.market_mapping_path) if not rebuild: - usable_ids = self.get_learnware_ids(check_status=BaseChecker.USABLE_LEARWARE) + usable_ids = self.get_learnware_ids(check_status=BaseChecker.USABLE_LEARNWARE) hetero_ids = self._get_hetero_learnware_ids(usable_ids) for hetero_id in hetero_ids: self._reload_learnware_hetero_spec(hetero_id) @@ -93,14 +93,14 @@ class HeteroMapTableOrganizer(EasyOrganizer): zip_path, semantic_spec, check_status, learnware_id ) - if learnwere_status == BaseChecker.USABLE_LEARWARE and len(self._get_hetero_learnware_ids(learnware_id)): - self._update_learware_hetero_spec(learnware_id) + if learnwere_status == BaseChecker.USABLE_LEARNWARE and len(self._get_hetero_learnware_ids(learnware_id)): + self._update_learnware_hetero_spec(learnware_id) if self.auto_update: self.count_down -= 1 if self.count_down == 0: training_learnware_ids = self._get_hetero_learnware_ids( - self.get_learnware_ids(check_status=BaseChecker.USABLE_LEARWARE) + self.get_learnware_ids(check_status=BaseChecker.USABLE_LEARNWARE) ) training_learnwares = self.get_learnware_by_ids(training_learnware_ids) logger.info(f"Verified leanwares for training: {training_learnware_ids}") @@ -111,7 +111,7 @@ class HeteroMapTableOrganizer(EasyOrganizer): f"Market mapping train completed. Now update HeteroMapTableSpecification for {training_learnware_ids}" ) self.market_mapping = updated_market_mapping - self._update_learware_hetero_spec(training_learnware_ids) + self._update_learnware_hetero_spec(training_learnware_ids) self.count_down = self.auto_update_limit @@ -165,9 +165,9 @@ class HeteroMapTableOrganizer(EasyOrganizer): """ old_semantic_spec = self.learnware_list[id].get_specification().get_semantic_spec() final_status = super(HeteroMapTableOrganizer, self).update_learnware(id, zip_path, semantic_spec, check_status) - if final_status == BaseChecker.USABLE_LEARWARE and len(self._get_hetero_learnware_ids(id)): + if final_status == BaseChecker.USABLE_LEARNWARE and len(self._get_hetero_learnware_ids(id)): if zip_path is not None or old_semantic_spec.get("Input", {}) != semantic_spec.get("Input", {}): - self._update_learware_hetero_spec(id) + self._update_learnware_hetero_spec(id) return final_status def _reload_learnware_hetero_spec(self, learnware_id): @@ -178,7 +178,7 @@ class HeteroMapTableOrganizer(EasyOrganizer): hetero_spec.load(hetero_spec_path) self.learnware_list[learnware_id].update_stat_spec(hetero_spec.type, hetero_spec) else: - self._update_learware_hetero_spec(learnware_id) + self._update_learnware_hetero_spec(learnware_id) logger.info(f"Reload HeteroMapTableSpecification for hetero spec {learnware_id} succeed!") except Exception as err: logger.error(f"Reload HeteroMapTableSpecification for hetero spec {learnware_id} failed! due to {err}.") @@ -196,14 +196,14 @@ class HeteroMapTableOrganizer(EasyOrganizer): if len(self._get_hetero_learnware_ids(learnware_id)): self._reload_learnware_hetero_spec(learnware_id) - def _update_learware_hetero_spec(self, ids: Union[str, List[str]]): + def _update_learnware_hetero_spec(self, ids: Union[str, List[str]]): """Update learnware by ids, attempting to generate HeteroMapTableSpecification for them. Parameters ---------- ids : Union[str, List[str]] Give a id or a list of ids - str: id of target learware + str: id of target learnware List[str]: A list of ids of target learnwares """ if isinstance(ids, str): @@ -231,7 +231,7 @@ class HeteroMapTableOrganizer(EasyOrganizer): ---------- ids : Union[str, List[str]] Give a id or a list of ids - str: id of target learware + str: id of target learnware List[str]: A list of ids of target learnwares Returns diff --git a/learnware/market/heterogeneous/organizer/hetero_map/__init__.py b/learnware/market/heterogeneous/organizer/hetero_map/__init__.py index 6defcff..273cac9 100644 --- a/learnware/market/heterogeneous/organizer/hetero_map/__init__.py +++ b/learnware/market/heterogeneous/organizer/hetero_map/__init__.py @@ -288,6 +288,9 @@ class HeteroMap(nn.Module): # go through transformers, get the first cls embedding encoder_output = self.encoder(**outputs) # bs, seqlen+1, hidden_dim output_features = encoder_output[:, 0, :] + + del inputs, outputs, encoder_output + torch.cuda.empty_cache() return output_features @@ -317,6 +320,8 @@ class HeteroMap(nn.Module): with torch.no_grad(): output_features = self._extract_features(bs_x_test).detach().cpu().numpy() output_feas_list.append(output_features) + del output_features + torch.cuda.empty_cache() all_output_features = np.concatenate(output_feas_list, 0) return all_output_features diff --git a/learnware/reuse/ensemble_pruning.py b/learnware/reuse/ensemble_pruning.py index bd28ab4..d182c9f 100644 --- a/learnware/reuse/ensemble_pruning.py +++ b/learnware/reuse/ensemble_pruning.py @@ -149,7 +149,9 @@ class EnsemblePruningReuser(BaseReuser): import geatpy as ea except ModuleNotFoundError: raise ModuleNotFoundError(f"EnsemblePruningReuser is not available because 'geatpy' is not installed! Please install it manually (only support python_version<3.11).") - + + if torch.is_tensor(v_true): + v_true = v_true.detach().cpu().numpy() model_num = v_predict.shape[1] diff --git a/learnware/reuse/job_selector.py b/learnware/reuse/job_selector.py index 1825132..355849e 100644 --- a/learnware/reuse/job_selector.py +++ b/learnware/reuse/job_selector.py @@ -59,8 +59,11 @@ class JobSelectorReuser(BaseReuser): for idx in range(len(self.learnware_list)): data_idx_list = np.where(select_result == idx)[0] if len(data_idx_list) > 0: - # pred_y = self.learnware_list[idx].predict(raw_user_data[data_idx_list]) - pred_y = self.learnware_list[idx].predict([raw_user_data[i] for i in data_idx_list]) + if isinstance(raw_user_data, list): + pred_y = self.learnware_list[idx].predict([raw_user_data[i] for i in data_idx_list]) + else: + pred_y = self.learnware_list[idx].predict(raw_user_data[data_idx_list]) + if isinstance(pred_y, torch.Tensor): pred_y = pred_y.detach().cpu().numpy() # elif isinstance(pred_y, tf.Tensor): @@ -89,6 +92,9 @@ class JobSelectorReuser(BaseReuser): user_data : np.ndarray User's raw data. """ + if torch.is_tensor(user_data): + user_data = user_data.detach().cpu().numpy() + if len(self.learnware_list) == 1: # user_data_num = user_data.shape[0] user_data_num = len(user_data) @@ -118,9 +124,9 @@ class JobSelectorReuser(BaseReuser): task_spec = learnware_rkme_spec_list[i] if self.use_herding: task_herding_num = max(5, int(self.herding_num * task_mixture_weight[i])) - herding_X_i = task_spec.herding(task_herding_num).detach().cpu().numpy() + herding_X_i = task_spec.herding(task_herding_num) else: - herding_X_i = task_spec.z.detach().cpu().numpy() + herding_X_i = task_spec.get_z() task_herding_num = herding_X_i.shape[0] task_val_num = task_herding_num // 5 @@ -172,7 +178,7 @@ class JobSelectorReuser(BaseReuser): user_data : np.ndarray Raw user data. task_rkme_list : List[RKMETableSpecification] - The list of learwares' rkmes whose mixture approximates the user's rkme + The list of learnwares' rkmes whose mixture approximates the user's rkme task_rkme_matrix : np.ndarray Inner product matrix calculated from task_rkme_list. """ @@ -223,8 +229,10 @@ class JobSelectorReuser(BaseReuser): try: from lightgbm import LGBMClassifier, early_stopping except ModuleNotFoundError: - raise ModuleNotFoundError(f"JobSelectorReuser is not available because 'lightgbm' is not installed! Please install it manually.") - + raise ModuleNotFoundError( + f"JobSelectorReuser is not available because 'lightgbm' is not installed! Please install it manually." + ) + score_best = -1 learning_rate = [0.01] max_depth = [66] diff --git a/learnware/specification/regular/image/rkme.py b/learnware/specification/regular/image/rkme.py index 27a4b84..024ad84 100644 --- a/learnware/specification/regular/image/rkme.py +++ b/learnware/specification/regular/image/rkme.py @@ -365,7 +365,7 @@ class RKMEImageSpecification(RegularStatSpecification): indices = torch.multinomial(self.beta, T, replacement=True) mock = self.z[indices] + torch.randn_like(self.z[indices]) * 0.01 - return mock.numpy() + return mock.detach().cpu().numpy() def _sampling_candidates(self, N: int) -> np.ndarray: raise NotImplementedError() diff --git a/learnware/specification/regular/table/rkme.py b/learnware/specification/regular/table/rkme.py index c00445a..a7482ab 100644 --- a/learnware/specification/regular/table/rkme.py +++ b/learnware/specification/regular/table/rkme.py @@ -412,7 +412,7 @@ class RKMETableSpecification(RegularStatSpecification): S_shape = tuple([S.shape[0]] + list(Z_shape)[1:]) S = S.reshape(S_shape) - return S + return S.detach().cpu().numpy() def save(self, filepath: str): """Save the computed RKME specification to a specified path in JSON format. @@ -458,7 +458,9 @@ class RKMETableSpecification(RegularStatSpecification): for d in self.get_states(): if d in rkme_load.keys(): if d == "type" and rkme_load[d] != self.type: - raise TypeError(f"The type of loaded RKME ({rkme_load[d]}) is different from the expected type ({self.type})!") + raise TypeError( + f"The type of loaded RKME ({rkme_load[d]}) is different from the expected type ({self.type})!" + ) setattr(self, d, rkme_load[d])