Browse Source

[MNT] merge from main

tags/v0.3.2
Gene 2 years ago
parent
commit
a4e5150fcc
16 changed files with 390 additions and 363 deletions
  1. +2
    -1
      examples/dataset_image_workflow/main.py
  2. +2
    -1
      examples/dataset_m5_workflow/main.py
  3. +2
    -1
      examples/dataset_pfs_workflow/main.py
  4. +1
    -3
      examples/dataset_pfs_workflow/pfs/pfs_cross_transfer.py
  5. +1
    -1
      examples/workflow_by_code/main.py
  6. +1
    -3
      learnware/client/learnware_client.py
  7. +2
    -4
      learnware/learnware/__init__.py
  8. +0
    -34
      learnware/learnware/base.py
  9. +3
    -0
      learnware/reuse/__init__.py
  10. +72
    -0
      learnware/reuse/averaging.py
  11. +41
    -0
      learnware/reuse/base.py
  12. +1
    -312
      learnware/reuse/ensemble_pruning.py
  13. +259
    -0
      learnware/reuse/job_selector.py
  14. +1
    -1
      tests/test_learnware_client/test_load_conda.py
  15. +1
    -1
      tests/test_learnware_client/test_reuse.py
  16. +1
    -1
      tests/test_workflow/test_workflow.py

+ 2
- 1
examples/dataset_image_workflow/main.py View File

@@ -4,7 +4,8 @@ from get_data import *
import os
import random
from utils import generate_uploader, generate_user, ImageDataLoader, train, eval_prediction
from learnware.learnware import Learnware, JobSelectorReuser, AveragingReuser
from learnware.learnware import Learnware
from learnware.reuse import JobSelectorReuser, AveragingReuser
import time

from learnware.market import EasyMarket, BaseUserInfo


+ 2
- 1
examples/dataset_m5_workflow/main.py View File

@@ -9,7 +9,8 @@ from shutil import copyfile, rmtree
import learnware
from learnware.market import EasyMarket, BaseUserInfo
from learnware.market import database_ops
from learnware.learnware import Learnware, JobSelectorReuser, AveragingReuser
from learnware.learnware import Learnware
from learnware.reuse import JobSelectorReuser, AveragingReuser
import learnware.specification as specification
from m5 import DataLoader
from learnware.logger import get_module_logger


+ 2
- 1
examples/dataset_pfs_workflow/main.py View File

@@ -9,7 +9,8 @@ from shutil import copyfile, rmtree
import learnware
from learnware.market import EasyMarket, BaseUserInfo
from learnware.market import database_ops
from learnware.learnware import Learnware, JobSelectorReuser, AveragingReuser
from learnware.learnware import Learnware
from learnware.reuse import JobSelectorReuser, AveragingReuser
import learnware.specification as specification
from pfs import Dataloader
from learnware.logger import get_module_logger


+ 1
- 3
examples/dataset_pfs_workflow/pfs/pfs_cross_transfer.py View File

@@ -85,9 +85,7 @@ def get_split_errs(algo):
split = train_xs.shape[0] - proportion_list[tmp]
model.fit(
train_xs[
split:,
],
train_xs[split:,],
train_ys[split:],
eval_set=[(val_xs, val_ys)],
early_stopping_rounds=50,


+ 1
- 1
examples/workflow_by_code/main.py View File

@@ -11,7 +11,7 @@ from shutil import copyfile, rmtree

import learnware
from learnware.market import EasyMarket, BaseUserInfo
from learnware.learnware import JobSelectorReuser, AveragingReuser
from learnware.reuse import JobSelectorReuser, AveragingReuser
import learnware.specification as specification
from learnware.utils import get_module_by_module_path



+ 1
- 3
learnware/client/learnware_client.py View File

@@ -7,19 +7,17 @@ import zipfile
import hashlib
import requests
import tempfile
import numpy as np
from enum import Enum
from tqdm import tqdm
from typing import Union, List

from ..config import C
from .. import learnware
from . import package_utils
from .container import LearnwaresContainer
from ..market.easy import EasyMarket
from ..logger import get_module_logger
from ..specification import Specification
from ..learnware import BaseReuser, Learnware, get_learnware_from_dirpath
from ..learnware import get_learnware_from_dirpath
from ..test import get_semantic_specification

CHUNK_SIZE = 1024 * 1024


+ 2
- 4
learnware/learnware/__init__.py View File

@@ -1,10 +1,9 @@
import os
import copy

from .base import Learnware, BaseReuser
from .reuse import JobSelectorReuser, AveragingReuser, EnsemblePruningReuser
from .base import Learnware

from .utils import get_stat_spec_from_config, get_model_from_config
from .utils import get_stat_spec_from_config
from ..specification import Specification
from ..utils import read_yaml_to_dict
from ..logger import get_module_logger
@@ -73,7 +72,6 @@ def get_learnware_from_dirpath(id: str, semantic_spec: dict, learnware_dirpath:
learnware_spec.update_stat_spec(**{stat_spac_name: stat_spec_inst})

learnware_spec.update_semantic_spec(copy.deepcopy(semantic_spec))
# learnware_model = get_model_from_config(learnware_config["model"])

except Exception as e:
logger.warning(f"Load Learnware {id} failed! Due to {repr(e)}")


+ 0
- 34
learnware/learnware/base.py View File

@@ -75,37 +75,3 @@ class Learnware:
def update(self):
# Empty Interface.
raise NotImplementedError("'update' Method is NOT Implemented.")


class BaseReuser:
"""Providing the interfaces to reuse the learnwares which is searched by learnware"""

def __init__(self, learnware_list: List[Learnware] = None):
"""The initializaiton method for base reuser

Parameters
----------
learnware_list : List[Learnware]
The learnware list to reuse and make predictions
"""
self.learnware_list = learnware_list

def reset(self, **kwargs):
for _k, _v in kwargs.items():
if hasattr(_k):
setattr(_k, _v)

def predict(self, user_data: np.ndarray) -> np.ndarray:
"""Give the final prediction for user data with reused learnware

Parameters
----------
user_data : np.ndarray
User's labeled raw data.

Returns
-------
np.ndarray
The final prediction for user data with reused learnware
"""
raise NotImplementedError("The predict method is not implemented!")

+ 3
- 0
learnware/reuse/__init__.py View File

@@ -0,0 +1,3 @@
from .ensemble_pruning import EnsemblePruningReuser
from .averaging import AveragingReuser
from .job_selector import JobSelectorReuser

+ 72
- 0
learnware/reuse/averaging.py View File

@@ -0,0 +1,72 @@
import torch
import numpy as np
from typing import List
from scipy.special import softmax


from learnware.learnware import Learnware
from .base import BaseReuser
from ..logger import get_module_logger

logger = get_module_logger("avaraging_reuser")


class AveragingReuser(BaseReuser):
"""Baseline Multiple Learnware Reuser using Ensemble Method"""

def __init__(self, learnware_list: List[Learnware] = None, mode: str = "mean"):
"""The initialization method for averaging ensemble reuser

Parameters
----------
learnware_list : List[Learnware]
The learnware list
mode : str
- "mean": average the output of all learnwares for regression task (learnware output is a real number)
- "vote_by_label": vote by labels for classification task, learnware output belongs to the set {0, 1, ..., class_num}
- "vote_by_prob": vote by probabilities for classification task, learnware output is a logits vector, denoting the probability of each class
"""
super(AveragingReuser, self).__init__(learnware_list)
if mode not in ["mean", "vote_by_label", "vote_by_prob"]:
raise ValueError(f"Mode must be one of ['mean', 'vote_by_label', 'vote_by_prob'], but got {mode}")
self.mode = mode

def predict(self, user_data: np.ndarray) -> np.ndarray:
"""Prediction for user data using baseline ensemble method

Parameters
----------
user_data : np.ndarray
Raw user data.

Returns
-------
np.ndarray
Prediction given by ensemble method
"""
preds = []
for learnware in self.learnware_list:
pred_y = learnware.predict(user_data)
if isinstance(pred_y, torch.Tensor):
pred_y = pred_y.detach().cpu().numpy()
if not isinstance(pred_y, np.ndarray):
raise TypeError(f"Model output must be np.ndarray or torch.Tensor")

if len(pred_y.shape) == 1:
pred_y = pred_y.reshape(-1, 1)
else:
if self.mode == "vote_by_label":
if pred_y.shape[1] > 1:
pred_y = pred_y.argmax(axis=1).reshape(-1, 1)
elif self.mode == "vote_by_prob":
pred_y = softmax(pred_y, axis=-1)
preds.append(pred_y)

if self.mode == "vote_by_prob":
return np.mean(preds, axis=0)
else:
preds = np.concatenate(preds, axis=1)
if self.mode == "mean":
return preds.mean(axis=1)
elif self.mode == "vote_by_label":
return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=preds)

+ 41
- 0
learnware/reuse/base.py View File

@@ -0,0 +1,41 @@
import numpy as np
from typing import List

from ..learnware import Learnware
from ..logger import get_module_logger

logger = get_module_logger("reuser")


class BaseReuser:
"""Providing the interfaces to reuse the learnwares which is searched by learnware"""

def __init__(self, learnware_list: List[Learnware] = None):
"""The initializaiton method for base reuser

Parameters
----------
learnware_list : List[Learnware]
The learnware list to reuse and make predictions
"""
self.learnware_list = learnware_list

def reset(self, **kwargs):
for _k, _v in kwargs.items():
if hasattr(_k):
setattr(_k, _v)

def predict(self, user_data: np.ndarray) -> np.ndarray:
"""Give the final prediction for user data with reused learnware

Parameters
----------
user_data : np.ndarray
User's labeled raw data.

Returns
-------
np.ndarray
The final prediction for user data with reused learnware
"""
raise NotImplementedError("The predict method is not implemented!")

learnware/learnware/reuse.py → learnware/reuse/ensemble_pruning.py View File

@@ -4,323 +4,12 @@ import numpy as np
import geatpy as ea

from typing import List
from cvxopt import matrix, solvers
from lightgbm import LGBMClassifier, early_stopping
from scipy.special import softmax
from sklearn.metrics import accuracy_score

from learnware.learnware import Learnware
import learnware.specification as specification
from .base import BaseReuser
from ..specification import RKMEStatSpecification
from ..logger import get_module_logger

logger = get_module_logger("Reuser")


class JobSelectorReuser(BaseReuser):
"""Baseline Multiple Learnware Reuser using Job Selector Method"""

def __init__(self, learnware_list: List[Learnware] = None, herding_num: int = 1000, use_herding: bool = True):
"""The initialization method for job selector reuser

Parameters
----------
learnware_list : List[Learnware]
The learnware list, which should have RKME Specification for each learnweare
herding_num : int, optional
The herding number, by default 1000
"""
super(JobSelectorReuser, self).__init__(learnware_list)
self.herding_num = herding_num
self.use_herding = use_herding

def predict(self, user_data: np.ndarray) -> np.ndarray:
"""Give prediction for user data using baseline job-selector method

Parameters
----------
user_data : np.ndarray
User's labeled raw data.

Returns
-------
np.ndarray
Prediction given by job-selector method
"""
select_result = self.job_selector(user_data)
pred_y_list = []
data_idxs_list = []

for idx in range(len(self.learnware_list)):
data_idx_list = np.where(select_result == idx)[0]
if len(data_idx_list) > 0:
pred_y = self.learnware_list[idx].predict(user_data[data_idx_list])
if isinstance(pred_y, torch.Tensor):
pred_y = pred_y.detach().cpu().numpy()
# elif isinstance(pred_y, tf.Tensor):
# pred_y = pred_y.numpy()

if not isinstance(pred_y, np.ndarray):
raise TypeError(f"Model output must be np.ndarray or torch.Tensor")

pred_y_list.append(pred_y)
data_idxs_list.append(data_idx_list)

if pred_y_list[0].ndim == 1:
selector_pred_y = np.zeros(user_data.shape[0])
else:
selector_pred_y = np.zeros((user_data.shape[0], pred_y_list[0].shape[1]))
for pred_y, data_idx_list in zip(pred_y_list, data_idxs_list):
selector_pred_y[data_idx_list] = pred_y

return selector_pred_y

def job_selector(self, user_data: np.ndarray):
"""Train job selector based on user's data, which predicts which learnware in the pool should be selected

Parameters
----------
user_data : np.ndarray
User's labeled raw data.
"""
if len(self.learnware_list) == 1:
user_data_num = user_data.shape[0]
return np.array([0] * user_data_num)
else:
learnware_rkme_spec_list = [
learnware.specification.get_stat_spec_by_name("RKMEStatSpecification")
for learnware in self.learnware_list
]

if self.use_herding:
task_matrix = np.zeros((len(learnware_rkme_spec_list), len(learnware_rkme_spec_list)))
for i in range(len(self.learnware_list)):
task_rkme1 = learnware_rkme_spec_list[i]
task_matrix[i][i] = task_rkme1.inner_prod(task_rkme1)
for j in range(i + 1, len(self.learnware_list)):
task_rkme2 = learnware_rkme_spec_list[j]
task_matrix[i][j] = task_matrix[j][i] = task_rkme1.inner_prod(task_rkme2)

task_mixture_weight = self._calculate_rkme_spec_mixture_weight(
user_data, learnware_rkme_spec_list, task_matrix
)

herding_X, train_herding_X, val_herding_X = None, None, None
herding_y, train_herding_y, val_herding_y = [], [], []
for i in range(len(self.learnware_list)):
task_spec = learnware_rkme_spec_list[i]
if self.use_herding:
task_herding_num = max(5, int(self.herding_num * task_mixture_weight[i]))
herding_X_i = task_spec.herding(task_herding_num).detach().cpu().numpy()
else:
herding_X_i = task_spec.z.detach().cpu().numpy()
task_herding_num = herding_X_i.shape[0]
task_val_num = task_herding_num // 5

train_X_i = herding_X_i[:-task_val_num]
val_X_i = herding_X_i[-task_val_num:]

herding_X = herding_X_i if herding_X is None else np.concatenate((herding_X, herding_X_i), axis=0)
train_herding_X = (
train_X_i if train_herding_X is None else np.concatenate((train_herding_X, train_X_i), axis=0)
)
val_herding_X = val_X_i if val_herding_X is None else np.concatenate((val_herding_X, val_X_i), axis=0)

herding_y += [i] * task_herding_num
train_herding_y += [i] * (task_herding_num - task_val_num)
val_herding_y += [i] * task_val_num

herding_y = np.array(herding_y)
train_herding_y = np.array(train_herding_y)
val_herding_y = np.array(val_herding_y)

# use herding samples to train a job selector
herding_X = herding_X.reshape(herding_X.shape[0], -1)
train_herding_X = train_herding_X.reshape(train_herding_X.shape[0], -1)
val_herding_X = val_herding_X.reshape(val_herding_X.shape[0], -1)
herding_y = herding_y.astype(int)
train_herding_y = train_herding_y.astype(int)
val_herding_y = val_herding_y.astype(int)

job_selector = self._selector_grid_search(
herding_X,
herding_y,
train_herding_X,
train_herding_y,
val_herding_X,
val_herding_y,
len(self.learnware_list),
)
job_select_result = np.array(job_selector.predict(user_data.reshape(user_data.shape[0], -1)))

return job_select_result

def _calculate_rkme_spec_mixture_weight(
self, user_data: np.ndarray, task_rkme_list: List[RKMEStatSpecification], task_rkme_matrix: np.ndarray
) -> List[float]:
"""_summary_

Parameters
----------
user_data : np.ndarray
Raw user data.
task_rkme_list : List[RKMEStatSpecification]
The list of learwares' rkmes whose mixture approximates the user's rkme
task_rkme_matrix : np.ndarray
Inner product matrix calculated from task_rkme_list.
"""
task_num = len(task_rkme_list)
user_rkme_spec = specification.utils.generate_rkme_spec(X=user_data, reduce=False)
K = task_rkme_matrix
v = np.array([user_rkme_spec.inner_prod(task_rkme) for task_rkme in task_rkme_list])

P = matrix(K)
q = matrix(-v)
G = matrix(-np.eye(task_num))
h = matrix(np.zeros((task_num, 1)))
A = matrix(np.ones((1, task_num)))
b = matrix(np.ones((1, 1)))
solvers.options["show_progress"] = False

sol = solvers.qp(P, q, G, h, A, b, kktsolver="ldl")
task_mixture_weight = np.array(sol["x"]).reshape(-1)

return task_mixture_weight

def _selector_grid_search(
self,
org_train_x: np.ndarray,
org_train_y: np.ndarray,
train_x: np.ndarray,
train_y: np.ndarray,
val_x: np.ndarray,
val_y: np.ndarray,
num_class: int,
) -> LGBMClassifier:
"""Train a LGBMClassifier as job selector using the herding data as training instances.

Parameters
----------
org_train_x : np.ndarray
The original herding features.
org_train_y : np.ndarray
The original hearding labels(which are learnware indexes).
train_x : np.ndarray
Herding features used for training.
train_y : np.ndarray
Herding labels used for training.
val_x : np.ndarray
Herding features used for validation.
val_y : np.ndarray
Herding labels used for validation.
num_class : int
Total number of classes for the job selector(which is exactly the total number of learnwares to be reused).

Returns
-------
LGBMClassifier
The job selector model.
"""
score_best = -1
learning_rate = [0.01]
max_depth = [66]
params = (0, 0)

lgb_params = {
"boosting_type": "gbdt",
"n_estimators": 2000,
"boost_from_average": False,
}

if num_class == 2:
lgb_params["objective"] = "binary"
lgb_params["metric"] = "binary_logloss"
else:
lgb_params["objective"] = "multiclass"
lgb_params["metric"] = "multi_logloss"

for lr in learning_rate:
for md in max_depth:
lgb_params["learning_rate"] = lr
lgb_params["max_depth"] = md
model = LGBMClassifier(**lgb_params)
train_y = train_y.astype(int)
model.fit(train_x, train_y, eval_set=[(val_x, val_y)], callbacks=[early_stopping(300, verbose=False)])
pred_y = model.predict(org_train_x)
score = accuracy_score(pred_y, org_train_y)

if score > score_best:
score_best = score
params = (lr, md)

lgb_params["learning_rate"] = params[0]
lgb_params["max_depth"] = params[1]
model = LGBMClassifier(**lgb_params)
model.fit(org_train_x, org_train_y)

return model


class AveragingReuser(BaseReuser):
"""Baseline Multiple Learnware Reuser using Ensemble Method"""

def __init__(self, learnware_list: List[Learnware] = None, mode: str = "mean"):
"""The initialization method for averaging ensemble reuser

Parameters
----------
learnware_list : List[Learnware]
The learnware list
mode : str
- "mean": average the output of all learnwares for regression task (learnware output is a real number)
- "vote_by_label": vote by labels for classification task, learnware output belongs to the set {0, 1, ..., class_num}
- "vote_by_prob": vote by probabilities for classification task, learnware output is a logits vector, denoting the probability of each class
"""
super(AveragingReuser, self).__init__(learnware_list)
if mode not in ["mean", "vote_by_label", "vote_by_prob"]:
raise ValueError(f"Mode must be one of ['mean', 'vote_by_label', 'vote_by_prob'], but got {mode}")
self.mode = mode

def predict(self, user_data: np.ndarray) -> np.ndarray:
"""Prediction for user data using baseline ensemble method

Parameters
----------
user_data : np.ndarray
Raw user data.

Returns
-------
np.ndarray
Prediction given by ensemble method
"""
preds = []
for learnware in self.learnware_list:
pred_y = learnware.predict(user_data)
if isinstance(pred_y, torch.Tensor):
pred_y = pred_y.detach().cpu().numpy()
if not isinstance(pred_y, np.ndarray):
raise TypeError(f"Model output must be np.ndarray or torch.Tensor")

if len(pred_y.shape) == 1:
pred_y = pred_y.reshape(-1, 1)
else:
if self.mode == "vote_by_label":
if pred_y.shape[1] > 1:
pred_y = pred_y.argmax(axis=1).reshape(-1, 1)
elif self.mode == "vote_by_prob":
pred_y = softmax(pred_y, axis=-1)
preds.append(pred_y)

if self.mode == "vote_by_prob":
return np.mean(preds, axis=0)
else:
preds = np.concatenate(preds, axis=1)
if self.mode == "mean":
return preds.mean(axis=1)
elif self.mode == "vote_by_label":
return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=preds)
logger = get_module_logger("ensemble_pruning")


class EnsemblePruningReuser(BaseReuser):

+ 259
- 0
learnware/reuse/job_selector.py View File

@@ -0,0 +1,259 @@
import torch
import numpy as np

from typing import List
from cvxopt import matrix, solvers
from lightgbm import LGBMClassifier, early_stopping
from sklearn.metrics import accuracy_score

from learnware.learnware import Learnware
import learnware.specification as specification
from .base import BaseReuser
from ..specification import RKMEStatSpecification
from ..logger import get_module_logger

logger = get_module_logger("job_selector_reuse")


class JobSelectorReuser(BaseReuser):
"""Baseline Multiple Learnware Reuser using Job Selector Method"""

def __init__(self, learnware_list: List[Learnware] = None, herding_num: int = 1000, use_herding: bool = True):
"""The initialization method for job selector reuser

Parameters
----------
learnware_list : List[Learnware]
The learnware list, which should have RKME Specification for each learnweare
herding_num : int, optional
The herding number, by default 1000
"""
super(JobSelectorReuser, self).__init__(learnware_list)
self.herding_num = herding_num
self.use_herding = use_herding

def predict(self, user_data: np.ndarray) -> np.ndarray:
"""Give prediction for user data using baseline job-selector method

Parameters
----------
user_data : np.ndarray
User's labeled raw data.

Returns
-------
np.ndarray
Prediction given by job-selector method
"""
select_result = self.job_selector(user_data)
pred_y_list = []
data_idxs_list = []

for idx in range(len(self.learnware_list)):
data_idx_list = np.where(select_result == idx)[0]
if len(data_idx_list) > 0:
pred_y = self.learnware_list[idx].predict(user_data[data_idx_list])
if isinstance(pred_y, torch.Tensor):
pred_y = pred_y.detach().cpu().numpy()
# elif isinstance(pred_y, tf.Tensor):
# pred_y = pred_y.numpy()

if not isinstance(pred_y, np.ndarray):
raise TypeError(f"Model output must be np.ndarray or torch.Tensor")

pred_y_list.append(pred_y)
data_idxs_list.append(data_idx_list)

if pred_y_list[0].ndim == 1:
selector_pred_y = np.zeros(user_data.shape[0])
else:
selector_pred_y = np.zeros((user_data.shape[0], pred_y_list[0].shape[1]))
for pred_y, data_idx_list in zip(pred_y_list, data_idxs_list):
selector_pred_y[data_idx_list] = pred_y

return selector_pred_y

def job_selector(self, user_data: np.ndarray):
"""Train job selector based on user's data, which predicts which learnware in the pool should be selected

Parameters
----------
user_data : np.ndarray
User's labeled raw data.
"""
if len(self.learnware_list) == 1:
user_data_num = user_data.shape[0]
return np.array([0] * user_data_num)
else:
learnware_rkme_spec_list = [
learnware.specification.get_stat_spec_by_name("RKMEStatSpecification")
for learnware in self.learnware_list
]

if self.use_herding:
task_matrix = np.zeros((len(learnware_rkme_spec_list), len(learnware_rkme_spec_list)))
for i in range(len(self.learnware_list)):
task_rkme1 = learnware_rkme_spec_list[i]
task_matrix[i][i] = task_rkme1.inner_prod(task_rkme1)
for j in range(i + 1, len(self.learnware_list)):
task_rkme2 = learnware_rkme_spec_list[j]
task_matrix[i][j] = task_matrix[j][i] = task_rkme1.inner_prod(task_rkme2)

task_mixture_weight = self._calculate_rkme_spec_mixture_weight(
user_data, learnware_rkme_spec_list, task_matrix
)

herding_X, train_herding_X, val_herding_X = None, None, None
herding_y, train_herding_y, val_herding_y = [], [], []
for i in range(len(self.learnware_list)):
task_spec = learnware_rkme_spec_list[i]
if self.use_herding:
task_herding_num = max(5, int(self.herding_num * task_mixture_weight[i]))
herding_X_i = task_spec.herding(task_herding_num).detach().cpu().numpy()
else:
herding_X_i = task_spec.z.detach().cpu().numpy()
task_herding_num = herding_X_i.shape[0]
task_val_num = task_herding_num // 5

train_X_i = herding_X_i[:-task_val_num]
val_X_i = herding_X_i[-task_val_num:]

herding_X = herding_X_i if herding_X is None else np.concatenate((herding_X, herding_X_i), axis=0)
train_herding_X = (
train_X_i if train_herding_X is None else np.concatenate((train_herding_X, train_X_i), axis=0)
)
val_herding_X = val_X_i if val_herding_X is None else np.concatenate((val_herding_X, val_X_i), axis=0)

herding_y += [i] * task_herding_num
train_herding_y += [i] * (task_herding_num - task_val_num)
val_herding_y += [i] * task_val_num

herding_y = np.array(herding_y)
train_herding_y = np.array(train_herding_y)
val_herding_y = np.array(val_herding_y)

# use herding samples to train a job selector
herding_X = herding_X.reshape(herding_X.shape[0], -1)
train_herding_X = train_herding_X.reshape(train_herding_X.shape[0], -1)
val_herding_X = val_herding_X.reshape(val_herding_X.shape[0], -1)
herding_y = herding_y.astype(int)
train_herding_y = train_herding_y.astype(int)
val_herding_y = val_herding_y.astype(int)

job_selector = self._selector_grid_search(
herding_X,
herding_y,
train_herding_X,
train_herding_y,
val_herding_X,
val_herding_y,
len(self.learnware_list),
)
job_select_result = np.array(job_selector.predict(user_data.reshape(user_data.shape[0], -1)))

return job_select_result

def _calculate_rkme_spec_mixture_weight(
self, user_data: np.ndarray, task_rkme_list: List[RKMEStatSpecification], task_rkme_matrix: np.ndarray
) -> List[float]:
"""_summary_

Parameters
----------
user_data : np.ndarray
Raw user data.
task_rkme_list : List[RKMEStatSpecification]
The list of learwares' rkmes whose mixture approximates the user's rkme
task_rkme_matrix : np.ndarray
Inner product matrix calculated from task_rkme_list.
"""
task_num = len(task_rkme_list)
user_rkme_spec = specification.utils.generate_rkme_spec(X=user_data, reduce=False)
K = task_rkme_matrix
v = np.array([user_rkme_spec.inner_prod(task_rkme) for task_rkme in task_rkme_list])

P = matrix(K)
q = matrix(-v)
G = matrix(-np.eye(task_num))
h = matrix(np.zeros((task_num, 1)))
A = matrix(np.ones((1, task_num)))
b = matrix(np.ones((1, 1)))
solvers.options["show_progress"] = False

sol = solvers.qp(P, q, G, h, A, b, kktsolver="ldl")
task_mixture_weight = np.array(sol["x"]).reshape(-1)

return task_mixture_weight

def _selector_grid_search(
self,
org_train_x: np.ndarray,
org_train_y: np.ndarray,
train_x: np.ndarray,
train_y: np.ndarray,
val_x: np.ndarray,
val_y: np.ndarray,
num_class: int,
) -> LGBMClassifier:
"""Train a LGBMClassifier as job selector using the herding data as training instances.

Parameters
----------
org_train_x : np.ndarray
The original herding features.
org_train_y : np.ndarray
The original hearding labels(which are learnware indexes).
train_x : np.ndarray
Herding features used for training.
train_y : np.ndarray
Herding labels used for training.
val_x : np.ndarray
Herding features used for validation.
val_y : np.ndarray
Herding labels used for validation.
num_class : int
Total number of classes for the job selector(which is exactly the total number of learnwares to be reused).

Returns
-------
LGBMClassifier
The job selector model.
"""
score_best = -1
learning_rate = [0.01]
max_depth = [66]
params = (0, 0)

lgb_params = {
"boosting_type": "gbdt",
"n_estimators": 2000,
"boost_from_average": False,
}

if num_class == 2:
lgb_params["objective"] = "binary"
lgb_params["metric"] = "binary_logloss"
else:
lgb_params["objective"] = "multiclass"
lgb_params["metric"] = "multi_logloss"

for lr in learning_rate:
for md in max_depth:
lgb_params["learning_rate"] = lr
lgb_params["max_depth"] = md
model = LGBMClassifier(**lgb_params)
train_y = train_y.astype(int)
model.fit(train_x, train_y, eval_set=[(val_x, val_y)], callbacks=[early_stopping(300, verbose=False)])
pred_y = model.predict(org_train_x)
score = accuracy_score(pred_y, org_train_y)

if score > score_best:
score_best = score
params = (lr, md)

lgb_params["learning_rate"] = params[0]
lgb_params["max_depth"] = params[1]
model = LGBMClassifier(**lgb_params)
model.fit(org_train_x, org_train_y)

return model

+ 1
- 1
tests/test_learnware_client/test_load_conda.py View File

@@ -7,7 +7,7 @@ import learnware
from learnware.learnware import get_learnware_from_dirpath
from learnware.client import LearnwareClient
from learnware.client.container import ModelCondaContainer, LearnwaresContainer
from learnware.learnware.reuse import AveragingReuser
from learnware.reuse import AveragingReuser


class TestLearnwareLoad(unittest.TestCase):


+ 1
- 1
tests/test_learnware_client/test_reuse.py View File

@@ -3,7 +3,7 @@ import numpy as np

from learnware.learnware import get_learnware_from_dirpath
from learnware.client.container import LearnwaresContainer
from learnware.learnware.reuse import AveragingReuser
from learnware.reuse import AveragingReuser
from learnware.test.module import get_semantic_specification

if __name__ == "__main__":


+ 1
- 1
tests/test_workflow/test_workflow.py View File

@@ -12,7 +12,7 @@ from shutil import copyfile, rmtree

import learnware
from learnware.market import EasyMarket, BaseUserInfo
from learnware.learnware import JobSelectorReuser, AveragingReuser, EnsemblePruningReuser
from learnware.reuse import JobSelectorReuser, AveragingReuser, EnsemblePruningReuser
import learnware.specification as specification

curr_root = os.path.dirname(os.path.abspath(__file__))


Loading…
Cancel
Save