Merge branch 'dev' of git.nju.edu.cn:learnware/learnware-market into dev

3 years ago · 0bff2ecf9c
--- a/examples/example_image/example_files/example_init.py
+++ b/examples/example_image/example_files/example_init.py
@@ -8,6 +8,7 @@ import torch

 class Model(BaseModel):
    def __init__(self):
        super().__init__(input_shape=(3, 32, 32), output_shape=(10,))
        dir_path = os.path.dirname(os.path.abspath(__file__))
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = ConvModel(channel=3, n_random_features=10).to(self.device)
--- a/examples/example_image/main.py
+++ b/examples/example_image/main.py
@@ -4,7 +4,7 @@ from get_data import *
 import os
 import random
 from utils import generate_uploader, generate_user, ImageDataLoader, train, eval_prediction
 from learnware.learnware import Learnware, JobSelectorReuser, EnsembleReuser
 from learnware.learnware import Learnware, JobSelectorReuser, AveragingReuser
 import time

 from learnware.market import EasyMarket, BaseUserInfo
@@ -40,17 +40,17 @@ semantic_specs = [
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {"Values": ["Classification"], "Type": "Class"},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Library": {"Values": ["Pytorch"], "Type": "Class"},
        "Scenario": {"Values": ["Business"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_1", "Type": "String"},
    }
 ]

 user_senmantic = {
 user_semantic = {
    "Data": {"Values": ["Tabular"], "Type": "Class"},
    "Task": {"Values": ["Classification"], "Type": "Class"},
    "Device": {"Values": ["GPU"], "Type": "Tag"},
    "Library": {"Values": ["Pytorch"], "Type": "Class"},
    "Scenario": {"Values": ["Business"], "Type": "Tag"},
    "Description": {"Values": "", "Type": "String"},
    "Name": {"Values": "", "Type": "String"},
@@ -133,10 +133,10 @@ def prepare_market():

 def test_search(gamma=0.1, load_market=True):
    if load_market:
        image_market = EasyMarket()
        image_market = EasyMarket(market_id="image")
    else:
        prepare_market()
        image_market = EasyMarket()
        image_market = EasyMarket(market_id="image")
    logger.info("Number of items in the market: %d" % len(image_market))

    select_list = []
@@ -151,7 +151,7 @@ def test_search(gamma=0.1, load_market=True):
        user_label = np.load(user_label_path)
        user_stat_spec = specification.utils.generate_rkme_spec(X=user_data, gamma=gamma, cuda_idx=0)
        user_info = BaseUserInfo(
            id=f"user_{i}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_stat_spec}
            id=f"user_{i}", semantic_spec=user_semantic, stat_info={"RKMEStatSpecification": user_stat_spec}
        )
        logger.info("Searching Market for user: %d" % (i))
        sorted_score_list, single_learnware_list, mixture_score, mixture_learnware_list = image_market.search_learnware(
@@ -167,13 +167,14 @@ def test_search(gamma=0.1, load_market=True):
            acc_list.append(acc)
            logger.info("search rank: %d, score: %.3f, learnware_id: %s, acc: %.3f" % (idx, score, learnware.id, acc))
        # test reuse

        reuse_baseline = JobSelectorReuser(learnware_list=mixture_learnware_list)
        reuse_predict = reuse_baseline.predict(user_data=user_data)
        reuse_score = eval_prediction(reuse_predict, user_label)
        job_selector_score_list.append(reuse_score)
        print(f"mixture reuse loss: {reuse_score}\n")

        reuse_ensemble = EnsembleReuser(learnware_list=mixture_learnware_list, mode="vote")
        reuse_ensemble = AveragingReuser(learnware_list=mixture_learnware_list, mode="vote")
        ensemble_predict_y = reuse_ensemble.predict(user_data=user_data)
        ensemble_score = eval_prediction(ensemble_predict_y, user_label)
        ensemble_score_list.append(ensemble_score)
@@ -198,4 +199,4 @@ def test_search(gamma=0.1, load_market=True):
 if __name__ == "__main__":
    # prepare_data()
    # prepare_model()
    test_search(load_market=True)
    test_search(load_market=False)
--- a/examples/example_m5/example_init.py
+++ b/examples/example_m5/example_init.py
@@ -1,13 +1,15 @@
 import os
 import joblib
 import numpy as np
 import lightgbm as lgb
 from learnware.model import BaseModel


 class Model(BaseModel):
    def __init__(self):
        super(Model, self).__init__(input_shape=(82,), output_shape=())
        dir_path = os.path.dirname(os.path.abspath(__file__))
        self.model = joblib.load(os.path.join(dir_path, "model.out"))
        self.model = lgb.Booster(model_file=os.path.join(dir_path, "model.out"))

    def fit(self, X: np.ndarray, y: np.ndarray):
        pass
--- a/examples/example_m5/main.py
+++ b/examples/example_m5/main.py
@@ -1,13 +1,14 @@
 import os
 import fire
 import zipfile
 import numpy as np
 from tqdm import tqdm
 from shutil import copyfile, rmtree

 import learnware
 from learnware.market import EasyMarket, BaseUserInfo
 from learnware.market import database_ops
 from learnware.learnware import Learnware, JobSelectorReuser
 from learnware.learnware import Learnware, JobSelectorReuser, AveragingReuser
 import learnware.specification as specification
 from m5 import DataLoader

@@ -16,17 +17,17 @@ semantic_specs = [
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {"Values": ["Classification"], "Type": "Class"},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Library": {"Values": ["Scikit-learn"], "Type": "Class"},
        "Scenario": {"Values": ["Business"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_1", "Type": "String"},
    }
 ]

 user_senmantic = {
 user_semantic = {
    "Data": {"Values": ["Tabular"], "Type": "Class"},
    "Task": {"Values": ["Classification"], "Type": "Class"},
    "Device": {"Values": ["GPU"], "Type": "Tag"},
    "Library": {"Values": ["Scikit-learn"], "Type": "Class"},
    "Scenario": {"Values": ["Business"], "Type": "Tag"},
    "Description": {"Values": "", "Type": "String"},
    "Name": {"Values": "", "Type": "String"},
@@ -122,13 +123,20 @@ class M5DatasetWorkflow:

        m5 = DataLoader()
        idx_list = m5.get_idx_list()
        os.makedirs("./user_spec", exist_ok=True)
        single_score_list = []
        random_score_list = []
        job_selector_score_list = []
        ensemble_score_list = []

        for idx in idx_list:
            train_x, train_y, test_x, test_y = m5.get_idx_data(idx)
            user_spec = specification.utils.generate_rkme_spec(X=test_x, gamma=0.1, cuda_idx=0)
            user_spec_path = f"./user_spec/user_{idx}.json"
            user_spec.save(user_spec_path)

            user_info = BaseUserInfo(
                id=f"user_{idx}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec}
                id=f"user_{idx}", semantic_spec=user_semantic, stat_info={"RKMEStatSpecification": user_spec}
            )
            (
                sorted_score_list,
@@ -141,18 +149,36 @@ class M5DatasetWorkflow:
            print(
                f"single model num: {len(sorted_score_list)}, max_score: {sorted_score_list[0]}, min_score: {sorted_score_list[-1]}"
            )
            loss_list = []
            for score, learnware in zip(sorted_score_list, single_learnware_list):
                pred_y = learnware.predict(test_x)
                loss = m5.score(test_y, pred_y)
                print(f"score: {score}, learnware_id: {learnware.id}, loss: {loss}")
                loss_list.append(m5.score(test_y, pred_y))
            print(
                f"Top1-score: {sorted_score_list[0]}, learnware_id: {single_learnware_list[0].id}, loss: {loss_list[0]}"
            )

            mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list])
            print(f"mixture_learnware: {mixture_id}\n")

            reuse_baseline = JobSelectorReuser(learnware_list=mixture_learnware_list)
            reuse_predict = reuse_baseline.predict(user_data=test_x)
            reuse_score = m5.score(test_y, reuse_predict)
            print(f"mixture reuse loss: {reuse_score}\n")
            print(f"mixture_score: {mixture_score}, mixture_learnware: {mixture_id}")

            reuse_job_selector = JobSelectorReuser(learnware_list=mixture_learnware_list, use_herding=False)
            job_selector_predict_y = reuse_job_selector.predict(user_data=test_x)
            job_selector_score = m5.score(test_y, job_selector_predict_y)
            print(f"mixture reuse loss (job selector): {job_selector_score}")

            reuse_ensemble = AveragingReuser(learnware_list=mixture_learnware_list)
            ensemble_predict_y = reuse_ensemble.predict(user_data=test_x)
            ensemble_score = m5.score(test_y, ensemble_predict_y)
            print(f"mixture reuse loss (ensemble): {ensemble_score}\n")

            single_score_list.append(loss_list[0])
            random_score_list.append(np.mean(loss_list))
            job_selector_score_list.append(job_selector_score)
            ensemble_score_list.append(ensemble_score)

        print(f"Single search score: {np.mean(single_score_list)}")
        print(f"Job selector score: {np.mean(job_selector_score_list)}")
        print(f"Average ensemble score: {np.mean(ensemble_score_list)}")
        print(f"Random search score: {np.mean(random_score_list)}")


 if __name__ == "__main__":
--- a/examples/example_market_db/example_db.py
+++ b/examples/example_market_db/example_db.py
@@ -15,7 +15,7 @@ semantic_specs = [
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {"Values": ["Classification"], "Type": "Class"},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Library": {"Values": ["Scikit-learn"], "Type": "Class"},
        "Scenario": {"Values": ["Nature"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_1", "Type": "String"},
@@ -23,7 +23,7 @@ semantic_specs = [
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {"Values": ["Classification"], "Type": "Class"},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Library": {"Values": ["Scikit-learn"], "Type": "Class"},
        "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_2", "Type": "String"},
@@ -31,7 +31,7 @@ semantic_specs = [
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {"Values": ["Regression"], "Type": "Class"},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Library": {"Values": ["Scikit-learn"], "Type": "Class"},
        "Scenario": {"Values": ["Business"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_3", "Type": "String"},
@@ -40,11 +40,8 @@ semantic_specs = [

 user_senmantic = {
    "Data": {"Values": ["Tabular"], "Type": "Class"},
    "Task": {
        "Values": ["Classification"],
        "Type": "Class",
    },
    "Device": {"Values": ["GPU"], "Type": "Tag"},
    "Task": {"Values": ["Classification"], "Type": "Class"},
    "Device": {"Values": ["GPU"], "Type": "Class"},
    "Scenario": {"Values": ["Business"], "Type": "Tag"},
    "Description": {"Values": "", "Type": "String"},
    "Name": {"Values": "learnware", "Type": "String"},
--- a/examples/example_pfs/example_init.py
+++ b/examples/example_pfs/example_init.py
@@ -6,6 +6,7 @@ from learnware.model import BaseModel

 class Model(BaseModel):
    def __init__(self):
        super(Model, self).__init__(input_shape=(31,), output_shape=())
        dir_path = os.path.dirname(os.path.abspath(__file__))
        self.model = joblib.load(os.path.join(dir_path, "model.out"))

--- a/examples/example_pfs/main.py
+++ b/examples/example_pfs/main.py
@@ -17,17 +17,17 @@ semantic_specs = [
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {"Values": ["Classification"], "Type": "Class"},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Library": {"Values": ["Scikit-learn"], "Type": "Class"},
        "Scenario": {"Values": ["Business"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_1", "Type": "String"},
    }
 ]

 user_senmantic = {
 user_semantic = {
    "Data": {"Values": ["Tabular"], "Type": "Class"},
    "Task": {"Values": ["Classification"], "Type": "Class"},
    "Device": {"Values": ["GPU"], "Type": "Tag"},
    "Library": {"Values": ["Scikit-learn"], "Type": "Class"},
    "Scenario": {"Values": ["Business"], "Type": "Tag"},
    "Description": {"Values": "", "Type": "String"},
    "Name": {"Values": "", "Type": "String"},
@@ -122,7 +122,7 @@ class PFSDatasetWorkflow:
        pfs = Dataloader()
        idx_list = pfs.get_idx_list()
        os.makedirs("./user_spec", exist_ok=True)
        sinle_score_list = []
        single_score_list = []
        random_score_list = []
        job_selector_score_list = []
        ensemble_score_list = []
@@ -134,7 +134,7 @@ class PFSDatasetWorkflow:
            user_spec.save(user_spec_path)

            user_info = BaseUserInfo(
                id=f"user_{idx}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec}
                id=f"user_{idx}", semantic_spec=user_semantic, stat_info={"RKMEStatSpecification": user_spec}
            )
            (
                sorted_score_list,
@@ -152,13 +152,13 @@ class PFSDatasetWorkflow:
                pred_y = learnware.predict(test_x)
                loss_list.append(pfs.score(test_y, pred_y))
            print(
                f"Top1-score: {sorted_score_list[0]}, learnware_id: {single_learnware_list[0].id}, loss: {loss_list[-1]}"
                f"Top1-score: {sorted_score_list[0]}, learnware_id: {single_learnware_list[0].id}, loss: {loss_list[0]}"
            )

            mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list])
            print(f"mixture_score: {mixture_score}, mixture_learnware: {mixture_id}")

            reuse_job_selector = JobSelectorReuser(learnware_list=mixture_learnware_list)
            reuse_job_selector = JobSelectorReuser(learnware_list=mixture_learnware_list, use_herding=False)
            job_selector_predict_y = reuse_job_selector.predict(user_data=test_x)
            job_selector_score = pfs.score(test_y, job_selector_predict_y)
            print(f"mixture reuse loss (job selector): {job_selector_score}")
@@ -168,12 +168,12 @@ class PFSDatasetWorkflow:
            ensemble_score = pfs.score(test_y, ensemble_predict_y)
            print(f"mixture reuse loss (ensemble): {ensemble_score}\n")

            sinle_score_list.append(loss_list[0])
            single_score_list.append(loss_list[0])
            random_score_list.append(np.mean(loss_list))
            job_selector_score_list.append(job_selector_score)
            ensemble_score_list.append(ensemble_score)

        print(f"Single search score: {np.mean(sinle_score_list)}")
        print(f"Single search score: {np.mean(single_score_list)}")
        print(f"Job selector score: {np.mean(job_selector_score_list)}")
        print(f"Average ensemble score: {np.mean(ensemble_score_list)}")
        print(f"Random search score: {np.mean(random_score_list)}")
--- a/examples/workflow_by_code/main.py
+++ b/examples/workflow_by_code/main.py
@@ -19,21 +19,21 @@ semantic_specs = [
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {"Values": ["Classification"], "Type": "Class"},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Library": {"Values": ["Scikit-learn"], "Type": "Class"},
        "Scenario": {"Values": ["Business"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_1", "Type": "String"},
    }
 ]

 user_senmantic = {
 user_semantic = {
    "Data": {"Values": ["Tabular"], "Type": "Class"},
    "Task": {
        "Values": ["Classification"],
        "Type": "Class",
    },
    "Device": {"Values": ["GPU"], "Type": "Tag"},
    "Scenario": {"Values": ["Business"], "Type": "Tag"},
    "Library": {"Values": ["Scikit-learn"], "Type": "Tag"},
    "Scenario": {"Values": ["Business"], "Type": "Class"},
    "Description": {"Values": "", "Type": "String"},
    "Name": {"Values": "", "Type": "String"},
 }
@@ -130,7 +130,7 @@ class LearnwareMarketWorkflow:
        with zipfile.ZipFile(zip_path, "r") as zip_obj:
            zip_obj.extractall(path=unzip_dir)

        user_info = BaseUserInfo(id="user_0", semantic_spec=user_senmantic)
        user_info = BaseUserInfo(id="user_0", semantic_spec=user_semantic)
        _, single_learnware_list, _ = easy_market.search_learnware(user_info)

        print("User info:", user_info.get_semantic_spec())
@@ -159,7 +159,7 @@ class LearnwareMarketWorkflow:
            user_spec = specification.rkme.RKMEStatSpecification()
            user_spec.load(os.path.join(unzip_dir, "svm.json"))
            user_info = BaseUserInfo(
                id="user_0", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec}
                id="user_0", semantic_spec=user_semantic, stat_info={"RKMEStatSpecification": user_spec}
            )
            (
                sorted_score_list,
--- a/learnware/config.py
+++ b/learnware/config.py
@@ -79,12 +79,17 @@ semantic_config = {
            # "Generation",
            "Segmentation",
            "Object Detection",
            "Others",
        ],
        "Type": "Class",  # Choose only one class
    },
    "Device": {
        "Values": ["CPU", "GPU"],
        "Type": "Tag",
    # "Device": {
    #     "Values": ["CPU", "GPU"],
    #     "Type": "Tag",
    # },  # Choose one or more tags
    "Library": {
        "Values": ["Scikit-learn", "PyTorch", "TensorFlow", "Others"],
        "Type": "Class",
    },  # Choose one or more tags
    "Scenario": {
        "Values": [
--- a/learnware/learnware/reuse.py
+++ b/learnware/learnware/reuse.py
@@ -17,7 +17,7 @@ logger = get_module_logger("Reuser")
 class JobSelectorReuser(BaseReuser):
    """Baseline Multiple Learnware Reuser uing Job Selector Method"""

    def __init__(self, learnware_list: List[Learnware], herding_num: int = 1000):
    def __init__(self, learnware_list: List[Learnware], herding_num: int = 1000, use_herding: bool = True):
        """The initialization method for job selector reuser

        Parameters
@@ -29,6 +29,7 @@ class JobSelectorReuser(BaseReuser):
        """
        super(JobSelectorReuser, self).__init__(learnware_list)
        self.herding_num = herding_num
        self.use_herding = use_herding

    def predict(self, user_data: np.ndarray) -> np.ndarray:
        """Give prediction for user data using baseline job-selector method
@@ -53,13 +54,15 @@ class JobSelectorReuser(BaseReuser):

        return selector_pred_y

    def job_selector(self, user_data: np.ndarray):
    def job_selector(self, user_data: np.ndarray, use_herding: bool = True):
        """Train job selector based on user's data, which predicts which learnware in the pool should be selected

        Parameters
        ----------
        user_data : np.ndarray
            User's labeled raw data.
        use_herding: bool
            Whether create job selector training samples by herding
        """
        if len(self.learnware_list) == 1:
            user_data_num = user_data.shape[0]
@@ -69,26 +72,32 @@ class JobSelectorReuser(BaseReuser):
                learnware.specification.get_stat_spec_by_name("RKMEStatSpecification")
                for learnware in self.learnware_list
            ]
            task_matrix = np.zeros((len(learnware_rkme_spec_list), len(learnware_rkme_spec_list)))

            for i in range(len(self.learnware_list)):
                task_rkme1 = learnware_rkme_spec_list[i]
                for j in range(i, len(self.learnware_list)):
                    task_rkme2 = learnware_rkme_spec_list[j]
                    task_matrix[i][j] = task_matrix[j][i] = task_rkme1.inner_prod(task_rkme2)

            task_mixture_weight = self._calculate_rkme_spec_mixture_weight(
                user_data, learnware_rkme_spec_list, task_matrix
            )
            if self.use_herding:
                task_matrix = np.zeros((len(learnware_rkme_spec_list), len(learnware_rkme_spec_list)))
                for i in range(len(self.learnware_list)):
                    task_rkme1 = learnware_rkme_spec_list[i]
                    task_matrix[i][i] = task_rkme1.inner_prod(task_rkme1)
                    for j in range(i + 1, len(self.learnware_list)):
                        task_rkme2 = learnware_rkme_spec_list[j]
                        task_matrix[i][j] = task_matrix[j][i] = task_rkme1.inner_prod(task_rkme2)

                task_mixture_weight = self._calculate_rkme_spec_mixture_weight(
                    user_data, learnware_rkme_spec_list, task_matrix
                )

            herding_X, train_herding_X, val_herding_X = None, None, None
            herding_y, train_herding_y, val_herding_y = [], [], []
            for i in range(len(self.learnware_list)):
                task_spec = learnware_rkme_spec_list[i]
                task_herding_num = max(5, int(self.herding_num * task_mixture_weight[i]))
                if self.use_herding:
                    task_herding_num = max(5, int(self.herding_num * task_mixture_weight[i]))
                    herding_X_i = task_spec.herding(task_herding_num).detach().cpu().numpy()
                else:
                    herding_X_i = task_spec.z.detach().cpu().numpy()
                    task_herding_num = herding_X_i.shape[0]
                task_val_num = task_herding_num // 5

                herding_X_i = task_spec.herding(task_herding_num).detach().cpu().numpy()
                train_X_i = herding_X_i[:-task_val_num]
                val_X_i = herding_X_i[-task_val_num:]

@@ -267,7 +276,7 @@ class AveragingReuser(BaseReuser):
                # print(pred_y.shape)
                if not isinstance(pred_y, np.ndarray):
                    pred_y = pred_y.detach().cpu().numpy()
                softmax_pred = softmax(pred_y, axis=1)
                softmax_pred = softmax(pred_y, axis=0)
                if mean_pred_y is None:
                    mean_pred_y = softmax_pred
                else:
--- a/learnware/market/init.py
+++ b/learnware/market/init.py
@@ -3,3 +3,4 @@ from .base import BaseUserInfo, BaseMarket
 from .evolve_anchor import EvolvedAnchoredMarket
 from .evolve import EvolvedMarket
 from .easy import EasyMarket
 from .heterogeneous_feature import HeterogeneousFeatureMarket
--- a/learnware/market/easy.py
+++ b/learnware/market/easy.py
@@ -119,37 +119,38 @@ class EasyMarket(BaseMarket):
        """
        if not os.path.exists(zip_path):
            logger.warning("Zip Path NOT Found! Fail to add learnware.")
            return None, False
            return None, self.INVALID_LEARNWARE

        try:
            if len(semantic_spec["Data"]["Values"]) == 0:
                logger.warning("Illegal semantic specification, please choose Data.")
                return None, False
                return None, self.INVALID_LEARNWARE
            if len(semantic_spec["Task"]["Values"]) == 0:
                logger.warning("Illegal semantic specification, please choose Task.")
                return None, False
            if len(semantic_spec["Device"]["Values"]) == 0:
                return None, self.INVALID_LEARNWARE
            if len(semantic_spec["Library"]["Values"]) == 0:
                logger.warning("Illegal semantic specification, please choose Device.")
                return None, False
                return None, self.INVALID_LEARNWARE
            if len(semantic_spec["Name"]["Values"]) == 0:
                logger.warning("Illegal semantic specification, please provide Name.")
                return None, False
                return None, self.INVALID_LEARNWARE
            if len(semantic_spec["Description"]["Values"]) == 0 and len(semantic_spec["Scenario"]["Values"]) == 0:
                logger.warning("Illegal semantic specification, please provide Scenario or Description.")
                return None, False
                return None, self.INVALID_LEARNWARE
            if (
                semantic_spec["Data"]["Type"] != "Class"
                or semantic_spec["Task"]["Type"] != "Class"
                or semantic_spec["Device"]["Type"] != "Tag"
                or semantic_spec["Library"]["Type"] != "Class"
                or semantic_spec["Scenario"]["Type"] != "Tag"
                or semantic_spec["Name"]["Type"] != "String"
                or semantic_spec["Description"]["Type"] != "String"
            ):
                logger.warning("Illegal semantic specification, please provide the right type.")
                return None, False
                return None, self.INVALID_LEARNWARE
        except:
            print(semantic_spec)
            logger.warning("Illegal semantic specification, some keys are missing.")
            return None, False
            return None, self.INVALID_LEARNWARE

        logger.info("Get new learnware from %s" % (zip_path))
        id = "%08d" % (self.count)
@@ -476,7 +477,7 @@ class EasyMarket(BaseMarket):
        """
        learnware_num = len(learnware_list)
        if learnware_num == 0:
            return [], []
            return None, [], []
        if learnware_num < max_search_num:
            logger.warning("Available Learnware num less than search_num!")
            max_search_num = learnware_num
@@ -639,7 +640,6 @@ class EasyMarket(BaseMarket):
                weight_list = []
                mixture_learnware_list = []

            # convert dist to score
            if mixture_dist is None:
                sorted_score_list = self._convert_dist_to_score(sorted_dist_list)
                mixture_score = None
--- a/learnware/market/heterogeneous_feature.py
+++ b/learnware/market/heterogeneous_feature.py
@@ -0,0 +1,102 @@
 import numpy as np
 from typing import Tuple, Any, List, Union, Dict

 from .evolve import EvolvedMarket
 from ..learnware import Learnware


 class MappingFunction:
    def __init__(self) -> None:
        pass

    def transform(X: np.ndarray) -> np.ndarray:
        """transform the data in one feature space to another feature space.

        Parameters
        ----------
        X : np.ndarray
            data in one feature space

        Returns
        -------
        np.ndarray
            transformed data in other feature space
        """
        pass


 class HeterogeneousFeatureMarket(EvolvedMarket):
    """Organize learnwares with heterogeneous feature spaces

    Parameters
    ----------
    EvolvedMarket : _type_
        Market version with evolved learnwares
    """

    def __init__(self):
        super(HeterogeneousFeatureMarket, self).__init__()
        self.mapping_function_list = {}

    def _mapping_function_list_initialization(self, learnware_list: List[Learnware]):
        """Initialize mapping functions with all submitted learnwares

        Parameters
        ----------
        learnware_list : List[Learnware]
            list of learnwares
        """
        self.mapping_function_list = self.learn_mapping_functions(learnware_list)

    def learn_mapping_functions(self, learnware_list: List[Learnware]) -> List[MappingFunction]:
        """Use all statistical specifications of submitted learnwares to generate mapping functions from each original feature space to subsapce and vice verse.

        Parameters
        ----------
        learnware_list : List[Learnware]
            list of learnwares

        Returns
        -------
        List[MappingFunction]
            list of mapping functions
        """
        pass

    def transform_original_to_subspace(
        self, original_feature_space_idx: int, original_feature: np.ndarray
    ) -> np.ndarray:
        """Transform feature in a original feature space to the subspace.

        Parameters
        ----------
        original_feature_space_idx : int
            index of the original feature space
        original_feature : np.ndarray
            data in the original feature space

        Returns
        -------
        np.ndarray
            mapped data in the subspace
        """
        pass

    def transform_subspace_to_original(
        self, original_feature_space_idx: int, subspace_feature: np.ndarray
    ) -> np.ndarray:
        """Transform feature in the subspace to a original feature space.

        Parameters
        ----------
        original_feature_space_idx : int
            index of the original feature space
        subspace_feature : np.ndarray
            data in the subspace

        Returns
        -------
        np.ndarray
            mapped data in the original feature space
        """
        pass