From 15dfd6e1b14cb27ae7fb920538e40afb9c48ed12 Mon Sep 17 00:00:00 2001 From: xiey Date: Mon, 24 Apr 2023 15:06:56 +0800 Subject: [PATCH 1/5] [FIX] fix semantic spec in example_db --- examples/example_market_db/example_db.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/example_market_db/example_db.py b/examples/example_market_db/example_db.py index b4e146a..2acb349 100644 --- a/examples/example_market_db/example_db.py +++ b/examples/example_market_db/example_db.py @@ -41,7 +41,7 @@ semantic_specs = [ user_senmantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Class"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware", "Type": "String"}, @@ -92,7 +92,7 @@ def test_market(): for idx, zip_path in enumerate(zip_path_list): semantic_spec = semantic_specs[idx % 3] - semantic_spec["Name"]["Values"] = "learnware_%d" % (idx) + semantic_spec["Name"]["Values"] = "Learnware_%d" % (idx) semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx) easy_market.add_learnware(zip_path, semantic_spec) From d10d0b311f454ecd51eb9c7f5011117709167faa Mon Sep 17 00:00:00 2001 From: Gene Date: Mon, 24 Apr 2023 15:14:00 +0800 Subject: [PATCH 2/5] [MNT] Modify details in JobSelectorReuser --- learnware/learnware/reuse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/learnware/learnware/reuse.py b/learnware/learnware/reuse.py index 7c289cc..e5492bf 100644 --- a/learnware/learnware/reuse.py +++ b/learnware/learnware/reuse.py @@ -47,7 +47,7 @@ class JobSelectorReuser(BaseReuser): np.ndarray Prediction given by job-selector method """ - select_result = self.job_selector(user_data) + select_result = self._job_selector(user_data) pred_y_list = [] data_idxs_list = [] @@ -75,7 +75,7 @@ class JobSelectorReuser(BaseReuser): return selector_pred_y - def job_selector(self, user_data: np.ndarray): + def _job_selector(self, user_data: np.ndarray): """Train job selector based on user's data, which predicts which learnware in the pool should be selected Parameters From 598ca8e06abfd48b41422bf2560719b41e41d5cc Mon Sep 17 00:00:00 2001 From: Gene Date: Mon, 24 Apr 2023 15:15:14 +0800 Subject: [PATCH 3/5] [MNT] Modify details in JobSelectorReuser --- learnware/learnware/reuse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/learnware/learnware/reuse.py b/learnware/learnware/reuse.py index e5492bf..7c289cc 100644 --- a/learnware/learnware/reuse.py +++ b/learnware/learnware/reuse.py @@ -47,7 +47,7 @@ class JobSelectorReuser(BaseReuser): np.ndarray Prediction given by job-selector method """ - select_result = self._job_selector(user_data) + select_result = self.job_selector(user_data) pred_y_list = [] data_idxs_list = [] @@ -75,7 +75,7 @@ class JobSelectorReuser(BaseReuser): return selector_pred_y - def _job_selector(self, user_data: np.ndarray): + def job_selector(self, user_data: np.ndarray): """Train job selector based on user's data, which predicts which learnware in the pool should be selected Parameters From 3c0c30f13d99512a6cb2923fe5b43543232a4e06 Mon Sep 17 00:00:00 2001 From: xiey Date: Mon, 24 Apr 2023 16:35:40 +0800 Subject: [PATCH 4/5] [MNT] Modified a language disorder --- learnware/market/easy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/market/easy.py b/learnware/market/easy.py index 57c0631..48058a5 100644 --- a/learnware/market/easy.py +++ b/learnware/market/easy.py @@ -475,7 +475,7 @@ class EasyMarket(BaseMarket): max_search_num: int, score_cutoff: float = 0.001, ) -> Tuple[float, List[float], List[Learnware]]: - """Greedily match learnwares such that their mixture become more and more closer to user's rkme + """Greedily match learnwares such that their mixture become closer and closer to user's rkme Parameters ---------- From 5526fdd1148ba3b02b8b5c33e85254b182c1caac Mon Sep 17 00:00:00 2001 From: xiey Date: Mon, 24 Apr 2023 16:37:46 +0800 Subject: [PATCH 5/5] [DOC] Add Identify Helpful Learnwares --- docs/workflow/Identify helpful learnwares.rst | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 docs/workflow/Identify helpful learnwares.rst diff --git a/docs/workflow/Identify helpful learnwares.rst b/docs/workflow/Identify helpful learnwares.rst new file mode 100644 index 0000000..7a7d69d --- /dev/null +++ b/docs/workflow/Identify helpful learnwares.rst @@ -0,0 +1,67 @@ +============================================================ +Identify Helpful Learnwares +============================================================ + +Semantic Specification Search +------------------------------- +To search for learnwares that fit your task purpose, +you should first provide a semantic specification ``user_semantic`` that describes the characteristics of your task. +The Learnware Market will perform a first-stage search based on ``user_semantic``, +identifying potentially helpful leranwares whose models solve tasks similar to your requirements. + +.. code-block:: python + + # construct user_info which includes semantic specification for searching learnware + user_info = BaseUserInfo(id="user", semantic_spec=semantic_spec) + + # search_learnware performs semantic specification search if user_info doesn't include a statistical specification + _, single_learnware_list, _ = easy_market.search_learnware(user_info) + + # single_learnware_list is the learnware list by semantic specification searching + print(single_learnware_list) + +In semantic specification search, we go through all learnwares in the market to compare their semantic specifications with the user's one, and return all the learnwares that pass through the comparation. When comparing two learnwares' semantic specifications, we design different ways for different semantic keys: +- For semantic keys with type 'Class', they are matched only if they have the same value. +- For semantic keys with type 'Tag', they are matched only if they have nonempty intersections. +- For the user's input in the search box, it matchs with a learnware's semantic specification only if it's a substring of its 'Name' or 'Description'. All the strings are converted to the lower case before matching. +- When a key value is missing, it will not participate in the match. The user could upload no semantic specifications if he wants. + +Statistical Specification Search +--------------------------------- + +If you choose to provide your own statistical specification file ``stat.json``, +the Learnware Market can perform a more accurate leanware selection from +the learnwares returned by the previous step. This second-stage search is based on statistical information and returns one or more learnwares that are most likely to be helpful for your task. + +For example, the following code is designed to work with Reduced Kernel Mean Embedding (RKME) as a statistical specification: + +.. code-block:: python + + import learnware.specification as specification + + user_spec = specification.rkme.RKMEStatSpecification() + user_spec.load(os.path.join(unzip_path, "rkme.json")) + user_info = BaseUserInfo( + id="user", semantic_spec=user_semantic, stat_info={"RKMEStatSpecification": user_spec} + ) + (sorted_score_list, single_learnware_list, + mixture_score, mixture_learnware_list) = easy_market.search_learnware(user_info) + + # sorted_score_list is the learnware scores based on MMD distances, sorted in descending order + print(sorted_score_list) + + # single_learnware_list is the learnwares sorted in descending order based on their scores + print(single_learnware_list) + + # mixture_learnware_list is the learnwares whose mixture is helpful for your task + print(mixture_learnware_list) + + # mixture_score is the score of the mixture of learnwares + print(mixture_score) + +The statistical specification search is done in the following way. +We first filter by the dimension of RKME specifications; only those with the same dimension with the user's will enter the subsequent stage. + +The single_learnware_list is calculated using the distances between two RKMEs. The greater the distance from the user's RKME, the lower the score is. The learnwares with very low scores will not be returned, unless the number of matching leanwares is smaller than a set threshold. + +The mixture_learnware_list is calculated in a greedy way. Each time we choose a learnware to make their mixture closer to the user's RKME. Specifically, each time we go through all the left learnwares to find the one whose combination with chosen learnwares could minimize the distance between their mixture's RKME and the user's RKME. The mixture weight is calculated by minimizing the RKME distance, which is solved by quadratic programming. If the distance become larger or the number of chosen learnwares reaches a threshold, the process will end and the chosen learnware and weight list will return. \ No newline at end of file