From 3d63065cfde0e695564efe8857e2bd9ff7c0d359 Mon Sep 17 00:00:00 2001 From: shihy Date: Wed, 29 Nov 2023 19:46:41 +0800 Subject: [PATCH 01/25] [ENH] Adding the basic framework --- .../benchmarks/__init__.py | 0 .../benchmarks/build_market.py | 114 ++++++++++++++++++ .../benchmarks/evaluate_market.py | 75 ++++++++++++ .../dataset/__init__.py | 0 .../dataset_cifar_workflow/dataset/data.py | 10 ++ .../dataset_cifar_workflow/dataset/utils.py | 45 +++++++ examples/dataset_cifar_workflow/main.py | 0 .../dataset_cifar_workflow/models/__init__.py | 0 .../dataset_cifar_workflow/models/config.yaml | 8 ++ .../models/conv/__init__.py | 26 ++++ .../models/conv/model.py | 71 +++++++++++ .../dataset_cifar_workflow/models/train.py | 0 12 files changed, 349 insertions(+) create mode 100644 examples/dataset_cifar_workflow/benchmarks/__init__.py create mode 100644 examples/dataset_cifar_workflow/benchmarks/build_market.py create mode 100644 examples/dataset_cifar_workflow/benchmarks/evaluate_market.py create mode 100644 examples/dataset_cifar_workflow/dataset/__init__.py create mode 100644 examples/dataset_cifar_workflow/dataset/data.py create mode 100644 examples/dataset_cifar_workflow/dataset/utils.py create mode 100644 examples/dataset_cifar_workflow/main.py create mode 100644 examples/dataset_cifar_workflow/models/__init__.py create mode 100644 examples/dataset_cifar_workflow/models/config.yaml create mode 100644 examples/dataset_cifar_workflow/models/conv/__init__.py create mode 100644 examples/dataset_cifar_workflow/models/conv/model.py create mode 100644 examples/dataset_cifar_workflow/models/train.py diff --git a/examples/dataset_cifar_workflow/benchmarks/__init__.py b/examples/dataset_cifar_workflow/benchmarks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/dataset_cifar_workflow/benchmarks/build_market.py b/examples/dataset_cifar_workflow/benchmarks/build_market.py new file mode 100644 index 0000000..e9bf46a --- /dev/null +++ b/examples/dataset_cifar_workflow/benchmarks/build_market.py @@ -0,0 +1,114 @@ +import copy +import os +import zipfile +from shutil import copyfile, rmtree + +import learnware +import numpy as np +import tqdm +import yaml +from learnware import specification +from learnware.market import EasyMarket + +from preprocess.dataloader import ImageDataLoader + +user_semantic = { + "Data": {"Values": ["Image"], "Type": "Class"}, + "Task": { + "Values": ["Classification"], + "Type": "Class", + }, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, + "Scenario": {"Values": [], "Type": "Tag"}, + "Description": {"Values": "", "Type": "String"}, + "Name": {"Values": "", "Type": "String"}, + "Output": {"Values": "", "Dimension": 0} +} + + +def build_from_preprocessed(args, regenerate=True): + zip_path_list = [] + data_root = os.path.join(args.data_root, 'learnware_market_data', "{}_{:d}".format(args.data, args.data_id)) + dataloader = ImageDataLoader(data_root, args.n_uploaders, train=True) + + market_root = args.market_root + for i, (train_X, train_y, val_X, val_y) in tqdm.tqdm(enumerate(dataloader), total=args.n_uploaders): + dir_path = os.path.join(market_root, args.data, "{}_{:d}".format(args.spec, args.id), "learnware_{:d}".format(i)) + os.makedirs(dir_path, exist_ok=True) + + if not regenerate: + zip_path_list.append(dir_path + ".zip") + continue + + # print("Preparing Learnware {:d} with {:s} specification".format(i, args.spec)) + # Copy Model File + model_file = os.path.join(dir_path, "model.pth") + copyfile(os.path.join(data_root, "models", "uploader_{:d}.pth".format(i)), + model_file) + + # Make Specification + if args.spec == "rbf": + spec = specification.utils.generate_rkme_spec(X=train_X, reduced_set_size=args.K, gamma=0.1, cuda_idx=args.cuda_idx) + elif args.spec == "ntk": + spec = learnware.specification.RKMEImageStatSpecification(rkme_id=i, **args.__dict__) + spec.generate_stat_spec_from_data(val_X, K=args.K, steps=args.ntk_steps, reduce=True, whitening=False) + else: + raise NotImplementedError("Not Support", args.spec) + spec.save(os.path.join(dir_path, "spec.json")) + + # Copy __init__.py and learnware_yaml + init_file = os.path.join(dir_path, "__init__.py") + yaml_file = os.path.join(dir_path, "learnware.yaml") + copyfile( + os.path.join(market_root, "learnware_example", + "conv.py"), init_file + ) # cp conv.py init_file + + with open(os.path.join(market_root, "learnware_example", + "{}.yaml".format(args.spec)), "r") as yaml_templet,\ + open(yaml_file, "w") as yaml_target: + + yaml_content = yaml.load(yaml_templet, Loader=yaml.FullLoader) + + yaml_content["model"]["kwargs"]["device"] = str(choose_device(args.cuda_idx)) + yaml_content["model"]["kwargs"]["input_channel"] = train_X.shape[1] + if args.spec == "ntk": + yaml_content["stat_specifications"][0]["kwargs"] = copy.deepcopy(args.__dict__) + + yaml.dump(yaml_content, yaml_target) + + + zip_file = dir_path + ".zip" + # zip -q -r -j zip_file dir_path + with zipfile.ZipFile(zip_file, "w") as zip_obj: + for foldername, subfolders, filenames in os.walk(dir_path): + for filename in filenames: + file_path = os.path.join(foldername, filename) + zip_info = zipfile.ZipInfo(filename) + zip_info.compress_type = zipfile.ZIP_STORED + with open(file_path, "rb") as file: + zip_obj.writestr(zip_info, file.read()) + + rmtree(dir_path) # rm -r dir_path + zip_path_list.append(zip_file) + + return zip_path_list + +def upload_to_easy_market(args, zip_path_list, market_id=None): + learnware.init() + np.random.seed(2023) + market_id = market_id if market_id else "NTK-RF-{:d}".format(args.id) + market = DummyMarket(market_id=market_id, rebuild=True) + + for idx, zip_path in enumerate(zip_path_list): + semantic_spec = copy.deepcopy(user_semantic) + semantic_spec["Name"]["Values"] = "learnware_{:d}".format(idx) + semantic_spec["Description"]["Values"] = "test_learnware_number_{:d}".format(idx) + semantic_spec["Scenario"]["Values"] = [args.data] + semantic_spec["Output"]['Dimension'] = 10 + market.add_learnware(zip_path, semantic_spec) + + logger = get_custom_logger() + logger.debug("Total Item: {:d}".format(len(market))) + + return market \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/benchmarks/evaluate_market.py b/examples/dataset_cifar_workflow/benchmarks/evaluate_market.py new file mode 100644 index 0000000..8fb5b87 --- /dev/null +++ b/examples/dataset_cifar_workflow/benchmarks/evaluate_market.py @@ -0,0 +1,75 @@ +import os +import random +from time import sleep +from typing import Dict + +import learnware +import numpy as np +import torch.random +from learnware import specification +from learnware.market import BaseUserInfo +from tqdm import tqdm + +from build_market import user_semantic +from preprocess.dataloader import ImageDataLoader +from utils.clerk import Clerk, get_custom_logger +from utils.reuse import AveragingReuser + + +def evaluate_market_performance(args, market, clerk: Clerk=None, regenerate=True) -> Dict: + logger = get_custom_logger() + + data_root = os.path.join(args.data_root, 'learnware_market_data', "{}_{:d}".format(args.data, args.data_id)) + dataloader = ImageDataLoader(data_root, args.n_users, train=False) + acc = [] + + market_root = args.market_root + # shuffled = list(enumerate(dataloader)) + # random.shuffle(shuffled) + for i, (test_X, test_y) in enumerate(dataloader): + dir_path = os.path.join(market_root, args.data, "{}_{:d}".format(args.spec, args.id), "user_{:d}".format(i)) + os.makedirs(dir_path, exist_ok=True) + + if regenerate: + if args.spec == "rbf": + stat_spec = specification.utils.generate_rkme_spec(X=test_X, reduced_set_size=args.K, gamma=0.1, cuda_idx=args.cuda_idx) + elif args.spec == "ntk": + stat_spec = learnware.specification.RKMEImageStatSpecification(rkme_id=i+args.n_uploaders, **args.__dict__) + stat_spec.generate_stat_spec_from_data(test_X, reduce=True, steps=args.ntk_steps, K=args.K, whitening=False) + else: + raise NotImplementedError() + # Save User's spec to disk + stat_spec.save(os.path.join(dir_path, "spec.json")) + else: + if args.spec == "rbf": + stat_spec = specification.RKMEStatSpecification(gamma=0.1, cuda_idx=args.cuda_idx) + elif args.spec == "ntk": + stat_spec = learnware.specification.RKMEImageStatSpecification(rkme_id=i+args.n_uploaders, cache=False, **args.__dict__) + else: + raise NotImplementedError() + # Load User's spec from disk + stat_spec.load(os.path.join(dir_path, "spec.json")) + + user_info = BaseUserInfo(semantic_spec=user_semantic, stat_info={"RKMEStatSpecification": stat_spec}) + + sorted_score_list, single_learnware_list, _, _= market.search_learnware(user_info, max_search_num=args.max_search_num) + + reuse_ensemble = AveragingReuser(learnware_list=single_learnware_list, mode="vote") + ensemble_predict_y = np.argmax(reuse_ensemble.predict(user_data=test_X), axis=-1) + + curr_acc = np.mean(ensemble_predict_y == test_y) + acc.append(curr_acc) + if clerk: + clerk.rkme_performance(curr_acc) + + logger.debug("Accuracy for user {:d}: {:.3f}; {:.3f} on average up to now.".format(i, curr_acc, np.mean(acc))) + + logger.info("Accuracy {:.3f}({:.3f})".format(np.mean(acc), np.std(acc))) + + return { + "Accuracy": { + "Mean": np.mean(acc), + "Std": np.std(acc), + "All": acc + } + } \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/dataset/__init__.py b/examples/dataset_cifar_workflow/dataset/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/dataset_cifar_workflow/dataset/data.py b/examples/dataset_cifar_workflow/dataset/data.py new file mode 100644 index 0000000..3c47d88 --- /dev/null +++ b/examples/dataset_cifar_workflow/dataset/data.py @@ -0,0 +1,10 @@ +from torchvision import datasets + + +def cifar10(split="uploader"): + assert(split in {"uploader", "user"}) + + if split == "uploader": + dataset = datasets.CIFAR10(root="cache", download=True, train=True) + else: + dataset = datasets.CIFAR10(root="cache", download=True, train=False) \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/dataset/utils.py b/examples/dataset_cifar_workflow/dataset/utils.py new file mode 100644 index 0000000..04b0608 --- /dev/null +++ b/examples/dataset_cifar_workflow/dataset/utils.py @@ -0,0 +1,45 @@ +import random +from functools import reduce + +import numpy as np +import torch +from torch.utils.data import TensorDataset + + +def sample_by_labels(labels: torch.Tensor, weights, total_num): + weights = np.asarray(weights) + + norm_factor = np.sum(weights) + last_non_zero = np.argwhere(weights > 0)[-1].item() + category_nums = [int(w * total_num / norm_factor) for w in weights[:last_non_zero]] + category_nums += [total_num - sum(category_nums)] + category_nums += [0] * (weights.shape[0] - last_non_zero - 1) + + selected_cls_indexes = [ + random.sample(list(torch.where(labels == c)[0]), k=n) + for c, n in enumerate(category_nums) + ] + + return selected_cls_indexes + + +USER_WEIGHTS = [3, 3, 1, 1, 1, 1, 0, 0, 0, 0] +UPLOADER_WEIGHTS = [4, 4, 1, 1, 0, 0, 0, 0, 0, 0] + +def split_dataset(data_x, data_y, size, split="uploader"): + if split == "uploader": + weights = np.asarray(UPLOADER_WEIGHTS) + elif split == "user": + weights = np.asarray(USER_WEIGHTS) + else: + raise Exception(split) + + order = list(range(len(weights))) + random.shuffle(order) + + selected_data_indexes = reduce(lambda x, y: x+y, sample_by_labels(data_y, weights[order], size)) + selected_data_indexes = torch.stack(selected_data_indexes) + selected_X = data_x[selected_data_indexes].numpy() + selected_y = data_y[selected_data_indexes].numpy() + + return TensorDataset(selected_X, selected_y), weights[order] \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/main.py b/examples/dataset_cifar_workflow/main.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/dataset_cifar_workflow/models/__init__.py b/examples/dataset_cifar_workflow/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/dataset_cifar_workflow/models/config.yaml b/examples/dataset_cifar_workflow/models/config.yaml new file mode 100644 index 0000000..a73666d --- /dev/null +++ b/examples/dataset_cifar_workflow/models/config.yaml @@ -0,0 +1,8 @@ +model: + class_name: Model + kwargs: {} +stat_specifications: + - module_path: learnware.specification + class_name: RKMEImageStatSpecification + file_name: spec.json + kwargs: {} \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/models/conv/__init__.py b/examples/dataset_cifar_workflow/models/conv/__init__.py new file mode 100644 index 0000000..9080b3c --- /dev/null +++ b/examples/dataset_cifar_workflow/models/conv/__init__.py @@ -0,0 +1,26 @@ +import os + +import torch +import numpy as np +from learnware.model import BaseModel + +from .model import ConvModel + + +class Model(BaseModel): + def __init__(self, device="cuda", input_channel=3): + super(Model, self).__init__(input_shape=(input_channel, 32, 32), output_shape=(10,)) + dir_path = os.path.dirname(os.path.abspath(__file__)) + self.device =device + self.model = ConvModel(channel=input_channel, n_random_features=10) + self.model.load_state_dict(torch.load(os.path.join(dir_path, "model.pth"))) + self.model.to(device).eval() + + def fit(self, X: np.ndarray, y: np.ndarray): + raise NotImplementedError() + + def predict(self, X: np.ndarray) -> np.ndarray: + return self.model(torch.asarray(X, dtype=torch.float32, device=self.device)) + + def finetune(self, X: np.ndarray, y: np.ndarray): + raise NotImplementedError() diff --git a/examples/dataset_cifar_workflow/models/conv/model.py b/examples/dataset_cifar_workflow/models/conv/model.py new file mode 100644 index 0000000..b9e04a1 --- /dev/null +++ b/examples/dataset_cifar_workflow/models/conv/model.py @@ -0,0 +1,71 @@ +from torch import nn + + +class ConvModel(nn.Module): + def __init__(self, channel, n_random_features, net_width = 64, net_depth = 3, net_act = 'relu', + net_norm = 'batchnorm', net_pooling = 'avgpooling', im_size = (32,32)): + super().__init__() + self.features, shape_feat = self._make_layers(channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size) + num_feat = shape_feat[0]*shape_feat[1]*shape_feat[2] + self.classifier = nn.Linear(num_feat, n_random_features) + + def forward(self, x): + out = self.features(x) + out = out.reshape(out.size(0), -1) + out = self.classifier(out) + return out + + def _get_activation(self, net_act): + if net_act == 'sigmoid': + return nn.Sigmoid() + elif net_act == 'relu': + return nn.ReLU(inplace=True) + elif net_act == 'leakyrelu': + return nn.LeakyReLU(negative_slope=0.01) + elif net_act == 'gelu': + return nn.SiLU() + else: + raise Exception('unknown activation function: %s'%net_act) + + def _get_pooling(self, net_pooling): + if net_pooling == 'maxpooling': + return nn.MaxPool2d(kernel_size=2, stride=2) + elif net_pooling == 'avgpooling': + return nn.AvgPool2d(kernel_size=2, stride=2) + elif net_pooling == 'none': + return None + else: + raise Exception('unknown net_pooling: %s'%net_pooling) + + def _get_normlayer(self, net_norm, shape_feat): + if net_norm == 'batchnorm': + return nn.BatchNorm2d(shape_feat[0], affine=True) + elif net_norm == 'layernorm': + return nn.LayerNorm(shape_feat, elementwise_affine=True) + elif net_norm == 'instancenorm': + return nn.GroupNorm(shape_feat[0], shape_feat[0], affine=True) + elif net_norm == 'groupnorm': + return nn.GroupNorm(4, shape_feat[0], affine=True) + elif net_norm == 'none': + return None + else: + raise Exception('unknown net_norm: %s'%net_norm) + + def _make_layers(self, channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size): + layers = [] + in_channels = channel + shape_feat = [in_channels, im_size[0], im_size[1]] + for d in range(net_depth): + layers += [nn.Conv2d(in_channels, net_width, kernel_size=3, padding='same')] + + shape_feat[0] = net_width + if net_norm != 'none': + layers += [self._get_normlayer(net_norm, shape_feat)] + layers += [self._get_activation(net_act)] + in_channels = net_width + if net_pooling != 'none': + layers += [self._get_pooling(net_pooling)] + shape_feat[1] //= 2 + shape_feat[2] //= 2 + + return nn.Sequential(*layers), shape_feat \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/models/train.py b/examples/dataset_cifar_workflow/models/train.py new file mode 100644 index 0000000..e69de29 From 5f0929c14a060aaa3673afcbe7b7a3fdf2b40ae2 Mon Sep 17 00:00:00 2001 From: shihy Date: Thu, 30 Nov 2023 10:11:18 +0800 Subject: [PATCH 02/25] [ENH] Fixed some bugs and finished the main code --- .../dataset => }/__init__.py | 0 .../benchmarks/build_market.py | 114 ------------- .../benchmarks/dataset/__init__.py | 1 + .../benchmarks/dataset/data.py | 26 +++ .../{ => benchmarks}/dataset/utils.py | 38 ++++- .../benchmarks/evaluate_market.py | 75 --------- .../{ => benchmarks}/models/__init__.py | 0 .../{ => benchmarks}/models/config.yaml | 0 .../{ => benchmarks}/models/conv/__init__.py | 0 .../{ => benchmarks}/models/conv/model.py | 0 .../benchmarks/models/conv/requirements.txt | 3 + .../benchmarks/utils.py | 158 ++++++++++++++++++ .../dataset_cifar_workflow/dataset/data.py | 10 -- examples/dataset_cifar_workflow/main.py | 33 ++++ .../dataset_cifar_workflow/models/train.py | 0 15 files changed, 251 insertions(+), 207 deletions(-) rename examples/{dataset_cifar_workflow/dataset => }/__init__.py (100%) delete mode 100644 examples/dataset_cifar_workflow/benchmarks/build_market.py create mode 100644 examples/dataset_cifar_workflow/benchmarks/dataset/__init__.py create mode 100644 examples/dataset_cifar_workflow/benchmarks/dataset/data.py rename examples/dataset_cifar_workflow/{ => benchmarks}/dataset/utils.py (54%) delete mode 100644 examples/dataset_cifar_workflow/benchmarks/evaluate_market.py rename examples/dataset_cifar_workflow/{ => benchmarks}/models/__init__.py (100%) rename examples/dataset_cifar_workflow/{ => benchmarks}/models/config.yaml (100%) rename examples/dataset_cifar_workflow/{ => benchmarks}/models/conv/__init__.py (100%) rename examples/dataset_cifar_workflow/{ => benchmarks}/models/conv/model.py (100%) create mode 100644 examples/dataset_cifar_workflow/benchmarks/models/conv/requirements.txt create mode 100644 examples/dataset_cifar_workflow/benchmarks/utils.py delete mode 100644 examples/dataset_cifar_workflow/dataset/data.py delete mode 100644 examples/dataset_cifar_workflow/models/train.py diff --git a/examples/dataset_cifar_workflow/dataset/__init__.py b/examples/__init__.py similarity index 100% rename from examples/dataset_cifar_workflow/dataset/__init__.py rename to examples/__init__.py diff --git a/examples/dataset_cifar_workflow/benchmarks/build_market.py b/examples/dataset_cifar_workflow/benchmarks/build_market.py deleted file mode 100644 index e9bf46a..0000000 --- a/examples/dataset_cifar_workflow/benchmarks/build_market.py +++ /dev/null @@ -1,114 +0,0 @@ -import copy -import os -import zipfile -from shutil import copyfile, rmtree - -import learnware -import numpy as np -import tqdm -import yaml -from learnware import specification -from learnware.market import EasyMarket - -from preprocess.dataloader import ImageDataLoader - -user_semantic = { - "Data": {"Values": ["Image"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, - "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, - "Scenario": {"Values": [], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "", "Type": "String"}, - "Output": {"Values": "", "Dimension": 0} -} - - -def build_from_preprocessed(args, regenerate=True): - zip_path_list = [] - data_root = os.path.join(args.data_root, 'learnware_market_data', "{}_{:d}".format(args.data, args.data_id)) - dataloader = ImageDataLoader(data_root, args.n_uploaders, train=True) - - market_root = args.market_root - for i, (train_X, train_y, val_X, val_y) in tqdm.tqdm(enumerate(dataloader), total=args.n_uploaders): - dir_path = os.path.join(market_root, args.data, "{}_{:d}".format(args.spec, args.id), "learnware_{:d}".format(i)) - os.makedirs(dir_path, exist_ok=True) - - if not regenerate: - zip_path_list.append(dir_path + ".zip") - continue - - # print("Preparing Learnware {:d} with {:s} specification".format(i, args.spec)) - # Copy Model File - model_file = os.path.join(dir_path, "model.pth") - copyfile(os.path.join(data_root, "models", "uploader_{:d}.pth".format(i)), - model_file) - - # Make Specification - if args.spec == "rbf": - spec = specification.utils.generate_rkme_spec(X=train_X, reduced_set_size=args.K, gamma=0.1, cuda_idx=args.cuda_idx) - elif args.spec == "ntk": - spec = learnware.specification.RKMEImageStatSpecification(rkme_id=i, **args.__dict__) - spec.generate_stat_spec_from_data(val_X, K=args.K, steps=args.ntk_steps, reduce=True, whitening=False) - else: - raise NotImplementedError("Not Support", args.spec) - spec.save(os.path.join(dir_path, "spec.json")) - - # Copy __init__.py and learnware_yaml - init_file = os.path.join(dir_path, "__init__.py") - yaml_file = os.path.join(dir_path, "learnware.yaml") - copyfile( - os.path.join(market_root, "learnware_example", - "conv.py"), init_file - ) # cp conv.py init_file - - with open(os.path.join(market_root, "learnware_example", - "{}.yaml".format(args.spec)), "r") as yaml_templet,\ - open(yaml_file, "w") as yaml_target: - - yaml_content = yaml.load(yaml_templet, Loader=yaml.FullLoader) - - yaml_content["model"]["kwargs"]["device"] = str(choose_device(args.cuda_idx)) - yaml_content["model"]["kwargs"]["input_channel"] = train_X.shape[1] - if args.spec == "ntk": - yaml_content["stat_specifications"][0]["kwargs"] = copy.deepcopy(args.__dict__) - - yaml.dump(yaml_content, yaml_target) - - - zip_file = dir_path + ".zip" - # zip -q -r -j zip_file dir_path - with zipfile.ZipFile(zip_file, "w") as zip_obj: - for foldername, subfolders, filenames in os.walk(dir_path): - for filename in filenames: - file_path = os.path.join(foldername, filename) - zip_info = zipfile.ZipInfo(filename) - zip_info.compress_type = zipfile.ZIP_STORED - with open(file_path, "rb") as file: - zip_obj.writestr(zip_info, file.read()) - - rmtree(dir_path) # rm -r dir_path - zip_path_list.append(zip_file) - - return zip_path_list - -def upload_to_easy_market(args, zip_path_list, market_id=None): - learnware.init() - np.random.seed(2023) - market_id = market_id if market_id else "NTK-RF-{:d}".format(args.id) - market = DummyMarket(market_id=market_id, rebuild=True) - - for idx, zip_path in enumerate(zip_path_list): - semantic_spec = copy.deepcopy(user_semantic) - semantic_spec["Name"]["Values"] = "learnware_{:d}".format(idx) - semantic_spec["Description"]["Values"] = "test_learnware_number_{:d}".format(idx) - semantic_spec["Scenario"]["Values"] = [args.data] - semantic_spec["Output"]['Dimension'] = 10 - market.add_learnware(zip_path, semantic_spec) - - logger = get_custom_logger() - logger.debug("Total Item: {:d}".format(len(market))) - - return market \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/benchmarks/dataset/__init__.py b/examples/dataset_cifar_workflow/benchmarks/dataset/__init__.py new file mode 100644 index 0000000..8d16d00 --- /dev/null +++ b/examples/dataset_cifar_workflow/benchmarks/dataset/__init__.py @@ -0,0 +1 @@ +from .data import * \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/benchmarks/dataset/data.py b/examples/dataset_cifar_workflow/benchmarks/dataset/data.py new file mode 100644 index 0000000..5a8e335 --- /dev/null +++ b/examples/dataset_cifar_workflow/benchmarks/dataset/data.py @@ -0,0 +1,26 @@ +import os + +import torch +from torch.utils.data import random_split, Subset +from torchvision import datasets + +from examples.dataset_cifar_workflow.benchmarks.dataset.utils import build_transform, sample_by_labels, split_dataset + + +cache_root = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', '..', 'cache')) + +augment_transform, regular_transform = build_transform((32, 32)) +cifar_train_set_augment = datasets.CIFAR10(root="cache", download=True, + train=True, transform=augment_transform) +cifar_train_set = datasets.CIFAR10(root="cache", download=True, + train=True, transform=augment_transform) +cifar_test_set = datasets.CIFAR10(root="cache", download=True, + train=False, transform=regular_transform) + +def uploader_data(): + train_indices, order = split_dataset(torch.asarray(cifar_train_set_augment.targets), 12500, split="uploader") + valid_indices, _ = split_dataset(torch.asarray(cifar_test_set.targets), 2000, split="uploader", order=order) + + return (Subset(cifar_train_set_augment, train_indices), + Subset(cifar_test_set, valid_indices), + Subset(cifar_train_set, train_indices)) diff --git a/examples/dataset_cifar_workflow/dataset/utils.py b/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py similarity index 54% rename from examples/dataset_cifar_workflow/dataset/utils.py rename to examples/dataset_cifar_workflow/benchmarks/dataset/utils.py index 04b0608..36fd33a 100644 --- a/examples/dataset_cifar_workflow/dataset/utils.py +++ b/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py @@ -3,8 +3,14 @@ from functools import reduce import numpy as np import torch +import torchvision from torch.utils.data import TensorDataset +torchvision.disable_beta_transforms_warning() +from torchvision.transforms import transforms, v2 + + + def sample_by_labels(labels: torch.Tensor, weights, total_num): weights = np.asarray(weights) @@ -25,8 +31,7 @@ def sample_by_labels(labels: torch.Tensor, weights, total_num): USER_WEIGHTS = [3, 3, 1, 1, 1, 1, 0, 0, 0, 0] UPLOADER_WEIGHTS = [4, 4, 1, 1, 0, 0, 0, 0, 0, 0] - -def split_dataset(data_x, data_y, size, split="uploader"): +def split_dataset(labels, size, split="uploader", order=None): if split == "uploader": weights = np.asarray(UPLOADER_WEIGHTS) elif split == "user": @@ -34,12 +39,29 @@ def split_dataset(data_x, data_y, size, split="uploader"): else: raise Exception(split) - order = list(range(len(weights))) - random.shuffle(order) + if order is None: + order = list(range(len(weights))) + random.shuffle(order) - selected_data_indexes = reduce(lambda x, y: x+y, sample_by_labels(data_y, weights[order], size)) + selected_data_indexes = reduce(lambda x, y: x+y, sample_by_labels(labels, weights[order], size)) selected_data_indexes = torch.stack(selected_data_indexes) - selected_X = data_x[selected_data_indexes].numpy() - selected_y = data_y[selected_data_indexes].numpy() - return TensorDataset(selected_X, selected_y), weights[order] \ No newline at end of file + return selected_data_indexes, order + +def build_transform(size): + augment_transform = transforms.Compose([ + transforms.Resize(size), + v2.AutoAugment(), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + regular_transform = transforms.Compose([ + transforms.Resize(size), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + return augment_transform, regular_transform \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/benchmarks/evaluate_market.py b/examples/dataset_cifar_workflow/benchmarks/evaluate_market.py deleted file mode 100644 index 8fb5b87..0000000 --- a/examples/dataset_cifar_workflow/benchmarks/evaluate_market.py +++ /dev/null @@ -1,75 +0,0 @@ -import os -import random -from time import sleep -from typing import Dict - -import learnware -import numpy as np -import torch.random -from learnware import specification -from learnware.market import BaseUserInfo -from tqdm import tqdm - -from build_market import user_semantic -from preprocess.dataloader import ImageDataLoader -from utils.clerk import Clerk, get_custom_logger -from utils.reuse import AveragingReuser - - -def evaluate_market_performance(args, market, clerk: Clerk=None, regenerate=True) -> Dict: - logger = get_custom_logger() - - data_root = os.path.join(args.data_root, 'learnware_market_data', "{}_{:d}".format(args.data, args.data_id)) - dataloader = ImageDataLoader(data_root, args.n_users, train=False) - acc = [] - - market_root = args.market_root - # shuffled = list(enumerate(dataloader)) - # random.shuffle(shuffled) - for i, (test_X, test_y) in enumerate(dataloader): - dir_path = os.path.join(market_root, args.data, "{}_{:d}".format(args.spec, args.id), "user_{:d}".format(i)) - os.makedirs(dir_path, exist_ok=True) - - if regenerate: - if args.spec == "rbf": - stat_spec = specification.utils.generate_rkme_spec(X=test_X, reduced_set_size=args.K, gamma=0.1, cuda_idx=args.cuda_idx) - elif args.spec == "ntk": - stat_spec = learnware.specification.RKMEImageStatSpecification(rkme_id=i+args.n_uploaders, **args.__dict__) - stat_spec.generate_stat_spec_from_data(test_X, reduce=True, steps=args.ntk_steps, K=args.K, whitening=False) - else: - raise NotImplementedError() - # Save User's spec to disk - stat_spec.save(os.path.join(dir_path, "spec.json")) - else: - if args.spec == "rbf": - stat_spec = specification.RKMEStatSpecification(gamma=0.1, cuda_idx=args.cuda_idx) - elif args.spec == "ntk": - stat_spec = learnware.specification.RKMEImageStatSpecification(rkme_id=i+args.n_uploaders, cache=False, **args.__dict__) - else: - raise NotImplementedError() - # Load User's spec from disk - stat_spec.load(os.path.join(dir_path, "spec.json")) - - user_info = BaseUserInfo(semantic_spec=user_semantic, stat_info={"RKMEStatSpecification": stat_spec}) - - sorted_score_list, single_learnware_list, _, _= market.search_learnware(user_info, max_search_num=args.max_search_num) - - reuse_ensemble = AveragingReuser(learnware_list=single_learnware_list, mode="vote") - ensemble_predict_y = np.argmax(reuse_ensemble.predict(user_data=test_X), axis=-1) - - curr_acc = np.mean(ensemble_predict_y == test_y) - acc.append(curr_acc) - if clerk: - clerk.rkme_performance(curr_acc) - - logger.debug("Accuracy for user {:d}: {:.3f}; {:.3f} on average up to now.".format(i, curr_acc, np.mean(acc))) - - logger.info("Accuracy {:.3f}({:.3f})".format(np.mean(acc), np.std(acc))) - - return { - "Accuracy": { - "Mean": np.mean(acc), - "Std": np.std(acc), - "All": acc - } - } \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/models/__init__.py b/examples/dataset_cifar_workflow/benchmarks/models/__init__.py similarity index 100% rename from examples/dataset_cifar_workflow/models/__init__.py rename to examples/dataset_cifar_workflow/benchmarks/models/__init__.py diff --git a/examples/dataset_cifar_workflow/models/config.yaml b/examples/dataset_cifar_workflow/benchmarks/models/config.yaml similarity index 100% rename from examples/dataset_cifar_workflow/models/config.yaml rename to examples/dataset_cifar_workflow/benchmarks/models/config.yaml diff --git a/examples/dataset_cifar_workflow/models/conv/__init__.py b/examples/dataset_cifar_workflow/benchmarks/models/conv/__init__.py similarity index 100% rename from examples/dataset_cifar_workflow/models/conv/__init__.py rename to examples/dataset_cifar_workflow/benchmarks/models/conv/__init__.py diff --git a/examples/dataset_cifar_workflow/models/conv/model.py b/examples/dataset_cifar_workflow/benchmarks/models/conv/model.py similarity index 100% rename from examples/dataset_cifar_workflow/models/conv/model.py rename to examples/dataset_cifar_workflow/benchmarks/models/conv/model.py diff --git a/examples/dataset_cifar_workflow/benchmarks/models/conv/requirements.txt b/examples/dataset_cifar_workflow/benchmarks/models/conv/requirements.txt new file mode 100644 index 0000000..c1bb5f6 --- /dev/null +++ b/examples/dataset_cifar_workflow/benchmarks/models/conv/requirements.txt @@ -0,0 +1,3 @@ +numpy +torch>2.0.0 +torchvision diff --git a/examples/dataset_cifar_workflow/benchmarks/utils.py b/examples/dataset_cifar_workflow/benchmarks/utils.py new file mode 100644 index 0000000..51547d8 --- /dev/null +++ b/examples/dataset_cifar_workflow/benchmarks/utils.py @@ -0,0 +1,158 @@ +import os +import zipfile +from shutil import rmtree + +import numpy as np +import torch +import tqdm +from torch import optim, nn +from torch.utils.data import DataLoader, Dataset + +from learnware.client import LearnwareClient +from learnware.learnware import Learnware +from learnware.specification import generate_rkme_image_spec, RKMEImageSpecification +from .dataset import uploader_data +from .models.conv import ConvModel +from learnware.market import LearnwareMarket +from learnware.utils import choose_device + +@torch.no_grad() +def evaluate(model, evaluate_set: Dataset, device=None): + device = choose_device(0) if device is None else device + + if isinstance(model, Learnware): + # duck-type + model.__call__ = model.predict + if isinstance(model, nn.Module): + model.eval() + + criterion = nn.CrossEntropyLoss(reduction="sum") + total, correct, loss = 0, 0, 0.0 + dataloader = DataLoader(evaluate_set, batch_size=512, shuffle=True) + for i, (X, y) in enumerate(dataloader): + X, y = X.to(device), y.to(device) + out = model(X) + loss += criterion(out, y) + + _, predicted = torch.max(out.data, 1) + total += y.size(0) + correct += (predicted == y).sum().item() + + acc = correct / total * 100 + loss = loss / total + + if isinstance(model, nn.Module): + model.train() + + return loss, acc + + +def build_learnware(name: str, market: LearnwareMarket, model_name="conv", + out_classes=10, epochs=35, batch_size=1024, device=None): + device = choose_device(0) if device is None else device + + if name == "cifar10": + train_set, valid_set, spec_set = uploader_data() + else: + raise Exception("Not support", name) + + channel = train_set[0][0].shape[0] + image_size = train_set[0][0].shape[1], train_set[0][0].shape[2] + + model = ConvModel(channel=channel, im_size=image_size, + n_random_features=out_classes).to(device) + model.train() + + # SGD optimizer with learning rate 1e-2 + optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) + + # mean-squared error loss + criterion = nn.CrossEntropyLoss() + # Prepare DataLoader + dataloader = DataLoader(train_set, batch_size=batch_size, shuffle=True) + # Optimizing... + for epoch in tqdm.tqdm(range(epochs), total=epochs): + running_loss = [] + for i, (X, y) in enumerate(dataloader): + X, y = X.to(device=device), y.to(device=device) + optimizer.zero_grad() + out = model(X) + loss = criterion(out, y) + loss.backward() + optimizer.step() + running_loss.append(loss.item()) + + if (epoch + 1) % 5 == 0: + valid_loss, valid_acc = evaluate(model, train_set, device=device) + print('Epoch: {}, Train Average Loss: {:.3f}, Valid Average Loss: {:.3f}'.format( + epoch+1, np.mean(running_loss), valid_loss)) + + train_loss, train_acc = evaluate(model, train_set, device=device) + print("Train Loss: {:.3e}\tTrain Accuracy: {:.3e}".format(train_loss, train_acc)) + + # build specification + loader = DataLoader(spec_set, batch_size=3000, shuffle=True) + sampled_X, _ = next(iter(loader)) + spec = generate_rkme_image_spec(sampled_X) + + # add to market + cache_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'cache', 'learnware')) + if os.path.exists(cache_dir): + rmtree(cache_dir) + os.makedirs(cache_dir, exist_ok=True) + model_dir = os.path.abspath(os.path.join(__file__, "models")) + spec.save(os.path.join(cache_dir, "spec.json")) + + zip_file = os.path.join(cache_dir, "learnware.zip") + # zip -q -r -j zip_file dir_path + with zipfile.ZipFile(zip_file, "w") as zip_obj: + for foldername, subfolders, filenames in os.walk(os.path.join(model_dir, model_name)): + for filename in filenames: + file_path = os.path.join(foldername, filename) + zip_info = zipfile.ZipInfo(filename) + zip_info.compress_type = zipfile.ZIP_STORED + with open(file_path, "rb") as file: + zip_obj.writestr(zip_info, file.read()) + + for filename, filepath in zip(["spec.json", "config.yaml"], + [os.path.join(cache_dir, "spec.json"), + os.path.join(model_dir, "config.yaml")]): + zip_info = zipfile.ZipInfo(filename) + zip_info.compress_type = zipfile.ZIP_STORED + with open(file_path, "rb") as file: + zip_obj.writestr(zip_info, file.read()) + + market.add_learnware(zip_file, semantic_spec=LearnwareClient.create_semantic_specification( + self=None, + name="learnware", + description="", + data_type="Image", + task_type="Classification", + library_type="PyTorch", + scenarios=["Computer"], + output_description={str(i): "i" for i in range(out_classes)}) + ) + + return model + + +def build_specification(name: str, cache_id, sampled_size=3000): + cache_path = os.path.abspath(os.path.join( + os.path.dirname( __file__ ), '..', '..', 'cache', "{}.json".format(cache_id))) + + if os.path.exists(cache_path): + spec = RKMEImageSpecification() + spec.load(cache_path) + return spec + + if name == "cifar10": + dataset = cifar10(split="user") + else: + raise Exception("Not support", name) + + loader = DataLoader(dataset, batch_size=sampled_size, shuffle=True) + sampled_X, _ = next(iter(loader)) + spec = generate_rkme_image_spec(sampled_X) + + spec.save(cache_path) + return spec \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/dataset/data.py b/examples/dataset_cifar_workflow/dataset/data.py deleted file mode 100644 index 3c47d88..0000000 --- a/examples/dataset_cifar_workflow/dataset/data.py +++ /dev/null @@ -1,10 +0,0 @@ -from torchvision import datasets - - -def cifar10(split="uploader"): - assert(split in {"uploader", "user"}) - - if split == "uploader": - dataset = datasets.CIFAR10(root="cache", download=True, train=True) - else: - dataset = datasets.CIFAR10(root="cache", download=True, train=False) \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/main.py b/examples/dataset_cifar_workflow/main.py index e69de29..c4cf0a9 100644 --- a/examples/dataset_cifar_workflow/main.py +++ b/examples/dataset_cifar_workflow/main.py @@ -0,0 +1,33 @@ +import os + +import fire + +from examples.dataset_cifar_workflow.benchmarks.utils import build_learnware, build_specification +from learnware.market import instantiate_learnware_market + +PROXY_IP = "172.24.57.111" +os.environ["HTTP_PROXY"] = "http://"+PROXY_IP+":7890" +os.environ["HTTPS_PROXY"] = "http://"+PROXY_IP+":7890" + +class CifarDatasetWorkflow: + + def prepare_learnware(self, market_size=30, rebuild=False): + """initialize learnware market""" + # learnware.init() + + market = instantiate_learnware_market(name="easy", market_id="dataset_cifar_workflow", rebuild=rebuild) + + for i in range(market_size - len(market)): + build_learnware("cifar10", market) + + print("Total Item:", len(market)) + + def evaluate(self, user_size=20): + market = instantiate_learnware_market(name="easy", market_id="dataset_cifar_workflow", rebuild=rebuild) + + # for i in range(user_size): + # build_specification() + + +if __name__ == "__main__": + fire.Fire(CifarDatasetWorkflow) diff --git a/examples/dataset_cifar_workflow/models/train.py b/examples/dataset_cifar_workflow/models/train.py deleted file mode 100644 index e69de29..0000000 From c986cc2721aa5c7954219ce7c757c6f369964121 Mon Sep 17 00:00:00 2001 From: shihy Date: Thu, 30 Nov 2023 20:31:10 +0800 Subject: [PATCH 03/25] [ENH] Milestone --- .../benchmarks/dataset/data.py | 7 +- .../benchmarks/dataset/utils.py | 13 ++- .../benchmarks/models/conv/__init__.py | 3 + .../models/{config.yaml => learnware.yaml} | 2 +- .../benchmarks/utils.py | 82 ++++++++++++------- examples/dataset_cifar_workflow/main.py | 37 +++++++-- examples/dataset_cifar_workflow/run.bash | 15 ++++ 7 files changed, 113 insertions(+), 46 deletions(-) rename examples/dataset_cifar_workflow/benchmarks/models/{config.yaml => learnware.yaml} (69%) create mode 100644 examples/dataset_cifar_workflow/run.bash diff --git a/examples/dataset_cifar_workflow/benchmarks/dataset/data.py b/examples/dataset_cifar_workflow/benchmarks/dataset/data.py index 5a8e335..6f007d9 100644 --- a/examples/dataset_cifar_workflow/benchmarks/dataset/data.py +++ b/examples/dataset_cifar_workflow/benchmarks/dataset/data.py @@ -13,7 +13,7 @@ augment_transform, regular_transform = build_transform((32, 32)) cifar_train_set_augment = datasets.CIFAR10(root="cache", download=True, train=True, transform=augment_transform) cifar_train_set = datasets.CIFAR10(root="cache", download=True, - train=True, transform=augment_transform) + train=True, transform=regular_transform) cifar_test_set = datasets.CIFAR10(root="cache", download=True, train=False, transform=regular_transform) @@ -24,3 +24,8 @@ def uploader_data(): return (Subset(cifar_train_set_augment, train_indices), Subset(cifar_test_set, valid_indices), Subset(cifar_train_set, train_indices)) + +def user_data(): + test_indices, order = split_dataset(torch.asarray(cifar_test_set.targets), 3000, split="user") + + return Subset(cifar_test_set, test_indices) \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py b/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py index 36fd33a..d4e7c7d 100644 --- a/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py +++ b/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py @@ -4,14 +4,11 @@ from functools import reduce import numpy as np import torch import torchvision -from torch.utils.data import TensorDataset torchvision.disable_beta_transforms_warning() from torchvision.transforms import transforms, v2 - - def sample_by_labels(labels: torch.Tensor, weights, total_num): weights = np.asarray(weights) @@ -51,17 +48,19 @@ def split_dataset(labels, size, split="uploader", order=None): def build_transform(size): augment_transform = transforms.Compose([ transforms.Resize(size), + # transforms.RandomCrop(size, padding=4), + # transforms.RandomHorizontalFlip(), v2.AutoAugment(), transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]), + transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), + std=(0.2023, 0.1994, 0.2010)), ]) regular_transform = transforms.Compose([ transforms.Resize(size), transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]), + transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), + std=(0.2023, 0.1994, 0.2010)), ]) return augment_transform, regular_transform \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/benchmarks/models/conv/__init__.py b/examples/dataset_cifar_workflow/benchmarks/models/conv/__init__.py index 9080b3c..f8fc5a9 100644 --- a/examples/dataset_cifar_workflow/benchmarks/models/conv/__init__.py +++ b/examples/dataset_cifar_workflow/benchmarks/models/conv/__init__.py @@ -22,5 +22,8 @@ class Model(BaseModel): def predict(self, X: np.ndarray) -> np.ndarray: return self.model(torch.asarray(X, dtype=torch.float32, device=self.device)) + def __call__(self, *args, **kwargs): + self.predict(*args, **kwargs) + def finetune(self, X: np.ndarray, y: np.ndarray): raise NotImplementedError() diff --git a/examples/dataset_cifar_workflow/benchmarks/models/config.yaml b/examples/dataset_cifar_workflow/benchmarks/models/learnware.yaml similarity index 69% rename from examples/dataset_cifar_workflow/benchmarks/models/config.yaml rename to examples/dataset_cifar_workflow/benchmarks/models/learnware.yaml index a73666d..0bc6c97 100644 --- a/examples/dataset_cifar_workflow/benchmarks/models/config.yaml +++ b/examples/dataset_cifar_workflow/benchmarks/models/learnware.yaml @@ -3,6 +3,6 @@ model: kwargs: {} stat_specifications: - module_path: learnware.specification - class_name: RKMEImageStatSpecification + class_name: RKMEImageSpecification file_name: spec.json kwargs: {} \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/benchmarks/utils.py b/examples/dataset_cifar_workflow/benchmarks/utils.py index 51547d8..0e44b93 100644 --- a/examples/dataset_cifar_workflow/benchmarks/utils.py +++ b/examples/dataset_cifar_workflow/benchmarks/utils.py @@ -4,14 +4,13 @@ from shutil import rmtree import numpy as np import torch -import tqdm from torch import optim, nn from torch.utils.data import DataLoader, Dataset from learnware.client import LearnwareClient from learnware.learnware import Learnware from learnware.specification import generate_rkme_image_spec, RKMEImageSpecification -from .dataset import uploader_data +from .dataset import uploader_data, user_data from .models.conv import ConvModel from learnware.market import LearnwareMarket from learnware.utils import choose_device @@ -20,18 +19,20 @@ from learnware.utils import choose_device def evaluate(model, evaluate_set: Dataset, device=None): device = choose_device(0) if device is None else device - if isinstance(model, Learnware): - # duck-type - model.__call__ = model.predict if isinstance(model, nn.Module): model.eval() + mapping = lambda m, x: m(x) + elif isinstance(model, Learnware): + mapping = lambda m, x: m.predict(x) + else: + raise Exception("not support model type", model) criterion = nn.CrossEntropyLoss(reduction="sum") total, correct, loss = 0, 0, 0.0 dataloader = DataLoader(evaluate_set, batch_size=512, shuffle=True) for i, (X, y) in enumerate(dataloader): X, y = X.to(device), y.to(device) - out = model(X) + out = mapping(model, X) loss += criterion(out, y) _, predicted = torch.max(out.data, 1) @@ -48,7 +49,7 @@ def evaluate(model, evaluate_set: Dataset, device=None): def build_learnware(name: str, market: LearnwareMarket, model_name="conv", - out_classes=10, epochs=35, batch_size=1024, device=None): + out_classes=10, epochs=200, batch_size=2048, device=None): device = choose_device(0) if device is None else device if name == "cifar10": @@ -56,22 +57,34 @@ def build_learnware(name: str, market: LearnwareMarket, model_name="conv", else: raise Exception("Not support", name) + cache_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'cache', 'learnware')) + if os.path.exists(cache_dir): + rmtree(cache_dir) + os.makedirs(cache_dir, exist_ok=True) + channel = train_set[0][0].shape[0] image_size = train_set[0][0].shape[1], train_set[0][0].shape[2] model = ConvModel(channel=channel, im_size=image_size, n_random_features=out_classes).to(device) + # if device.type == 'cuda': + # model = nn.DataParallel(model) + # model.benchmark = True + model.train() # SGD optimizer with learning rate 1e-2 - optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) - + optimizer = optim.SGD(model.parameters(), lr=5e-2, momentum=0.9) + # Scheduler TODO: Use this + # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20) # mean-squared error loss criterion = nn.CrossEntropyLoss() # Prepare DataLoader dataloader = DataLoader(train_set, batch_size=batch_size, shuffle=True) + # valid loss + best_loss = 100000 # initially # Optimizing... - for epoch in tqdm.tqdm(range(epochs), total=epochs): + for epoch in range(epochs): running_loss = [] for i, (X, y) in enumerate(dataloader): X, y = X.to(device=device), y.to(device=device) @@ -82,13 +95,21 @@ def build_learnware(name: str, market: LearnwareMarket, model_name="conv", optimizer.step() running_loss.append(loss.item()) + valid_loss, valid_acc = evaluate(model, train_set, device=device) + if valid_loss < best_loss: + best_loss = valid_loss + if isinstance(model, nn.DataParallel): + model_to_save = model.module + else: + model_to_save = model + torch.save(model_to_save.state_dict(), os.path.join(cache_dir, "model.pth")) + print("Epoch: {}, Valid Best Accuracy: {:.3f}% ({:.3f})".format(epoch+1, valid_acc, valid_loss)) + if (epoch + 1) % 5 == 0: - valid_loss, valid_acc = evaluate(model, train_set, device=device) print('Epoch: {}, Train Average Loss: {:.3f}, Valid Average Loss: {:.3f}'.format( epoch+1, np.mean(running_loss), valid_loss)) - train_loss, train_acc = evaluate(model, train_set, device=device) - print("Train Loss: {:.3e}\tTrain Accuracy: {:.3e}".format(train_loss, train_acc)) + # scheduler.step() # build specification loader = DataLoader(spec_set, batch_size=3000, shuffle=True) @@ -96,11 +117,7 @@ def build_learnware(name: str, market: LearnwareMarket, model_name="conv", spec = generate_rkme_image_spec(sampled_X) # add to market - cache_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'cache', 'learnware')) - if os.path.exists(cache_dir): - rmtree(cache_dir) - os.makedirs(cache_dir, exist_ok=True) - model_dir = os.path.abspath(os.path.join(__file__, "models")) + model_dir = os.path.abspath(os.path.join(__file__, "..", "models")) spec.save(os.path.join(cache_dir, "spec.json")) zip_file = os.path.join(cache_dir, "learnware.zip") @@ -108,15 +125,18 @@ def build_learnware(name: str, market: LearnwareMarket, model_name="conv", with zipfile.ZipFile(zip_file, "w") as zip_obj: for foldername, subfolders, filenames in os.walk(os.path.join(model_dir, model_name)): for filename in filenames: + if filename.endswith(".pyc"): + continue file_path = os.path.join(foldername, filename) zip_info = zipfile.ZipInfo(filename) zip_info.compress_type = zipfile.ZIP_STORED with open(file_path, "rb") as file: zip_obj.writestr(zip_info, file.read()) - for filename, filepath in zip(["spec.json", "config.yaml"], + for filename, file_path in zip(["spec.json", "model.pth", "learnware.yaml"], [os.path.join(cache_dir, "spec.json"), - os.path.join(model_dir, "config.yaml")]): + os.path.join(cache_dir, "model.pth"), + os.path.join(model_dir, "learnware.yaml")]): zip_info = zipfile.ZipInfo(filename) zip_info.compress_type = zipfile.ZIP_STORED with open(file_path, "rb") as file: @@ -125,12 +145,12 @@ def build_learnware(name: str, market: LearnwareMarket, model_name="conv", market.add_learnware(zip_file, semantic_spec=LearnwareClient.create_semantic_specification( self=None, name="learnware", - description="", + description="For Cifar Dataset Workflow", data_type="Image", task_type="Classification", library_type="PyTorch", scenarios=["Computer"], - output_description={str(i): "i" for i in range(out_classes)}) + output_description={"Dimension": out_classes, "Description": {str(i): "i" for i in range(out_classes)}}) ) return model @@ -138,21 +158,21 @@ def build_learnware(name: str, market: LearnwareMarket, model_name="conv", def build_specification(name: str, cache_id, sampled_size=3000): cache_path = os.path.abspath(os.path.join( - os.path.dirname( __file__ ), '..', '..', 'cache', "{}.json".format(cache_id))) - - if os.path.exists(cache_path): - spec = RKMEImageSpecification() - spec.load(cache_path) - return spec + os.path.dirname( __file__ ), '..', 'cache', "{}.json".format(cache_id))) if name == "cifar10": - dataset = cifar10(split="user") + dataset = user_data() else: raise Exception("Not support", name) + if os.path.exists(cache_path): + spec = RKMEImageSpecification() + spec.load(cache_path) + return spec, dataset + loader = DataLoader(dataset, batch_size=sampled_size, shuffle=True) sampled_X, _ = next(iter(loader)) - spec = generate_rkme_image_spec(sampled_X) + spec = generate_rkme_image_spec(sampled_X, steps=1) spec.save(cache_path) - return spec \ No newline at end of file + return spec, dataset \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/main.py b/examples/dataset_cifar_workflow/main.py index c4cf0a9..c45503e 100644 --- a/examples/dataset_cifar_workflow/main.py +++ b/examples/dataset_cifar_workflow/main.py @@ -2,8 +2,9 @@ import os import fire -from examples.dataset_cifar_workflow.benchmarks.utils import build_learnware, build_specification -from learnware.market import instantiate_learnware_market +from benchmarks.utils import build_learnware, build_specification, evaluate +from learnware.client import LearnwareClient +from learnware.market import instantiate_learnware_market, BaseUserInfo PROXY_IP = "172.24.57.111" os.environ["HTTP_PROXY"] = "http://"+PROXY_IP+":7890" @@ -18,15 +19,39 @@ class CifarDatasetWorkflow: market = instantiate_learnware_market(name="easy", market_id="dataset_cifar_workflow", rebuild=rebuild) for i in range(market_size - len(market)): + print("=" * 20 + "learnware {}".format(i) + "=" * 20) build_learnware("cifar10", market) print("Total Item:", len(market)) def evaluate(self, user_size=20): - market = instantiate_learnware_market(name="easy", market_id="dataset_cifar_workflow", rebuild=rebuild) - - # for i in range(user_size): - # build_specification() + market = instantiate_learnware_market(name="easy", market_id="dataset_cifar_workflow", rebuild=False) + + for i in range(user_size): + user_spec, dataset = build_specification("cifar10", i) + + user_info = BaseUserInfo(semantic_spec=LearnwareClient.create_semantic_specification( + self=None, + description="For Cifar Dataset Workflow", + data_type="Image", + task_type="Classification", + library_type="PyTorch", + scenarios=["Computer"], + output_description={"Dimension": 10, "Description": {str(i): "i" for i in range(10)}}), + stat_info={"RKMEImageSpecification": user_spec}) + + search_result = market.search_learnware(user_info) + single_result = search_result.get_single_results() + multiple_result = search_result.get_multiple_results() + + loss_list = [] + for single_item in single_result[:3]: + loss, acc = evaluate(single_item.learnware, dataset) + loss_list.append(loss) + + print( + f"Top1-score: {single_result[0].score}, learnware_id: {single_result[0].learnware.id}, loss: {loss_list[0]}" + ) if __name__ == "__main__": diff --git a/examples/dataset_cifar_workflow/run.bash b/examples/dataset_cifar_workflow/run.bash new file mode 100644 index 0000000..b6057ae --- /dev/null +++ b/examples/dataset_cifar_workflow/run.bash @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +# shellcheck disable=SC1090 +source ~/anaconda3/etc/profile.d/conda.sh +conda activate dev + +export PYTHONPATH="${PYTHONPATH}:${HOME}/Lab/Learnware/" +echo ${PYTHONPATH} +token="$(date +%s)" +mkdir -p "./log" +echo "The output is redirected to log/${token}.log with token ${token}" + +# shellcheck disable=SC2086 +nohup python -u main.py prepare_learnware --rebuild=True > "./log/${token}.log" 2>&1 & +echo "With PID = $!" \ No newline at end of file From e36aab797562036a5c145c123e50da6b4a166499 Mon Sep 17 00:00:00 2001 From: shihy Date: Sat, 2 Dec 2023 16:57:06 +0800 Subject: [PATCH 04/25] [ENH] Able to run basic experiments --- .../benchmarks/dataset/data.py | 36 +++--- .../benchmarks/dataset/utils.py | 40 +++++-- .../benchmarks/utils.py | 72 ++++++------ .../{run.bash => evaluate.bash} | 3 +- examples/dataset_cifar_workflow/main.py | 65 ++++++++--- examples/dataset_cifar_workflow/mock.py | 106 ++++++++++++++++++ examples/dataset_cifar_workflow/prepare.bash | 16 +++ learnware/specification/module.py | 3 +- learnware/specification/regular/image/rkme.py | 3 + 9 files changed, 271 insertions(+), 73 deletions(-) rename examples/dataset_cifar_workflow/{run.bash => evaluate.bash} (71%) create mode 100644 examples/dataset_cifar_workflow/mock.py create mode 100644 examples/dataset_cifar_workflow/prepare.bash diff --git a/examples/dataset_cifar_workflow/benchmarks/dataset/data.py b/examples/dataset_cifar_workflow/benchmarks/dataset/data.py index 6f007d9..6a87c8a 100644 --- a/examples/dataset_cifar_workflow/benchmarks/dataset/data.py +++ b/examples/dataset_cifar_workflow/benchmarks/dataset/data.py @@ -1,31 +1,39 @@ import os +import numpy as np import torch from torch.utils.data import random_split, Subset from torchvision import datasets +from torchvision.transforms import transforms -from examples.dataset_cifar_workflow.benchmarks.dataset.utils import build_transform, sample_by_labels, split_dataset - +from examples.dataset_cifar_workflow.benchmarks.dataset.utils import split_dataset, build_transforms cache_root = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', '..', 'cache')) -augment_transform, regular_transform = build_transform((32, 32)) +cifar_data = torch.stack([u[0] for u in datasets.CIFAR10(root="cache", download=True, + train=True, transform=transforms.ToTensor())]) +augment_transform, regular_transform, whiten_transform = build_transforms(cifar_data) + cifar_train_set_augment = datasets.CIFAR10(root="cache", download=True, - train=True, transform=augment_transform) -cifar_train_set = datasets.CIFAR10(root="cache", download=True, - train=True, transform=regular_transform) + train=True, transform=whiten_transform) cifar_test_set = datasets.CIFAR10(root="cache", download=True, - train=False, transform=regular_transform) - -def uploader_data(): - train_indices, order = split_dataset(torch.asarray(cifar_train_set_augment.targets), 12500, split="uploader") + train=False, transform=whiten_transform) +cifar_spec_train_set = datasets.CIFAR10(root="cache", download=True, + train=True, transform=whiten_transform) +cifar_spec_test_set = datasets.CIFAR10(root="cache", download=True, + train=False, transform=whiten_transform) + +def uploader_data(order=None): + train_indices, order = split_dataset(torch.asarray(cifar_train_set_augment.targets), 12500, split="uploader", order=order) valid_indices, _ = split_dataset(torch.asarray(cifar_test_set.targets), 2000, split="uploader", order=order) return (Subset(cifar_train_set_augment, train_indices), Subset(cifar_test_set, valid_indices), - Subset(cifar_train_set, train_indices)) + Subset(cifar_spec_train_set, train_indices), + order) -def user_data(): - test_indices, order = split_dataset(torch.asarray(cifar_test_set.targets), 3000, split="user") +def user_data(indices=None, order=None): + if indices is None: + indices, order = split_dataset(torch.asarray(cifar_spec_test_set.targets), 3000, split="user", order=order) - return Subset(cifar_test_set, test_indices) \ No newline at end of file + return Subset(cifar_test_set, indices), Subset(cifar_spec_test_set, indices), indices, order \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py b/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py index d4e7c7d..269eb04 100644 --- a/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py +++ b/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py @@ -45,22 +45,44 @@ def split_dataset(labels, size, split="uploader", order=None): return selected_data_indexes, order -def build_transform(size): +def build_zca_matrix(X, reg_coef=0.1): + X = (X - torch.mean(X, [0, 2, 3], keepdim=True)) / (torch.std(X, [0, 2, 3], keepdim=True)) + + X_flat = X.reshape(X.shape[0], -1) + cov = (X_flat.T @ X_flat) / X_flat.shape[0] + reg_amount = reg_coef * torch.trace(cov) / cov.shape[0] + u, s, _ = torch.svd(cov.cuda() + reg_amount * torch.eye(cov.shape[0]).cuda()) + inv_sqrt_zca_eigs = s ** (-0.5) + whitening_transform = torch.einsum( + 'ij,j,kj->ik', u, inv_sqrt_zca_eigs, u) + + return whitening_transform.cpu() + +def build_transforms(train_X): + size = train_X.shape[2], train_X.shape[3] + whitening_matrix = build_zca_matrix(train_X) + + mean_vector = torch.mean(train_X, [0, 2, 3], keepdim=True).squeeze(0) + std_vector = torch.std(train_X, [0, 2, 3], keepdim=True).squeeze(0) + augment_transform = transforms.Compose([ transforms.Resize(size), - # transforms.RandomCrop(size, padding=4), - # transforms.RandomHorizontalFlip(), - v2.AutoAugment(), transforms.ToTensor(), - transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), - std=(0.2023, 0.1994, 0.2010)), + transforms.Normalize(mean=mean_vector, std=std_vector), ]) regular_transform = transforms.Compose([ transforms.Resize(size), transforms.ToTensor(), - transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), - std=(0.2023, 0.1994, 0.2010)), + transforms.Normalize(mean=mean_vector, std=std_vector), + ]) + + whiten_transform = transforms.Compose([ + transforms.Resize(size), + transforms.ToTensor(), + transforms.Normalize(mean=mean_vector, std=std_vector), + # transform_data + transforms.LinearTransformation(whitening_matrix, torch.zeros_like(train_X[0].reshape(-1))) ]) - return augment_transform, regular_transform \ No newline at end of file + return augment_transform, regular_transform, whiten_transform \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/benchmarks/utils.py b/examples/dataset_cifar_workflow/benchmarks/utils.py index 0e44b93..6458400 100644 --- a/examples/dataset_cifar_workflow/benchmarks/utils.py +++ b/examples/dataset_cifar_workflow/benchmarks/utils.py @@ -1,3 +1,4 @@ +import json import os import zipfile from shutil import rmtree @@ -22,10 +23,8 @@ def evaluate(model, evaluate_set: Dataset, device=None): if isinstance(model, nn.Module): model.eval() mapping = lambda m, x: m(x) - elif isinstance(model, Learnware): + else: # For predict interface mapping = lambda m, x: m.predict(x) - else: - raise Exception("not support model type", model) criterion = nn.CrossEntropyLoss(reduction="sum") total, correct, loss = 0, 0, 0.0 @@ -33,6 +32,8 @@ def evaluate(model, evaluate_set: Dataset, device=None): for i, (X, y) in enumerate(dataloader): X, y = X.to(device), y.to(device) out = mapping(model, X) + if not torch.is_tensor(out): + out = torch.from_numpy(out).to(device) loss += criterion(out, y) _, predicted = torch.max(out.data, 1) @@ -48,12 +49,12 @@ def evaluate(model, evaluate_set: Dataset, device=None): return loss, acc -def build_learnware(name: str, market: LearnwareMarket, model_name="conv", - out_classes=10, epochs=200, batch_size=2048, device=None): +def build_learnware(name: str, market: LearnwareMarket, order, model_name="conv", + out_classes=10, epochs=35, batch_size=128, device=None): device = choose_device(0) if device is None else device if name == "cifar10": - train_set, valid_set, spec_set = uploader_data() + train_set, valid_set, spec_set, order = uploader_data(order=order) else: raise Exception("Not support", name) @@ -67,15 +68,12 @@ def build_learnware(name: str, market: LearnwareMarket, model_name="conv", model = ConvModel(channel=channel, im_size=image_size, n_random_features=out_classes).to(device) - # if device.type == 'cuda': - # model = nn.DataParallel(model) - # model.benchmark = True model.train() # SGD optimizer with learning rate 1e-2 - optimizer = optim.SGD(model.parameters(), lr=5e-2, momentum=0.9) - # Scheduler TODO: Use this + optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) + # Scheduler # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20) # mean-squared error loss criterion = nn.CrossEntropyLoss() @@ -86,6 +84,7 @@ def build_learnware(name: str, market: LearnwareMarket, model_name="conv", # Optimizing... for epoch in range(epochs): running_loss = [] + model.train() for i, (X, y) in enumerate(dataloader): X, y = X.to(device=device), y.to(device=device) optimizer.zero_grad() @@ -95,26 +94,27 @@ def build_learnware(name: str, market: LearnwareMarket, model_name="conv", optimizer.step() running_loss.append(loss.item()) - valid_loss, valid_acc = evaluate(model, train_set, device=device) + valid_loss, valid_acc = evaluate(model, valid_set, device=device) + train_loss, train_acc = evaluate(model, train_set, device=device) if valid_loss < best_loss: best_loss = valid_loss - if isinstance(model, nn.DataParallel): - model_to_save = model.module - else: - model_to_save = model - torch.save(model_to_save.state_dict(), os.path.join(cache_dir, "model.pth")) + + torch.save(model.state_dict(), os.path.join(cache_dir, "model.pth")) print("Epoch: {}, Valid Best Accuracy: {:.3f}% ({:.3f})".format(epoch+1, valid_acc, valid_loss)) + if valid_acc > 99.0: + print("Early Stopping at 99% !") + break if (epoch + 1) % 5 == 0: - print('Epoch: {}, Train Average Loss: {:.3f}, Valid Average Loss: {:.3f}'.format( - epoch+1, np.mean(running_loss), valid_loss)) + print('Epoch: {}, Train Average Loss: {:.3f}, Accuracy {:.3f}%, Valid Average Loss: {:.3f}'.format( + epoch+1, np.mean(running_loss), train_acc, valid_loss)) # scheduler.step() # build specification loader = DataLoader(spec_set, batch_size=3000, shuffle=True) sampled_X, _ = next(iter(loader)) - spec = generate_rkme_image_spec(sampled_X) + spec = generate_rkme_image_spec(sampled_X, whitening=False) # add to market model_dir = os.path.abspath(os.path.join(__file__, "..", "models")) @@ -142,10 +142,11 @@ def build_learnware(name: str, market: LearnwareMarket, model_name="conv", with open(file_path, "rb") as file: zip_obj.writestr(zip_info, file.read()) + print(", ".join([str(o) for o in order])) market.add_learnware(zip_file, semantic_spec=LearnwareClient.create_semantic_specification( self=None, name="learnware", - description="For Cifar Dataset Workflow", + description=", ".join([str(o) for o in order]), data_type="Image", task_type="Classification", library_type="PyTorch", @@ -156,23 +157,24 @@ def build_learnware(name: str, market: LearnwareMarket, model_name="conv", return model -def build_specification(name: str, cache_id, sampled_size=3000): - cache_path = os.path.abspath(os.path.join( - os.path.dirname( __file__ ), '..', 'cache', "{}.json".format(cache_id))) - - if name == "cifar10": - dataset = user_data() - else: - raise Exception("Not support", name) +def build_specification(name: str, cache_id, order, sampled_size=3000): + cache_dir = os.path.abspath(os.path.join( + os.path.dirname( __file__ ), '..', 'cache', 'spec')) + os.makedirs(cache_dir, exist_ok=True) + cache_path = os.path.join(cache_dir, "spec_{}.json".format(cache_id)) if os.path.exists(cache_path): spec = RKMEImageSpecification() spec.load(cache_path) - return spec, dataset - loader = DataLoader(dataset, batch_size=sampled_size, shuffle=True) - sampled_X, _ = next(iter(loader)) - spec = generate_rkme_image_spec(sampled_X, steps=1) + test_dataset, spec_dataset, _, _ = user_data(indices=torch.asarray(spec.msg)) + else: + test_dataset, spec_dataset, indices, _ = user_data(order=order) + loader = DataLoader(spec_dataset, batch_size=sampled_size, shuffle=True) + sampled_X, _ = next(iter(loader)) + spec = generate_rkme_image_spec(sampled_X, whitening=False) + + spec.msg = indices.tolist() + spec.save(cache_path) - spec.save(cache_path) - return spec, dataset \ No newline at end of file + return spec, test_dataset \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/run.bash b/examples/dataset_cifar_workflow/evaluate.bash similarity index 71% rename from examples/dataset_cifar_workflow/run.bash rename to examples/dataset_cifar_workflow/evaluate.bash index b6057ae..99cbc5a 100644 --- a/examples/dataset_cifar_workflow/run.bash +++ b/examples/dataset_cifar_workflow/evaluate.bash @@ -10,6 +10,7 @@ token="$(date +%s)" mkdir -p "./log" echo "The output is redirected to log/${token}.log with token ${token}" +export CUDA_VISIBLE_DEVICES=0 # shellcheck disable=SC2086 -nohup python -u main.py prepare_learnware --rebuild=True > "./log/${token}.log" 2>&1 & +nohup python -u main.py evaluate --market_id="momo" > "./log/${token}.log" 2>&1 & echo "With PID = $!" \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/main.py b/examples/dataset_cifar_workflow/main.py index c45503e..6550efd 100644 --- a/examples/dataset_cifar_workflow/main.py +++ b/examples/dataset_cifar_workflow/main.py @@ -1,10 +1,16 @@ import os import fire +import numpy as np +from numpy import mean +from torch.utils.data import DataLoader +import learnware from benchmarks.utils import build_learnware, build_specification, evaluate from learnware.client import LearnwareClient from learnware.market import instantiate_learnware_market, BaseUserInfo +from learnware.reuse import AveragingReuser +from learnware.specification import generate_rkme_image_spec PROXY_IP = "172.24.57.111" os.environ["HTTP_PROXY"] = "http://"+PROXY_IP+":7890" @@ -12,23 +18,38 @@ os.environ["HTTPS_PROXY"] = "http://"+PROXY_IP+":7890" class CifarDatasetWorkflow: - def prepare_learnware(self, market_size=30, rebuild=False): + def prepare_learnware(self, market_size=50, market_id=None, rebuild=False): """initialize learnware market""" - # learnware.init() + learnware.init() - market = instantiate_learnware_market(name="easy", market_id="dataset_cifar_workflow", rebuild=rebuild) + market_id = "dataset_cifar_workflow" if market_id is None else market_id + orders = np.stack([np.random.permutation(10) for _ in range(market_size)]) - for i in range(market_size - len(market)): + print("Using market_id", market_id) + market = instantiate_learnware_market(name="easy", market_id=market_id, rebuild=rebuild) + + for i, order in enumerate(orders[len(market):]): print("=" * 20 + "learnware {}".format(i) + "=" * 20) - build_learnware("cifar10", market) + print("order:", order) + build_learnware("cifar10", market, order) print("Total Item:", len(market)) - def evaluate(self, user_size=20): - market = instantiate_learnware_market(name="easy", market_id="dataset_cifar_workflow", rebuild=False) + def evaluate(self, user_size=100, market_id=None): + learnware.init() + + market_id = "dataset_cifar_workflow" if market_id is None else market_id + orders = np.stack([np.random.permutation(10) for _ in range(user_size)]) - for i in range(user_size): - user_spec, dataset = build_specification("cifar10", i) + print("Using market_id", market_id) + market = instantiate_learnware_market(name="easy", market_id=market_id, rebuild=False) + + top_1_acc_record, ensemble_acc_record, best_acc_record, mean_acc_record = [], [], [], [] + top_1_loss_record, ensemble_loss_record, best_loss_record, mean_loss_record = [], [], [], [] + for i, order in enumerate(orders): + print("=" * 20 + "user {}".format(i) + "=" * 20) + print("order:", order) + user_spec, dataset = build_specification("cifar10", i, order) user_info = BaseUserInfo(semantic_spec=LearnwareClient.create_semantic_specification( self=None, @@ -44,13 +65,31 @@ class CifarDatasetWorkflow: single_result = search_result.get_single_results() multiple_result = search_result.get_multiple_results() - loss_list = [] - for single_item in single_result[:3]: - loss, acc = evaluate(single_item.learnware, dataset) + loss_list, acc_list = [], [] + for item in market.get_learnwares(): + loss, acc = evaluate(item, dataset) loss_list.append(loss) + acc_list.append(acc) + + best_acc_record.append(max(acc_list)) + mean_acc_record.append(mean(acc_list)) + print("Best Accuracy: {:.3f}% ({:.3f}%), Avg Accuracy: {:.3f}% ({:.3f}%)".format( + max(acc_list), mean(best_acc_record), mean(acc_list), mean(mean_acc_record))) + + top_1_loss, top_1_acc = evaluate(single_result[0].learnware, dataset) + top_1_acc_record.append(top_1_acc) + print( + "Top-1\tAccuracy: {:.3f}% ({:.3f}%), Loss: {:.3f}".format( + top_1_acc, mean(top_1_acc_record), loss_list[0]) + ) + # multiple_result[0].learnwares + reuse_ensemble = AveragingReuser(learnware_list=[item.learnware for item in single_result[:3]], mode="vote_by_prob") + ensemble_loss, ensemble_acc = evaluate(reuse_ensemble, dataset) + ensemble_acc_record.append(ensemble_acc) print( - f"Top1-score: {single_result[0].score}, learnware_id: {single_result[0].learnware.id}, loss: {loss_list[0]}" + "Averaging Reuse\tAccuracy: {:.3f} ({:.3f}%), Loss: {:.3f}".format( + ensemble_acc, mean(ensemble_acc_record), ensemble_loss) ) diff --git a/examples/dataset_cifar_workflow/mock.py b/examples/dataset_cifar_workflow/mock.py new file mode 100644 index 0000000..554718f --- /dev/null +++ b/examples/dataset_cifar_workflow/mock.py @@ -0,0 +1,106 @@ +import os.path +import random + +import numpy as np +import torch +from torch.utils.data import DataLoader, TensorDataset +from torchvision import datasets +from torchvision.transforms import transforms + +import learnware +from examples.dataset_cifar_workflow.benchmarks.dataset import user_data, split_dataset +from examples.dataset_image_workflow.get_data import get_zca_matrix, transform_data +from learnware import setup_seed +from learnware.specification import generate_rkme_image_spec, RKMEImageSpecification + + +def f(d): + return np.exp(-d / 0.00005) + +def get_spec(path, order=None): + if path is not None and os.path.exists(path): + spec = RKMEImageSpecification() + spec.load(path) + return spec, spec.msg + + test_user, spec_user, _, order = user_data(order=order) + loader = DataLoader(spec_user, batch_size=3000, shuffle=True) + sampled_X, _ = next(iter(loader)) + spec = generate_rkme_image_spec(sampled_X, whitening=False) + spec.msg = order + + if path is not None: + spec.save(path) + + return spec, order + +DATA_ROOT = "cache" +def get_cifar10(output_channels=3, image_size=32, z_score=True, order=None): + ds_train = datasets.CIFAR10(DATA_ROOT, train=True, download=True, transform=transforms.Compose( + [transforms.ToTensor(), transforms.Resize([image_size, image_size])])) + X_train = ds_train.data + y_train = ds_train.targets + ds_test = datasets.CIFAR10(DATA_ROOT, train=False, download=True, transform=transforms.Compose( + [transforms.ToTensor(), transforms.Resize([image_size, image_size])])) + + X_test = ds_test.data + y_test = ds_test.targets + + X_train = torch.Tensor(np.moveaxis(X_train, 3, 1)) + y_train = torch.Tensor(y_train).long() + X_test = torch.Tensor(np.moveaxis(X_test, 3, 1)) + y_test = torch.Tensor(y_test).long() + + if output_channels == 1: + X_train = torch.mean(X_train, 1, keepdim=True) + X_test = torch.mean(X_test, 1, keepdim=True) + + if z_score: + X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) + X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / ( + torch.std(X_train, [0, 2, 3], keepdim=True)) + + whitening_mat = get_zca_matrix(X_train, reg_coef=0.1) + train_X = transform_data(X_train, whitening_mat) + test_X = transform_data(X_train, whitening_mat) + + selected_data_indexes, order = split_dataset(y_test, 3000, split="user", order=order) + + return TensorDataset(test_X[selected_data_indexes], y_test[selected_data_indexes]), order + + + + +if __name__ == "__main__": + # 3 5 + # learnware.init(deterministic=False) + # + # userset1, order = get_cifar10() + # print(order) + # loader = DataLoader(userset1, batch_size=3000, shuffle=True) + # sampled_X, _ = next(iter(loader)) + # spec = generate_rkme_image_spec(sampled_X, whitening=False) + # spec.msg = order + # spec.save("old1.json") + # old1 = spec + # + # # userset2 = userset1 + # userset2, order = get_cifar10() + # print(order) + # loader = DataLoader(userset2, batch_size=3000, shuffle=True) + # sampled_X, _ = next(iter(loader)) + # spec = generate_rkme_image_spec(sampled_X, whitening=False) + # spec.msg = order + # spec.save("old2.json") + # old2 = spec + # + # old1, order1 = get_spec("hope1.json", order=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + # old2, order2 = get_spec("hope2.json", order=[2, 3, 4, 5, 0, 1, 6, 7, 8, 9]) + # np.random.seed(0) + # random.seed(0) + old1, order1 = get_spec(None, order=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + + old2, order2 = get_spec(None, order=[2, 3, 4, 5, 6, 7, 0, 1, 8, 9]) + print(order1, order2) + print(f(old1.dist(old2))) + diff --git a/examples/dataset_cifar_workflow/prepare.bash b/examples/dataset_cifar_workflow/prepare.bash new file mode 100644 index 0000000..9d48246 --- /dev/null +++ b/examples/dataset_cifar_workflow/prepare.bash @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# shellcheck disable=SC1090 +source ~/anaconda3/etc/profile.d/conda.sh +conda activate dev + +export PYTHONPATH="${PYTHONPATH}:${HOME}/Lab/Learnware/" +echo ${PYTHONPATH} +token="$(date +%s)" +mkdir -p "./log" +echo "The output is redirected to log/${token}.log with token ${token}" + +export CUDA_VISIBLE_DEVICES=1 +# shellcheck disable=SC2086 +nohup python -u main.py prepare_learnware --market_id="momo" --rebuild > "./log/${token}.log" 2>&1 & +echo "With PID = $!" \ No newline at end of file diff --git a/learnware/specification/module.py b/learnware/specification/module.py index f17a76f..13624f9 100644 --- a/learnware/specification/module.py +++ b/learnware/specification/module.py @@ -77,6 +77,7 @@ def generate_rkme_image_spec( reduce: bool = True, verbose: bool = True, cuda_idx: int = None, + **kwargs ) -> RKMEImageSpecification: """ Interface for users to generate Reduced Kernel Mean Embedding (RKME) specification for Image. @@ -119,7 +120,7 @@ def generate_rkme_image_spec( # Generate rkme spec rkme_image_spec = RKMEImageSpecification(cuda_idx=cuda_idx) rkme_image_spec.generate_stat_spec_from_data( - X, reduced_set_size, step_size, steps, resize, nonnegative_beta, reduce, verbose + X, reduced_set_size, step_size, steps, resize, nonnegative_beta, reduce, verbose, **kwargs ) return rkme_image_spec diff --git a/learnware/specification/regular/image/rkme.py b/learnware/specification/regular/image/rkme.py index 81b68f7..420066b 100644 --- a/learnware/specification/regular/image/rkme.py +++ b/learnware/specification/regular/image/rkme.py @@ -34,6 +34,9 @@ class RKMEImageSpecification(RegularStatSpecification): """ self.RKME_IMAGE_VERSION = 1 # Please maintain backward compatibility. + # TODO: remove this + self.msg=None + self.z = None self.beta = None self._cuda_idx = allocate_cuda_idx() if cuda_idx is None else cuda_idx From 320c77f2a20fa071f99d759471f6698081beeef6 Mon Sep 17 00:00:00 2001 From: shihy Date: Mon, 4 Dec 2023 10:31:11 +0800 Subject: [PATCH 05/25] [Fix] Fix RKME Image's Performance decline --- learnware/specification/regular/image/rkme.py | 41 +++++++++++++------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/learnware/specification/regular/image/rkme.py b/learnware/specification/regular/image/rkme.py index 420066b..0d115b8 100644 --- a/learnware/specification/regular/image/rkme.py +++ b/learnware/specification/regular/image/rkme.py @@ -13,10 +13,12 @@ import torch from torch import nn from torch.utils.data import TensorDataset, DataLoader from tqdm import tqdm +from numpy.random import RandomState from . import cnn_gp from ..base import RegularStatSpecification from ..table.rkme import rkme_solve_qp +from .... import setup_seed from ....utils import choose_device, allocate_cuda_idx @@ -48,6 +50,7 @@ class RKMEImageSpecification(RegularStatSpecification): if "model_config" not in kwargs else kwargs["model_config"] ) + self._random_generator = None super(RKMEImageSpecification, self).__init__(type=self.__class__.__name__) @@ -55,13 +58,11 @@ class RKMEImageSpecification(RegularStatSpecification): def device(self): return self._device - def _generate_models(self, n_models: int, channel: int = 3, fixed_seed=None): + def _generate_models(self, n_models: int, channel: int = 3): model_class = functools.partial(_ConvNet_wide, channel=channel, **self.model_config) def __builder(i): - if fixed_seed is not None: - torch.manual_seed(fixed_seed[i]) - return model_class().to(self._device) + return model_class(random_generator=self._random_generator).to(self._device) return (__builder(m) for m in range(n_models)) @@ -152,8 +153,14 @@ class RKMEImageSpecification(RegularStatSpecification): self.beta = torch.from_numpy(self.beta).to(self._device) return + self._random_generator = RandomGenerator(0) + # crucial + setup_seed(0) + random_models = list(self._generate_models(n_models=self.n_models, channel=X.shape[1])) - self.z = torch.zeros(Z_shape).to(self._device).float().normal_(0, 1) + self.z = torch.zeros(Z_shape).to(self._device).float() + self._random_generator.normal_(self.z, 0, 1) + with torch.no_grad(): x_features = self._generate_random_feature(X_train, random_models=random_models) self._update_beta(x_features, nonnegative_beta, random_models=random_models) @@ -404,11 +411,21 @@ def _get_zca_matrix(X, reg_coef=0.1): return whitening_transform +class RandomGenerator: + + def __init__(self, seed=0): + self.state = RandomState(seed) + + def normal_(self, tensor: torch.Tensor, mean=0.0, std=1.0): + data = self.state.normal(mean, std, size=tensor.shape) + with torch.no_grad(): + tensor.copy_(torch.asarray(data, dtype=tensor.dtype)) + class _ConvNet_wide(nn.Module): - def __init__(self, channel, mu=None, sigma=None, k=2, net_width=128, net_depth=3, im_size=(32, 32)): + def __init__(self, channel, random_generator, mu=None, sigma=None, k=2, net_width=128, net_depth=3, im_size=(32, 32)): self.k = k super().__init__() - self.features, shape_feat = self._make_layers(channel, net_width, net_depth, im_size, mu, sigma) + self.features, shape_feat = self._make_layers(channel, net_width, net_depth, im_size, mu, sigma, random_generator) # self.aggregation = nn.AvgPool2d(kernel_size=shape_feat[1]) def forward(self, x): @@ -417,14 +434,14 @@ class _ConvNet_wide(nn.Module): # out = self.aggregation(out).reshape(out.size(0), -1) return out - def _make_layers(self, channel, net_width, net_depth, im_size, mu, sigma): + def _make_layers(self, channel, net_width, net_depth, im_size, mu, sigma, random_generator): k = self.k layers = [] in_channels = channel shape_feat = [in_channels, im_size[0], im_size[1]] for d in range(net_depth): - layers += [_build_conv2d_gaussian(in_channels, int(k * net_width), 3, 1, mean=mu, std=sigma)] + layers += [_build_conv2d_gaussian(in_channels, int(k * net_width), random_generator, 3, 1, mean=mu, std=sigma)] shape_feat[0] = int(k * net_width) layers += [nn.ReLU(inplace=True)] @@ -437,15 +454,15 @@ class _ConvNet_wide(nn.Module): return nn.Sequential(*layers), shape_feat -def _build_conv2d_gaussian(in_channels, out_channels, kernel=3, padding=1, mean=None, std=None): +def _build_conv2d_gaussian(in_channels, out_channels, random_generator: RandomGenerator, kernel=3, padding=1, mean=None, std=None): layer = nn.Conv2d(in_channels, out_channels, kernel, padding=padding) if mean is None: mean = 0 if std is None: std = np.sqrt(2) / np.sqrt(layer.weight.shape[1] * layer.weight.shape[2] * layer.weight.shape[3]) # print('Initializing Conv. Mean=%.2f, std=%.2f'%(mean, std)) - torch.nn.init.normal_(layer.weight, mean, std) - torch.nn.init.normal_(layer.bias, 0, 0.1) + random_generator.normal_(layer.weight, mean, std) + random_generator.normal_(layer.bias, 0, 0.1) return layer From 908f90918d0ea7564c1c2f573be2f09e66cafe2d Mon Sep 17 00:00:00 2001 From: shihy Date: Mon, 4 Dec 2023 20:34:57 +0800 Subject: [PATCH 06/25] [ENH] New Recorder for Cifar Workflow --- .../benchmarks/utils.py | 36 +++++++++++++++++-- examples/dataset_cifar_workflow/main.py | 35 +++++++++++++----- examples/dataset_cifar_workflow/prepare.bash | 2 +- learnware/specification/regular/image/rkme.py | 17 ++++++++- 4 files changed, 77 insertions(+), 13 deletions(-) diff --git a/examples/dataset_cifar_workflow/benchmarks/utils.py b/examples/dataset_cifar_workflow/benchmarks/utils.py index 6458400..b1f42fd 100644 --- a/examples/dataset_cifar_workflow/benchmarks/utils.py +++ b/examples/dataset_cifar_workflow/benchmarks/utils.py @@ -1,7 +1,9 @@ import json import os import zipfile +from collections import defaultdict from shutil import rmtree +from tabulate import tabulate import numpy as np import torch @@ -46,7 +48,7 @@ def evaluate(model, evaluate_set: Dataset, device=None): if isinstance(model, nn.Module): model.train() - return loss, acc + return loss.item(), acc def build_learnware(name: str, market: LearnwareMarket, order, model_name="conv", @@ -177,4 +179,34 @@ def build_specification(name: str, cache_id, order, sampled_size=3000): spec.msg = indices.tolist() spec.save(cache_path) - return spec, test_dataset \ No newline at end of file + return spec, test_dataset + + +class Recorder: + + def __init__(self): + self.data = defaultdict(list) + + def record(self, name, accuracy, loss): + self.data[name].append((accuracy, loss)) + + def latest(self): + table = [] + + for name, values in self.data.items(): + value = values[-1] + table.append([name, "{:.3f}%".format(value[0]), "{:.3f}".format(value[1])]) + + return str(tabulate(table, headers=["Case", "Accuracy", "Loss"], tablefmt='orgtbl')) + + def accumulated(self): + table = [] + + for name, values in self.data.items(): + value_mean = [np.mean(v) for v in zip(*values)] + value_std = [np.std(v) for v in zip(*values)] + table.append([name, + "{:.3f}% ± {:.3f}%".format(value_mean[0], value_std[0]), + "{:.3f} ± {:.3f}" .format(value_mean[1], value_std[1])]) + + return str(tabulate(table, headers=["Case", "Accuracy", "Loss"], tablefmt='orgtbl')) \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/main.py b/examples/dataset_cifar_workflow/main.py index 6550efd..3842de2 100644 --- a/examples/dataset_cifar_workflow/main.py +++ b/examples/dataset_cifar_workflow/main.py @@ -6,7 +6,7 @@ from numpy import mean from torch.utils.data import DataLoader import learnware -from benchmarks.utils import build_learnware, build_specification, evaluate +from benchmarks.utils import build_learnware, build_specification, evaluate, Recorder from learnware.client import LearnwareClient from learnware.market import instantiate_learnware_market, BaseUserInfo from learnware.reuse import AveragingReuser @@ -21,6 +21,7 @@ class CifarDatasetWorkflow: def prepare_learnware(self, market_size=50, market_id=None, rebuild=False): """initialize learnware market""" learnware.init() + assert not rebuild market_id = "dataset_cifar_workflow" if market_id is None else market_id orders = np.stack([np.random.permutation(10) for _ in range(market_size)]) @@ -46,6 +47,8 @@ class CifarDatasetWorkflow: top_1_acc_record, ensemble_acc_record, best_acc_record, mean_acc_record = [], [], [], [] top_1_loss_record, ensemble_loss_record, best_loss_record, mean_loss_record = [], [], [], [] + + recorder = Recorder() for i, order in enumerate(orders): print("=" * 20 + "user {}".format(i) + "=" * 20) print("order:", order) @@ -72,25 +75,39 @@ class CifarDatasetWorkflow: acc_list.append(acc) best_acc_record.append(max(acc_list)) + best_loss_record.append(min(loss_list)) + print("Best Accuracy: {:.3f}% ({:.3f}%), Best Loss: {:.3f} ({:.3f})".format( + max(acc_list), mean(best_acc_record), min(loss_list), mean(best_loss_record))) + recorder.record("Best", accuracy=max(acc_list), loss=min(loss_list)) + mean_acc_record.append(mean(acc_list)) - print("Best Accuracy: {:.3f}% ({:.3f}%), Avg Accuracy: {:.3f}% ({:.3f}%)".format( - max(acc_list), mean(best_acc_record), mean(acc_list), mean(mean_acc_record))) + mean_loss_record.append(mean(loss_list)) + print("Avg Accuracy: {:.3f}% ({:.3f}%), Avg Loss: {:.3f} ({:.3f})".format( + mean(acc_list), mean(mean_acc_record), mean(loss_list), mean(mean_loss_record))) + recorder.record("Average", accuracy=mean(acc_list), loss=mean(loss_list)) top_1_loss, top_1_acc = evaluate(single_result[0].learnware, dataset) top_1_acc_record.append(top_1_acc) + top_1_loss_record.append(top_1_loss) print( - "Top-1\tAccuracy: {:.3f}% ({:.3f}%), Loss: {:.3f}".format( - top_1_acc, mean(top_1_acc_record), loss_list[0]) + "Top-1\tAccuracy: {:.3f}% ({:.3f}%), Loss: {:.3f}({:.3f})".format( + top_1_acc, mean(top_1_acc_record), top_1_loss, mean(top_1_loss_record)) ) + recorder.record("Top-1", accuracy=top_1_acc, loss=top_1_loss) - # multiple_result[0].learnwares - reuse_ensemble = AveragingReuser(learnware_list=[item.learnware for item in single_result[:3]], mode="vote_by_prob") + reuse_ensemble = AveragingReuser(learnware_list=multiple_result[0].learnwares, mode="vote_by_prob") + # reuse_ensemble = AveragingReuser(learnware_list=[item.learnware for item in single_result[:3]], mode="vote_by_prob") ensemble_loss, ensemble_acc = evaluate(reuse_ensemble, dataset) ensemble_acc_record.append(ensemble_acc) + ensemble_loss_record.append(ensemble_loss) print( - "Averaging Reuse\tAccuracy: {:.3f} ({:.3f}%), Loss: {:.3f}".format( - ensemble_acc, mean(ensemble_acc_record), ensemble_loss) + "Averaging Reuse\tAccuracy: {:.3f}% ({:.3f}%), Loss: {:.3f} ({:.3f})".format( + ensemble_acc, mean(ensemble_acc_record), ensemble_loss, mean(ensemble_loss_record)) ) + recorder.record("Voting Reuse", accuracy=ensemble_acc, loss=ensemble_loss) + + print(recorder.latest()) + print(recorder.accumulated()) if __name__ == "__main__": diff --git a/examples/dataset_cifar_workflow/prepare.bash b/examples/dataset_cifar_workflow/prepare.bash index 9d48246..f967115 100644 --- a/examples/dataset_cifar_workflow/prepare.bash +++ b/examples/dataset_cifar_workflow/prepare.bash @@ -12,5 +12,5 @@ echo "The output is redirected to log/${token}.log with token ${token}" export CUDA_VISIBLE_DEVICES=1 # shellcheck disable=SC2086 -nohup python -u main.py prepare_learnware --market_id="momo" --rebuild > "./log/${token}.log" 2>&1 & +nohup python -u main.py prepare_learnware --market_id="momo" > "./log/${token}.log" 2>&1 & echo "With PID = $!" \ No newline at end of file diff --git a/learnware/specification/regular/image/rkme.py b/learnware/specification/regular/image/rkme.py index 0d115b8..757aaf7 100644 --- a/learnware/specification/regular/image/rkme.py +++ b/learnware/specification/regular/image/rkme.py @@ -335,7 +335,22 @@ class RKMEImageSpecification(RegularStatSpecification): return K_12 def herding(self, T: int) -> np.ndarray: - raise NotImplementedError("The function herding hasn't been supported in Image RKME Specification.") + """Iteratively sample examples from an unknown distribution with the help of its RKME specification + + Parameters + ---------- + T : int + Total iteration number for sampling. + + Returns + ------- + np.ndarray + A collection of examples which approximate the unknown distribution. + """ + indices = torch.multinomial(self.beta, T, replacement=True) + mock = self.z[indices] + torch.randn_like(self.z[indices]) * 0.01 + + return mock.numpy() def _sampling_candidates(self, N: int) -> np.ndarray: raise NotImplementedError() From 5037339759742fdd7c0488038a896f1fa9417174 Mon Sep 17 00:00:00 2001 From: shihy Date: Mon, 4 Dec 2023 21:34:34 +0800 Subject: [PATCH 07/25] [MNT] Job Selector with a better tensor support --- learnware/reuse/job_selector.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/learnware/reuse/job_selector.py b/learnware/reuse/job_selector.py index 467e063..773c3c1 100644 --- a/learnware/reuse/job_selector.py +++ b/learnware/reuse/job_selector.py @@ -59,8 +59,11 @@ class JobSelectorReuser(BaseReuser): for idx in range(len(self.learnware_list)): data_idx_list = np.where(select_result == idx)[0] if len(data_idx_list) > 0: - # pred_y = self.learnware_list[idx].predict(raw_user_data[data_idx_list]) - pred_y = self.learnware_list[idx].predict([raw_user_data[i] for i in data_idx_list]) + if isinstance(raw_user_data, list): + pred_y = self.learnware_list[idx].predict([raw_user_data[i] for i in data_idx_list]) + else: + pred_y = self.learnware_list[idx].predict(raw_user_data[data_idx_list]) + if isinstance(pred_y, torch.Tensor): pred_y = pred_y.detach().cpu().numpy() # elif isinstance(pred_y, tf.Tensor): @@ -89,6 +92,9 @@ class JobSelectorReuser(BaseReuser): user_data : np.ndarray User's raw data. """ + if torch.is_tensor(user_data): + user_data = user_data.detach().cpu().numpy() + if len(self.learnware_list) == 1: # user_data_num = user_data.shape[0] user_data_num = len(user_data) From f07d92da718dcd31d87a683a1efe46c7f1c5c282 Mon Sep 17 00:00:00 2001 From: shihy Date: Mon, 4 Dec 2023 23:58:53 +0800 Subject: [PATCH 08/25] [ENH] Finish Unlabeled Benchmark for Image --- .../benchmarks/utils.py | 6 ++-- examples/dataset_cifar_workflow/evaluate.bash | 4 +-- examples/dataset_cifar_workflow/main.py | 35 +++++-------------- learnware/specification/regular/image/rkme.py | 14 +++++++- setup.py | 1 + 5 files changed, 27 insertions(+), 33 deletions(-) diff --git a/examples/dataset_cifar_workflow/benchmarks/utils.py b/examples/dataset_cifar_workflow/benchmarks/utils.py index b1f42fd..66a8f0c 100644 --- a/examples/dataset_cifar_workflow/benchmarks/utils.py +++ b/examples/dataset_cifar_workflow/benchmarks/utils.py @@ -116,7 +116,7 @@ def build_learnware(name: str, market: LearnwareMarket, order, model_name="conv" # build specification loader = DataLoader(spec_set, batch_size=3000, shuffle=True) sampled_X, _ = next(iter(loader)) - spec = generate_rkme_image_spec(sampled_X, whitening=False) + spec = generate_rkme_image_spec(sampled_X, whitening=False, cross_platform=False) # add to market model_dir = os.path.abspath(os.path.join(__file__, "..", "models")) @@ -174,7 +174,7 @@ def build_specification(name: str, cache_id, order, sampled_size=3000): test_dataset, spec_dataset, indices, _ = user_data(order=order) loader = DataLoader(spec_dataset, batch_size=sampled_size, shuffle=True) sampled_X, _ = next(iter(loader)) - spec = generate_rkme_image_spec(sampled_X, whitening=False) + spec = generate_rkme_image_spec(sampled_X, whitening=False, cross_platform=False) spec.msg = indices.tolist() spec.save(cache_path) @@ -199,7 +199,7 @@ class Recorder: return str(tabulate(table, headers=["Case", "Accuracy", "Loss"], tablefmt='orgtbl')) - def accumulated(self): + def summary(self): table = [] for name, values in self.data.items(): diff --git a/examples/dataset_cifar_workflow/evaluate.bash b/examples/dataset_cifar_workflow/evaluate.bash index 99cbc5a..bf70a96 100644 --- a/examples/dataset_cifar_workflow/evaluate.bash +++ b/examples/dataset_cifar_workflow/evaluate.bash @@ -10,7 +10,7 @@ token="$(date +%s)" mkdir -p "./log" echo "The output is redirected to log/${token}.log with token ${token}" -export CUDA_VISIBLE_DEVICES=0 +export CUDA_VISIBLE_DEVICES=5. # shellcheck disable=SC2086 -nohup python -u main.py evaluate --market_id="momo" > "./log/${token}.log" 2>&1 & +nohup python -u main.py evaluate_unlabeled --market_id="momo" > "./log/${token}.log" 2>&1 & echo "With PID = $!" \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/main.py b/examples/dataset_cifar_workflow/main.py index 3842de2..6fcbc0f 100644 --- a/examples/dataset_cifar_workflow/main.py +++ b/examples/dataset_cifar_workflow/main.py @@ -9,7 +9,7 @@ import learnware from benchmarks.utils import build_learnware, build_specification, evaluate, Recorder from learnware.client import LearnwareClient from learnware.market import instantiate_learnware_market, BaseUserInfo -from learnware.reuse import AveragingReuser +from learnware.reuse import JobSelectorReuser, AveragingReuser from learnware.specification import generate_rkme_image_spec PROXY_IP = "172.24.57.111" @@ -36,7 +36,7 @@ class CifarDatasetWorkflow: print("Total Item:", len(market)) - def evaluate(self, user_size=100, market_id=None): + def evaluate_unlabeled(self, user_size=100, market_id=None): learnware.init() market_id = "dataset_cifar_workflow" if market_id is None else market_id @@ -73,41 +73,22 @@ class CifarDatasetWorkflow: loss, acc = evaluate(item, dataset) loss_list.append(loss) acc_list.append(acc) - - best_acc_record.append(max(acc_list)) - best_loss_record.append(min(loss_list)) - print("Best Accuracy: {:.3f}% ({:.3f}%), Best Loss: {:.3f} ({:.3f})".format( - max(acc_list), mean(best_acc_record), min(loss_list), mean(best_loss_record))) recorder.record("Best", accuracy=max(acc_list), loss=min(loss_list)) - - mean_acc_record.append(mean(acc_list)) - mean_loss_record.append(mean(loss_list)) - print("Avg Accuracy: {:.3f}% ({:.3f}%), Avg Loss: {:.3f} ({:.3f})".format( - mean(acc_list), mean(mean_acc_record), mean(loss_list), mean(mean_loss_record))) recorder.record("Average", accuracy=mean(acc_list), loss=mean(loss_list)) top_1_loss, top_1_acc = evaluate(single_result[0].learnware, dataset) - top_1_acc_record.append(top_1_acc) - top_1_loss_record.append(top_1_loss) - print( - "Top-1\tAccuracy: {:.3f}% ({:.3f}%), Loss: {:.3f}({:.3f})".format( - top_1_acc, mean(top_1_acc_record), top_1_loss, mean(top_1_loss_record)) - ) - recorder.record("Top-1", accuracy=top_1_acc, loss=top_1_loss) + recorder.record("Top-1 Learnware", accuracy=top_1_acc, loss=top_1_loss) reuse_ensemble = AveragingReuser(learnware_list=multiple_result[0].learnwares, mode="vote_by_prob") # reuse_ensemble = AveragingReuser(learnware_list=[item.learnware for item in single_result[:3]], mode="vote_by_prob") ensemble_loss, ensemble_acc = evaluate(reuse_ensemble, dataset) - ensemble_acc_record.append(ensemble_acc) - ensemble_loss_record.append(ensemble_loss) - print( - "Averaging Reuse\tAccuracy: {:.3f}% ({:.3f}%), Loss: {:.3f} ({:.3f})".format( - ensemble_acc, mean(ensemble_acc_record), ensemble_loss, mean(ensemble_loss_record)) - ) recorder.record("Voting Reuse", accuracy=ensemble_acc, loss=ensemble_loss) - print(recorder.latest()) - print(recorder.accumulated()) + reuse_job_selector = JobSelectorReuser(learnware_list=multiple_result[0].learnwares, use_herding=False) + job_loss, job_acc = evaluate(reuse_job_selector, dataset) + recorder.record("Job Selector", accuracy=job_acc, loss=job_loss) + + print(recorder.summary()) if __name__ == "__main__": diff --git a/learnware/specification/regular/image/rkme.py b/learnware/specification/regular/image/rkme.py index 757aaf7..a10948d 100644 --- a/learnware/specification/regular/image/rkme.py +++ b/learnware/specification/regular/image/rkme.py @@ -155,7 +155,13 @@ class RKMEImageSpecification(RegularStatSpecification): self._random_generator = RandomGenerator(0) # crucial - setup_seed(0) + torch.manual_seed(0) + torch.cuda.manual_seed_all(0) + torch.backends.cudnn.deterministic = True + if ("cross_platform" not in kwargs or kwargs["cross_platform"]) + torch.cuda.set_rng_state( + new_state=torch.cuda.get_rng_state(self._device.index), + device="cpu") random_models = list(self._generate_models(n_models=self.n_models, channel=X.shape[1])) self.z = torch.zeros(Z_shape).to(self._device).float() @@ -181,6 +187,12 @@ class RKMEImageSpecification(RegularStatSpecification): self._update_z(x_features, optimizer, random_models=random_models) self._update_beta(x_features, nonnegative_beta, random_models=random_models) + # Recovering Random Number Generation Settings + if ("cross-platform" not in kwargs or kwargs["cross-platform"]) + torch.cuda.set_rng_state( + new_state=torch.cuda.get_rng_state(self._device.index), + device="cuda") + @torch.no_grad() def _update_beta(self, x_features: Any, nonnegative_beta: bool = True, random_models=None): Z = self.z diff --git a/setup.py b/setup.py index 0c636b8..2ade5e3 100644 --- a/setup.py +++ b/setup.py @@ -37,6 +37,7 @@ REQUIRED = [ "pandas>=0.25.1", "scipy>=1.0.0", "tqdm>=4.65.0", + "tabulate", "scikit-learn>=0.22", "joblib>=1.2.0", "pyyaml>=6.0", From c4b718618a866a7139120417a6f67acfbf200aca Mon Sep 17 00:00:00 2001 From: shihy Date: Tue, 5 Dec 2023 00:03:53 +0800 Subject: [PATCH 09/25] [FIX] Fix typo --- learnware/specification/regular/image/rkme.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/learnware/specification/regular/image/rkme.py b/learnware/specification/regular/image/rkme.py index a10948d..4a67289 100644 --- a/learnware/specification/regular/image/rkme.py +++ b/learnware/specification/regular/image/rkme.py @@ -158,7 +158,7 @@ class RKMEImageSpecification(RegularStatSpecification): torch.manual_seed(0) torch.cuda.manual_seed_all(0) torch.backends.cudnn.deterministic = True - if ("cross_platform" not in kwargs or kwargs["cross_platform"]) + if ("cross_platform" not in kwargs or kwargs["cross_platform"]): torch.cuda.set_rng_state( new_state=torch.cuda.get_rng_state(self._device.index), device="cpu") @@ -188,7 +188,7 @@ class RKMEImageSpecification(RegularStatSpecification): self._update_beta(x_features, nonnegative_beta, random_models=random_models) # Recovering Random Number Generation Settings - if ("cross-platform" not in kwargs or kwargs["cross-platform"]) + if ("cross_platform" not in kwargs or kwargs["cross_platform"]): torch.cuda.set_rng_state( new_state=torch.cuda.get_rng_state(self._device.index), device="cuda") From ed7aaa2a638cae6428c91896879143aba21007f1 Mon Sep 17 00:00:00 2001 From: shihy Date: Tue, 5 Dec 2023 12:59:28 +0800 Subject: [PATCH 10/25] [MNT] RKME Image Supports parameter sample size --- learnware/specification/module.py | 3 +- learnware/specification/regular/image/rkme.py | 76 ++++++++++++------- 2 files changed, 50 insertions(+), 29 deletions(-) diff --git a/learnware/specification/module.py b/learnware/specification/module.py index 13624f9..469f515 100644 --- a/learnware/specification/module.py +++ b/learnware/specification/module.py @@ -73,6 +73,7 @@ def generate_rkme_image_spec( step_size: float = 0.01, steps: int = 100, resize: bool = True, + sample_size: int = 5000, nonnegative_beta: bool = True, reduce: bool = True, verbose: bool = True, @@ -120,7 +121,7 @@ def generate_rkme_image_spec( # Generate rkme spec rkme_image_spec = RKMEImageSpecification(cuda_idx=cuda_idx) rkme_image_spec.generate_stat_spec_from_data( - X, reduced_set_size, step_size, steps, resize, nonnegative_beta, reduce, verbose, **kwargs + X, reduced_set_size, step_size, steps, resize, sample_size, nonnegative_beta, reduce, verbose, **kwargs ) return rkme_image_spec diff --git a/learnware/specification/regular/image/rkme.py b/learnware/specification/regular/image/rkme.py index 4a67289..4c654f5 100644 --- a/learnware/specification/regular/image/rkme.py +++ b/learnware/specification/regular/image/rkme.py @@ -7,6 +7,7 @@ import json import os from typing import Any +from contextlib import contextmanager import numpy as np import torch @@ -73,6 +74,7 @@ class RKMEImageSpecification(RegularStatSpecification): step_size: float = 0.01, steps: int = 100, resize: bool = True, + sample_size: int = 5000, nonnegative_beta: bool = True, reduce: bool = True, verbose: bool = True, @@ -92,6 +94,8 @@ class RKMEImageSpecification(RegularStatSpecification): Total rounds in the iterative optimization. resize : bool Whether to scale the image to the requested size, by default True. + sample_size: int + Size of sampled set used to generate specification nonnegative_beta : bool, optional True if weights for the reduced set are intended to be kept non-negative, by default False. reduce : bool, optional @@ -153,45 +157,40 @@ class RKMEImageSpecification(RegularStatSpecification): self.beta = torch.from_numpy(self.beta).to(self._device) return - self._random_generator = RandomGenerator(0) - # crucial - torch.manual_seed(0) - torch.cuda.manual_seed_all(0) - torch.backends.cudnn.deterministic = True - if ("cross_platform" not in kwargs or kwargs["cross_platform"]): - torch.cuda.set_rng_state( - new_state=torch.cuda.get_rng_state(self._device.index), - device="cpu") - - random_models = list(self._generate_models(n_models=self.n_models, channel=X.shape[1])) - self.z = torch.zeros(Z_shape).to(self._device).float() - self._random_generator.normal_(self.z, 0, 1) + # auto sample + if len(X_train) > sample_size: + indices = np.random.choice(len(X_train), size=sample_size, replace=False) + X_train = X_train(indices) - with torch.no_grad(): - x_features = self._generate_random_feature(X_train, random_models=random_models) - self._update_beta(x_features, nonnegative_beta, random_models=random_models) - try: import torch_optimizer except ModuleNotFoundError: - raise ModuleNotFoundError(f"RKMEImageSpecification is not available because 'torch-optimizer' is not installed! Please install it manually.") - - optimizer = torch_optimizer.AdaBelief([{"params": [self.z]}], lr=step_size, eps=1e-16) + raise ModuleNotFoundError( + f"RKMEImageSpecification is not available because 'torch-optimizer' is not installed! Please install it manually.") + + cross_platform = "cross_platform" not in kwargs or kwargs["cross_platform"] + # crucial + with deterministic(cross_platform, self._device) as random_generator: + self._random_generator = random_generator - for _ in tqdm(range(steps)) if verbose else range(steps): - # Regenerate Random Models random_models = list(self._generate_models(n_models=self.n_models, channel=X.shape[1])) + self.z = torch.zeros(Z_shape).to(self._device).float() + self._random_generator.normal_(self.z, 0, 1) with torch.no_grad(): x_features = self._generate_random_feature(X_train, random_models=random_models) - self._update_z(x_features, optimizer, random_models=random_models) self._update_beta(x_features, nonnegative_beta, random_models=random_models) - # Recovering Random Number Generation Settings - if ("cross_platform" not in kwargs or kwargs["cross_platform"]): - torch.cuda.set_rng_state( - new_state=torch.cuda.get_rng_state(self._device.index), - device="cuda") + optimizer = torch_optimizer.AdaBelief([{"params": [self.z]}], lr=step_size, eps=1e-16) + + for _ in tqdm(range(steps)) if verbose else range(steps): + # Regenerate Random Models + random_models = list(self._generate_models(n_models=self.n_models, channel=X.shape[1])) + + with torch.no_grad(): + x_features = self._generate_random_feature(X_train, random_models=random_models) + self._update_z(x_features, optimizer, random_models=random_models) + self._update_beta(x_features, nonnegative_beta, random_models=random_models) @torch.no_grad() def _update_beta(self, x_features: Any, nonnegative_beta: bool = True, random_models=None): @@ -448,6 +447,27 @@ class RandomGenerator: with torch.no_grad(): tensor.copy_(torch.asarray(data, dtype=tensor.dtype)) + +@contextmanager +def deterministic(cross_platform, device): + torch.manual_seed(0) + torch.cuda.manual_seed_all(0) + deterministic_state = torch.backends.cudnn.deterministic + torch.backends.cudnn.deterministic = True + if cross_platform: + torch.cuda.set_rng_state( + new_state=torch.cuda.get_rng_state(device.index), + device="cpu") + + yield RandomGenerator(0) + + torch.backends.cudnn.deterministic = deterministic_state + if cross_platform: + torch.cuda.set_rng_state( + new_state=torch.cuda.get_rng_state(device.index), + device="cuda") + + class _ConvNet_wide(nn.Module): def __init__(self, channel, random_generator, mu=None, sigma=None, k=2, net_width=128, net_depth=3, im_size=(32, 32)): self.k = k From 1f108d88de42931365cedd322ae41f3387a683a3 Mon Sep 17 00:00:00 2001 From: shihy Date: Thu, 7 Dec 2023 00:16:09 +0800 Subject: [PATCH 11/25] [MNT] Faster Training --- .../benchmarks/dataset/data.py | 40 +++-- .../benchmarks/dataset/utils.py | 7 +- .../benchmarks/utils.py | 141 ++++++++++-------- learnware/specification/regular/image/rkme.py | 17 ++- 4 files changed, 118 insertions(+), 87 deletions(-) diff --git a/examples/dataset_cifar_workflow/benchmarks/dataset/data.py b/examples/dataset_cifar_workflow/benchmarks/dataset/data.py index 6a87c8a..39dcc7e 100644 --- a/examples/dataset_cifar_workflow/benchmarks/dataset/data.py +++ b/examples/dataset_cifar_workflow/benchmarks/dataset/data.py @@ -5,27 +5,37 @@ import torch from torch.utils.data import random_split, Subset from torchvision import datasets from torchvision.transforms import transforms +from torch.utils.data import TensorDataset +from .utils import cached from examples.dataset_cifar_workflow.benchmarks.dataset.utils import split_dataset, build_transforms cache_root = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', '..', 'cache')) -cifar_data = torch.stack([u[0] for u in datasets.CIFAR10(root="cache", download=True, - train=True, transform=transforms.ToTensor())]) -augment_transform, regular_transform, whiten_transform = build_transforms(cifar_data) - -cifar_train_set_augment = datasets.CIFAR10(root="cache", download=True, - train=True, transform=whiten_transform) -cifar_test_set = datasets.CIFAR10(root="cache", download=True, - train=False, transform=whiten_transform) -cifar_spec_train_set = datasets.CIFAR10(root="cache", download=True, - train=True, transform=whiten_transform) -cifar_spec_test_set = datasets.CIFAR10(root="cache", download=True, - train=False, transform=whiten_transform) +cifar_train = datasets.CIFAR10(root=cache_root, download=True, train=True, transform=transforms.ToTensor()) +cifar_train_X = torch.stack([u[0] for u in cifar_train]) +augment_transform, regular_transform, whiten_transform = build_transforms(cifar_train_X) + +cifar_train_set_augment = datasets.CIFAR10(root=cache_root, download=True, train=True, transform=whiten_transform) +cifar_test_set = datasets.CIFAR10(root=cache_root, download=True, train=False, transform=whiten_transform) +cifar_spec_train_set = datasets.CIFAR10(root=cache_root, download=True, train=True, transform=whiten_transform) +cifar_spec_test_set = datasets.CIFAR10(root=cache_root, download=True, train=False, transform=whiten_transform) +train_targets = cifar_train_set_augment.targets +test_targets = cifar_test_set.targets + +def faster_train(device): + global cifar_train_set_augment + global cifar_test_set + global cifar_spec_train_set + global cifar_spec_test_set + cifar_train_set_augment = cached(cifar_train_set_augment, device=device) + cifar_test_set = cached(cifar_test_set, device=device) + cifar_spec_train_set = cached(cifar_spec_train_set, device=device) + cifar_spec_test_set = cached(cifar_spec_test_set, device=device) def uploader_data(order=None): - train_indices, order = split_dataset(torch.asarray(cifar_train_set_augment.targets), 12500, split="uploader", order=order) - valid_indices, _ = split_dataset(torch.asarray(cifar_test_set.targets), 2000, split="uploader", order=order) + train_indices, order = split_dataset(torch.asarray(train_targets), 12500, split="uploader", order=order) + valid_indices, _ = split_dataset(torch.asarray(test_targets), 2000, split="uploader", order=order) return (Subset(cifar_train_set_augment, train_indices), Subset(cifar_test_set, valid_indices), @@ -34,6 +44,6 @@ def uploader_data(order=None): def user_data(indices=None, order=None): if indices is None: - indices, order = split_dataset(torch.asarray(cifar_spec_test_set.targets), 3000, split="user", order=order) + indices, order = split_dataset(torch.asarray(test_targets), 3000, split="user", order=order) return Subset(cifar_test_set, indices), Subset(cifar_spec_test_set, indices), indices, order \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py b/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py index 269eb04..1708cea 100644 --- a/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py +++ b/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py @@ -4,6 +4,7 @@ from functools import reduce import numpy as np import torch import torchvision +from torch.utils.data import TensorDataset, Dataset, DataLoader torchvision.disable_beta_transforms_warning() from torchvision.transforms import transforms, v2 @@ -85,4 +86,8 @@ def build_transforms(train_X): transforms.LinearTransformation(whitening_matrix, torch.zeros_like(train_X[0].reshape(-1))) ]) - return augment_transform, regular_transform, whiten_transform \ No newline at end of file + return augment_transform, regular_transform, whiten_transform + +def cached(data: Dataset, device): + X, y = next(iter(DataLoader(data, batch_size=len(data)))) + return TensorDataset(X.to(device), y.to(device)) diff --git a/examples/dataset_cifar_workflow/benchmarks/utils.py b/examples/dataset_cifar_workflow/benchmarks/utils.py index 66a8f0c..faedd04 100644 --- a/examples/dataset_cifar_workflow/benchmarks/utils.py +++ b/examples/dataset_cifar_workflow/benchmarks/utils.py @@ -14,12 +14,15 @@ from learnware.client import LearnwareClient from learnware.learnware import Learnware from learnware.specification import generate_rkme_image_spec, RKMEImageSpecification from .dataset import uploader_data, user_data +from .dataset.utils import cached from .models.conv import ConvModel from learnware.market import LearnwareMarket from learnware.utils import choose_device +from torch.profiler import profile, record_function, ProfilerActivity + @torch.no_grad() -def evaluate(model, evaluate_set: Dataset, device=None): +def evaluate(model, evaluate_set: Dataset, device=None, distribution=True): device = choose_device(0) if device is None else device if isinstance(model, nn.Module): @@ -29,16 +32,20 @@ def evaluate(model, evaluate_set: Dataset, device=None): mapping = lambda m, x: m.predict(x) criterion = nn.CrossEntropyLoss(reduction="sum") - total, correct, loss = 0, 0, 0.0 - dataloader = DataLoader(evaluate_set, batch_size=512, shuffle=True) + total, correct, loss = 0, 0, torch.as_tensor(0.0, dtype=torch.float32, device=device) + dataloader = DataLoader(evaluate_set, batch_size=1024, shuffle=True) for i, (X, y) in enumerate(dataloader): X, y = X.to(device), y.to(device) out = mapping(model, X) if not torch.is_tensor(out): out = torch.from_numpy(out).to(device) - loss += criterion(out, y) - _, predicted = torch.max(out.data, 1) + if distribution: + loss += criterion(out, y) + _, predicted = torch.max(out.data, 1) + else: + predicted = out + total += y.size(0) correct += (predicted == y).sum().item() @@ -67,56 +74,17 @@ def build_learnware(name: str, market: LearnwareMarket, order, model_name="conv" channel = train_set[0][0].shape[0] image_size = train_set[0][0].shape[1], train_set[0][0].shape[2] - model = ConvModel(channel=channel, im_size=image_size, n_random_features=out_classes).to(device) - model.train() - - # SGD optimizer with learning rate 1e-2 - optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) - # Scheduler - # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20) - # mean-squared error loss - criterion = nn.CrossEntropyLoss() - # Prepare DataLoader - dataloader = DataLoader(train_set, batch_size=batch_size, shuffle=True) - # valid loss - best_loss = 100000 # initially - # Optimizing... - for epoch in range(epochs): - running_loss = [] - model.train() - for i, (X, y) in enumerate(dataloader): - X, y = X.to(device=device), y.to(device=device) - optimizer.zero_grad() - out = model(X) - loss = criterion(out, y) - loss.backward() - optimizer.step() - running_loss.append(loss.item()) - - valid_loss, valid_acc = evaluate(model, valid_set, device=device) - train_loss, train_acc = evaluate(model, train_set, device=device) - if valid_loss < best_loss: - best_loss = valid_loss - - torch.save(model.state_dict(), os.path.join(cache_dir, "model.pth")) - print("Epoch: {}, Valid Best Accuracy: {:.3f}% ({:.3f})".format(epoch+1, valid_acc, valid_loss)) - if valid_acc > 99.0: - print("Early Stopping at 99% !") - break - - if (epoch + 1) % 5 == 0: - print('Epoch: {}, Train Average Loss: {:.3f}, Accuracy {:.3f}%, Valid Average Loss: {:.3f}'.format( - epoch+1, np.mean(running_loss), train_acc, valid_loss)) - - # scheduler.step() + # train model + save_path = os.path.join(cache_dir, "model.pth") + train_model(model, train_set, valid_set, save_path, epochs=epochs, batch_size=batch_size, device=device) # build specification loader = DataLoader(spec_set, batch_size=3000, shuffle=True) sampled_X, _ = next(iter(loader)) - spec = generate_rkme_image_spec(sampled_X, whitening=False, cross_platform=False) + spec = generate_rkme_image_spec(sampled_X, whitening=False, experimental=True) # add to market model_dir = os.path.abspath(os.path.join(__file__, "..", "models")) @@ -158,6 +126,49 @@ def build_learnware(name: str, market: LearnwareMarket, order, model_name="conv" return model +def train_model(model: nn.Module, train_set: Dataset, valid_set: Dataset, + save_path: str, epochs=35, batch_size=128, device=None): + device = choose_device(0) if device is None else device + + model.train() + # SGD optimizer with learning rate 1e-2 + optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) + # Scheduler + # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20) + # mean-squared error loss + criterion = nn.CrossEntropyLoss() + # Prepare DataLoader + dataloader = DataLoader(train_set, batch_size=batch_size, shuffle=True) + # valid loss + best_loss = 100000 # initially + # Optimizing... + for epoch in range(epochs): + running_loss = [] + model.train() + for i, (X, y) in enumerate(dataloader): + X, y = X.to(device=device), y.to(device=device) + optimizer.zero_grad() + out = model(X) + loss = criterion(out, y) + loss.backward() + optimizer.step() + running_loss.append(loss.item()) + + valid_loss, valid_acc = evaluate(model, valid_set, device=device) + train_loss, train_acc = evaluate(model, train_set, device=device) + if valid_loss < best_loss: + best_loss = valid_loss + + torch.save(model.state_dict(), save_path) + print("Epoch: {}, Valid Best Accuracy: {:.3f}% ({:.3f})".format(epoch+1, valid_acc, valid_loss)) + if valid_acc > 99.0: + print("Early Stopping at 99% !") + break + + if (epoch + 1) % 5 == 0: + print('Epoch: {}, Train Average Loss: {:.3f}, Accuracy {:.3f}%, Valid Average Loss: {:.3f}'.format( + epoch+1, np.mean(running_loss), train_acc, valid_loss)) + def build_specification(name: str, cache_id, order, sampled_size=3000): cache_dir = os.path.abspath(os.path.join( @@ -174,7 +185,7 @@ def build_specification(name: str, cache_id, order, sampled_size=3000): test_dataset, spec_dataset, indices, _ = user_data(order=order) loader = DataLoader(spec_dataset, batch_size=sampled_size, shuffle=True) sampled_X, _ = next(iter(loader)) - spec = generate_rkme_image_spec(sampled_X, whitening=False, cross_platform=False) + spec = generate_rkme_image_spec(sampled_X, whitening=False, experimental=True) spec.msg = indices.tolist() spec.save(cache_path) @@ -184,20 +195,14 @@ def build_specification(name: str, cache_id, order, sampled_size=3000): class Recorder: - def __init__(self): + def __init__(self, headers, formats): + assert len(headers) == len(formats) self.data = defaultdict(list) + self.headers = headers + self.formats = formats - def record(self, name, accuracy, loss): - self.data[name].append((accuracy, loss)) - - def latest(self): - table = [] - - for name, values in self.data.items(): - value = values[-1] - table.append([name, "{:.3f}%".format(value[0]), "{:.3f}".format(value[1])]) - - return str(tabulate(table, headers=["Case", "Accuracy", "Loss"], tablefmt='orgtbl')) + def record(self, name, *args): + self.data[name].append(args) def summary(self): table = [] @@ -205,8 +210,14 @@ class Recorder: for name, values in self.data.items(): value_mean = [np.mean(v) for v in zip(*values)] value_std = [np.std(v) for v in zip(*values)] - table.append([name, - "{:.3f}% ± {:.3f}%".format(value_mean[0], value_std[0]), - "{:.3f} ± {:.3f}" .format(value_mean[1], value_std[1])]) + table.append([name] + [f.format(m, s) for f, m, s in zip(self.formats, value_mean, value_std)]) + + return str(tabulate(table, headers=["Case"] + self.headers, tablefmt='orgtbl')) + + def save(self, path): + with open(path, "w") as f: + json.dump(self.data, f) - return str(tabulate(table, headers=["Case", "Accuracy", "Loss"], tablefmt='orgtbl')) \ No newline at end of file + def load(self, path): + with open(path, "r") as f: + self.data = json.load(f) \ No newline at end of file diff --git a/learnware/specification/regular/image/rkme.py b/learnware/specification/regular/image/rkme.py index 4c654f5..20838dc 100644 --- a/learnware/specification/regular/image/rkme.py +++ b/learnware/specification/regular/image/rkme.py @@ -168,7 +168,8 @@ class RKMEImageSpecification(RegularStatSpecification): raise ModuleNotFoundError( f"RKMEImageSpecification is not available because 'torch-optimizer' is not installed! Please install it manually.") - cross_platform = "cross_platform" not in kwargs or kwargs["cross_platform"] + # Cross-platform by default, unless the spec is specified to be generated specifically for local experiments. + cross_platform = "experimental" not in kwargs or not kwargs["experimental"] # crucial with deterministic(cross_platform, self._device) as random_generator: self._random_generator = random_generator @@ -439,13 +440,17 @@ def _get_zca_matrix(X, reg_coef=0.1): class RandomGenerator: - def __init__(self, seed=0): + def __init__(self, seed=0, cross_platform=True): + self.cross_platform=cross_platform self.state = RandomState(seed) def normal_(self, tensor: torch.Tensor, mean=0.0, std=1.0): - data = self.state.normal(mean, std, size=tensor.shape) - with torch.no_grad(): - tensor.copy_(torch.asarray(data, dtype=tensor.dtype)) + if self.cross_platform: + data = self.state.normal(mean, std, size=tensor.shape) + with torch.no_grad(): + tensor.copy_(torch.asarray(data, dtype=tensor.dtype)) + else: + torch.nn.init.normal_(tensor, mean, std) @contextmanager @@ -459,7 +464,7 @@ def deterministic(cross_platform, device): new_state=torch.cuda.get_rng_state(device.index), device="cpu") - yield RandomGenerator(0) + yield RandomGenerator(seed=0, cross_platform=cross_platform) torch.backends.cudnn.deterministic = deterministic_state if cross_platform: From 8d7dda1201f3b0913385b31f18c2bcf849917793 Mon Sep 17 00:00:00 2001 From: shihy Date: Thu, 7 Dec 2023 00:16:23 +0800 Subject: [PATCH 12/25] [MNT] Better supports for Torch --- learnware/reuse/ensemble_pruning.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/learnware/reuse/ensemble_pruning.py b/learnware/reuse/ensemble_pruning.py index 49c65b5..cf1ffb7 100644 --- a/learnware/reuse/ensemble_pruning.py +++ b/learnware/reuse/ensemble_pruning.py @@ -148,7 +148,9 @@ class EnsemblePruningReuser(BaseReuser): import geatpy as ea except ModuleNotFoundError: raise ModuleNotFoundError(f"EnsemblePruningReuser is not available because 'geatpy' is not installed! Please install it manually (only support python_version<3.11).") - + + if torch.is_tensor(v_true): + v_true = v_true.detach().cpu().numpy() model_num = v_predict.shape[1] From de9ca1445f690a0ad0ad6a7c1ee02f1a472bf58d Mon Sep 17 00:00:00 2001 From: shihy Date: Fri, 8 Dec 2023 18:16:00 +0800 Subject: [PATCH 13/25] [ENH] Finish Image Workflow --- .../benchmarks/dataset/utils.py | 5 +- .../benchmarks/utils.py | 58 ++++++++- examples/dataset_cifar_workflow/main.py | 111 +++++++++++++----- examples/dataset_cifar_workflow/mock.py | 41 ++----- learnware/specification/regular/image/rkme.py | 4 +- 5 files changed, 148 insertions(+), 71 deletions(-) diff --git a/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py b/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py index 1708cea..2c6231a 100644 --- a/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py +++ b/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py @@ -6,6 +6,8 @@ import torch import torchvision from torch.utils.data import TensorDataset, Dataset, DataLoader +from learnware.utils import choose_device + torchvision.disable_beta_transforms_warning() from torchvision.transforms import transforms, v2 @@ -49,10 +51,11 @@ def split_dataset(labels, size, split="uploader", order=None): def build_zca_matrix(X, reg_coef=0.1): X = (X - torch.mean(X, [0, 2, 3], keepdim=True)) / (torch.std(X, [0, 2, 3], keepdim=True)) + device = choose_device(0) X_flat = X.reshape(X.shape[0], -1) cov = (X_flat.T @ X_flat) / X_flat.shape[0] reg_amount = reg_coef * torch.trace(cov) / cov.shape[0] - u, s, _ = torch.svd(cov.cuda() + reg_amount * torch.eye(cov.shape[0]).cuda()) + u, s, _ = torch.svd(cov.to(device) + reg_amount * torch.eye(cov.shape[0]).to(device)) inv_sqrt_zca_eigs = s ** (-0.5) whitening_transform = torch.einsum( 'ij,j,kj->ik', u, inv_sqrt_zca_eigs, u) diff --git a/examples/dataset_cifar_workflow/benchmarks/utils.py b/examples/dataset_cifar_workflow/benchmarks/utils.py index faedd04..085258d 100644 --- a/examples/dataset_cifar_workflow/benchmarks/utils.py +++ b/examples/dataset_cifar_workflow/benchmarks/utils.py @@ -3,6 +3,8 @@ import os import zipfile from collections import defaultdict from shutil import rmtree + +from matplotlib import pyplot as plt from tabulate import tabulate import numpy as np @@ -127,7 +129,8 @@ def build_learnware(name: str, market: LearnwareMarket, order, model_name="conv" return model def train_model(model: nn.Module, train_set: Dataset, valid_set: Dataset, - save_path: str, epochs=35, batch_size=128, device=None): + save_path: str, epochs=35, batch_size=128, + device=None, verbose=True): device = choose_device(0) if device is None else device model.train() @@ -160,12 +163,14 @@ def train_model(model: nn.Module, train_set: Dataset, valid_set: Dataset, best_loss = valid_loss torch.save(model.state_dict(), save_path) - print("Epoch: {}, Valid Best Accuracy: {:.3f}% ({:.3f})".format(epoch+1, valid_acc, valid_loss)) + if verbose: + print("Epoch: {}, Valid Best Accuracy: {:.3f}% ({:.3f})".format(epoch+1, valid_acc, valid_loss)) if valid_acc > 99.0: - print("Early Stopping at 99% !") + if verbose: + print("Early Stopping at 99% !") break - if (epoch + 1) % 5 == 0: + if verbose and (epoch + 1) % 5 == 0: print('Epoch: {}, Train Average Loss: {:.3f}, Accuracy {:.3f}%, Valid Average Loss: {:.3f}'.format( epoch+1, np.mean(running_loss), train_acc, valid_loss)) @@ -214,10 +219,53 @@ class Recorder: return str(tabulate(table, headers=["Case"] + self.headers, tablefmt='orgtbl')) + def __getitem__(self, item): + return [[x[item] for x in v] for k, v in self.data.items()] + def save(self, path): with open(path, "w") as f: json.dump(self.data, f) def load(self, path): with open(path, "r") as f: - self.data = json.load(f) \ No newline at end of file + self.data = json.load(f) + + +def plot_labeled_performance_curves(name, user_mat, pruning_mat, n_labeled_list, save_path=None): + plt.figure(figsize=(10, 6)) + plt.xticks(range(len(n_labeled_list)), n_labeled_list) + + mats = [user_mat, pruning_mat] + + styles = [ + {"color": "navy", "linestyle": "-", "marker": "o"}, + {"color": "magenta", "linestyle": "-.", "marker": "d"}, + ] + + labels = [ + "User Model", + "Multiple Learnware Reuse (EnsemblePrune)", + ] + + for mat, style, label in zip(mats, styles, labels): + array_mat = 1 - np.asarray(mat) / 100 + mean_curve, std_curve = np.mean(array_mat, axis=1), np.std(array_mat, axis=1) + plt.plot(mean_curve, **style, label=label) + plt.fill_between( + range(len(n_labeled_list)), + mean_curve - 0.5 * std_curve, + mean_curve + 0.5 * std_curve, + color=style["color"], + alpha=0.2, + ) + + plt.xlabel("Labeled Data Size") + plt.ylabel("1 - Accuracy") + plt.title(f"{name} Homo Limited Labeled Data") + plt.legend() + plt.tight_layout() + if save_path: + plt.savefig( + save_path, bbox_inches="tight", dpi=600 + ) + plt.show() diff --git a/examples/dataset_cifar_workflow/main.py b/examples/dataset_cifar_workflow/main.py index 6fcbc0f..1de6796 100644 --- a/examples/dataset_cifar_workflow/main.py +++ b/examples/dataset_cifar_workflow/main.py @@ -1,24 +1,30 @@ import os +from datetime import datetime import fire import numpy as np +import tqdm from numpy import mean -from torch.utils.data import DataLoader +import torch +from torch.utils.data import DataLoader, TensorDataset import learnware -from benchmarks.utils import build_learnware, build_specification, evaluate, Recorder +from benchmarks.utils import * +from benchmarks.dataset.data import faster_train, uploader_data +from benchmarks.models.conv import ConvModel from learnware.client import LearnwareClient from learnware.market import instantiate_learnware_market, BaseUserInfo -from learnware.reuse import JobSelectorReuser, AveragingReuser -from learnware.specification import generate_rkme_image_spec +from learnware.reuse import JobSelectorReuser, AveragingReuser, EnsemblePruningReuser +from learnware.utils import choose_device + +PROXY_IP = "172.27.138.61" +os.environ["HTTP_PROXY"] = "http://" + PROXY_IP + ":7890" +os.environ["HTTPS_PROXY"] = "http://" + PROXY_IP + ":7890" -PROXY_IP = "172.24.57.111" -os.environ["HTTP_PROXY"] = "http://"+PROXY_IP+":7890" -os.environ["HTTPS_PROXY"] = "http://"+PROXY_IP+":7890" class CifarDatasetWorkflow: - def prepare_learnware(self, market_size=50, market_id=None, rebuild=False): + def prepare(self, market_size=50, market_id=None, rebuild=False, faster=True): """initialize learnware market""" learnware.init() assert not rebuild @@ -29,14 +35,17 @@ class CifarDatasetWorkflow: print("Using market_id", market_id) market = instantiate_learnware_market(name="easy", market_id=market_id, rebuild=rebuild) + device = choose_device(0) + if faster: + faster_train(device) for i, order in enumerate(orders[len(market):]): - print("=" * 20 + "learnware {}".format(i) + "=" * 20) + print("=" * 20 + "learnware {}".format(len(market)) + "=" * 20) print("order:", order) - build_learnware("cifar10", market, order) + build_learnware("cifar10", market, order, device=device) print("Total Item:", len(market)) - def evaluate_unlabeled(self, user_size=100, market_id=None): + def evaluate(self, user_size=100, market_id=None, faster=True): learnware.init() market_id = "dataset_cifar_workflow" if market_id is None else market_id @@ -45,23 +54,24 @@ class CifarDatasetWorkflow: print("Using market_id", market_id) market = instantiate_learnware_market(name="easy", market_id=market_id, rebuild=False) - top_1_acc_record, ensemble_acc_record, best_acc_record, mean_acc_record = [], [], [], [] - top_1_loss_record, ensemble_loss_record, best_loss_record, mean_loss_record = [], [], [], [] - - recorder = Recorder() + device = choose_device(0) + if faster: + faster_train(device) + unlabeled = Recorder(["Accuracy", "Loss"], ["{:.3f}% ± {:.3f}%", "{:.3f} ± {:.3f}"]) + labeled = Recorder(["Training", "Pruning"], ["{:.3f}% ± {:.3f}%", "{:.3f}% ± {:.3f}%"]) for i, order in enumerate(orders): print("=" * 20 + "user {}".format(i) + "=" * 20) print("order:", order) user_spec, dataset = build_specification("cifar10", i, order) user_info = BaseUserInfo(semantic_spec=LearnwareClient.create_semantic_specification( - self=None, - description="For Cifar Dataset Workflow", - data_type="Image", - task_type="Classification", - library_type="PyTorch", - scenarios=["Computer"], - output_description={"Dimension": 10, "Description": {str(i): "i" for i in range(10)}}), + self=None, + description="For Cifar Dataset Workflow", + data_type="Image", + task_type="Classification", + library_type="PyTorch", + scenarios=["Computer"], + output_description={"Dimension": 10, "Description": {str(i): "i" for i in range(10)}}), stat_info={"RKMEImageSpecification": user_spec}) search_result = market.search_learnware(user_info) @@ -73,22 +83,61 @@ class CifarDatasetWorkflow: loss, acc = evaluate(item, dataset) loss_list.append(loss) acc_list.append(acc) - recorder.record("Best", accuracy=max(acc_list), loss=min(loss_list)) - recorder.record("Average", accuracy=mean(acc_list), loss=mean(loss_list)) + unlabeled.record("Best", max(acc_list), min(loss_list)) + unlabeled.record("Average", mean(acc_list), mean(loss_list)) - top_1_loss, top_1_acc = evaluate(single_result[0].learnware, dataset) - recorder.record("Top-1 Learnware", accuracy=top_1_acc, loss=top_1_loss) + top_1_loss, top_1_acc = evaluate(single_result[0].learnware, dataset) + unlabeled.record("Top-1 Learnware", top_1_acc, top_1_loss) reuse_ensemble = AveragingReuser(learnware_list=multiple_result[0].learnwares, mode="vote_by_prob") - # reuse_ensemble = AveragingReuser(learnware_list=[item.learnware for item in single_result[:3]], mode="vote_by_prob") ensemble_loss, ensemble_acc = evaluate(reuse_ensemble, dataset) - recorder.record("Voting Reuse", accuracy=ensemble_acc, loss=ensemble_loss) + unlabeled.record("Voting Reuse", ensemble_acc, ensemble_loss) reuse_job_selector = JobSelectorReuser(learnware_list=multiple_result[0].learnwares, use_herding=False) job_loss, job_acc = evaluate(reuse_job_selector, dataset) - recorder.record("Job Selector", accuracy=job_acc, loss=job_loss) - - print(recorder.summary()) + unlabeled.record("Job Selector", job_acc, job_loss) + + train_set, valid_set, spec_set, order = uploader_data(order=order) + for labeled_size in tqdm.tqdm([100, 200, 500, 1000, 2000, 4000, 6000, 8000, 10000]): + loader = DataLoader(train_set, batch_size=labeled_size, shuffle=True) + X, y = next(iter(loader)) + + sampled_dataset = TensorDataset(X, y) + mode_save_path = os.path.abspath(os.path.join(__file__, "..", "cache", "model.pth")) + model = ConvModel(channel=X.shape[1], im_size=(X.shape[2], X.shape[3]), + n_random_features=10).to(device) + train_model(model, sampled_dataset, sampled_dataset, mode_save_path, + epochs=35, batch_size=128, device=device, verbose=False) + model.load_state_dict(torch.load(mode_save_path)) + _, train_acc = evaluate(model, dataset, distribution=True) + + ensemble_pruning = EnsemblePruningReuser(learnware_list=multiple_result[0].learnwares) + ensemble_pruning.fit(val_X=X, val_y=y) + _, pruning_acc = evaluate(ensemble_pruning, dataset, distribution=False) + + labeled.record("{:d}".format(labeled_size), train_acc, pruning_acc) + + print(unlabeled.summary()) + print(labeled.summary()) + + # Save recorder + current_time = datetime.now() + formatted_time = current_time.strftime("%Y-%m-%d_%H-%M-%S") + log_dir = os.path.abspath(os.path.join(__file__, "..", "log", formatted_time)) + os.makedirs(log_dir, exist_ok=True) + unlabeled.save(os.path.join(log_dir, "unlabeled.json")) + labeled.save(os.path.join(log_dir, "labeled.json")) + + def plot(self, record_dir): + unlabeled = Recorder(["Accuracy", "Loss"], ["{:.3f}% ± {:.3f}%", "{:.3f} ± {:.3f}"]) + labeled = Recorder(["Training", "Pruning"], ["{:.3f}% ± {:.3f}%", "{:.3f}% ± {:.3f}%"]) + + unlabeled.load(os.path.join(record_dir, "unlabeled.json")) + labeled.load(os.path.join(record_dir, "labeled.json")) + + plot_labeled_performance_curves("Image", labeled[0], labeled[1], + [100, 200, 500, 1000, 2000, 4000, 6000, 8000, 10000], + save_path=os.path.abspath(os.path.join(__file__, "..", "labeled.png"))) if __name__ == "__main__": diff --git a/examples/dataset_cifar_workflow/mock.py b/examples/dataset_cifar_workflow/mock.py index 554718f..e08199b 100644 --- a/examples/dataset_cifar_workflow/mock.py +++ b/examples/dataset_cifar_workflow/mock.py @@ -64,7 +64,7 @@ def get_cifar10(output_channels=3, image_size=32, z_score=True, order=None): train_X = transform_data(X_train, whitening_mat) test_X = transform_data(X_train, whitening_mat) - selected_data_indexes, order = split_dataset(y_test, 3000, split="user", order=order) + selected_data_indexes, order = split_dataset(y_test, 10000, split="user", order=order) return TensorDataset(test_X[selected_data_indexes], y_test[selected_data_indexes]), order @@ -72,35 +72,12 @@ def get_cifar10(output_channels=3, image_size=32, z_score=True, order=None): if __name__ == "__main__": - # 3 5 - # learnware.init(deterministic=False) - # - # userset1, order = get_cifar10() - # print(order) - # loader = DataLoader(userset1, batch_size=3000, shuffle=True) - # sampled_X, _ = next(iter(loader)) - # spec = generate_rkme_image_spec(sampled_X, whitening=False) - # spec.msg = order - # spec.save("old1.json") - # old1 = spec - # - # # userset2 = userset1 - # userset2, order = get_cifar10() - # print(order) - # loader = DataLoader(userset2, batch_size=3000, shuffle=True) - # sampled_X, _ = next(iter(loader)) - # spec = generate_rkme_image_spec(sampled_X, whitening=False) - # spec.msg = order - # spec.save("old2.json") - # old2 = spec - # - # old1, order1 = get_spec("hope1.json", order=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - # old2, order2 = get_spec("hope2.json", order=[2, 3, 4, 5, 0, 1, 6, 7, 8, 9]) - # np.random.seed(0) - # random.seed(0) - old1, order1 = get_spec(None, order=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - - old2, order2 = get_spec(None, order=[2, 3, 4, 5, 6, 7, 0, 1, 8, 9]) - print(order1, order2) - print(f(old1.dist(old2))) + # old1, order1 = get_spec("spec_1_V100.json", order=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + # old2, order2 = get_spec("spec_2_A100.json", order=[2, 3, 4, 5, 6, 7, 0, 1, 8, 9]) + + old3, order3 = get_spec("spec_3_A100.json", order=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + old4, order4 = get_spec("spec_6_A100.json", order=[2, 3, 4, 5, 6, 7, 0, 1, 8, 9]) + + print(order3, order4) + print(f(old3.dist(old4))) diff --git a/learnware/specification/regular/image/rkme.py b/learnware/specification/regular/image/rkme.py index 20838dc..f957fee 100644 --- a/learnware/specification/regular/image/rkme.py +++ b/learnware/specification/regular/image/rkme.py @@ -459,7 +459,7 @@ def deterministic(cross_platform, device): torch.cuda.manual_seed_all(0) deterministic_state = torch.backends.cudnn.deterministic torch.backends.cudnn.deterministic = True - if cross_platform: + if cross_platform and torch.cuda.is_available(): torch.cuda.set_rng_state( new_state=torch.cuda.get_rng_state(device.index), device="cpu") @@ -467,7 +467,7 @@ def deterministic(cross_platform, device): yield RandomGenerator(seed=0, cross_platform=cross_platform) torch.backends.cudnn.deterministic = deterministic_state - if cross_platform: + if cross_platform and torch.cuda.is_available(): torch.cuda.set_rng_state( new_state=torch.cuda.get_rng_state(device.index), device="cuda") From bae46ec9fa3aec21096eb256635078e9aae55664 Mon Sep 17 00:00:00 2001 From: shihy Date: Fri, 8 Dec 2023 18:17:39 +0800 Subject: [PATCH 14/25] [MNT] Clear bash --- examples/dataset_cifar_workflow/evaluate.bash | 16 ---------------- examples/dataset_cifar_workflow/prepare.bash | 16 ---------------- 2 files changed, 32 deletions(-) delete mode 100644 examples/dataset_cifar_workflow/evaluate.bash delete mode 100644 examples/dataset_cifar_workflow/prepare.bash diff --git a/examples/dataset_cifar_workflow/evaluate.bash b/examples/dataset_cifar_workflow/evaluate.bash deleted file mode 100644 index bf70a96..0000000 --- a/examples/dataset_cifar_workflow/evaluate.bash +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -# shellcheck disable=SC1090 -source ~/anaconda3/etc/profile.d/conda.sh -conda activate dev - -export PYTHONPATH="${PYTHONPATH}:${HOME}/Lab/Learnware/" -echo ${PYTHONPATH} -token="$(date +%s)" -mkdir -p "./log" -echo "The output is redirected to log/${token}.log with token ${token}" - -export CUDA_VISIBLE_DEVICES=5. -# shellcheck disable=SC2086 -nohup python -u main.py evaluate_unlabeled --market_id="momo" > "./log/${token}.log" 2>&1 & -echo "With PID = $!" \ No newline at end of file diff --git a/examples/dataset_cifar_workflow/prepare.bash b/examples/dataset_cifar_workflow/prepare.bash deleted file mode 100644 index f967115..0000000 --- a/examples/dataset_cifar_workflow/prepare.bash +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -# shellcheck disable=SC1090 -source ~/anaconda3/etc/profile.d/conda.sh -conda activate dev - -export PYTHONPATH="${PYTHONPATH}:${HOME}/Lab/Learnware/" -echo ${PYTHONPATH} -token="$(date +%s)" -mkdir -p "./log" -echo "The output is redirected to log/${token}.log with token ${token}" - -export CUDA_VISIBLE_DEVICES=1 -# shellcheck disable=SC2086 -nohup python -u main.py prepare_learnware --market_id="momo" > "./log/${token}.log" 2>&1 & -echo "With PID = $!" \ No newline at end of file From 476c0e211bd88ea6dc7836081fcc94ebdebfb84a Mon Sep 17 00:00:00 2001 From: shihy Date: Fri, 8 Dec 2023 18:26:55 +0800 Subject: [PATCH 15/25] [MNT] Rename the folder --- examples/dataset_cifar_workflow/main.py | 144 -------- .../example_files/example_init.py | 0 .../example_files/example_yaml.yaml | 0 .../example_files/model.py | 0 .../get_data.py | 0 examples/dataset_image_workflow(old)/main.py | 216 +++++++++++ .../utils.py | 0 .../benchmarks/__init__.py | 0 .../benchmarks/dataset/__init__.py | 0 .../benchmarks/dataset/data.py | 0 .../benchmarks/dataset/utils.py | 0 .../benchmarks/models/__init__.py | 0 .../benchmarks/models/conv/__init__.py | 0 .../benchmarks/models/conv/model.py | 0 .../benchmarks/models/conv/requirements.txt | 0 .../benchmarks/models/learnware.yaml | 0 .../benchmarks/utils.py | 0 examples/dataset_image_workflow/main.py | 346 +++++++----------- .../mock.py | 0 19 files changed, 353 insertions(+), 353 deletions(-) delete mode 100644 examples/dataset_cifar_workflow/main.py rename examples/{dataset_image_workflow => dataset_image_workflow(old)}/example_files/example_init.py (100%) rename examples/{dataset_image_workflow => dataset_image_workflow(old)}/example_files/example_yaml.yaml (100%) rename examples/{dataset_image_workflow => dataset_image_workflow(old)}/example_files/model.py (100%) rename examples/{dataset_image_workflow => dataset_image_workflow(old)}/get_data.py (100%) create mode 100644 examples/dataset_image_workflow(old)/main.py rename examples/{dataset_image_workflow => dataset_image_workflow(old)}/utils.py (100%) rename examples/{dataset_cifar_workflow => dataset_image_workflow}/benchmarks/__init__.py (100%) rename examples/{dataset_cifar_workflow => dataset_image_workflow}/benchmarks/dataset/__init__.py (100%) rename examples/{dataset_cifar_workflow => dataset_image_workflow}/benchmarks/dataset/data.py (100%) rename examples/{dataset_cifar_workflow => dataset_image_workflow}/benchmarks/dataset/utils.py (100%) rename examples/{dataset_cifar_workflow => dataset_image_workflow}/benchmarks/models/__init__.py (100%) rename examples/{dataset_cifar_workflow => dataset_image_workflow}/benchmarks/models/conv/__init__.py (100%) rename examples/{dataset_cifar_workflow => dataset_image_workflow}/benchmarks/models/conv/model.py (100%) rename examples/{dataset_cifar_workflow => dataset_image_workflow}/benchmarks/models/conv/requirements.txt (100%) rename examples/{dataset_cifar_workflow => dataset_image_workflow}/benchmarks/models/learnware.yaml (100%) rename examples/{dataset_cifar_workflow => dataset_image_workflow}/benchmarks/utils.py (100%) rename examples/{dataset_cifar_workflow => dataset_image_workflow}/mock.py (100%) diff --git a/examples/dataset_cifar_workflow/main.py b/examples/dataset_cifar_workflow/main.py deleted file mode 100644 index 1de6796..0000000 --- a/examples/dataset_cifar_workflow/main.py +++ /dev/null @@ -1,144 +0,0 @@ -import os -from datetime import datetime - -import fire -import numpy as np -import tqdm -from numpy import mean -import torch -from torch.utils.data import DataLoader, TensorDataset - -import learnware -from benchmarks.utils import * -from benchmarks.dataset.data import faster_train, uploader_data -from benchmarks.models.conv import ConvModel -from learnware.client import LearnwareClient -from learnware.market import instantiate_learnware_market, BaseUserInfo -from learnware.reuse import JobSelectorReuser, AveragingReuser, EnsemblePruningReuser -from learnware.utils import choose_device - -PROXY_IP = "172.27.138.61" -os.environ["HTTP_PROXY"] = "http://" + PROXY_IP + ":7890" -os.environ["HTTPS_PROXY"] = "http://" + PROXY_IP + ":7890" - - -class CifarDatasetWorkflow: - - def prepare(self, market_size=50, market_id=None, rebuild=False, faster=True): - """initialize learnware market""" - learnware.init() - assert not rebuild - - market_id = "dataset_cifar_workflow" if market_id is None else market_id - orders = np.stack([np.random.permutation(10) for _ in range(market_size)]) - - print("Using market_id", market_id) - market = instantiate_learnware_market(name="easy", market_id=market_id, rebuild=rebuild) - - device = choose_device(0) - if faster: - faster_train(device) - for i, order in enumerate(orders[len(market):]): - print("=" * 20 + "learnware {}".format(len(market)) + "=" * 20) - print("order:", order) - build_learnware("cifar10", market, order, device=device) - - print("Total Item:", len(market)) - - def evaluate(self, user_size=100, market_id=None, faster=True): - learnware.init() - - market_id = "dataset_cifar_workflow" if market_id is None else market_id - orders = np.stack([np.random.permutation(10) for _ in range(user_size)]) - - print("Using market_id", market_id) - market = instantiate_learnware_market(name="easy", market_id=market_id, rebuild=False) - - device = choose_device(0) - if faster: - faster_train(device) - unlabeled = Recorder(["Accuracy", "Loss"], ["{:.3f}% ± {:.3f}%", "{:.3f} ± {:.3f}"]) - labeled = Recorder(["Training", "Pruning"], ["{:.3f}% ± {:.3f}%", "{:.3f}% ± {:.3f}%"]) - for i, order in enumerate(orders): - print("=" * 20 + "user {}".format(i) + "=" * 20) - print("order:", order) - user_spec, dataset = build_specification("cifar10", i, order) - - user_info = BaseUserInfo(semantic_spec=LearnwareClient.create_semantic_specification( - self=None, - description="For Cifar Dataset Workflow", - data_type="Image", - task_type="Classification", - library_type="PyTorch", - scenarios=["Computer"], - output_description={"Dimension": 10, "Description": {str(i): "i" for i in range(10)}}), - stat_info={"RKMEImageSpecification": user_spec}) - - search_result = market.search_learnware(user_info) - single_result = search_result.get_single_results() - multiple_result = search_result.get_multiple_results() - - loss_list, acc_list = [], [] - for item in market.get_learnwares(): - loss, acc = evaluate(item, dataset) - loss_list.append(loss) - acc_list.append(acc) - unlabeled.record("Best", max(acc_list), min(loss_list)) - unlabeled.record("Average", mean(acc_list), mean(loss_list)) - - top_1_loss, top_1_acc = evaluate(single_result[0].learnware, dataset) - unlabeled.record("Top-1 Learnware", top_1_acc, top_1_loss) - - reuse_ensemble = AveragingReuser(learnware_list=multiple_result[0].learnwares, mode="vote_by_prob") - ensemble_loss, ensemble_acc = evaluate(reuse_ensemble, dataset) - unlabeled.record("Voting Reuse", ensemble_acc, ensemble_loss) - - reuse_job_selector = JobSelectorReuser(learnware_list=multiple_result[0].learnwares, use_herding=False) - job_loss, job_acc = evaluate(reuse_job_selector, dataset) - unlabeled.record("Job Selector", job_acc, job_loss) - - train_set, valid_set, spec_set, order = uploader_data(order=order) - for labeled_size in tqdm.tqdm([100, 200, 500, 1000, 2000, 4000, 6000, 8000, 10000]): - loader = DataLoader(train_set, batch_size=labeled_size, shuffle=True) - X, y = next(iter(loader)) - - sampled_dataset = TensorDataset(X, y) - mode_save_path = os.path.abspath(os.path.join(__file__, "..", "cache", "model.pth")) - model = ConvModel(channel=X.shape[1], im_size=(X.shape[2], X.shape[3]), - n_random_features=10).to(device) - train_model(model, sampled_dataset, sampled_dataset, mode_save_path, - epochs=35, batch_size=128, device=device, verbose=False) - model.load_state_dict(torch.load(mode_save_path)) - _, train_acc = evaluate(model, dataset, distribution=True) - - ensemble_pruning = EnsemblePruningReuser(learnware_list=multiple_result[0].learnwares) - ensemble_pruning.fit(val_X=X, val_y=y) - _, pruning_acc = evaluate(ensemble_pruning, dataset, distribution=False) - - labeled.record("{:d}".format(labeled_size), train_acc, pruning_acc) - - print(unlabeled.summary()) - print(labeled.summary()) - - # Save recorder - current_time = datetime.now() - formatted_time = current_time.strftime("%Y-%m-%d_%H-%M-%S") - log_dir = os.path.abspath(os.path.join(__file__, "..", "log", formatted_time)) - os.makedirs(log_dir, exist_ok=True) - unlabeled.save(os.path.join(log_dir, "unlabeled.json")) - labeled.save(os.path.join(log_dir, "labeled.json")) - - def plot(self, record_dir): - unlabeled = Recorder(["Accuracy", "Loss"], ["{:.3f}% ± {:.3f}%", "{:.3f} ± {:.3f}"]) - labeled = Recorder(["Training", "Pruning"], ["{:.3f}% ± {:.3f}%", "{:.3f}% ± {:.3f}%"]) - - unlabeled.load(os.path.join(record_dir, "unlabeled.json")) - labeled.load(os.path.join(record_dir, "labeled.json")) - - plot_labeled_performance_curves("Image", labeled[0], labeled[1], - [100, 200, 500, 1000, 2000, 4000, 6000, 8000, 10000], - save_path=os.path.abspath(os.path.join(__file__, "..", "labeled.png"))) - - -if __name__ == "__main__": - fire.Fire(CifarDatasetWorkflow) diff --git a/examples/dataset_image_workflow/example_files/example_init.py b/examples/dataset_image_workflow(old)/example_files/example_init.py similarity index 100% rename from examples/dataset_image_workflow/example_files/example_init.py rename to examples/dataset_image_workflow(old)/example_files/example_init.py diff --git a/examples/dataset_image_workflow/example_files/example_yaml.yaml b/examples/dataset_image_workflow(old)/example_files/example_yaml.yaml similarity index 100% rename from examples/dataset_image_workflow/example_files/example_yaml.yaml rename to examples/dataset_image_workflow(old)/example_files/example_yaml.yaml diff --git a/examples/dataset_image_workflow/example_files/model.py b/examples/dataset_image_workflow(old)/example_files/model.py similarity index 100% rename from examples/dataset_image_workflow/example_files/model.py rename to examples/dataset_image_workflow(old)/example_files/model.py diff --git a/examples/dataset_image_workflow/get_data.py b/examples/dataset_image_workflow(old)/get_data.py similarity index 100% rename from examples/dataset_image_workflow/get_data.py rename to examples/dataset_image_workflow(old)/get_data.py diff --git a/examples/dataset_image_workflow(old)/main.py b/examples/dataset_image_workflow(old)/main.py new file mode 100644 index 0000000..c91981c --- /dev/null +++ b/examples/dataset_image_workflow(old)/main.py @@ -0,0 +1,216 @@ +import numpy as np +import torch +from tqdm import tqdm + +from get_data import * +import os +import random + +from learnware.specification import RKMEImageSpecification +from learnware.reuse.averaging import AveragingReuser +from utils import generate_uploader, generate_user, ImageDataLoader, train, eval_prediction +from learnware.learnware import Learnware +import time + +from learnware.market import instantiate_learnware_market, BaseUserInfo +from learnware.market.easy import database_ops +from learnware.learnware import Learnware +import learnware.specification as specification +from learnware.logger import get_module_logger + +from shutil import copyfile, rmtree +import zipfile + +logger = get_module_logger("image_test", level="INFO") +origin_data_root = "./data/origin_data" +processed_data_root = "./data/processed_data" +tmp_dir = "./data/tmp" +learnware_pool_dir = "./data/learnware_pool" +dataset = "cifar10" +n_uploaders = 30 +n_users = 20 +n_classes = 10 +data_root = os.path.join(origin_data_root, dataset) +data_save_root = os.path.join(processed_data_root, dataset) +user_save_root = os.path.join(data_save_root, "user") +uploader_save_root = os.path.join(data_save_root, "uploader") +model_save_root = os.path.join(data_save_root, "uploader_model") +os.makedirs(data_root, exist_ok=True) +os.makedirs(user_save_root, exist_ok=True) +os.makedirs(uploader_save_root, exist_ok=True) +os.makedirs(model_save_root, exist_ok=True) + + +semantic_specs = [ + { + "Data": {"Values": ["Tabular"], "Type": "Class"}, + "Task": {"Values": ["Classification"], "Type": "Class"}, + "Library": {"Values": ["Pytorch"], "Type": "Class"}, + "Scenario": {"Values": ["Business"], "Type": "Tag"}, + "Description": {"Values": "", "Type": "String"}, + "Name": {"Values": "learnware_1", "Type": "String"}, + "Output": {"Dimension": 10}, + } +] + +user_semantic = { + "Data": {"Values": ["Tabular"], "Type": "Class"}, + "Task": {"Values": ["Classification"], "Type": "Class"}, + "Library": {"Values": ["Pytorch"], "Type": "Class"}, + "Scenario": {"Values": ["Business"], "Type": "Tag"}, + "Description": {"Values": "", "Type": "String"}, + "Name": {"Values": "", "Type": "String"}, +} + + +def prepare_data(): + if dataset == "cifar10": + X_train, y_train, X_test, y_test = get_cifar10(data_root) + elif dataset == "mnist": + X_train, y_train, X_test, y_test = get_mnist(data_root) + else: + return + generate_uploader(X_train, y_train, n_uploaders=n_uploaders, data_save_root=uploader_save_root) + generate_user(X_test, y_test, n_users=n_users, data_save_root=user_save_root) + + +def prepare_model(): + dataloader = ImageDataLoader(data_save_root, train=True) + for i in range(n_uploaders): + logger.info("Train on uploader: %d" % (i)) + X, y = dataloader.get_idx_data(i) + model = train(X, y, out_classes=n_classes) + model_save_path = os.path.join(model_save_root, "uploader_%d.pth" % (i)) + torch.save(model.state_dict(), model_save_path) + logger.info("Model saved to '%s'" % (model_save_path)) + + +def prepare_learnware(data_path, model_path, init_file_path, yaml_path, save_root, zip_name): + os.makedirs(save_root, exist_ok=True) + tmp_spec_path = os.path.join(save_root, "rkme.json") + tmp_model_path = os.path.join(save_root, "conv_model.pth") + tmp_yaml_path = os.path.join(save_root, "learnware.yaml") + tmp_init_path = os.path.join(save_root, "__init__.py") + tmp_model_file_path = os.path.join(save_root, "model.py") + mmodel_file_path = "./example_files/model.py" + + # Computing the specification from the whole dataset is too costly. + X = np.load(data_path) + indices = np.random.choice(len(X), size=2000, replace=False) + X_sampled = X[indices] + + st = time.time() + user_spec = RKMEImageSpecification(cuda_idx=0) + user_spec.generate_stat_spec_from_data(X=X_sampled) + ed = time.time() + logger.info("Stat spec generated in %.3f s" % (ed - st)) + user_spec.save(tmp_spec_path) + copyfile(model_path, tmp_model_path) + copyfile(yaml_path, tmp_yaml_path) + copyfile(init_file_path, tmp_init_path) + copyfile(mmodel_file_path, tmp_model_file_path) + zip_file_name = os.path.join(learnware_pool_dir, "%s.zip" % (zip_name)) + with zipfile.ZipFile(zip_file_name, "w", compression=zipfile.ZIP_DEFLATED) as zip_obj: + zip_obj.write(tmp_spec_path, "rkme.json") + zip_obj.write(tmp_model_path, "conv_model.pth") + zip_obj.write(tmp_yaml_path, "learnware.yaml") + zip_obj.write(tmp_init_path, "__init__.py") + zip_obj.write(tmp_model_file_path, "model.py") + rmtree(save_root) + logger.info("New Learnware Saved to %s" % (zip_file_name)) + return zip_file_name + + +def prepare_market(): + image_market = instantiate_learnware_market(market_id="cifar10", name="easy", rebuild=True) + try: + rmtree(learnware_pool_dir) + except: + pass + os.makedirs(learnware_pool_dir, exist_ok=True) + for i in tqdm(range(n_uploaders), total=n_uploaders, desc="Preparing..."): + data_path = os.path.join(uploader_save_root, "uploader_%d_X.npy" % (i)) + model_path = os.path.join(model_save_root, "uploader_%d.pth" % (i)) + init_file_path = "./example_files/example_init.py" + yaml_file_path = "./example_files/example_yaml.yaml" + new_learnware_path = prepare_learnware( + data_path, model_path, init_file_path, yaml_file_path, tmp_dir, "%s_%d" % (dataset, i) + ) + semantic_spec = semantic_specs[0] + semantic_spec["Name"]["Values"] = "learnware_%d" % (i) + semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (i) + image_market.add_learnware(new_learnware_path, semantic_spec) + + logger.info("Total Item: %d" % (len(image_market))) + curr_inds = image_market._get_ids() + logger.info("Available ids: " + str(curr_inds)) + + +def test_search(gamma=0.1, load_market=True): + if load_market: + image_market = instantiate_learnware_market(market_id="cifar10", name="easy") + else: + prepare_market() + image_market = instantiate_learnware_market(market_id="cifar10", name="easy") + logger.info("Number of items in the market: %d" % len(image_market)) + + select_list = [] + avg_list = [] + improve_list = [] + job_selector_score_list = [] + ensemble_score_list = [] + for i in tqdm(range(n_users), total=n_users, desc="Searching..."): + user_data_path = os.path.join(user_save_root, "user_%d_X.npy" % (i)) + user_label_path = os.path.join(user_save_root, "user_%d_y.npy" % (i)) + user_data = np.load(user_data_path) + user_label = np.load(user_label_path) + user_stat_spec = RKMEImageSpecification(cuda_idx=0) + user_stat_spec.generate_stat_spec_from_data(X=user_data, resize=False) + user_info = BaseUserInfo(semantic_spec=user_semantic, stat_info={"RKMETableSpecification": user_stat_spec}) + logger.info("Searching Market for user: %d" % i) + search_result = image_market.search_learnware(user_info) + single_result = search_result.get_single_results() + acc_list = [] + for idx, single_item in enumerate(single_result[:5]): + pred_y = single_item.learnware.predict(user_data) + acc = eval_prediction(pred_y, user_label) + acc_list.append(acc) + logger.info("Search rank: %d, score: %.3f, learnware_id: %s, acc: %.3f" % (idx, single_item.score, single_item.learnware.id, acc)) + + # test reuse (job selector) + # reuse_baseline = JobSelectorReuser(learnware_list=mixture_learnware_list, herding_num=100) + # reuse_predict = reuse_baseline.predict(user_data=user_data) + # reuse_score = eval_prediction(reuse_predict, user_label) + # job_selector_score_list.append(reuse_score) + # print(f"mixture reuse loss: {reuse_score}") + + # test reuse (ensemble) + single_learnware_list = [single_item.learnware for single_item in single_result] + reuse_ensemble = AveragingReuser(learnware_list=single_learnware_list[:3], mode="vote_by_prob") + ensemble_predict_y = reuse_ensemble.predict(user_data=user_data) + ensemble_score = eval_prediction(ensemble_predict_y, user_label) + ensemble_score_list.append(ensemble_score) + print(f"reuse accuracy (vote_by_prob): {ensemble_score}\n") + + select_list.append(acc_list[0]) + avg_list.append(np.mean(acc_list)) + improve_list.append((acc_list[0] - np.mean(acc_list)) / np.mean(acc_list)) + + logger.info( + "Accuracy of selected learnware: %.3f +/- %.3f, Average performance: %.3f +/- %.3f" + % (np.mean(select_list), np.std(select_list), np.mean(avg_list), np.std(avg_list)) + ) + logger.info( + "Ensemble Reuse Performance: %.3f +/- %.3f" % (np.mean(ensemble_score_list), np.std(ensemble_score_list)) + ) + + +if __name__ == "__main__": + logger.info("=" * 40) + logger.info(f"n_uploaders:\t{n_uploaders}") + logger.info(f"n_users:\t{n_users}") + logger.info("=" * 40) + + prepare_data() + prepare_model() + test_search(load_market=False) diff --git a/examples/dataset_image_workflow/utils.py b/examples/dataset_image_workflow(old)/utils.py similarity index 100% rename from examples/dataset_image_workflow/utils.py rename to examples/dataset_image_workflow(old)/utils.py diff --git a/examples/dataset_cifar_workflow/benchmarks/__init__.py b/examples/dataset_image_workflow/benchmarks/__init__.py similarity index 100% rename from examples/dataset_cifar_workflow/benchmarks/__init__.py rename to examples/dataset_image_workflow/benchmarks/__init__.py diff --git a/examples/dataset_cifar_workflow/benchmarks/dataset/__init__.py b/examples/dataset_image_workflow/benchmarks/dataset/__init__.py similarity index 100% rename from examples/dataset_cifar_workflow/benchmarks/dataset/__init__.py rename to examples/dataset_image_workflow/benchmarks/dataset/__init__.py diff --git a/examples/dataset_cifar_workflow/benchmarks/dataset/data.py b/examples/dataset_image_workflow/benchmarks/dataset/data.py similarity index 100% rename from examples/dataset_cifar_workflow/benchmarks/dataset/data.py rename to examples/dataset_image_workflow/benchmarks/dataset/data.py diff --git a/examples/dataset_cifar_workflow/benchmarks/dataset/utils.py b/examples/dataset_image_workflow/benchmarks/dataset/utils.py similarity index 100% rename from examples/dataset_cifar_workflow/benchmarks/dataset/utils.py rename to examples/dataset_image_workflow/benchmarks/dataset/utils.py diff --git a/examples/dataset_cifar_workflow/benchmarks/models/__init__.py b/examples/dataset_image_workflow/benchmarks/models/__init__.py similarity index 100% rename from examples/dataset_cifar_workflow/benchmarks/models/__init__.py rename to examples/dataset_image_workflow/benchmarks/models/__init__.py diff --git a/examples/dataset_cifar_workflow/benchmarks/models/conv/__init__.py b/examples/dataset_image_workflow/benchmarks/models/conv/__init__.py similarity index 100% rename from examples/dataset_cifar_workflow/benchmarks/models/conv/__init__.py rename to examples/dataset_image_workflow/benchmarks/models/conv/__init__.py diff --git a/examples/dataset_cifar_workflow/benchmarks/models/conv/model.py b/examples/dataset_image_workflow/benchmarks/models/conv/model.py similarity index 100% rename from examples/dataset_cifar_workflow/benchmarks/models/conv/model.py rename to examples/dataset_image_workflow/benchmarks/models/conv/model.py diff --git a/examples/dataset_cifar_workflow/benchmarks/models/conv/requirements.txt b/examples/dataset_image_workflow/benchmarks/models/conv/requirements.txt similarity index 100% rename from examples/dataset_cifar_workflow/benchmarks/models/conv/requirements.txt rename to examples/dataset_image_workflow/benchmarks/models/conv/requirements.txt diff --git a/examples/dataset_cifar_workflow/benchmarks/models/learnware.yaml b/examples/dataset_image_workflow/benchmarks/models/learnware.yaml similarity index 100% rename from examples/dataset_cifar_workflow/benchmarks/models/learnware.yaml rename to examples/dataset_image_workflow/benchmarks/models/learnware.yaml diff --git a/examples/dataset_cifar_workflow/benchmarks/utils.py b/examples/dataset_image_workflow/benchmarks/utils.py similarity index 100% rename from examples/dataset_cifar_workflow/benchmarks/utils.py rename to examples/dataset_image_workflow/benchmarks/utils.py diff --git a/examples/dataset_image_workflow/main.py b/examples/dataset_image_workflow/main.py index c91981c..1de6796 100644 --- a/examples/dataset_image_workflow/main.py +++ b/examples/dataset_image_workflow/main.py @@ -1,216 +1,144 @@ -import numpy as np -import torch -from tqdm import tqdm - -from get_data import * import os -import random +from datetime import datetime -from learnware.specification import RKMEImageSpecification -from learnware.reuse.averaging import AveragingReuser -from utils import generate_uploader, generate_user, ImageDataLoader, train, eval_prediction -from learnware.learnware import Learnware -import time +import fire +import numpy as np +import tqdm +from numpy import mean +import torch +from torch.utils.data import DataLoader, TensorDataset +import learnware +from benchmarks.utils import * +from benchmarks.dataset.data import faster_train, uploader_data +from benchmarks.models.conv import ConvModel +from learnware.client import LearnwareClient from learnware.market import instantiate_learnware_market, BaseUserInfo -from learnware.market.easy import database_ops -from learnware.learnware import Learnware -import learnware.specification as specification -from learnware.logger import get_module_logger - -from shutil import copyfile, rmtree -import zipfile - -logger = get_module_logger("image_test", level="INFO") -origin_data_root = "./data/origin_data" -processed_data_root = "./data/processed_data" -tmp_dir = "./data/tmp" -learnware_pool_dir = "./data/learnware_pool" -dataset = "cifar10" -n_uploaders = 30 -n_users = 20 -n_classes = 10 -data_root = os.path.join(origin_data_root, dataset) -data_save_root = os.path.join(processed_data_root, dataset) -user_save_root = os.path.join(data_save_root, "user") -uploader_save_root = os.path.join(data_save_root, "uploader") -model_save_root = os.path.join(data_save_root, "uploader_model") -os.makedirs(data_root, exist_ok=True) -os.makedirs(user_save_root, exist_ok=True) -os.makedirs(uploader_save_root, exist_ok=True) -os.makedirs(model_save_root, exist_ok=True) - - -semantic_specs = [ - { - "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": {"Values": ["Classification"], "Type": "Class"}, - "Library": {"Values": ["Pytorch"], "Type": "Class"}, - "Scenario": {"Values": ["Business"], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "learnware_1", "Type": "String"}, - "Output": {"Dimension": 10}, - } -] - -user_semantic = { - "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": {"Values": ["Classification"], "Type": "Class"}, - "Library": {"Values": ["Pytorch"], "Type": "Class"}, - "Scenario": {"Values": ["Business"], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "", "Type": "String"}, -} - - -def prepare_data(): - if dataset == "cifar10": - X_train, y_train, X_test, y_test = get_cifar10(data_root) - elif dataset == "mnist": - X_train, y_train, X_test, y_test = get_mnist(data_root) - else: - return - generate_uploader(X_train, y_train, n_uploaders=n_uploaders, data_save_root=uploader_save_root) - generate_user(X_test, y_test, n_users=n_users, data_save_root=user_save_root) - - -def prepare_model(): - dataloader = ImageDataLoader(data_save_root, train=True) - for i in range(n_uploaders): - logger.info("Train on uploader: %d" % (i)) - X, y = dataloader.get_idx_data(i) - model = train(X, y, out_classes=n_classes) - model_save_path = os.path.join(model_save_root, "uploader_%d.pth" % (i)) - torch.save(model.state_dict(), model_save_path) - logger.info("Model saved to '%s'" % (model_save_path)) - - -def prepare_learnware(data_path, model_path, init_file_path, yaml_path, save_root, zip_name): - os.makedirs(save_root, exist_ok=True) - tmp_spec_path = os.path.join(save_root, "rkme.json") - tmp_model_path = os.path.join(save_root, "conv_model.pth") - tmp_yaml_path = os.path.join(save_root, "learnware.yaml") - tmp_init_path = os.path.join(save_root, "__init__.py") - tmp_model_file_path = os.path.join(save_root, "model.py") - mmodel_file_path = "./example_files/model.py" - - # Computing the specification from the whole dataset is too costly. - X = np.load(data_path) - indices = np.random.choice(len(X), size=2000, replace=False) - X_sampled = X[indices] - - st = time.time() - user_spec = RKMEImageSpecification(cuda_idx=0) - user_spec.generate_stat_spec_from_data(X=X_sampled) - ed = time.time() - logger.info("Stat spec generated in %.3f s" % (ed - st)) - user_spec.save(tmp_spec_path) - copyfile(model_path, tmp_model_path) - copyfile(yaml_path, tmp_yaml_path) - copyfile(init_file_path, tmp_init_path) - copyfile(mmodel_file_path, tmp_model_file_path) - zip_file_name = os.path.join(learnware_pool_dir, "%s.zip" % (zip_name)) - with zipfile.ZipFile(zip_file_name, "w", compression=zipfile.ZIP_DEFLATED) as zip_obj: - zip_obj.write(tmp_spec_path, "rkme.json") - zip_obj.write(tmp_model_path, "conv_model.pth") - zip_obj.write(tmp_yaml_path, "learnware.yaml") - zip_obj.write(tmp_init_path, "__init__.py") - zip_obj.write(tmp_model_file_path, "model.py") - rmtree(save_root) - logger.info("New Learnware Saved to %s" % (zip_file_name)) - return zip_file_name - - -def prepare_market(): - image_market = instantiate_learnware_market(market_id="cifar10", name="easy", rebuild=True) - try: - rmtree(learnware_pool_dir) - except: - pass - os.makedirs(learnware_pool_dir, exist_ok=True) - for i in tqdm(range(n_uploaders), total=n_uploaders, desc="Preparing..."): - data_path = os.path.join(uploader_save_root, "uploader_%d_X.npy" % (i)) - model_path = os.path.join(model_save_root, "uploader_%d.pth" % (i)) - init_file_path = "./example_files/example_init.py" - yaml_file_path = "./example_files/example_yaml.yaml" - new_learnware_path = prepare_learnware( - data_path, model_path, init_file_path, yaml_file_path, tmp_dir, "%s_%d" % (dataset, i) - ) - semantic_spec = semantic_specs[0] - semantic_spec["Name"]["Values"] = "learnware_%d" % (i) - semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (i) - image_market.add_learnware(new_learnware_path, semantic_spec) - - logger.info("Total Item: %d" % (len(image_market))) - curr_inds = image_market._get_ids() - logger.info("Available ids: " + str(curr_inds)) - - -def test_search(gamma=0.1, load_market=True): - if load_market: - image_market = instantiate_learnware_market(market_id="cifar10", name="easy") - else: - prepare_market() - image_market = instantiate_learnware_market(market_id="cifar10", name="easy") - logger.info("Number of items in the market: %d" % len(image_market)) - - select_list = [] - avg_list = [] - improve_list = [] - job_selector_score_list = [] - ensemble_score_list = [] - for i in tqdm(range(n_users), total=n_users, desc="Searching..."): - user_data_path = os.path.join(user_save_root, "user_%d_X.npy" % (i)) - user_label_path = os.path.join(user_save_root, "user_%d_y.npy" % (i)) - user_data = np.load(user_data_path) - user_label = np.load(user_label_path) - user_stat_spec = RKMEImageSpecification(cuda_idx=0) - user_stat_spec.generate_stat_spec_from_data(X=user_data, resize=False) - user_info = BaseUserInfo(semantic_spec=user_semantic, stat_info={"RKMETableSpecification": user_stat_spec}) - logger.info("Searching Market for user: %d" % i) - search_result = image_market.search_learnware(user_info) - single_result = search_result.get_single_results() - acc_list = [] - for idx, single_item in enumerate(single_result[:5]): - pred_y = single_item.learnware.predict(user_data) - acc = eval_prediction(pred_y, user_label) - acc_list.append(acc) - logger.info("Search rank: %d, score: %.3f, learnware_id: %s, acc: %.3f" % (idx, single_item.score, single_item.learnware.id, acc)) - - # test reuse (job selector) - # reuse_baseline = JobSelectorReuser(learnware_list=mixture_learnware_list, herding_num=100) - # reuse_predict = reuse_baseline.predict(user_data=user_data) - # reuse_score = eval_prediction(reuse_predict, user_label) - # job_selector_score_list.append(reuse_score) - # print(f"mixture reuse loss: {reuse_score}") - - # test reuse (ensemble) - single_learnware_list = [single_item.learnware for single_item in single_result] - reuse_ensemble = AveragingReuser(learnware_list=single_learnware_list[:3], mode="vote_by_prob") - ensemble_predict_y = reuse_ensemble.predict(user_data=user_data) - ensemble_score = eval_prediction(ensemble_predict_y, user_label) - ensemble_score_list.append(ensemble_score) - print(f"reuse accuracy (vote_by_prob): {ensemble_score}\n") - - select_list.append(acc_list[0]) - avg_list.append(np.mean(acc_list)) - improve_list.append((acc_list[0] - np.mean(acc_list)) / np.mean(acc_list)) - - logger.info( - "Accuracy of selected learnware: %.3f +/- %.3f, Average performance: %.3f +/- %.3f" - % (np.mean(select_list), np.std(select_list), np.mean(avg_list), np.std(avg_list)) - ) - logger.info( - "Ensemble Reuse Performance: %.3f +/- %.3f" % (np.mean(ensemble_score_list), np.std(ensemble_score_list)) - ) +from learnware.reuse import JobSelectorReuser, AveragingReuser, EnsemblePruningReuser +from learnware.utils import choose_device + +PROXY_IP = "172.27.138.61" +os.environ["HTTP_PROXY"] = "http://" + PROXY_IP + ":7890" +os.environ["HTTPS_PROXY"] = "http://" + PROXY_IP + ":7890" + + +class CifarDatasetWorkflow: + + def prepare(self, market_size=50, market_id=None, rebuild=False, faster=True): + """initialize learnware market""" + learnware.init() + assert not rebuild + + market_id = "dataset_cifar_workflow" if market_id is None else market_id + orders = np.stack([np.random.permutation(10) for _ in range(market_size)]) + + print("Using market_id", market_id) + market = instantiate_learnware_market(name="easy", market_id=market_id, rebuild=rebuild) + + device = choose_device(0) + if faster: + faster_train(device) + for i, order in enumerate(orders[len(market):]): + print("=" * 20 + "learnware {}".format(len(market)) + "=" * 20) + print("order:", order) + build_learnware("cifar10", market, order, device=device) + + print("Total Item:", len(market)) + + def evaluate(self, user_size=100, market_id=None, faster=True): + learnware.init() + + market_id = "dataset_cifar_workflow" if market_id is None else market_id + orders = np.stack([np.random.permutation(10) for _ in range(user_size)]) + + print("Using market_id", market_id) + market = instantiate_learnware_market(name="easy", market_id=market_id, rebuild=False) + + device = choose_device(0) + if faster: + faster_train(device) + unlabeled = Recorder(["Accuracy", "Loss"], ["{:.3f}% ± {:.3f}%", "{:.3f} ± {:.3f}"]) + labeled = Recorder(["Training", "Pruning"], ["{:.3f}% ± {:.3f}%", "{:.3f}% ± {:.3f}%"]) + for i, order in enumerate(orders): + print("=" * 20 + "user {}".format(i) + "=" * 20) + print("order:", order) + user_spec, dataset = build_specification("cifar10", i, order) + + user_info = BaseUserInfo(semantic_spec=LearnwareClient.create_semantic_specification( + self=None, + description="For Cifar Dataset Workflow", + data_type="Image", + task_type="Classification", + library_type="PyTorch", + scenarios=["Computer"], + output_description={"Dimension": 10, "Description": {str(i): "i" for i in range(10)}}), + stat_info={"RKMEImageSpecification": user_spec}) + + search_result = market.search_learnware(user_info) + single_result = search_result.get_single_results() + multiple_result = search_result.get_multiple_results() + + loss_list, acc_list = [], [] + for item in market.get_learnwares(): + loss, acc = evaluate(item, dataset) + loss_list.append(loss) + acc_list.append(acc) + unlabeled.record("Best", max(acc_list), min(loss_list)) + unlabeled.record("Average", mean(acc_list), mean(loss_list)) + + top_1_loss, top_1_acc = evaluate(single_result[0].learnware, dataset) + unlabeled.record("Top-1 Learnware", top_1_acc, top_1_loss) + + reuse_ensemble = AveragingReuser(learnware_list=multiple_result[0].learnwares, mode="vote_by_prob") + ensemble_loss, ensemble_acc = evaluate(reuse_ensemble, dataset) + unlabeled.record("Voting Reuse", ensemble_acc, ensemble_loss) + + reuse_job_selector = JobSelectorReuser(learnware_list=multiple_result[0].learnwares, use_herding=False) + job_loss, job_acc = evaluate(reuse_job_selector, dataset) + unlabeled.record("Job Selector", job_acc, job_loss) + + train_set, valid_set, spec_set, order = uploader_data(order=order) + for labeled_size in tqdm.tqdm([100, 200, 500, 1000, 2000, 4000, 6000, 8000, 10000]): + loader = DataLoader(train_set, batch_size=labeled_size, shuffle=True) + X, y = next(iter(loader)) + + sampled_dataset = TensorDataset(X, y) + mode_save_path = os.path.abspath(os.path.join(__file__, "..", "cache", "model.pth")) + model = ConvModel(channel=X.shape[1], im_size=(X.shape[2], X.shape[3]), + n_random_features=10).to(device) + train_model(model, sampled_dataset, sampled_dataset, mode_save_path, + epochs=35, batch_size=128, device=device, verbose=False) + model.load_state_dict(torch.load(mode_save_path)) + _, train_acc = evaluate(model, dataset, distribution=True) + + ensemble_pruning = EnsemblePruningReuser(learnware_list=multiple_result[0].learnwares) + ensemble_pruning.fit(val_X=X, val_y=y) + _, pruning_acc = evaluate(ensemble_pruning, dataset, distribution=False) + + labeled.record("{:d}".format(labeled_size), train_acc, pruning_acc) + + print(unlabeled.summary()) + print(labeled.summary()) + + # Save recorder + current_time = datetime.now() + formatted_time = current_time.strftime("%Y-%m-%d_%H-%M-%S") + log_dir = os.path.abspath(os.path.join(__file__, "..", "log", formatted_time)) + os.makedirs(log_dir, exist_ok=True) + unlabeled.save(os.path.join(log_dir, "unlabeled.json")) + labeled.save(os.path.join(log_dir, "labeled.json")) + + def plot(self, record_dir): + unlabeled = Recorder(["Accuracy", "Loss"], ["{:.3f}% ± {:.3f}%", "{:.3f} ± {:.3f}"]) + labeled = Recorder(["Training", "Pruning"], ["{:.3f}% ± {:.3f}%", "{:.3f}% ± {:.3f}%"]) + + unlabeled.load(os.path.join(record_dir, "unlabeled.json")) + labeled.load(os.path.join(record_dir, "labeled.json")) + + plot_labeled_performance_curves("Image", labeled[0], labeled[1], + [100, 200, 500, 1000, 2000, 4000, 6000, 8000, 10000], + save_path=os.path.abspath(os.path.join(__file__, "..", "labeled.png"))) if __name__ == "__main__": - logger.info("=" * 40) - logger.info(f"n_uploaders:\t{n_uploaders}") - logger.info(f"n_users:\t{n_users}") - logger.info("=" * 40) - - prepare_data() - prepare_model() - test_search(load_market=False) + fire.Fire(CifarDatasetWorkflow) diff --git a/examples/dataset_cifar_workflow/mock.py b/examples/dataset_image_workflow/mock.py similarity index 100% rename from examples/dataset_cifar_workflow/mock.py rename to examples/dataset_image_workflow/mock.py From 1b16d90ae8dfd0ec6c196b68a0107eebc5c24152 Mon Sep 17 00:00:00 2001 From: shihy Date: Fri, 8 Dec 2023 18:38:34 +0800 Subject: [PATCH 16/25] [MNT] Rename --- examples/dataset_image_workflow/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/dataset_image_workflow/main.py b/examples/dataset_image_workflow/main.py index 1de6796..9d09fe4 100644 --- a/examples/dataset_image_workflow/main.py +++ b/examples/dataset_image_workflow/main.py @@ -22,7 +22,7 @@ os.environ["HTTP_PROXY"] = "http://" + PROXY_IP + ":7890" os.environ["HTTPS_PROXY"] = "http://" + PROXY_IP + ":7890" -class CifarDatasetWorkflow: +class ImageDatasetWorkflow: def prepare(self, market_size=50, market_id=None, rebuild=False, faster=True): """initialize learnware market""" @@ -141,4 +141,4 @@ class CifarDatasetWorkflow: if __name__ == "__main__": - fire.Fire(CifarDatasetWorkflow) + fire.Fire(ImageDatasetWorkflow) From 4ea360bc0e34e124ba93d1fd2fd571c78488b6c1 Mon Sep 17 00:00:00 2001 From: shihy Date: Sat, 9 Dec 2023 16:10:55 +0800 Subject: [PATCH 17/25] [FIX] Fix typo and remove mock.py --- .../benchmarks/dataset/data.py | 2 +- examples/dataset_image_workflow/main.py | 4 +- examples/dataset_image_workflow/mock.py | 83 ------------------- 3 files changed, 3 insertions(+), 86 deletions(-) delete mode 100644 examples/dataset_image_workflow/mock.py diff --git a/examples/dataset_image_workflow/benchmarks/dataset/data.py b/examples/dataset_image_workflow/benchmarks/dataset/data.py index 39dcc7e..81ed6e5 100644 --- a/examples/dataset_image_workflow/benchmarks/dataset/data.py +++ b/examples/dataset_image_workflow/benchmarks/dataset/data.py @@ -8,7 +8,7 @@ from torchvision.transforms import transforms from torch.utils.data import TensorDataset from .utils import cached -from examples.dataset_cifar_workflow.benchmarks.dataset.utils import split_dataset, build_transforms +from examples.dataset_image_workflow.benchmarks.dataset.utils import split_dataset, build_transforms cache_root = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', '..', 'cache')) diff --git a/examples/dataset_image_workflow/main.py b/examples/dataset_image_workflow/main.py index 9d09fe4..ec2d81b 100644 --- a/examples/dataset_image_workflow/main.py +++ b/examples/dataset_image_workflow/main.py @@ -29,7 +29,7 @@ class ImageDatasetWorkflow: learnware.init() assert not rebuild - market_id = "dataset_cifar_workflow" if market_id is None else market_id + market_id = "dataset_image_workflow" if market_id is None else market_id orders = np.stack([np.random.permutation(10) for _ in range(market_size)]) print("Using market_id", market_id) @@ -48,7 +48,7 @@ class ImageDatasetWorkflow: def evaluate(self, user_size=100, market_id=None, faster=True): learnware.init() - market_id = "dataset_cifar_workflow" if market_id is None else market_id + market_id = "dataset_image_workflow" if market_id is None else market_id orders = np.stack([np.random.permutation(10) for _ in range(user_size)]) print("Using market_id", market_id) diff --git a/examples/dataset_image_workflow/mock.py b/examples/dataset_image_workflow/mock.py deleted file mode 100644 index e08199b..0000000 --- a/examples/dataset_image_workflow/mock.py +++ /dev/null @@ -1,83 +0,0 @@ -import os.path -import random - -import numpy as np -import torch -from torch.utils.data import DataLoader, TensorDataset -from torchvision import datasets -from torchvision.transforms import transforms - -import learnware -from examples.dataset_cifar_workflow.benchmarks.dataset import user_data, split_dataset -from examples.dataset_image_workflow.get_data import get_zca_matrix, transform_data -from learnware import setup_seed -from learnware.specification import generate_rkme_image_spec, RKMEImageSpecification - - -def f(d): - return np.exp(-d / 0.00005) - -def get_spec(path, order=None): - if path is not None and os.path.exists(path): - spec = RKMEImageSpecification() - spec.load(path) - return spec, spec.msg - - test_user, spec_user, _, order = user_data(order=order) - loader = DataLoader(spec_user, batch_size=3000, shuffle=True) - sampled_X, _ = next(iter(loader)) - spec = generate_rkme_image_spec(sampled_X, whitening=False) - spec.msg = order - - if path is not None: - spec.save(path) - - return spec, order - -DATA_ROOT = "cache" -def get_cifar10(output_channels=3, image_size=32, z_score=True, order=None): - ds_train = datasets.CIFAR10(DATA_ROOT, train=True, download=True, transform=transforms.Compose( - [transforms.ToTensor(), transforms.Resize([image_size, image_size])])) - X_train = ds_train.data - y_train = ds_train.targets - ds_test = datasets.CIFAR10(DATA_ROOT, train=False, download=True, transform=transforms.Compose( - [transforms.ToTensor(), transforms.Resize([image_size, image_size])])) - - X_test = ds_test.data - y_test = ds_test.targets - - X_train = torch.Tensor(np.moveaxis(X_train, 3, 1)) - y_train = torch.Tensor(y_train).long() - X_test = torch.Tensor(np.moveaxis(X_test, 3, 1)) - y_test = torch.Tensor(y_test).long() - - if output_channels == 1: - X_train = torch.mean(X_train, 1, keepdim=True) - X_test = torch.mean(X_test, 1, keepdim=True) - - if z_score: - X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / ( - torch.std(X_train, [0, 2, 3], keepdim=True)) - - whitening_mat = get_zca_matrix(X_train, reg_coef=0.1) - train_X = transform_data(X_train, whitening_mat) - test_X = transform_data(X_train, whitening_mat) - - selected_data_indexes, order = split_dataset(y_test, 10000, split="user", order=order) - - return TensorDataset(test_X[selected_data_indexes], y_test[selected_data_indexes]), order - - - - -if __name__ == "__main__": - # old1, order1 = get_spec("spec_1_V100.json", order=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - # old2, order2 = get_spec("spec_2_A100.json", order=[2, 3, 4, 5, 6, 7, 0, 1, 8, 9]) - - old3, order3 = get_spec("spec_3_A100.json", order=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - old4, order4 = get_spec("spec_6_A100.json", order=[2, 3, 4, 5, 6, 7, 0, 1, 8, 9]) - - print(order3, order4) - print(f(old3.dist(old4))) - From be8b30e3eb8d40dbf1500170a99b26dac82576c1 Mon Sep 17 00:00:00 2001 From: shihy Date: Sat, 9 Dec 2023 16:26:52 +0800 Subject: [PATCH 18/25] [FIX] Fix a tiny bug --- learnware/specification/regular/image/rkme.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/specification/regular/image/rkme.py b/learnware/specification/regular/image/rkme.py index 9afcdd7..6be65a8 100644 --- a/learnware/specification/regular/image/rkme.py +++ b/learnware/specification/regular/image/rkme.py @@ -164,7 +164,7 @@ class RKMEImageSpecification(RegularStatSpecification): # auto sample if len(X_train) > sample_size: indices = np.random.choice(len(X_train), size=sample_size, replace=False) - X_train = X_train(indices) + X_train = X_train[indices] try: import torch_optimizer From 575ef94f91978417b381f1d61534fbdefe1597df Mon Sep 17 00:00:00 2001 From: shihy Date: Mon, 18 Dec 2023 20:31:40 +0800 Subject: [PATCH 19/25] [FIX] Delete the requirement. --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 2ade5e3..0c636b8 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,6 @@ REQUIRED = [ "pandas>=0.25.1", "scipy>=1.0.0", "tqdm>=4.65.0", - "tabulate", "scikit-learn>=0.22", "joblib>=1.2.0", "pyyaml>=6.0", From bb3686b6360bb45c27fe7ba09e44677451ab56de Mon Sep 17 00:00:00 2001 From: shihy Date: Sun, 24 Dec 2023 13:53:35 +0800 Subject: [PATCH 20/25] [MNT] Save test data copy to disk. --- .../benchmarks/utils.py | 23 ++++++++++++------- examples/dataset_image_workflow/main.py | 19 ++++++++++++++- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/examples/dataset_image_workflow/benchmarks/utils.py b/examples/dataset_image_workflow/benchmarks/utils.py index 085258d..134f80f 100644 --- a/examples/dataset_image_workflow/benchmarks/utils.py +++ b/examples/dataset_image_workflow/benchmarks/utils.py @@ -1,5 +1,6 @@ import json import os +import pickle import zipfile from collections import defaultdict from shutil import rmtree @@ -86,7 +87,7 @@ def build_learnware(name: str, market: LearnwareMarket, order, model_name="conv" # build specification loader = DataLoader(spec_set, batch_size=3000, shuffle=True) sampled_X, _ = next(iter(loader)) - spec = generate_rkme_image_spec(sampled_X, whitening=False, experimental=True) + spec = generate_rkme_image_spec(sampled_X, whitening=False, experimental=False) # add to market model_dir = os.path.abspath(os.path.join(__file__, "..", "models")) @@ -177,24 +178,30 @@ def train_model(model: nn.Module, train_set: Dataset, valid_set: Dataset, def build_specification(name: str, cache_id, order, sampled_size=3000): cache_dir = os.path.abspath(os.path.join( - os.path.dirname( __file__ ), '..', 'cache', 'spec')) + os.path.dirname(__file__), '..', 'cache')) os.makedirs(cache_dir, exist_ok=True) - cache_path = os.path.join(cache_dir, "spec_{}.json".format(cache_id)) + spec_cache_path = os.path.join(cache_dir, 'spec', "spec_{}.json".format(cache_id)) - if os.path.exists(cache_path): + if os.path.exists(spec_cache_path): spec = RKMEImageSpecification() - spec.load(cache_path) + spec.load(spec_cache_path) test_dataset, spec_dataset, _, _ = user_data(indices=torch.asarray(spec.msg)) else: test_dataset, spec_dataset, indices, _ = user_data(order=order) loader = DataLoader(spec_dataset, batch_size=sampled_size, shuffle=True) sampled_X, _ = next(iter(loader)) - spec = generate_rkme_image_spec(sampled_X, whitening=False, experimental=True) + spec = generate_rkme_image_spec(sampled_X, whitening=False, experimental=False) spec.msg = indices.tolist() - spec.save(cache_path) - + spec.save(spec_cache_path) + + # Save test_dataset to disk, spec_dataset is same as test_dataset for now + X, y = next(iter(DataLoader(test_dataset, batch_size=len(test_dataset)))) + with open(os.path.join(cache_dir, 'test_data', "user{}_X.pkl".format(cache_id)), "wb") as f: + pickle.dump(X.detach().cpu().numpy(), f) + with open(os.path.join(cache_dir, 'test_data', "user{}_y.pkl".format(cache_id)), "wb") as f: + pickle.dump(y.detach().cpu().numpy(), f) return spec, test_dataset diff --git a/examples/dataset_image_workflow/main.py b/examples/dataset_image_workflow/main.py index ec2d81b..6bf661c 100644 --- a/examples/dataset_image_workflow/main.py +++ b/examples/dataset_image_workflow/main.py @@ -1,4 +1,5 @@ import os +import random from datetime import datetime import fire @@ -29,6 +30,8 @@ class ImageDatasetWorkflow: learnware.init() assert not rebuild + np.random.seed(0) + random.seed(0) market_id = "dataset_image_workflow" if market_id is None else market_id orders = np.stack([np.random.permutation(10) for _ in range(market_size)]) @@ -48,12 +51,20 @@ class ImageDatasetWorkflow: def evaluate(self, user_size=100, market_id=None, faster=True): learnware.init() + np.random.seed(1) + random.seed(1) market_id = "dataset_image_workflow" if market_id is None else market_id orders = np.stack([np.random.permutation(10) for _ in range(user_size)]) print("Using market_id", market_id) market = instantiate_learnware_market(name="easy", market_id=market_id, rebuild=False) + # Create Folder to save data + train_data_cache_folder = os.path.abspath(os.path.join(__file__, '..', "cache", "train_data")) + test_data_cache_folder = os.path.abspath(os.path.join(__file__, '..', "cache", "test_data")) + os.makedirs(train_data_cache_folder, exist_ok=True) + os.makedirs(test_data_cache_folder, exist_ok=True) + device = choose_device(0) if faster: faster_train(device) @@ -97,7 +108,13 @@ class ImageDatasetWorkflow: job_loss, job_acc = evaluate(reuse_job_selector, dataset) unlabeled.record("Job Selector", job_acc, job_loss) - train_set, valid_set, spec_set, order = uploader_data(order=order) + train_set, _, _, _ = uploader_data(order=order) + X, y = next(iter(DataLoader(train_set, batch_size=len(train_set)))) + with open(os.path.join(train_data_cache_folder, "user{}_X.pkl".format(i)), "wb") as f: + pickle.dump(X.detach().cpu().numpy(), f) + with open(os.path.join(train_data_cache_folder, "user{}_y.pkl".format(i)), "wb") as f: + pickle.dump(y.detach().cpu().numpy(), f) + for labeled_size in tqdm.tqdm([100, 200, 500, 1000, 2000, 4000, 6000, 8000, 10000]): loader = DataLoader(train_set, batch_size=labeled_size, shuffle=True) X, y = next(iter(loader)) From ef592d7b9174879afb7d41f9589fa0f8f70e4473 Mon Sep 17 00:00:00 2001 From: Gene Date: Tue, 26 Dec 2023 00:46:45 +0800 Subject: [PATCH 21/25] [FIX] fix bugs in herding --- learnware/reuse/job_selector.py | 10 ++++++---- learnware/specification/regular/image/rkme.py | 2 +- learnware/specification/regular/table/rkme.py | 6 ++++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/learnware/reuse/job_selector.py b/learnware/reuse/job_selector.py index 773c3c1..c6bb82f 100644 --- a/learnware/reuse/job_selector.py +++ b/learnware/reuse/job_selector.py @@ -124,9 +124,9 @@ class JobSelectorReuser(BaseReuser): task_spec = learnware_rkme_spec_list[i] if self.use_herding: task_herding_num = max(5, int(self.herding_num * task_mixture_weight[i])) - herding_X_i = task_spec.herding(task_herding_num).detach().cpu().numpy() + herding_X_i = task_spec.herding(task_herding_num) else: - herding_X_i = task_spec.z.detach().cpu().numpy() + herding_X_i = task_spec.get_z() task_herding_num = herding_X_i.shape[0] task_val_num = task_herding_num // 5 @@ -229,8 +229,10 @@ class JobSelectorReuser(BaseReuser): try: from lightgbm import LGBMClassifier, early_stopping except ModuleNotFoundError: - raise ModuleNotFoundError(f"JobSelectorReuser is not available because 'lightgbm' is not installed! Please install it manually.") - + raise ModuleNotFoundError( + f"JobSelectorReuser is not available because 'lightgbm' is not installed! Please install it manually." + ) + score_best = -1 learning_rate = [0.01] max_depth = [66] diff --git a/learnware/specification/regular/image/rkme.py b/learnware/specification/regular/image/rkme.py index 3ce9ad5..f89014a 100644 --- a/learnware/specification/regular/image/rkme.py +++ b/learnware/specification/regular/image/rkme.py @@ -366,7 +366,7 @@ class RKMEImageSpecification(RegularStatSpecification): indices = torch.multinomial(self.beta, T, replacement=True) mock = self.z[indices] + torch.randn_like(self.z[indices]) * 0.01 - return mock.numpy() + return mock.detach().cpu().numpy() def _sampling_candidates(self, N: int) -> np.ndarray: raise NotImplementedError() diff --git a/learnware/specification/regular/table/rkme.py b/learnware/specification/regular/table/rkme.py index abecf6f..ade034d 100644 --- a/learnware/specification/regular/table/rkme.py +++ b/learnware/specification/regular/table/rkme.py @@ -411,7 +411,7 @@ class RKMETableSpecification(RegularStatSpecification): S_shape = tuple([S.shape[0]] + list(Z_shape)[1:]) S = S.reshape(S_shape) - return S + return S.detach().cpu().numpy() def save(self, filepath: str): """Save the computed RKME specification to a specified path in JSON format. @@ -457,7 +457,9 @@ class RKMETableSpecification(RegularStatSpecification): for d in self.get_states(): if d in rkme_load.keys(): if d == "type" and rkme_load[d] != self.type: - raise TypeError(f"The type of loaded RKME ({rkme_load[d]}) is different from the expected type ({self.type})!") + raise TypeError( + f"The type of loaded RKME ({rkme_load[d]}) is different from the expected type ({self.type})!" + ) setattr(self, d, rkme_load[d]) From 7be5a5373bd209d9f0891d43b69d12f7cf7b2d38 Mon Sep 17 00:00:00 2001 From: Gene Date: Tue, 26 Dec 2023 00:47:56 +0800 Subject: [PATCH 22/25] [MNT] Initialize image example with LearnwareBenchmark --- .../example_files/example_init.py | 26 -- .../example_files/example_yaml.yaml | 8 - .../example_files/model.py | 183 ----------- .../dataset_image_workflow(old)/get_data.py | 283 ------------------ examples/dataset_image_workflow(old)/main.py | 216 ------------- examples/dataset_image_workflow(old)/utils.py | 174 ----------- examples/dataset_image_workflow/config.py | 62 ++++ examples/dataset_image_workflow/model.py | 82 +++++ examples/dataset_image_workflow/utils.py | 100 +++++++ examples/dataset_image_workflow/workflow.py | 246 +++++++++++++++ 10 files changed, 490 insertions(+), 890 deletions(-) delete mode 100644 examples/dataset_image_workflow(old)/example_files/example_init.py delete mode 100644 examples/dataset_image_workflow(old)/example_files/example_yaml.yaml delete mode 100644 examples/dataset_image_workflow(old)/example_files/model.py delete mode 100644 examples/dataset_image_workflow(old)/get_data.py delete mode 100644 examples/dataset_image_workflow(old)/main.py delete mode 100644 examples/dataset_image_workflow(old)/utils.py create mode 100644 examples/dataset_image_workflow/config.py create mode 100644 examples/dataset_image_workflow/model.py create mode 100644 examples/dataset_image_workflow/utils.py create mode 100644 examples/dataset_image_workflow/workflow.py diff --git a/examples/dataset_image_workflow(old)/example_files/example_init.py b/examples/dataset_image_workflow(old)/example_files/example_init.py deleted file mode 100644 index b318ee8..0000000 --- a/examples/dataset_image_workflow(old)/example_files/example_init.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -import joblib -import numpy as np -from learnware.model import BaseModel -from .model import ConvModel -import torch - - -class Model(BaseModel): - def __init__(self): - super().__init__(input_shape=(3, 32, 32), output_shape=(10,)) - dir_path = os.path.dirname(os.path.abspath(__file__)) - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - self.model = ConvModel(channel=3, n_random_features=10).to(self.device) - self.model.load_state_dict(torch.load(os.path.join(dir_path, "conv_model.pth"))) - self.model.eval() - - def fit(self, X: np.ndarray, y: np.ndarray): - pass - - def predict(self, X: np.ndarray) -> np.ndarray: - X = torch.Tensor(X).to(self.device) - return self.model(X) - - def finetune(self, X: np.ndarray, y: np.ndarray): - pass diff --git a/examples/dataset_image_workflow(old)/example_files/example_yaml.yaml b/examples/dataset_image_workflow(old)/example_files/example_yaml.yaml deleted file mode 100644 index 9aaf820..0000000 --- a/examples/dataset_image_workflow(old)/example_files/example_yaml.yaml +++ /dev/null @@ -1,8 +0,0 @@ -model: - class_name: Model - kwargs: {} -stat_specifications: - - module_path: learnware.specification - class_name: RKMEImageSpecification - file_name: rkme.json - kwargs: {} \ No newline at end of file diff --git a/examples/dataset_image_workflow(old)/example_files/model.py b/examples/dataset_image_workflow(old)/example_files/model.py deleted file mode 100644 index 3281416..0000000 --- a/examples/dataset_image_workflow(old)/example_files/model.py +++ /dev/null @@ -1,183 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -import numpy as np - - -class Linear(nn.Module): - def __init__(self, input_feature=256, num_classes=10): - super().__init__() - self.linear_1 = nn.Linear(input_feature, 128) - self.dropout_1 = nn.Dropout(p=0.5) - self.linear_2 = nn.Linear(128, 128) - self.dropout_2 = nn.Dropout(p=0.5) - self.linear_3 = nn.Linear(128, num_classes) - - def forward(self, x): - out1 = F.relu(self.dropout_1(self.linear_1(x))) - out2 = F.relu(self.dropout_2(self.linear_2(out1))) - out = self.linear_3(out2) - return out - - -class OriginModel(nn.Module): - def __init__(self, last_layer_feature=256): - super().__init__() - self.linear_1 = nn.Linear(last_layer_feature, 128) - self.linear_2 = nn.Linear(128, 128) - self.linear_3 = nn.Linear(128, 10) - - def forward(self, x): - out = F.relu(self.linear_1(x)) - out = F.relu(self.linear_2(out)) - out = self.linear_3(out) - return out - - -class ConvModel(nn.Module): - def __init__( - self, - channel, - n_random_features, - net_width=64, - net_depth=3, - net_act="relu", - net_norm="batchnorm", - net_pooling="avgpooling", - im_size=(32, 32), - ): - super().__init__() - # print('Building Conv Model') - self.features, shape_feat = self._make_layers( - channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size - ) - num_feat = shape_feat[0] * shape_feat[1] * shape_feat[2] - self.classifier = GaussianLinear(num_feat, n_random_features) - - def forward(self, x): - out = self.features(x) - out = out.reshape(out.size(0), -1) - out = self.classifier(out) - return out - - def _get_activation(self, net_act): - if net_act == "sigmoid": - return nn.Sigmoid() - elif net_act == "relu": - return nn.ReLU(inplace=True) - elif net_act == "leakyrelu": - return nn.LeakyReLU(negative_slope=0.01) - elif net_act == "gelu": - return nn.SiLU() - else: - exit("unknown activation function: %s" % net_act) - - def _get_pooling(self, net_pooling): - if net_pooling == "maxpooling": - return nn.MaxPool2d(kernel_size=2, stride=2) - elif net_pooling == "avgpooling": - return nn.AvgPool2d(kernel_size=2, stride=2) - elif net_pooling == "none": - return None - else: - exit("unknown net_pooling: %s" % net_pooling) - - def _get_normlayer(self, net_norm, shape_feat): - # shape_feat = (c*h*w) - if net_norm == "batchnorm": - return nn.BatchNorm2d(shape_feat[0], affine=True) - elif net_norm == "layernorm": - return nn.LayerNorm(shape_feat, elementwise_affine=True) - elif net_norm == "instancenorm": - return nn.GroupNorm(shape_feat[0], shape_feat[0], affine=True) - elif net_norm == "groupnorm": - return nn.GroupNorm(4, shape_feat[0], affine=True) - elif net_norm == "none": - return None - else: - exit("unknown net_norm: %s" % net_norm) - - def _make_layers(self, channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size): - layers = [] - in_channels = channel - # if im_size[0] == 28: - # im_size = (32, 32) - shape_feat = [in_channels, im_size[0], im_size[1]] - for d in range(net_depth): - # print(shape_feat) - layers += [Conv2d_gaussian(in_channels, net_width, kernel_size=3, padding=1)] - # layers += [nn.Conv2d(in_channels, net_width, kernel_size=3, padding='same')] - shape_feat[0] = net_width - if net_norm != "none": - layers += [self._get_normlayer(net_norm, shape_feat)] - layers += [self._get_activation(net_act)] - in_channels = net_width - if net_pooling != "none": - layers += [self._get_pooling(net_pooling)] - shape_feat[1] //= 2 - shape_feat[2] //= 2 - - return nn.Sequential(*layers), shape_feat - - -class Conv2d_gaussian(torch.nn.Conv2d): - def reset_parameters(self) -> None: - # Setting a=sqrt(5) in kaiming_uniform is the same as initializing with - # uniform(-1/sqrt(k), 1/sqrt(k)), where k = weight.size(1) * prod(*kernel_size) - # For more details see: https://github.com/pytorch/pytorch/issues/15314#issuecomment-477448573 - # torch.nn.init.kaiming_normal_(self.weight, a= math.sqrt(5)) - # W has shape out, in, h, w - torch.nn.init.normal_( - self.weight, 0, np.sqrt(2) / np.sqrt(self.weight.shape[1] * self.weight.shape[2] * self.weight.shape[3]) - ) - if self.bias is not None: - fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weight) - # print(fan_in) - if fan_in != 0: - # bound = 0 * 1 / math.sqrt(fan_in) - # torch.nn.init.uniform_(self.bias, -bound, bound) - # torch.nn.init.uniform_(self.bias, -bound, bound) - torch.nn.init.normal_(self.bias, 0, 0.1) - - -class GaussianLinear(torch.nn.Module): - __constants__ = ["in_features", "out_features"] - in_features: int - out_features: int - weight: torch.Tensor - - def __init__( - self, in_features: int, out_features: int, bias: bool = True, device=None, dtype=None, funny=False - ) -> None: - factory_kwargs = {"device": device, "dtype": dtype} - super(GaussianLinear, self).__init__() - self.funny = funny - self.in_features = in_features - self.out_features = out_features - self.weight = torch.nn.Parameter(torch.empty((out_features, in_features), **factory_kwargs)) - if bias: - self.bias = torch.nn.Parameter(torch.empty(out_features, **factory_kwargs)) - else: - self.register_parameter("bias", None) - self.reset_parameters() - - def reset_parameters(self) -> None: - # Setting a=sqrt(5) in kaiming_uniform is the same as initializing with - # uniform(-1/sqrt(in_features), 1/sqrt(in_features)). For details, see - # https://github.com/pytorch/pytorch/issues/57109 - # torch.nn.init.kaiming_normal_(self.weight, a=1 * np.sqrt(5)) - torch.nn.init.normal_(self.weight, 0, np.sqrt(2) / np.sqrt(self.in_features)) - # torch.nn.init.normal_(self.weight, 0, 3/np.sqrt(self.in_features)) - if self.bias is not None: - fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weight) - bound = 1 / np.sqrt(fan_in) if fan_in > 0 else 0 - # torch.nn.init.uniform_(self.bias, -bound, bound) - torch.nn.init.normal_(self.bias, 0, 0.1) - - def forward(self, input: torch.Tensor) -> torch.Tensor: - return torch.nn.functional.linear(input, self.weight, self.bias) - - def extra_repr(self) -> str: - return "in_features={}, out_features={}, bias={}".format( - self.in_features, self.out_features, self.bias is not None - ) diff --git a/examples/dataset_image_workflow(old)/get_data.py b/examples/dataset_image_workflow(old)/get_data.py deleted file mode 100644 index c3af534..0000000 --- a/examples/dataset_image_workflow(old)/get_data.py +++ /dev/null @@ -1,283 +0,0 @@ -import torch -from torchvision import datasets, transforms -import torch.nn.functional as F -from scipy.ndimage.interpolation import rotate as scipyrotate - -import numpy as np - - -def get_fashion_mnist(data_root="./data", output_channels=1, image_size=28): - ds_train = datasets.FashionMNIST( - data_root, - train=True, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - X_train = ds_train.data - y_train = ds_train.targets - ds_test = datasets.FashionMNIST( - data_root, - train=False, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - - X_test = ds_test.data - y_test = ds_test.targets - - X_train = X_train[:, None, :, :].float() - X_test = X_test[:, None, :, :].float() - - if output_channels > 1: - X_train = torch.cat([X_train for i in range(output_channels)], 1) - X_test = torch.cat([X_test for i in range(output_channels)], 1) - - X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - - return X_train, y_train, X_test, y_test - - -def get_mnist(data_root="./data/", output_channels=1, image_size=28): - ds_train = datasets.MNIST( - data_root, - train=True, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - X_train = [] - - for x, _ in ds_train: - X_train.append(x) - X_train = torch.stack(X_train) - - y_train = ds_train.targets - ds_test = datasets.MNIST( - data_root, - train=False, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - - X_test = [] - - for x, _ in ds_test: - X_test.append(x) - X_test = torch.stack(X_test) - - y_test = ds_test.targets - - if output_channels > 1: - X_train = torch.cat([X_train for i in range(output_channels)], 1) - X_test = torch.cat([X_test for i in range(output_channels)], 1) - - X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - - return X_train, y_train, X_test, y_test - - -def get_cifar10(data_root="./data/", output_channels=3, image_size=32): - ds_train = datasets.CIFAR10( - data_root, - train=True, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - X_train = ds_train.data - y_train = ds_train.targets - ds_test = datasets.CIFAR10( - data_root, - train=False, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - - X_test = ds_test.data - y_test = ds_test.targets - - X_train = torch.Tensor(np.moveaxis(X_train, 3, 1)) - y_train = torch.Tensor(y_train).long() - X_test = torch.Tensor(np.moveaxis(X_test, 3, 1)) - y_test = torch.Tensor(y_test).long() - - if output_channels == 1: - X_train = torch.mean(X_train, 1, keepdim=True) - X_test = torch.mean(X_test, 1, keepdim=True) - - X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - - return X_train, y_train, X_test, y_test - - -def get_svhn(output_channels=1, image_size=32): - ds_train = datasets.SVHN( - "./data/", - split="train", - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - X_train = ds_train.data - y_train = ds_train.labels - ds_test = datasets.SVHN( - "./data/", - split="test", - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - - X_test = ds_test.data - y_test = ds_test.labels - - X_train = torch.Tensor(X_train) - y_train = torch.Tensor(y_train).long() - X_test = torch.Tensor(X_test) - y_test = torch.Tensor(y_test).long() - - if output_channels == 1: - X_train = torch.mean(X_train, 1, keepdim=True) - X_test = torch.mean(X_test, 1, keepdim=True) - - X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - - return X_train, y_train, X_test, y_test - - -def get_cifar100(data_root="./data/", output_channels=3, image_size=32): - ds_train = datasets.CIFAR100( - data_root, - train=True, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - X_train = ds_train.data - y_train = ds_train.targets - ds_test = datasets.CIFAR100( - data_root, - train=False, - download=True, - transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), - ) - - X_test = ds_test.data - y_test = ds_test.targets - - X_train = torch.Tensor(np.moveaxis(X_train, 3, 1)) - y_train = torch.Tensor(y_train).long() - X_test = torch.Tensor(np.moveaxis(X_test, 3, 1)) - y_test = torch.Tensor(y_test).long() - - if output_channels == 1: - X_train = torch.mean(X_train, 1, keepdim=True) - X_test = torch.mean(X_test, 1, keepdim=True) - - X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) - - return X_train, y_train, X_test, y_test - - -def get_zca_matrix(X, reg_coef=0.1): - X_flat = X.reshape(X.shape[0], -1) - cov = (X_flat.T @ X_flat) / X_flat.shape[0] - reg_amount = reg_coef * torch.trace(cov) / cov.shape[0] - u, s, _ = torch.svd(cov.cuda() + reg_amount * torch.eye(cov.shape[0]).cuda()) - inv_sqrt_zca_eigs = s ** (-0.5) - whitening_transform = torch.einsum("ij,j,kj->ik", u, inv_sqrt_zca_eigs, u) - - return whitening_transform.cpu() - - -def layernorm_data(X): - X_processed = X - torch.mean(X, [1, 2, 3], keepdim=True) - X_processed = X_processed / torch.sqrt(torch.sum(X_processed**2, [1, 2, 3], keepdim=True)) - - return X_processed - - -def transform_data(X, whitening_transform): - if len(whitening_transform.shape) == 2: - X_flat = X.reshape(X.shape[0], -1) - X_flat = X_flat @ whitening_transform - return X_flat.view(*X.shape) - else: - X_flat = X.reshape(X.shape[0], -1) - X_flat = torch.einsum("nd, ndi->ni", X_flat, whitening_transform) - return X_flat.view(*X.shape) - - -def scale_to_zero_one(X): - mins = torch.min(X.view(X.shape[0], -1), 1)[0].view(-1, 1, 1, 1) - maxes = torch.max(X.view(X.shape[0], -1), 1)[0].view(-1, 1, 1, 1) - return (X - mins) / (maxes - mins) - - -def augment(images, dc_aug_param, device): - # This can be sped up in the future. - - if dc_aug_param != None and dc_aug_param["strategy"] != "none": - scale = dc_aug_param["scale"] - crop = dc_aug_param["crop"] - rotate = dc_aug_param["rotate"] - noise = dc_aug_param["noise"] - strategy = dc_aug_param["strategy"] - - shape = images.shape - mean = [] - for c in range(shape[1]): - mean.append(float(torch.mean(images[:, c]))) - - def cropfun(i): - im_ = torch.zeros(shape[1], shape[2] + crop * 2, shape[3] + crop * 2, dtype=torch.float, device=device) - for c in range(shape[1]): - im_[c] = mean[c] - im_[:, crop : crop + shape[2], crop : crop + shape[3]] = images[i] - r, c = np.random.permutation(crop * 2)[0], np.random.permutation(crop * 2)[0] - images[i] = im_[:, r : r + shape[2], c : c + shape[3]] - - def scalefun(i): - h = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2]) - w = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2]) - tmp = F.interpolate( - images[i : i + 1], - [h, w], - )[0] - mhw = max(h, w, shape[2], shape[3]) - im_ = torch.zeros(shape[1], mhw, mhw, dtype=torch.float, device=device) - r = int((mhw - h) / 2) - c = int((mhw - w) / 2) - im_[:, r : r + h, c : c + w] = tmp - r = int((mhw - shape[2]) / 2) - c = int((mhw - shape[3]) / 2) - images[i] = im_[:, r : r + shape[2], c : c + shape[3]] - - def rotatefun(i): - im_ = scipyrotate( - images[i].cpu().data.numpy(), - angle=np.random.randint(-rotate, rotate), - axes=(-2, -1), - cval=np.mean(mean), - ) - r = int((im_.shape[-2] - shape[-2]) / 2) - c = int((im_.shape[-1] - shape[-1]) / 2) - images[i] = torch.tensor(im_[:, r : r + shape[-2], c : c + shape[-1]], dtype=torch.float, device=device) - - def noisefun(i): - images[i] = images[i] + noise * torch.randn(shape[1:], dtype=torch.float, device=device) - - augs = strategy.split("_") - - for i in range(shape[0]): - choice = np.random.permutation(augs)[0] # randomly implement one augmentation - if choice == "crop": - cropfun(i) - elif choice == "scale": - scalefun(i) - elif choice == "rotate": - rotatefun(i) - elif choice == "noise": - noisefun(i) - - return images diff --git a/examples/dataset_image_workflow(old)/main.py b/examples/dataset_image_workflow(old)/main.py deleted file mode 100644 index c91981c..0000000 --- a/examples/dataset_image_workflow(old)/main.py +++ /dev/null @@ -1,216 +0,0 @@ -import numpy as np -import torch -from tqdm import tqdm - -from get_data import * -import os -import random - -from learnware.specification import RKMEImageSpecification -from learnware.reuse.averaging import AveragingReuser -from utils import generate_uploader, generate_user, ImageDataLoader, train, eval_prediction -from learnware.learnware import Learnware -import time - -from learnware.market import instantiate_learnware_market, BaseUserInfo -from learnware.market.easy import database_ops -from learnware.learnware import Learnware -import learnware.specification as specification -from learnware.logger import get_module_logger - -from shutil import copyfile, rmtree -import zipfile - -logger = get_module_logger("image_test", level="INFO") -origin_data_root = "./data/origin_data" -processed_data_root = "./data/processed_data" -tmp_dir = "./data/tmp" -learnware_pool_dir = "./data/learnware_pool" -dataset = "cifar10" -n_uploaders = 30 -n_users = 20 -n_classes = 10 -data_root = os.path.join(origin_data_root, dataset) -data_save_root = os.path.join(processed_data_root, dataset) -user_save_root = os.path.join(data_save_root, "user") -uploader_save_root = os.path.join(data_save_root, "uploader") -model_save_root = os.path.join(data_save_root, "uploader_model") -os.makedirs(data_root, exist_ok=True) -os.makedirs(user_save_root, exist_ok=True) -os.makedirs(uploader_save_root, exist_ok=True) -os.makedirs(model_save_root, exist_ok=True) - - -semantic_specs = [ - { - "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": {"Values": ["Classification"], "Type": "Class"}, - "Library": {"Values": ["Pytorch"], "Type": "Class"}, - "Scenario": {"Values": ["Business"], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "learnware_1", "Type": "String"}, - "Output": {"Dimension": 10}, - } -] - -user_semantic = { - "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": {"Values": ["Classification"], "Type": "Class"}, - "Library": {"Values": ["Pytorch"], "Type": "Class"}, - "Scenario": {"Values": ["Business"], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "", "Type": "String"}, -} - - -def prepare_data(): - if dataset == "cifar10": - X_train, y_train, X_test, y_test = get_cifar10(data_root) - elif dataset == "mnist": - X_train, y_train, X_test, y_test = get_mnist(data_root) - else: - return - generate_uploader(X_train, y_train, n_uploaders=n_uploaders, data_save_root=uploader_save_root) - generate_user(X_test, y_test, n_users=n_users, data_save_root=user_save_root) - - -def prepare_model(): - dataloader = ImageDataLoader(data_save_root, train=True) - for i in range(n_uploaders): - logger.info("Train on uploader: %d" % (i)) - X, y = dataloader.get_idx_data(i) - model = train(X, y, out_classes=n_classes) - model_save_path = os.path.join(model_save_root, "uploader_%d.pth" % (i)) - torch.save(model.state_dict(), model_save_path) - logger.info("Model saved to '%s'" % (model_save_path)) - - -def prepare_learnware(data_path, model_path, init_file_path, yaml_path, save_root, zip_name): - os.makedirs(save_root, exist_ok=True) - tmp_spec_path = os.path.join(save_root, "rkme.json") - tmp_model_path = os.path.join(save_root, "conv_model.pth") - tmp_yaml_path = os.path.join(save_root, "learnware.yaml") - tmp_init_path = os.path.join(save_root, "__init__.py") - tmp_model_file_path = os.path.join(save_root, "model.py") - mmodel_file_path = "./example_files/model.py" - - # Computing the specification from the whole dataset is too costly. - X = np.load(data_path) - indices = np.random.choice(len(X), size=2000, replace=False) - X_sampled = X[indices] - - st = time.time() - user_spec = RKMEImageSpecification(cuda_idx=0) - user_spec.generate_stat_spec_from_data(X=X_sampled) - ed = time.time() - logger.info("Stat spec generated in %.3f s" % (ed - st)) - user_spec.save(tmp_spec_path) - copyfile(model_path, tmp_model_path) - copyfile(yaml_path, tmp_yaml_path) - copyfile(init_file_path, tmp_init_path) - copyfile(mmodel_file_path, tmp_model_file_path) - zip_file_name = os.path.join(learnware_pool_dir, "%s.zip" % (zip_name)) - with zipfile.ZipFile(zip_file_name, "w", compression=zipfile.ZIP_DEFLATED) as zip_obj: - zip_obj.write(tmp_spec_path, "rkme.json") - zip_obj.write(tmp_model_path, "conv_model.pth") - zip_obj.write(tmp_yaml_path, "learnware.yaml") - zip_obj.write(tmp_init_path, "__init__.py") - zip_obj.write(tmp_model_file_path, "model.py") - rmtree(save_root) - logger.info("New Learnware Saved to %s" % (zip_file_name)) - return zip_file_name - - -def prepare_market(): - image_market = instantiate_learnware_market(market_id="cifar10", name="easy", rebuild=True) - try: - rmtree(learnware_pool_dir) - except: - pass - os.makedirs(learnware_pool_dir, exist_ok=True) - for i in tqdm(range(n_uploaders), total=n_uploaders, desc="Preparing..."): - data_path = os.path.join(uploader_save_root, "uploader_%d_X.npy" % (i)) - model_path = os.path.join(model_save_root, "uploader_%d.pth" % (i)) - init_file_path = "./example_files/example_init.py" - yaml_file_path = "./example_files/example_yaml.yaml" - new_learnware_path = prepare_learnware( - data_path, model_path, init_file_path, yaml_file_path, tmp_dir, "%s_%d" % (dataset, i) - ) - semantic_spec = semantic_specs[0] - semantic_spec["Name"]["Values"] = "learnware_%d" % (i) - semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (i) - image_market.add_learnware(new_learnware_path, semantic_spec) - - logger.info("Total Item: %d" % (len(image_market))) - curr_inds = image_market._get_ids() - logger.info("Available ids: " + str(curr_inds)) - - -def test_search(gamma=0.1, load_market=True): - if load_market: - image_market = instantiate_learnware_market(market_id="cifar10", name="easy") - else: - prepare_market() - image_market = instantiate_learnware_market(market_id="cifar10", name="easy") - logger.info("Number of items in the market: %d" % len(image_market)) - - select_list = [] - avg_list = [] - improve_list = [] - job_selector_score_list = [] - ensemble_score_list = [] - for i in tqdm(range(n_users), total=n_users, desc="Searching..."): - user_data_path = os.path.join(user_save_root, "user_%d_X.npy" % (i)) - user_label_path = os.path.join(user_save_root, "user_%d_y.npy" % (i)) - user_data = np.load(user_data_path) - user_label = np.load(user_label_path) - user_stat_spec = RKMEImageSpecification(cuda_idx=0) - user_stat_spec.generate_stat_spec_from_data(X=user_data, resize=False) - user_info = BaseUserInfo(semantic_spec=user_semantic, stat_info={"RKMETableSpecification": user_stat_spec}) - logger.info("Searching Market for user: %d" % i) - search_result = image_market.search_learnware(user_info) - single_result = search_result.get_single_results() - acc_list = [] - for idx, single_item in enumerate(single_result[:5]): - pred_y = single_item.learnware.predict(user_data) - acc = eval_prediction(pred_y, user_label) - acc_list.append(acc) - logger.info("Search rank: %d, score: %.3f, learnware_id: %s, acc: %.3f" % (idx, single_item.score, single_item.learnware.id, acc)) - - # test reuse (job selector) - # reuse_baseline = JobSelectorReuser(learnware_list=mixture_learnware_list, herding_num=100) - # reuse_predict = reuse_baseline.predict(user_data=user_data) - # reuse_score = eval_prediction(reuse_predict, user_label) - # job_selector_score_list.append(reuse_score) - # print(f"mixture reuse loss: {reuse_score}") - - # test reuse (ensemble) - single_learnware_list = [single_item.learnware for single_item in single_result] - reuse_ensemble = AveragingReuser(learnware_list=single_learnware_list[:3], mode="vote_by_prob") - ensemble_predict_y = reuse_ensemble.predict(user_data=user_data) - ensemble_score = eval_prediction(ensemble_predict_y, user_label) - ensemble_score_list.append(ensemble_score) - print(f"reuse accuracy (vote_by_prob): {ensemble_score}\n") - - select_list.append(acc_list[0]) - avg_list.append(np.mean(acc_list)) - improve_list.append((acc_list[0] - np.mean(acc_list)) / np.mean(acc_list)) - - logger.info( - "Accuracy of selected learnware: %.3f +/- %.3f, Average performance: %.3f +/- %.3f" - % (np.mean(select_list), np.std(select_list), np.mean(avg_list), np.std(avg_list)) - ) - logger.info( - "Ensemble Reuse Performance: %.3f +/- %.3f" % (np.mean(ensemble_score_list), np.std(ensemble_score_list)) - ) - - -if __name__ == "__main__": - logger.info("=" * 40) - logger.info(f"n_uploaders:\t{n_uploaders}") - logger.info(f"n_users:\t{n_users}") - logger.info("=" * 40) - - prepare_data() - prepare_model() - test_search(load_market=False) diff --git a/examples/dataset_image_workflow(old)/utils.py b/examples/dataset_image_workflow(old)/utils.py deleted file mode 100644 index b7f5056..0000000 --- a/examples/dataset_image_workflow(old)/utils.py +++ /dev/null @@ -1,174 +0,0 @@ -import os -import numpy as np -import random -import math - -import torch -import torch.nn as nn -import torch.optim as optim - -from example_files.model import ConvModel - - -class ImageDataLoader: - def __init__(self, data_root, train: bool = True): - self.data_root = data_root - self.train = train - - def get_idx_data(self, idx=0): - if self.train: - X_path = os.path.join(self.data_root, "uploader", "uploader_%d_X.npy" % (idx)) - y_path = os.path.join(self.data_root, "uploader", "uploader_%d_y.npy" % (idx)) - if not (os.path.exists(X_path) and os.path.exists(y_path)): - raise Exception("Index Error") - X = np.load(X_path) - y = np.load(y_path) - else: - X_path = os.path.join(self.data_root, "user", "user_%d_X.npy" % (idx)) - y_path = os.path.join(self.data_root, "user", "user_%d_y.npy" % (idx)) - if not (os.path.exists(X_path) and os.path.exists(y_path)): - raise Exception("Index Error") - X = np.load(X_path) - y = np.load(y_path) - return X, y - - -def generate_uploader(data_x, data_y, n_uploaders=50, data_save_root=None): - if data_save_root is None: - return - os.makedirs(data_save_root, exist_ok=True) - for i in range(n_uploaders): - random_class_num = random.randint(6, 10) - cls_indx = list(range(10)) - random.shuffle(cls_indx) - selected_cls_indx = cls_indx[:random_class_num] - rest_cls_indx = cls_indx[random_class_num:] - selected_data_indx = [] - for cls in selected_cls_indx: - data_indx = list(torch.where(data_y == cls)[0]) - # print(type(data_indx)) - random.shuffle(data_indx) - data_num = random.randint(800, 2000) - selected_indx = data_indx[:data_num] - selected_data_indx = selected_data_indx + selected_indx - for cls in rest_cls_indx: - flag = random.randint(0, 1) - if flag == 0: - continue - data_indx = list(torch.where(data_y == cls)[0]) - random.shuffle(data_indx) - data_num = random.randint(20, 80) - selected_indx = data_indx[:data_num] - selected_data_indx = selected_data_indx + selected_indx - selected_X = data_x[selected_data_indx].numpy() - selected_y = data_y[selected_data_indx].numpy() - print(selected_X.dtype, selected_y.dtype) - print(selected_X.shape, selected_y.shape) - X_save_dir = os.path.join(data_save_root, "uploader_%d_X.npy" % (i)) - y_save_dir = os.path.join(data_save_root, "uploader_%d_y.npy" % (i)) - np.save(X_save_dir, selected_X) - np.save(y_save_dir, selected_y) - print("Saving to %s" % (X_save_dir)) - - -def generate_user(data_x, data_y, n_users=50, data_save_root=None): - if data_save_root is None: - return - os.makedirs(data_save_root, exist_ok=True) - for i in range(n_users): - random_class_num = random.randint(3, 6) - cls_indx = list(range(10)) - random.shuffle(cls_indx) - selected_cls_indx = cls_indx[:random_class_num] - selected_data_indx = [] - for cls in selected_cls_indx: - data_indx = list(torch.where(data_y == cls)[0]) - # print(type(data_indx)) - random.shuffle(data_indx) - data_num = random.randint(150, 350) - selected_indx = data_indx[:data_num] - selected_data_indx = selected_data_indx + selected_indx - # print('Total Index:', len(selected_data_indx)) - selected_X = data_x[selected_data_indx].numpy() - selected_y = data_y[selected_data_indx].numpy() - print(selected_X.shape, selected_y.shape) - X_save_dir = os.path.join(data_save_root, "user_%d_X.npy" % (i)) - y_save_dir = os.path.join(data_save_root, "user_%d_y.npy" % (i)) - np.save(X_save_dir, selected_X) - np.save(y_save_dir, selected_y) - print("Saving to %s" % (X_save_dir)) - - -# Train Uploaders' models -def train(X, y, out_classes, epochs=35, batch_size=128): - print(X.shape, y.shape) - input_feature = X.shape[1] - data_size = X.shape[0] - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - model = ConvModel(channel=input_feature, n_random_features=out_classes).to(device) - model.train() - - # Adam optimizer with learning rate 1e-3 - # optimizer = optim.Adam(model.parameters(), lr=1e-3) - - # SGD optimizer with learning rate 1e-2 - optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) - - # mean-squared error loss - criterion = nn.CrossEntropyLoss() - - for epoch in range(epochs): - running_loss = [] - indx = list(range(data_size)) - random.shuffle(indx) - curr_X = X[indx] - curr_y = y[indx] - for i in range(math.floor(data_size / batch_size)): - inputs, annos = curr_X[i * batch_size : (i + 1) * batch_size], curr_y[i * batch_size : (i + 1) * batch_size] - inputs = torch.from_numpy(inputs).to(device) - annos = torch.from_numpy(annos).to(device) - # print(inputs.dtype, annos.dtype) - out = model(inputs) - optimizer.zero_grad() - loss = criterion(out, annos) - loss.backward() - optimizer.step() - running_loss.append(loss.item()) - # print('Epoch: %d, Average Loss: %.3f'%(epoch+1, np.mean(running_loss))) - - # Train Accuracy - acc = test(X, y, model) - model.train() - return model - - -def test(test_X, test_y, model, batch_size=128): - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - model.eval() - total, correct = 0, 0 - data_size = test_X.shape[0] - for i in range(math.ceil(data_size / batch_size)): - inputs, annos = test_X[i * batch_size : (i + 1) * batch_size], test_y[i * batch_size : (i + 1) * batch_size] - inputs = torch.Tensor(inputs).to(device) - annos = torch.Tensor(annos).to(device) - out = model(inputs) - _, predicted = torch.max(out.data, 1) - total += annos.size(0) - correct += (predicted == annos).sum().item() - acc = correct / total * 100 - print("Accuracy: %.2f" % (acc)) - return acc - - -def eval_prediction(pred_y, target_y): - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - if not isinstance(pred_y, np.ndarray): - pred_y = pred_y.detach().cpu().numpy() - predicted = np.argmax(pred_y, 1) - # print(predicted) - # annos = torch.from_numpy(target_y).to(device) - annos = target_y - total = annos.shape[0] - correct = (predicted == annos).sum().item() - criterion = nn.CrossEntropyLoss() - return correct / total diff --git a/examples/dataset_image_workflow/config.py b/examples/dataset_image_workflow/config.py new file mode 100644 index 0000000..ea199a5 --- /dev/null +++ b/examples/dataset_image_workflow/config.py @@ -0,0 +1,62 @@ +from learnware.tests.benchmarks import BenchmarkConfig + + +image_benchmark_config = BenchmarkConfig( + name="CIFAR-10", + user_num=100, + learnware_ids=[ + "00002207", + "00002208", + "00002209", + "00002210", + "00002211", + "00002212", + "00002213", + "00002214", + "00002215", + "00002216", + "00002217", + "00002218", + "00002219", + "00002220", + "00002221", + "00002222", + "00002223", + "00002224", + "00002225", + "00002226", + "00002227", + "00002228", + "00002229", + "00002230", + "00002231", + "00002232", + "00002233", + "00002234", + "00002235", + "00002236", + "00002237", + "00002238", + "00002239", + "00002240", + "00002241", + "00002242", + "00002243", + "00002244", + "00002245", + "00002246", + "00002247", + "00002248", + "00002249", + "00002250", + "00002251", + "00002252", + "00002253", + "00002254", + "00002255", + "00002256", + ], + test_data_path="CIFAR-10/test_data.zip", + train_data_path="CIFAR-10/train_data.zip", + extra_info_path="CIFAR-10/extra_info.zip", +) diff --git a/examples/dataset_image_workflow/model.py b/examples/dataset_image_workflow/model.py new file mode 100644 index 0000000..c1415c7 --- /dev/null +++ b/examples/dataset_image_workflow/model.py @@ -0,0 +1,82 @@ +from torch import nn + + +class ConvModel(nn.Module): + def __init__( + self, + channel, + n_random_features, + net_width=64, + net_depth=3, + net_act="relu", + net_norm="batchnorm", + net_pooling="avgpooling", + im_size=(32, 32), + ): + super().__init__() + self.features, shape_feat = self._make_layers( + channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size + ) + num_feat = shape_feat[0] * shape_feat[1] * shape_feat[2] + self.classifier = nn.Linear(num_feat, n_random_features) + + def forward(self, x): + out = self.features(x) + out = out.reshape(out.size(0), -1) + out = self.classifier(out) + return out + + def _get_activation(self, net_act): + if net_act == "sigmoid": + return nn.Sigmoid() + elif net_act == "relu": + return nn.ReLU(inplace=True) + elif net_act == "leakyrelu": + return nn.LeakyReLU(negative_slope=0.01) + elif net_act == "gelu": + return nn.SiLU() + else: + raise Exception("unknown activation function: %s" % net_act) + + def _get_pooling(self, net_pooling): + if net_pooling == "maxpooling": + return nn.MaxPool2d(kernel_size=2, stride=2) + elif net_pooling == "avgpooling": + return nn.AvgPool2d(kernel_size=2, stride=2) + elif net_pooling == "none": + return None + else: + raise Exception("unknown net_pooling: %s" % net_pooling) + + def _get_normlayer(self, net_norm, shape_feat): + if net_norm == "batchnorm": + return nn.BatchNorm2d(shape_feat[0], affine=True) + elif net_norm == "layernorm": + return nn.LayerNorm(shape_feat, elementwise_affine=True) + elif net_norm == "instancenorm": + return nn.GroupNorm(shape_feat[0], shape_feat[0], affine=True) + elif net_norm == "groupnorm": + return nn.GroupNorm(4, shape_feat[0], affine=True) + elif net_norm == "none": + return None + else: + raise Exception("unknown net_norm: %s" % net_norm) + + def _make_layers(self, channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size): + layers = [] + in_channels = channel + shape_feat = [in_channels, im_size[0], im_size[1]] + for d in range(net_depth): + layers += [nn.Conv2d(in_channels, net_width, kernel_size=3, padding="same")] + + shape_feat[0] = net_width + if net_norm != "none": + layers += [self._get_normlayer(net_norm, shape_feat)] + layers += [self._get_activation(net_act)] + in_channels = net_width + if net_pooling != "none": + layers += [self._get_pooling(net_pooling)] + shape_feat[1] //= 2 + shape_feat[2] //= 2 + + return nn.Sequential(*layers), shape_feat diff --git a/examples/dataset_image_workflow/utils.py b/examples/dataset_image_workflow/utils.py new file mode 100644 index 0000000..1ee200b --- /dev/null +++ b/examples/dataset_image_workflow/utils.py @@ -0,0 +1,100 @@ +import torch +import numpy as np +from torch import optim, nn +from torch.utils.data import DataLoader, Dataset + +from learnware.utils import choose_device + + +@torch.no_grad() +def evaluate(model, evaluate_set: Dataset, device=None, distribution=True): + device = choose_device(0) if device is None else device + + if isinstance(model, nn.Module): + model.eval() + mapping = lambda m, x: m(x) + else: # For predict interface + mapping = lambda m, x: m.predict(x) + + criterion = nn.CrossEntropyLoss(reduction="sum") + total, correct, loss = 0, 0, torch.as_tensor(0.0, dtype=torch.float32, device=device) + dataloader = DataLoader(evaluate_set, batch_size=1024, shuffle=True) + for i, (X, y) in enumerate(dataloader): + X, y = X.to(device), y.to(device) + out = mapping(model, X) + if not torch.is_tensor(out): + out = torch.from_numpy(out).to(device) + + if distribution: + loss += criterion(out, y) + _, predicted = torch.max(out.data, 1) + else: + predicted = out + + total += y.size(0) + correct += (predicted == y).sum().item() + + acc = correct / total * 100 + loss = loss / total + + if isinstance(model, nn.Module): + model.train() + + return loss.item(), acc + + +def train_model( + model: nn.Module, + train_set: Dataset, + valid_set: Dataset, + save_path: str, + epochs=35, + batch_size=128, + device=None, + verbose=True, +): + device = choose_device(0) if device is None else device + + model.train() + # SGD optimizer with learning rate 1e-2 + optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) + # Scheduler + # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20) + # mean-squared error loss + criterion = nn.CrossEntropyLoss() + # Prepare DataLoader + dataloader = DataLoader(train_set, batch_size=batch_size, shuffle=True) + # valid loss + best_loss = 100000 # initially + # Optimizing... + for epoch in range(epochs): + running_loss = [] + model.train() + for i, (X, y) in enumerate(dataloader): + X, y = X.to(device=device), y.to(device=device) + optimizer.zero_grad() + out = model(X) + loss = criterion(out, y) + loss.backward() + optimizer.step() + running_loss.append(loss.item()) + + valid_loss, valid_acc = evaluate(model, valid_set, device=device) + train_loss, train_acc = evaluate(model, train_set, device=device) + if valid_loss < best_loss: + best_loss = valid_loss + + torch.save(model.state_dict(), save_path) + if verbose: + print("Epoch: {}, Valid Best Accuracy: {:.3f}% ({:.3f})".format(epoch + 1, valid_acc, valid_loss)) + if valid_acc > 99.0: + if verbose: + print("Early Stopping at 99% !") + break + + if verbose and (epoch + 1) % 5 == 0: + print( + "Epoch: {}, Train Average Loss: {:.3f}, Accuracy {:.3f}%, Valid Average Loss: {:.3f}".format( + epoch + 1, np.mean(running_loss), train_acc, valid_loss + ) + ) diff --git a/examples/dataset_image_workflow/workflow.py b/examples/dataset_image_workflow/workflow.py new file mode 100644 index 0000000..b039446 --- /dev/null +++ b/examples/dataset_image_workflow/workflow.py @@ -0,0 +1,246 @@ +import os +import fire +import time +import torch +import pickle +import random +import tempfile +import numpy as np +import matplotlib.pyplot as plt +from torch.utils.data import TensorDataset + +from learnware.utils import choose_device +from learnware.client import LearnwareClient +from learnware.logger import get_module_logger +from learnware.specification import generate_stat_spec +from learnware.tests.benchmarks import LearnwareBenchmark +from learnware.market import instantiate_learnware_market, BaseUserInfo +from learnware.reuse import JobSelectorReuser, AveragingReuser, EnsemblePruningReuser +from model import ConvModel +from utils import train_model, evaluate +from config import image_benchmark_config + +logger = get_module_logger("image_workflow", level="INFO") + + +class ImageDatasetWorkflow: + def _plot_labeled_peformance_curves(self, all_user_curves_data): + plt.figure(figsize=(10, 6)) + plt.xticks(range(len(self.n_labeled_list)), self.n_labeled_list) + + styles = [ + {"color": "navy", "linestyle": "-", "marker": "o"}, + {"color": "magenta", "linestyle": "-.", "marker": "d"}, + ] + labels = ["User Model", "Multiple Learnware Reuse (EnsemblePrune)"] + + user_mat, pruning_mat = all_user_curves_data + user_mat, pruning_mat = np.array(user_mat), np.array(pruning_mat) + for mat, style, label in zip([user_mat, pruning_mat], styles, labels): + mean_curve, std_curve = 1 - np.mean(mat, axis=0), np.std(mat, axis=0) + plt.plot(mean_curve, **style, label=label) + plt.fill_between( + range(len(mean_curve)), + mean_curve - 0.5 * std_curve, + mean_curve + 0.5 * std_curve, + color=style["color"], + alpha=0.2, + ) + + plt.xlabel("Labeled Data Size") + plt.ylabel("1 - Accuracy") + plt.title(f"Text Limited Labeled Data") + plt.legend() + plt.tight_layout() + plt.savefig(os.path.join(self.fig_path, "image_labeled_curves.png"), bbox_inches="tight", dpi=700) + + def _prepare_market(self, rebuild=False): + client = LearnwareClient() + self.image_benchmark = LearnwareBenchmark().get_benchmark(image_benchmark_config) + self.image_market = instantiate_learnware_market(market_id=self.image_benchmark.name, rebuild=rebuild) + self.user_semantic = client.get_semantic_specification(self.image_benchmark.learnware_ids[0]) + self.user_semantic["Name"]["Values"] = "" + + if len(self.image_market) == 0 or rebuild == True: + for learnware_id in self.image_benchmark.learnware_ids: + with tempfile.TemporaryDirectory(prefix="image_benchmark_") as tempdir: + zip_path = os.path.join(tempdir, f"{learnware_id}.zip") + for i in range(20): + try: + semantic_spec = client.get_semantic_specification(learnware_id) + client.download_learnware(learnware_id, zip_path) + self.image_market.add_learnware(zip_path, semantic_spec) + break + except: + time.sleep(1) + continue + + logger.info("Total Item: %d" % (len(self.image_market))) + + def image_example(self, rebuild=False): + np.random.seed(1) + random.seed(1) + self._prepare_market(rebuild) + self.n_labeled_list = [100, 200, 500, 1000, 2000, 4000, 6000, 8000, 10000] + self.repeated_list = [10, 10, 10, 3, 3, 3, 3, 3, 3] + device = choose_device(0) + + self.root_path = os.path.dirname(os.path.abspath(__file__)) + self.fig_path = os.path.join(self.root_path, "figs") + self.curve_path = os.path.join(self.root_path, "curves") + self.model_path = os.path.join(self.root_path, "models") + os.makedirs(self.fig_path, exist_ok=True) + os.makedirs(self.curve_path, exist_ok=True) + os.makedirs(self.model_path, exist_ok=True) + + select_list = [] + avg_list = [] + best_list = [] + improve_list = [] + job_selector_score_list = [] + ensemble_score_list = [] + all_learnwares = self.image_market.get_learnwares() + + for i in range(self.image_benchmark.user_num): + test_x, test_y = self.image_benchmark.get_test_data(user_ids=i) + train_x, train_y = self.image_benchmark.get_train_data(user_ids=i) + + test_x = torch.from_numpy(test_x) + test_y = torch.from_numpy(test_y) + test_dataset = TensorDataset(test_x, test_y) + + user_stat_spec = generate_stat_spec(type="image", X=test_x, whitening=False) + user_info = BaseUserInfo(semantic_spec=self.user_semantic, stat_info={user_stat_spec.type: user_stat_spec}) + logger.info("Searching Market for user: %d" % (i)) + + search_result = self.image_market.search_learnware(user_info) + single_result = search_result.get_single_results() + multiple_result = search_result.get_multiple_results() + + print(f"search result of user{i}:") + print( + f"single model num: {len(single_result)}, max_score: {single_result[0].score}, min_score: {single_result[-1].score}" + ) + + acc_list = [] + for idx in range(len(all_learnwares)): + learnware = all_learnwares[idx] + loss, acc = evaluate(learnware, test_dataset) + acc_list.append(acc) + + learnware = single_result[0].learnware + best_loss, best_acc = evaluate(learnware, test_dataset) + best_list.append(np.max(acc_list)) + select_list.append(best_acc) + avg_list.append(np.mean(acc_list)) + improve_list.append((best_acc - np.mean(acc_list)) / np.mean(acc_list)) + print(f"market mean accuracy: {np.mean(acc_list)}, market best accuracy: {np.max(acc_list)}") + print( + f"Top1-score: {single_result[0].score}, learnware_id: {single_result[0].learnware.id}, acc: {best_acc}" + ) + + if len(multiple_result) > 0: + mixture_id = " ".join([learnware.id for learnware in multiple_result[0].learnwares]) + print(f"mixture_score: {multiple_result[0].score}, mixture_learnware: {mixture_id}") + mixture_learnware_list = multiple_result[0].learnwares + else: + mixture_learnware_list = [single_result[0].learnware] + + # test reuse (job selector) + reuse_job_selector = JobSelectorReuser(learnware_list=mixture_learnware_list, use_herding=False) + job_loss, job_acc = evaluate(reuse_job_selector, test_dataset) + job_selector_score_list.append(job_acc) + print(f"mixture reuse accuracy (job selector): {job_acc}") + + # test reuse (ensemble) + reuse_ensemble = AveragingReuser(learnware_list=mixture_learnware_list, mode="vote_by_prob") + ensemble_loss, ensemble_acc = evaluate(reuse_ensemble, test_dataset) + ensemble_score_list.append(ensemble_acc) + print(f"mixture reuse accuracy (ensemble): {ensemble_acc}\n") + + user_model_score_mat = [] + pruning_score_mat = [] + single_score_mat = [] + + for n_label, repeated in zip(self.n_labeled_list, self.repeated_list): + user_model_score_list, reuse_pruning_score_list = [], [] + if n_label > len(train_x): + n_label = len(train_x) + for _ in range(repeated): + x_train, y_train = zip(*random.sample(list(zip(train_x, train_y)), k=n_label)) + x_train = np.array(list(x_train)) + y_train = np.array(list(y_train)) + + x_train = torch.from_numpy(x_train) + y_train = torch.from_numpy(y_train) + sampled_dataset = TensorDataset(x_train, y_train) + + mode_save_path = os.path.abspath(os.path.join(self.model_path, "model.pth")) + model = ConvModel( + channel=x_train.shape[1], im_size=(x_train.shape[2], x_train.shape[3]), n_random_features=10 + ).to(device) + train_model( + model, + sampled_dataset, + sampled_dataset, + mode_save_path, + epochs=35, + batch_size=128, + device=device, + verbose=False, + ) + model.load_state_dict(torch.load(mode_save_path)) + _, user_model_acc = evaluate(model, test_dataset, distribution=True) + user_model_score_list.append(user_model_acc) + + reuse_pruning = EnsemblePruningReuser(learnware_list=mixture_learnware_list, mode="classification") + reuse_pruning.fit(x_train, y_train) + _, pruning_acc = evaluate(reuse_pruning, test_dataset, distribution=False) + reuse_pruning_score_list.append(pruning_acc) + + single_score_mat.append([best_acc] * repeated) + user_model_score_mat.append(user_model_score_list) + pruning_score_mat.append(reuse_pruning_score_list) + print(n_label, np.mean(user_model_score_mat[-1]), np.mean(pruning_score_mat[-1])) + + logger.info(f"Saving Curves for User_{i}") + user_curves_data = (single_score_mat, user_model_score_mat, pruning_score_mat) + with open(os.path.join(self.curve_path, f"curve{str(i)}.pkl"), "wb") as f: + pickle.dump(user_curves_data, f) + + logger.info( + "Accuracy of selected learnware: %.3f +/- %.3f, Average performance: %.3f +/- %.3f, Best performance: %.3f +/- %.3f" + % ( + np.mean(select_list), + np.std(select_list), + np.mean(avg_list), + np.std(avg_list), + np.mean(best_list), + np.std(best_list), + ) + ) + logger.info("Average performance improvement: %.3f" % (np.mean(improve_list))) + logger.info( + "Average Job Selector Reuse Performance: %.3f +/- %.3f" + % (np.mean(job_selector_score_list), np.std(job_selector_score_list)) + ) + logger.info( + "Averaging Ensemble Reuse Performance: %.3f +/- %.3f" + % (np.mean(ensemble_score_list), np.std(ensemble_score_list)) + ) + + pruning_curves_data, user_model_curves_data = [], [] + for i in range(self.image_benchmark.user_num): + with open(os.path.join(self.curve_path, f"curve{str(i)}.pkl"), "rb") as f: + user_curves_data = pickle.load(f) + (single_score_mat, user_model_score_mat, pruning_score_mat) = user_curves_data + for i in range(len(single_score_mat)): + user_model_score_mat[i] = np.mean(user_model_score_mat[i]) + pruning_score_mat[i] = np.mean(pruning_score_mat[i]) + user_model_curves_data.append(user_model_score_mat) + pruning_curves_data.append(pruning_score_mat) + self._plot_labeled_peformance_curves([user_model_curves_data, pruning_curves_data]) + + +if __name__ == "__main__": + fire.Fire(ImageDatasetWorkflow) From f916e467570c3c2222b1b3ff2eb1944bf55be092 Mon Sep 17 00:00:00 2001 From: Gene Date: Tue, 26 Dec 2023 10:20:58 +0800 Subject: [PATCH 23/25] [MNT] merge label and unlabel tests --- examples/dataset_image_workflow/workflow.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/dataset_image_workflow/workflow.py b/examples/dataset_image_workflow/workflow.py index b039446..736bc35 100644 --- a/examples/dataset_image_workflow/workflow.py +++ b/examples/dataset_image_workflow/workflow.py @@ -201,7 +201,9 @@ class ImageDatasetWorkflow: single_score_mat.append([best_acc] * repeated) user_model_score_mat.append(user_model_score_list) pruning_score_mat.append(reuse_pruning_score_list) - print(n_label, np.mean(user_model_score_mat[-1]), np.mean(pruning_score_mat[-1])) + print( + f"user_label_num: {n_label}, user_acc: {np.mean(user_model_score_mat[-1])}, pruning_acc: {np.mean(pruning_score_mat[-1])}" + ) logger.info(f"Saving Curves for User_{i}") user_curves_data = (single_score_mat, user_model_score_mat, pruning_score_mat) From 3f4ab7155cfabe2ebe32114fc833e32c43666a9c Mon Sep 17 00:00:00 2001 From: Gene Date: Wed, 27 Dec 2023 16:04:49 +0800 Subject: [PATCH 24/25] [MNT] unify example format --- examples/dataset_image_workflow/README.md | 31 ++ .../benchmarks/__init__.py | 0 .../benchmarks/dataset/__init__.py | 1 - .../benchmarks/dataset/data.py | 49 --- .../benchmarks/dataset/utils.py | 96 ------ .../benchmarks/models/__init__.py | 0 .../benchmarks/models/conv/__init__.py | 29 -- .../benchmarks/models/conv/model.py | 71 ----- .../benchmarks/models/conv/requirements.txt | 3 - .../benchmarks/models/learnware.yaml | 8 - .../benchmarks/utils.py | 278 ------------------ examples/dataset_image_workflow/main.py | 161 ---------- examples/dataset_image_workflow/utils.py | 12 +- examples/dataset_image_workflow/workflow.py | 50 ++-- 14 files changed, 64 insertions(+), 725 deletions(-) create mode 100644 examples/dataset_image_workflow/README.md delete mode 100644 examples/dataset_image_workflow/benchmarks/__init__.py delete mode 100644 examples/dataset_image_workflow/benchmarks/dataset/__init__.py delete mode 100644 examples/dataset_image_workflow/benchmarks/dataset/data.py delete mode 100644 examples/dataset_image_workflow/benchmarks/dataset/utils.py delete mode 100644 examples/dataset_image_workflow/benchmarks/models/__init__.py delete mode 100644 examples/dataset_image_workflow/benchmarks/models/conv/__init__.py delete mode 100644 examples/dataset_image_workflow/benchmarks/models/conv/model.py delete mode 100644 examples/dataset_image_workflow/benchmarks/models/conv/requirements.txt delete mode 100644 examples/dataset_image_workflow/benchmarks/models/learnware.yaml delete mode 100644 examples/dataset_image_workflow/benchmarks/utils.py delete mode 100644 examples/dataset_image_workflow/main.py diff --git a/examples/dataset_image_workflow/README.md b/examples/dataset_image_workflow/README.md new file mode 100644 index 0000000..061800f --- /dev/null +++ b/examples/dataset_image_workflow/README.md @@ -0,0 +1,31 @@ +# Image Dataset Workflow Example + +## Introduction + +For the CIFAR-10 dataset, we sampled the training set unevenly by category and constructed unbalanced training datasets for the 50 learnwares that contained only some of the categories. This makes it unlikely that there exists any learnware in the learnware market that can accurately handle all categories of data; only the learnware whose training data is closest to the data distribution of the target task is likely to perform well on the target task. Specifically, the probability of each category being sampled obeys a random multinomial distribution, with a non-zero probability of sampling on only 4 categories, and the sampling ratio is 0.4: 0.4: 0.1: 0.1. Ultimately, the training set for each learnware contains 12,000 samples covering the data of 4 categories in CIFAR-10. + +We constructed 50 target tasks using data from the test set of CIFAR-10. Similar to constructing the training set for the learnwares, in order to allow for some variation between tasks, we sampled the test set unevenly. Specifically, the probability of each category being sampled obeys a random multinomial distribution, with non-zero sampling probability on 6 categories, and the sampling ratio is 0.3: 0.3: 0.1: 0.1: 0.1: 0.1. Ultimately, each target task contains 3000 samples covering the data of 6 categories in CIFAR-10. + +With this experimental setup, we evaluated the performance of RKME Image by calculating the mean accuracy across all users. + +| Metric | Value | +|--------------------------------------|---------------------| +| Mean in Market (Single) | 0.346 | +| Best in Market (Single) | 0.688 | +| Top-1 Reuse (Single) | 0.534 | +| Job Selector Reuse (Multiple) | 0.534 | +| Average Ensemble Reuse (Multiple) | 0.676 | + +In some specific settings, the user will have a small number of labeled samples. In such settings, learning the weight of selected learnwares on a limited number of labeled samples can result in a better performance than training directly on a limited number of labeled samples. + +
+ Image Limited Labeled Data +
+ +## Run the code + +Run the following command to start the ``image_example``. + +```bash +python workflow.py image_example +``` \ No newline at end of file diff --git a/examples/dataset_image_workflow/benchmarks/__init__.py b/examples/dataset_image_workflow/benchmarks/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/examples/dataset_image_workflow/benchmarks/dataset/__init__.py b/examples/dataset_image_workflow/benchmarks/dataset/__init__.py deleted file mode 100644 index 8d16d00..0000000 --- a/examples/dataset_image_workflow/benchmarks/dataset/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .data import * \ No newline at end of file diff --git a/examples/dataset_image_workflow/benchmarks/dataset/data.py b/examples/dataset_image_workflow/benchmarks/dataset/data.py deleted file mode 100644 index 81ed6e5..0000000 --- a/examples/dataset_image_workflow/benchmarks/dataset/data.py +++ /dev/null @@ -1,49 +0,0 @@ -import os - -import numpy as np -import torch -from torch.utils.data import random_split, Subset -from torchvision import datasets -from torchvision.transforms import transforms -from torch.utils.data import TensorDataset - -from .utils import cached -from examples.dataset_image_workflow.benchmarks.dataset.utils import split_dataset, build_transforms - -cache_root = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', '..', 'cache')) - -cifar_train = datasets.CIFAR10(root=cache_root, download=True, train=True, transform=transforms.ToTensor()) -cifar_train_X = torch.stack([u[0] for u in cifar_train]) -augment_transform, regular_transform, whiten_transform = build_transforms(cifar_train_X) - -cifar_train_set_augment = datasets.CIFAR10(root=cache_root, download=True, train=True, transform=whiten_transform) -cifar_test_set = datasets.CIFAR10(root=cache_root, download=True, train=False, transform=whiten_transform) -cifar_spec_train_set = datasets.CIFAR10(root=cache_root, download=True, train=True, transform=whiten_transform) -cifar_spec_test_set = datasets.CIFAR10(root=cache_root, download=True, train=False, transform=whiten_transform) -train_targets = cifar_train_set_augment.targets -test_targets = cifar_test_set.targets - -def faster_train(device): - global cifar_train_set_augment - global cifar_test_set - global cifar_spec_train_set - global cifar_spec_test_set - cifar_train_set_augment = cached(cifar_train_set_augment, device=device) - cifar_test_set = cached(cifar_test_set, device=device) - cifar_spec_train_set = cached(cifar_spec_train_set, device=device) - cifar_spec_test_set = cached(cifar_spec_test_set, device=device) - -def uploader_data(order=None): - train_indices, order = split_dataset(torch.asarray(train_targets), 12500, split="uploader", order=order) - valid_indices, _ = split_dataset(torch.asarray(test_targets), 2000, split="uploader", order=order) - - return (Subset(cifar_train_set_augment, train_indices), - Subset(cifar_test_set, valid_indices), - Subset(cifar_spec_train_set, train_indices), - order) - -def user_data(indices=None, order=None): - if indices is None: - indices, order = split_dataset(torch.asarray(test_targets), 3000, split="user", order=order) - - return Subset(cifar_test_set, indices), Subset(cifar_spec_test_set, indices), indices, order \ No newline at end of file diff --git a/examples/dataset_image_workflow/benchmarks/dataset/utils.py b/examples/dataset_image_workflow/benchmarks/dataset/utils.py deleted file mode 100644 index 2c6231a..0000000 --- a/examples/dataset_image_workflow/benchmarks/dataset/utils.py +++ /dev/null @@ -1,96 +0,0 @@ -import random -from functools import reduce - -import numpy as np -import torch -import torchvision -from torch.utils.data import TensorDataset, Dataset, DataLoader - -from learnware.utils import choose_device - -torchvision.disable_beta_transforms_warning() -from torchvision.transforms import transforms, v2 - - -def sample_by_labels(labels: torch.Tensor, weights, total_num): - weights = np.asarray(weights) - - norm_factor = np.sum(weights) - last_non_zero = np.argwhere(weights > 0)[-1].item() - category_nums = [int(w * total_num / norm_factor) for w in weights[:last_non_zero]] - category_nums += [total_num - sum(category_nums)] - category_nums += [0] * (weights.shape[0] - last_non_zero - 1) - - selected_cls_indexes = [ - random.sample(list(torch.where(labels == c)[0]), k=n) - for c, n in enumerate(category_nums) - ] - - return selected_cls_indexes - - -USER_WEIGHTS = [3, 3, 1, 1, 1, 1, 0, 0, 0, 0] -UPLOADER_WEIGHTS = [4, 4, 1, 1, 0, 0, 0, 0, 0, 0] -def split_dataset(labels, size, split="uploader", order=None): - if split == "uploader": - weights = np.asarray(UPLOADER_WEIGHTS) - elif split == "user": - weights = np.asarray(USER_WEIGHTS) - else: - raise Exception(split) - - if order is None: - order = list(range(len(weights))) - random.shuffle(order) - - selected_data_indexes = reduce(lambda x, y: x+y, sample_by_labels(labels, weights[order], size)) - selected_data_indexes = torch.stack(selected_data_indexes) - - return selected_data_indexes, order - -def build_zca_matrix(X, reg_coef=0.1): - X = (X - torch.mean(X, [0, 2, 3], keepdim=True)) / (torch.std(X, [0, 2, 3], keepdim=True)) - - device = choose_device(0) - X_flat = X.reshape(X.shape[0], -1) - cov = (X_flat.T @ X_flat) / X_flat.shape[0] - reg_amount = reg_coef * torch.trace(cov) / cov.shape[0] - u, s, _ = torch.svd(cov.to(device) + reg_amount * torch.eye(cov.shape[0]).to(device)) - inv_sqrt_zca_eigs = s ** (-0.5) - whitening_transform = torch.einsum( - 'ij,j,kj->ik', u, inv_sqrt_zca_eigs, u) - - return whitening_transform.cpu() - -def build_transforms(train_X): - size = train_X.shape[2], train_X.shape[3] - whitening_matrix = build_zca_matrix(train_X) - - mean_vector = torch.mean(train_X, [0, 2, 3], keepdim=True).squeeze(0) - std_vector = torch.std(train_X, [0, 2, 3], keepdim=True).squeeze(0) - - augment_transform = transforms.Compose([ - transforms.Resize(size), - transforms.ToTensor(), - transforms.Normalize(mean=mean_vector, std=std_vector), - ]) - - regular_transform = transforms.Compose([ - transforms.Resize(size), - transforms.ToTensor(), - transforms.Normalize(mean=mean_vector, std=std_vector), - ]) - - whiten_transform = transforms.Compose([ - transforms.Resize(size), - transforms.ToTensor(), - transforms.Normalize(mean=mean_vector, std=std_vector), - # transform_data - transforms.LinearTransformation(whitening_matrix, torch.zeros_like(train_X[0].reshape(-1))) - ]) - - return augment_transform, regular_transform, whiten_transform - -def cached(data: Dataset, device): - X, y = next(iter(DataLoader(data, batch_size=len(data)))) - return TensorDataset(X.to(device), y.to(device)) diff --git a/examples/dataset_image_workflow/benchmarks/models/__init__.py b/examples/dataset_image_workflow/benchmarks/models/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/examples/dataset_image_workflow/benchmarks/models/conv/__init__.py b/examples/dataset_image_workflow/benchmarks/models/conv/__init__.py deleted file mode 100644 index f8fc5a9..0000000 --- a/examples/dataset_image_workflow/benchmarks/models/conv/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -import os - -import torch -import numpy as np -from learnware.model import BaseModel - -from .model import ConvModel - - -class Model(BaseModel): - def __init__(self, device="cuda", input_channel=3): - super(Model, self).__init__(input_shape=(input_channel, 32, 32), output_shape=(10,)) - dir_path = os.path.dirname(os.path.abspath(__file__)) - self.device =device - self.model = ConvModel(channel=input_channel, n_random_features=10) - self.model.load_state_dict(torch.load(os.path.join(dir_path, "model.pth"))) - self.model.to(device).eval() - - def fit(self, X: np.ndarray, y: np.ndarray): - raise NotImplementedError() - - def predict(self, X: np.ndarray) -> np.ndarray: - return self.model(torch.asarray(X, dtype=torch.float32, device=self.device)) - - def __call__(self, *args, **kwargs): - self.predict(*args, **kwargs) - - def finetune(self, X: np.ndarray, y: np.ndarray): - raise NotImplementedError() diff --git a/examples/dataset_image_workflow/benchmarks/models/conv/model.py b/examples/dataset_image_workflow/benchmarks/models/conv/model.py deleted file mode 100644 index b9e04a1..0000000 --- a/examples/dataset_image_workflow/benchmarks/models/conv/model.py +++ /dev/null @@ -1,71 +0,0 @@ -from torch import nn - - -class ConvModel(nn.Module): - def __init__(self, channel, n_random_features, net_width = 64, net_depth = 3, net_act = 'relu', - net_norm = 'batchnorm', net_pooling = 'avgpooling', im_size = (32,32)): - super().__init__() - self.features, shape_feat = self._make_layers(channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size) - num_feat = shape_feat[0]*shape_feat[1]*shape_feat[2] - self.classifier = nn.Linear(num_feat, n_random_features) - - def forward(self, x): - out = self.features(x) - out = out.reshape(out.size(0), -1) - out = self.classifier(out) - return out - - def _get_activation(self, net_act): - if net_act == 'sigmoid': - return nn.Sigmoid() - elif net_act == 'relu': - return nn.ReLU(inplace=True) - elif net_act == 'leakyrelu': - return nn.LeakyReLU(negative_slope=0.01) - elif net_act == 'gelu': - return nn.SiLU() - else: - raise Exception('unknown activation function: %s'%net_act) - - def _get_pooling(self, net_pooling): - if net_pooling == 'maxpooling': - return nn.MaxPool2d(kernel_size=2, stride=2) - elif net_pooling == 'avgpooling': - return nn.AvgPool2d(kernel_size=2, stride=2) - elif net_pooling == 'none': - return None - else: - raise Exception('unknown net_pooling: %s'%net_pooling) - - def _get_normlayer(self, net_norm, shape_feat): - if net_norm == 'batchnorm': - return nn.BatchNorm2d(shape_feat[0], affine=True) - elif net_norm == 'layernorm': - return nn.LayerNorm(shape_feat, elementwise_affine=True) - elif net_norm == 'instancenorm': - return nn.GroupNorm(shape_feat[0], shape_feat[0], affine=True) - elif net_norm == 'groupnorm': - return nn.GroupNorm(4, shape_feat[0], affine=True) - elif net_norm == 'none': - return None - else: - raise Exception('unknown net_norm: %s'%net_norm) - - def _make_layers(self, channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size): - layers = [] - in_channels = channel - shape_feat = [in_channels, im_size[0], im_size[1]] - for d in range(net_depth): - layers += [nn.Conv2d(in_channels, net_width, kernel_size=3, padding='same')] - - shape_feat[0] = net_width - if net_norm != 'none': - layers += [self._get_normlayer(net_norm, shape_feat)] - layers += [self._get_activation(net_act)] - in_channels = net_width - if net_pooling != 'none': - layers += [self._get_pooling(net_pooling)] - shape_feat[1] //= 2 - shape_feat[2] //= 2 - - return nn.Sequential(*layers), shape_feat \ No newline at end of file diff --git a/examples/dataset_image_workflow/benchmarks/models/conv/requirements.txt b/examples/dataset_image_workflow/benchmarks/models/conv/requirements.txt deleted file mode 100644 index c1bb5f6..0000000 --- a/examples/dataset_image_workflow/benchmarks/models/conv/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -numpy -torch>2.0.0 -torchvision diff --git a/examples/dataset_image_workflow/benchmarks/models/learnware.yaml b/examples/dataset_image_workflow/benchmarks/models/learnware.yaml deleted file mode 100644 index 0bc6c97..0000000 --- a/examples/dataset_image_workflow/benchmarks/models/learnware.yaml +++ /dev/null @@ -1,8 +0,0 @@ -model: - class_name: Model - kwargs: {} -stat_specifications: - - module_path: learnware.specification - class_name: RKMEImageSpecification - file_name: spec.json - kwargs: {} \ No newline at end of file diff --git a/examples/dataset_image_workflow/benchmarks/utils.py b/examples/dataset_image_workflow/benchmarks/utils.py deleted file mode 100644 index 134f80f..0000000 --- a/examples/dataset_image_workflow/benchmarks/utils.py +++ /dev/null @@ -1,278 +0,0 @@ -import json -import os -import pickle -import zipfile -from collections import defaultdict -from shutil import rmtree - -from matplotlib import pyplot as plt -from tabulate import tabulate - -import numpy as np -import torch -from torch import optim, nn -from torch.utils.data import DataLoader, Dataset - -from learnware.client import LearnwareClient -from learnware.learnware import Learnware -from learnware.specification import generate_rkme_image_spec, RKMEImageSpecification -from .dataset import uploader_data, user_data -from .dataset.utils import cached -from .models.conv import ConvModel -from learnware.market import LearnwareMarket -from learnware.utils import choose_device - -from torch.profiler import profile, record_function, ProfilerActivity - -@torch.no_grad() -def evaluate(model, evaluate_set: Dataset, device=None, distribution=True): - device = choose_device(0) if device is None else device - - if isinstance(model, nn.Module): - model.eval() - mapping = lambda m, x: m(x) - else: # For predict interface - mapping = lambda m, x: m.predict(x) - - criterion = nn.CrossEntropyLoss(reduction="sum") - total, correct, loss = 0, 0, torch.as_tensor(0.0, dtype=torch.float32, device=device) - dataloader = DataLoader(evaluate_set, batch_size=1024, shuffle=True) - for i, (X, y) in enumerate(dataloader): - X, y = X.to(device), y.to(device) - out = mapping(model, X) - if not torch.is_tensor(out): - out = torch.from_numpy(out).to(device) - - if distribution: - loss += criterion(out, y) - _, predicted = torch.max(out.data, 1) - else: - predicted = out - - total += y.size(0) - correct += (predicted == y).sum().item() - - acc = correct / total * 100 - loss = loss / total - - if isinstance(model, nn.Module): - model.train() - - return loss.item(), acc - - -def build_learnware(name: str, market: LearnwareMarket, order, model_name="conv", - out_classes=10, epochs=35, batch_size=128, device=None): - device = choose_device(0) if device is None else device - - if name == "cifar10": - train_set, valid_set, spec_set, order = uploader_data(order=order) - else: - raise Exception("Not support", name) - - cache_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'cache', 'learnware')) - if os.path.exists(cache_dir): - rmtree(cache_dir) - os.makedirs(cache_dir, exist_ok=True) - - channel = train_set[0][0].shape[0] - image_size = train_set[0][0].shape[1], train_set[0][0].shape[2] - model = ConvModel(channel=channel, im_size=image_size, - n_random_features=out_classes).to(device) - - # train model - save_path = os.path.join(cache_dir, "model.pth") - train_model(model, train_set, valid_set, save_path, epochs=epochs, batch_size=batch_size, device=device) - - # build specification - loader = DataLoader(spec_set, batch_size=3000, shuffle=True) - sampled_X, _ = next(iter(loader)) - spec = generate_rkme_image_spec(sampled_X, whitening=False, experimental=False) - - # add to market - model_dir = os.path.abspath(os.path.join(__file__, "..", "models")) - spec.save(os.path.join(cache_dir, "spec.json")) - - zip_file = os.path.join(cache_dir, "learnware.zip") - # zip -q -r -j zip_file dir_path - with zipfile.ZipFile(zip_file, "w") as zip_obj: - for foldername, subfolders, filenames in os.walk(os.path.join(model_dir, model_name)): - for filename in filenames: - if filename.endswith(".pyc"): - continue - file_path = os.path.join(foldername, filename) - zip_info = zipfile.ZipInfo(filename) - zip_info.compress_type = zipfile.ZIP_STORED - with open(file_path, "rb") as file: - zip_obj.writestr(zip_info, file.read()) - - for filename, file_path in zip(["spec.json", "model.pth", "learnware.yaml"], - [os.path.join(cache_dir, "spec.json"), - os.path.join(cache_dir, "model.pth"), - os.path.join(model_dir, "learnware.yaml")]): - zip_info = zipfile.ZipInfo(filename) - zip_info.compress_type = zipfile.ZIP_STORED - with open(file_path, "rb") as file: - zip_obj.writestr(zip_info, file.read()) - - print(", ".join([str(o) for o in order])) - market.add_learnware(zip_file, semantic_spec=LearnwareClient.create_semantic_specification( - self=None, - name="learnware", - description=", ".join([str(o) for o in order]), - data_type="Image", - task_type="Classification", - library_type="PyTorch", - scenarios=["Computer"], - output_description={"Dimension": out_classes, "Description": {str(i): "i" for i in range(out_classes)}}) - ) - - return model - -def train_model(model: nn.Module, train_set: Dataset, valid_set: Dataset, - save_path: str, epochs=35, batch_size=128, - device=None, verbose=True): - device = choose_device(0) if device is None else device - - model.train() - # SGD optimizer with learning rate 1e-2 - optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) - # Scheduler - # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20) - # mean-squared error loss - criterion = nn.CrossEntropyLoss() - # Prepare DataLoader - dataloader = DataLoader(train_set, batch_size=batch_size, shuffle=True) - # valid loss - best_loss = 100000 # initially - # Optimizing... - for epoch in range(epochs): - running_loss = [] - model.train() - for i, (X, y) in enumerate(dataloader): - X, y = X.to(device=device), y.to(device=device) - optimizer.zero_grad() - out = model(X) - loss = criterion(out, y) - loss.backward() - optimizer.step() - running_loss.append(loss.item()) - - valid_loss, valid_acc = evaluate(model, valid_set, device=device) - train_loss, train_acc = evaluate(model, train_set, device=device) - if valid_loss < best_loss: - best_loss = valid_loss - - torch.save(model.state_dict(), save_path) - if verbose: - print("Epoch: {}, Valid Best Accuracy: {:.3f}% ({:.3f})".format(epoch+1, valid_acc, valid_loss)) - if valid_acc > 99.0: - if verbose: - print("Early Stopping at 99% !") - break - - if verbose and (epoch + 1) % 5 == 0: - print('Epoch: {}, Train Average Loss: {:.3f}, Accuracy {:.3f}%, Valid Average Loss: {:.3f}'.format( - epoch+1, np.mean(running_loss), train_acc, valid_loss)) - - -def build_specification(name: str, cache_id, order, sampled_size=3000): - cache_dir = os.path.abspath(os.path.join( - os.path.dirname(__file__), '..', 'cache')) - os.makedirs(cache_dir, exist_ok=True) - spec_cache_path = os.path.join(cache_dir, 'spec', "spec_{}.json".format(cache_id)) - - if os.path.exists(spec_cache_path): - spec = RKMEImageSpecification() - spec.load(spec_cache_path) - - test_dataset, spec_dataset, _, _ = user_data(indices=torch.asarray(spec.msg)) - else: - test_dataset, spec_dataset, indices, _ = user_data(order=order) - loader = DataLoader(spec_dataset, batch_size=sampled_size, shuffle=True) - sampled_X, _ = next(iter(loader)) - spec = generate_rkme_image_spec(sampled_X, whitening=False, experimental=False) - - spec.msg = indices.tolist() - spec.save(spec_cache_path) - - # Save test_dataset to disk, spec_dataset is same as test_dataset for now - X, y = next(iter(DataLoader(test_dataset, batch_size=len(test_dataset)))) - with open(os.path.join(cache_dir, 'test_data', "user{}_X.pkl".format(cache_id)), "wb") as f: - pickle.dump(X.detach().cpu().numpy(), f) - with open(os.path.join(cache_dir, 'test_data', "user{}_y.pkl".format(cache_id)), "wb") as f: - pickle.dump(y.detach().cpu().numpy(), f) - return spec, test_dataset - - -class Recorder: - - def __init__(self, headers, formats): - assert len(headers) == len(formats) - self.data = defaultdict(list) - self.headers = headers - self.formats = formats - - def record(self, name, *args): - self.data[name].append(args) - - def summary(self): - table = [] - - for name, values in self.data.items(): - value_mean = [np.mean(v) for v in zip(*values)] - value_std = [np.std(v) for v in zip(*values)] - table.append([name] + [f.format(m, s) for f, m, s in zip(self.formats, value_mean, value_std)]) - - return str(tabulate(table, headers=["Case"] + self.headers, tablefmt='orgtbl')) - - def __getitem__(self, item): - return [[x[item] for x in v] for k, v in self.data.items()] - - def save(self, path): - with open(path, "w") as f: - json.dump(self.data, f) - - def load(self, path): - with open(path, "r") as f: - self.data = json.load(f) - - -def plot_labeled_performance_curves(name, user_mat, pruning_mat, n_labeled_list, save_path=None): - plt.figure(figsize=(10, 6)) - plt.xticks(range(len(n_labeled_list)), n_labeled_list) - - mats = [user_mat, pruning_mat] - - styles = [ - {"color": "navy", "linestyle": "-", "marker": "o"}, - {"color": "magenta", "linestyle": "-.", "marker": "d"}, - ] - - labels = [ - "User Model", - "Multiple Learnware Reuse (EnsemblePrune)", - ] - - for mat, style, label in zip(mats, styles, labels): - array_mat = 1 - np.asarray(mat) / 100 - mean_curve, std_curve = np.mean(array_mat, axis=1), np.std(array_mat, axis=1) - plt.plot(mean_curve, **style, label=label) - plt.fill_between( - range(len(n_labeled_list)), - mean_curve - 0.5 * std_curve, - mean_curve + 0.5 * std_curve, - color=style["color"], - alpha=0.2, - ) - - plt.xlabel("Labeled Data Size") - plt.ylabel("1 - Accuracy") - plt.title(f"{name} Homo Limited Labeled Data") - plt.legend() - plt.tight_layout() - if save_path: - plt.savefig( - save_path, bbox_inches="tight", dpi=600 - ) - plt.show() diff --git a/examples/dataset_image_workflow/main.py b/examples/dataset_image_workflow/main.py deleted file mode 100644 index 6bf661c..0000000 --- a/examples/dataset_image_workflow/main.py +++ /dev/null @@ -1,161 +0,0 @@ -import os -import random -from datetime import datetime - -import fire -import numpy as np -import tqdm -from numpy import mean -import torch -from torch.utils.data import DataLoader, TensorDataset - -import learnware -from benchmarks.utils import * -from benchmarks.dataset.data import faster_train, uploader_data -from benchmarks.models.conv import ConvModel -from learnware.client import LearnwareClient -from learnware.market import instantiate_learnware_market, BaseUserInfo -from learnware.reuse import JobSelectorReuser, AveragingReuser, EnsemblePruningReuser -from learnware.utils import choose_device - -PROXY_IP = "172.27.138.61" -os.environ["HTTP_PROXY"] = "http://" + PROXY_IP + ":7890" -os.environ["HTTPS_PROXY"] = "http://" + PROXY_IP + ":7890" - - -class ImageDatasetWorkflow: - - def prepare(self, market_size=50, market_id=None, rebuild=False, faster=True): - """initialize learnware market""" - learnware.init() - assert not rebuild - - np.random.seed(0) - random.seed(0) - market_id = "dataset_image_workflow" if market_id is None else market_id - orders = np.stack([np.random.permutation(10) for _ in range(market_size)]) - - print("Using market_id", market_id) - market = instantiate_learnware_market(name="easy", market_id=market_id, rebuild=rebuild) - - device = choose_device(0) - if faster: - faster_train(device) - for i, order in enumerate(orders[len(market):]): - print("=" * 20 + "learnware {}".format(len(market)) + "=" * 20) - print("order:", order) - build_learnware("cifar10", market, order, device=device) - - print("Total Item:", len(market)) - - def evaluate(self, user_size=100, market_id=None, faster=True): - learnware.init() - - np.random.seed(1) - random.seed(1) - market_id = "dataset_image_workflow" if market_id is None else market_id - orders = np.stack([np.random.permutation(10) for _ in range(user_size)]) - - print("Using market_id", market_id) - market = instantiate_learnware_market(name="easy", market_id=market_id, rebuild=False) - - # Create Folder to save data - train_data_cache_folder = os.path.abspath(os.path.join(__file__, '..', "cache", "train_data")) - test_data_cache_folder = os.path.abspath(os.path.join(__file__, '..', "cache", "test_data")) - os.makedirs(train_data_cache_folder, exist_ok=True) - os.makedirs(test_data_cache_folder, exist_ok=True) - - device = choose_device(0) - if faster: - faster_train(device) - unlabeled = Recorder(["Accuracy", "Loss"], ["{:.3f}% ± {:.3f}%", "{:.3f} ± {:.3f}"]) - labeled = Recorder(["Training", "Pruning"], ["{:.3f}% ± {:.3f}%", "{:.3f}% ± {:.3f}%"]) - for i, order in enumerate(orders): - print("=" * 20 + "user {}".format(i) + "=" * 20) - print("order:", order) - user_spec, dataset = build_specification("cifar10", i, order) - - user_info = BaseUserInfo(semantic_spec=LearnwareClient.create_semantic_specification( - self=None, - description="For Cifar Dataset Workflow", - data_type="Image", - task_type="Classification", - library_type="PyTorch", - scenarios=["Computer"], - output_description={"Dimension": 10, "Description": {str(i): "i" for i in range(10)}}), - stat_info={"RKMEImageSpecification": user_spec}) - - search_result = market.search_learnware(user_info) - single_result = search_result.get_single_results() - multiple_result = search_result.get_multiple_results() - - loss_list, acc_list = [], [] - for item in market.get_learnwares(): - loss, acc = evaluate(item, dataset) - loss_list.append(loss) - acc_list.append(acc) - unlabeled.record("Best", max(acc_list), min(loss_list)) - unlabeled.record("Average", mean(acc_list), mean(loss_list)) - - top_1_loss, top_1_acc = evaluate(single_result[0].learnware, dataset) - unlabeled.record("Top-1 Learnware", top_1_acc, top_1_loss) - - reuse_ensemble = AveragingReuser(learnware_list=multiple_result[0].learnwares, mode="vote_by_prob") - ensemble_loss, ensemble_acc = evaluate(reuse_ensemble, dataset) - unlabeled.record("Voting Reuse", ensemble_acc, ensemble_loss) - - reuse_job_selector = JobSelectorReuser(learnware_list=multiple_result[0].learnwares, use_herding=False) - job_loss, job_acc = evaluate(reuse_job_selector, dataset) - unlabeled.record("Job Selector", job_acc, job_loss) - - train_set, _, _, _ = uploader_data(order=order) - X, y = next(iter(DataLoader(train_set, batch_size=len(train_set)))) - with open(os.path.join(train_data_cache_folder, "user{}_X.pkl".format(i)), "wb") as f: - pickle.dump(X.detach().cpu().numpy(), f) - with open(os.path.join(train_data_cache_folder, "user{}_y.pkl".format(i)), "wb") as f: - pickle.dump(y.detach().cpu().numpy(), f) - - for labeled_size in tqdm.tqdm([100, 200, 500, 1000, 2000, 4000, 6000, 8000, 10000]): - loader = DataLoader(train_set, batch_size=labeled_size, shuffle=True) - X, y = next(iter(loader)) - - sampled_dataset = TensorDataset(X, y) - mode_save_path = os.path.abspath(os.path.join(__file__, "..", "cache", "model.pth")) - model = ConvModel(channel=X.shape[1], im_size=(X.shape[2], X.shape[3]), - n_random_features=10).to(device) - train_model(model, sampled_dataset, sampled_dataset, mode_save_path, - epochs=35, batch_size=128, device=device, verbose=False) - model.load_state_dict(torch.load(mode_save_path)) - _, train_acc = evaluate(model, dataset, distribution=True) - - ensemble_pruning = EnsemblePruningReuser(learnware_list=multiple_result[0].learnwares) - ensemble_pruning.fit(val_X=X, val_y=y) - _, pruning_acc = evaluate(ensemble_pruning, dataset, distribution=False) - - labeled.record("{:d}".format(labeled_size), train_acc, pruning_acc) - - print(unlabeled.summary()) - print(labeled.summary()) - - # Save recorder - current_time = datetime.now() - formatted_time = current_time.strftime("%Y-%m-%d_%H-%M-%S") - log_dir = os.path.abspath(os.path.join(__file__, "..", "log", formatted_time)) - os.makedirs(log_dir, exist_ok=True) - unlabeled.save(os.path.join(log_dir, "unlabeled.json")) - labeled.save(os.path.join(log_dir, "labeled.json")) - - def plot(self, record_dir): - unlabeled = Recorder(["Accuracy", "Loss"], ["{:.3f}% ± {:.3f}%", "{:.3f} ± {:.3f}"]) - labeled = Recorder(["Training", "Pruning"], ["{:.3f}% ± {:.3f}%", "{:.3f}% ± {:.3f}%"]) - - unlabeled.load(os.path.join(record_dir, "unlabeled.json")) - labeled.load(os.path.join(record_dir, "labeled.json")) - - plot_labeled_performance_curves("Image", labeled[0], labeled[1], - [100, 200, 500, 1000, 2000, 4000, 6000, 8000, 10000], - save_path=os.path.abspath(os.path.join(__file__, "..", "labeled.png"))) - - -if __name__ == "__main__": - fire.Fire(ImageDatasetWorkflow) diff --git a/examples/dataset_image_workflow/utils.py b/examples/dataset_image_workflow/utils.py index 1ee200b..1c9625d 100644 --- a/examples/dataset_image_workflow/utils.py +++ b/examples/dataset_image_workflow/utils.py @@ -13,7 +13,7 @@ def evaluate(model, evaluate_set: Dataset, device=None, distribution=True): if isinstance(model, nn.Module): model.eval() mapping = lambda m, x: m(x) - else: # For predict interface + else: mapping = lambda m, x: m.predict(x) criterion = nn.CrossEntropyLoss(reduction="sum") @@ -56,17 +56,11 @@ def train_model( device = choose_device(0) if device is None else device model.train() - # SGD optimizer with learning rate 1e-2 optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) - # Scheduler - # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20) - # mean-squared error loss criterion = nn.CrossEntropyLoss() - # Prepare DataLoader dataloader = DataLoader(train_set, batch_size=batch_size, shuffle=True) - # valid loss - best_loss = 100000 # initially - # Optimizing... + best_loss = 100000 + for epoch in range(epochs): running_loss = [] model.train() diff --git a/examples/dataset_image_workflow/workflow.py b/examples/dataset_image_workflow/workflow.py index 736bc35..5c86ba9 100644 --- a/examples/dataset_image_workflow/workflow.py +++ b/examples/dataset_image_workflow/workflow.py @@ -34,25 +34,25 @@ class ImageDatasetWorkflow: ] labels = ["User Model", "Multiple Learnware Reuse (EnsemblePrune)"] - user_mat, pruning_mat = all_user_curves_data - user_mat, pruning_mat = np.array(user_mat), np.array(pruning_mat) - for mat, style, label in zip([user_mat, pruning_mat], styles, labels): - mean_curve, std_curve = 1 - np.mean(mat, axis=0), np.std(mat, axis=0) + user_array, pruning_array = all_user_curves_data + for array, style, label in zip([user_array, pruning_array], styles, labels): + mean_curve = np.array([item[0] for item in array]) + std_curve = np.array([item[1] for item in array]) plt.plot(mean_curve, **style, label=label) plt.fill_between( range(len(mean_curve)), - mean_curve - 0.5 * std_curve, - mean_curve + 0.5 * std_curve, + mean_curve - std_curve, + mean_curve + std_curve, color=style["color"], alpha=0.2, ) - plt.xlabel("Labeled Data Size") - plt.ylabel("1 - Accuracy") - plt.title(f"Text Limited Labeled Data") - plt.legend() + plt.xlabel("Labeled Data Size", fontsize=14) + plt.ylabel("1 - Accuracy", fontsize=14) + plt.title(f"Image Limited Labeled Data", fontsize=16) + plt.legend(fontsize=14) plt.tight_layout() - plt.savefig(os.path.join(self.fig_path, "image_labeled_curves.png"), bbox_inches="tight", dpi=700) + plt.savefig(os.path.join(self.fig_path, "image_labeled_curves.svg"), bbox_inches="tight", dpi=700) def _prepare_market(self, rebuild=False): client = LearnwareClient() @@ -81,8 +81,8 @@ class ImageDatasetWorkflow: np.random.seed(1) random.seed(1) self._prepare_market(rebuild) - self.n_labeled_list = [100, 200, 500, 1000, 2000, 4000, 6000, 8000, 10000] - self.repeated_list = [10, 10, 10, 3, 3, 3, 3, 3, 3] + self.n_labeled_list = [100, 200, 500, 1000, 2000, 4000] + self.repeated_list = [10, 10, 10, 3, 3, 3] device = choose_device(0) self.root_path = os.path.dirname(os.path.abspath(__file__)) @@ -232,15 +232,25 @@ class ImageDatasetWorkflow: ) pruning_curves_data, user_model_curves_data = [], [] - for i in range(self.image_benchmark.user_num): - with open(os.path.join(self.curve_path, f"curve{str(i)}.pkl"), "rb") as f: + total_user_model_score_mat = [np.zeros(self.repeated_list[i]) for i in range(len(self.n_labeled_list))] + total_pruning_score_mat = [np.zeros(self.repeated_list[i]) for i in range(len(self.n_labeled_list))] + for user_idx in range(self.image_benchmark.user_num): + with open(os.path.join(self.curve_path, f"curve{str(user_idx)}.pkl"), "rb") as f: user_curves_data = pickle.load(f) (single_score_mat, user_model_score_mat, pruning_score_mat) = user_curves_data - for i in range(len(single_score_mat)): - user_model_score_mat[i] = np.mean(user_model_score_mat[i]) - pruning_score_mat[i] = np.mean(pruning_score_mat[i]) - user_model_curves_data.append(user_model_score_mat) - pruning_curves_data.append(pruning_score_mat) + + for i in range(len(self.n_labeled_list)): + total_user_model_score_mat[i] += 1 - np.array(user_model_score_mat[i]) / 100 + total_pruning_score_mat[i] += 1 - np.array(pruning_score_mat[i]) / 100 + + for i in range(len(self.n_labeled_list)): + total_user_model_score_mat[i] /= self.image_benchmark.user_num + total_pruning_score_mat[i] /= self.image_benchmark.user_num + user_model_curves_data.append( + (np.mean(total_user_model_score_mat[i]), np.std(total_user_model_score_mat[i])) + ) + pruning_curves_data.append((np.mean(total_pruning_score_mat[i]), np.std(total_pruning_score_mat[i]))) + self._plot_labeled_peformance_curves([user_model_curves_data, pruning_curves_data]) From ac2503d348b89b51a586e6822a5ccef059345e75 Mon Sep 17 00:00:00 2001 From: Gene Date: Wed, 27 Dec 2023 16:41:34 +0800 Subject: [PATCH 25/25] [MNT] unify figure format --- examples/dataset_image_workflow/workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/dataset_image_workflow/workflow.py b/examples/dataset_image_workflow/workflow.py index 5c86ba9..a2d3f9b 100644 --- a/examples/dataset_image_workflow/workflow.py +++ b/examples/dataset_image_workflow/workflow.py @@ -47,9 +47,9 @@ class ImageDatasetWorkflow: alpha=0.2, ) - plt.xlabel("Labeled Data Size", fontsize=14) + plt.xlabel("Amout of Labeled User Data", fontsize=14) plt.ylabel("1 - Accuracy", fontsize=14) - plt.title(f"Image Limited Labeled Data", fontsize=16) + plt.title(f"Results on Image Experimental Scenario", fontsize=16) plt.legend(fontsize=14) plt.tight_layout() plt.savefig(os.path.join(self.fig_path, "image_labeled_curves.svg"), bbox_inches="tight", dpi=700)