From 5cea67c28ecee2dc80160dea74526211b8a5ac43 Mon Sep 17 00:00:00 2001 From: chenzx Date: Tue, 18 Apr 2023 09:50:54 +0800 Subject: [PATCH] [ENH | MNT] add image example, fix setup conflict --- .gitignore | 1 + examples/example_image/example_init.py | 24 +++ examples/example_image/get_data.py | 283 +++++++++++++++++++++++++ examples/example_image/main.py | 114 ++++++++++ examples/example_image/model.py | 183 ++++++++++++++++ examples/example_image/utils.py | 160 ++++++++++++++ setup.py | 2 +- 7 files changed, 766 insertions(+), 1 deletion(-) create mode 100644 examples/example_image/example_init.py create mode 100644 examples/example_image/get_data.py create mode 100644 examples/example_image/main.py create mode 100644 examples/example_image/model.py create mode 100644 examples/example_image/utils.py diff --git a/.gitignore b/.gitignore index ec190db..ab2e7bb 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,4 @@ cache/ tmp/ learnware_pool/ PFS/ +data/ diff --git a/examples/example_image/example_init.py b/examples/example_image/example_init.py new file mode 100644 index 0000000..d4a176d --- /dev/null +++ b/examples/example_image/example_init.py @@ -0,0 +1,24 @@ +import os +import joblib +import numpy as np +from learnware.model import BaseModel +from model import ConvModel +import torch + + +class Model(BaseModel): + def __init__(self): + dir_path = os.path.dirname(os.path.abspath(__file__)) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.model = ConvModel(channel=3, n_random_features=10).to(device) + self.model.load_state_dict(torch.load(os.path.join(dir_path, "conv_model.pth"))) + self.model.eval() + + def fit(self, X: np.ndarray, y: np.ndarray): + pass + + def predict(self, X: np.ndarray) -> np.ndarray: + return self.model(X) + + def finetune(self, X: np.ndarray, y: np.ndarray): + pass diff --git a/examples/example_image/get_data.py b/examples/example_image/get_data.py new file mode 100644 index 0000000..c3af534 --- /dev/null +++ b/examples/example_image/get_data.py @@ -0,0 +1,283 @@ +import torch +from torchvision import datasets, transforms +import torch.nn.functional as F +from scipy.ndimage.interpolation import rotate as scipyrotate + +import numpy as np + + +def get_fashion_mnist(data_root="./data", output_channels=1, image_size=28): + ds_train = datasets.FashionMNIST( + data_root, + train=True, + download=True, + transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), + ) + X_train = ds_train.data + y_train = ds_train.targets + ds_test = datasets.FashionMNIST( + data_root, + train=False, + download=True, + transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), + ) + + X_test = ds_test.data + y_test = ds_test.targets + + X_train = X_train[:, None, :, :].float() + X_test = X_test[:, None, :, :].float() + + if output_channels > 1: + X_train = torch.cat([X_train for i in range(output_channels)], 1) + X_test = torch.cat([X_test for i in range(output_channels)], 1) + + X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) + X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) + + return X_train, y_train, X_test, y_test + + +def get_mnist(data_root="./data/", output_channels=1, image_size=28): + ds_train = datasets.MNIST( + data_root, + train=True, + download=True, + transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), + ) + X_train = [] + + for x, _ in ds_train: + X_train.append(x) + X_train = torch.stack(X_train) + + y_train = ds_train.targets + ds_test = datasets.MNIST( + data_root, + train=False, + download=True, + transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), + ) + + X_test = [] + + for x, _ in ds_test: + X_test.append(x) + X_test = torch.stack(X_test) + + y_test = ds_test.targets + + if output_channels > 1: + X_train = torch.cat([X_train for i in range(output_channels)], 1) + X_test = torch.cat([X_test for i in range(output_channels)], 1) + + X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) + X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) + + return X_train, y_train, X_test, y_test + + +def get_cifar10(data_root="./data/", output_channels=3, image_size=32): + ds_train = datasets.CIFAR10( + data_root, + train=True, + download=True, + transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), + ) + X_train = ds_train.data + y_train = ds_train.targets + ds_test = datasets.CIFAR10( + data_root, + train=False, + download=True, + transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), + ) + + X_test = ds_test.data + y_test = ds_test.targets + + X_train = torch.Tensor(np.moveaxis(X_train, 3, 1)) + y_train = torch.Tensor(y_train).long() + X_test = torch.Tensor(np.moveaxis(X_test, 3, 1)) + y_test = torch.Tensor(y_test).long() + + if output_channels == 1: + X_train = torch.mean(X_train, 1, keepdim=True) + X_test = torch.mean(X_test, 1, keepdim=True) + + X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) + X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) + + return X_train, y_train, X_test, y_test + + +def get_svhn(output_channels=1, image_size=32): + ds_train = datasets.SVHN( + "./data/", + split="train", + download=True, + transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), + ) + X_train = ds_train.data + y_train = ds_train.labels + ds_test = datasets.SVHN( + "./data/", + split="test", + download=True, + transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), + ) + + X_test = ds_test.data + y_test = ds_test.labels + + X_train = torch.Tensor(X_train) + y_train = torch.Tensor(y_train).long() + X_test = torch.Tensor(X_test) + y_test = torch.Tensor(y_test).long() + + if output_channels == 1: + X_train = torch.mean(X_train, 1, keepdim=True) + X_test = torch.mean(X_test, 1, keepdim=True) + + X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) + X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) + + return X_train, y_train, X_test, y_test + + +def get_cifar100(data_root="./data/", output_channels=3, image_size=32): + ds_train = datasets.CIFAR100( + data_root, + train=True, + download=True, + transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), + ) + X_train = ds_train.data + y_train = ds_train.targets + ds_test = datasets.CIFAR100( + data_root, + train=False, + download=True, + transform=transforms.Compose([transforms.ToTensor(), transforms.Resize([image_size, image_size])]), + ) + + X_test = ds_test.data + y_test = ds_test.targets + + X_train = torch.Tensor(np.moveaxis(X_train, 3, 1)) + y_train = torch.Tensor(y_train).long() + X_test = torch.Tensor(np.moveaxis(X_test, 3, 1)) + y_test = torch.Tensor(y_test).long() + + if output_channels == 1: + X_train = torch.mean(X_train, 1, keepdim=True) + X_test = torch.mean(X_test, 1, keepdim=True) + + X_test = (X_test - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) + X_train = (X_train - torch.mean(X_train, [0, 2, 3], keepdim=True)) / (torch.std(X_train, [0, 2, 3], keepdim=True)) + + return X_train, y_train, X_test, y_test + + +def get_zca_matrix(X, reg_coef=0.1): + X_flat = X.reshape(X.shape[0], -1) + cov = (X_flat.T @ X_flat) / X_flat.shape[0] + reg_amount = reg_coef * torch.trace(cov) / cov.shape[0] + u, s, _ = torch.svd(cov.cuda() + reg_amount * torch.eye(cov.shape[0]).cuda()) + inv_sqrt_zca_eigs = s ** (-0.5) + whitening_transform = torch.einsum("ij,j,kj->ik", u, inv_sqrt_zca_eigs, u) + + return whitening_transform.cpu() + + +def layernorm_data(X): + X_processed = X - torch.mean(X, [1, 2, 3], keepdim=True) + X_processed = X_processed / torch.sqrt(torch.sum(X_processed**2, [1, 2, 3], keepdim=True)) + + return X_processed + + +def transform_data(X, whitening_transform): + if len(whitening_transform.shape) == 2: + X_flat = X.reshape(X.shape[0], -1) + X_flat = X_flat @ whitening_transform + return X_flat.view(*X.shape) + else: + X_flat = X.reshape(X.shape[0], -1) + X_flat = torch.einsum("nd, ndi->ni", X_flat, whitening_transform) + return X_flat.view(*X.shape) + + +def scale_to_zero_one(X): + mins = torch.min(X.view(X.shape[0], -1), 1)[0].view(-1, 1, 1, 1) + maxes = torch.max(X.view(X.shape[0], -1), 1)[0].view(-1, 1, 1, 1) + return (X - mins) / (maxes - mins) + + +def augment(images, dc_aug_param, device): + # This can be sped up in the future. + + if dc_aug_param != None and dc_aug_param["strategy"] != "none": + scale = dc_aug_param["scale"] + crop = dc_aug_param["crop"] + rotate = dc_aug_param["rotate"] + noise = dc_aug_param["noise"] + strategy = dc_aug_param["strategy"] + + shape = images.shape + mean = [] + for c in range(shape[1]): + mean.append(float(torch.mean(images[:, c]))) + + def cropfun(i): + im_ = torch.zeros(shape[1], shape[2] + crop * 2, shape[3] + crop * 2, dtype=torch.float, device=device) + for c in range(shape[1]): + im_[c] = mean[c] + im_[:, crop : crop + shape[2], crop : crop + shape[3]] = images[i] + r, c = np.random.permutation(crop * 2)[0], np.random.permutation(crop * 2)[0] + images[i] = im_[:, r : r + shape[2], c : c + shape[3]] + + def scalefun(i): + h = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2]) + w = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2]) + tmp = F.interpolate( + images[i : i + 1], + [h, w], + )[0] + mhw = max(h, w, shape[2], shape[3]) + im_ = torch.zeros(shape[1], mhw, mhw, dtype=torch.float, device=device) + r = int((mhw - h) / 2) + c = int((mhw - w) / 2) + im_[:, r : r + h, c : c + w] = tmp + r = int((mhw - shape[2]) / 2) + c = int((mhw - shape[3]) / 2) + images[i] = im_[:, r : r + shape[2], c : c + shape[3]] + + def rotatefun(i): + im_ = scipyrotate( + images[i].cpu().data.numpy(), + angle=np.random.randint(-rotate, rotate), + axes=(-2, -1), + cval=np.mean(mean), + ) + r = int((im_.shape[-2] - shape[-2]) / 2) + c = int((im_.shape[-1] - shape[-1]) / 2) + images[i] = torch.tensor(im_[:, r : r + shape[-2], c : c + shape[-1]], dtype=torch.float, device=device) + + def noisefun(i): + images[i] = images[i] + noise * torch.randn(shape[1:], dtype=torch.float, device=device) + + augs = strategy.split("_") + + for i in range(shape[0]): + choice = np.random.permutation(augs)[0] # randomly implement one augmentation + if choice == "crop": + cropfun(i) + elif choice == "scale": + scalefun(i) + elif choice == "rotate": + rotatefun(i) + elif choice == "noise": + noisefun(i) + + return images diff --git a/examples/example_image/main.py b/examples/example_image/main.py new file mode 100644 index 0000000..9f5168a --- /dev/null +++ b/examples/example_image/main.py @@ -0,0 +1,114 @@ +import numpy as np +import torch +import get_data +import os +import random +from utils import generate_uploader, generate_user, ImageDataLoader, train + +from learnware.market import EasyMarket, BaseUserInfo +from learnware.market import database_ops +from learnware.learnware import Learnware +import learnware.specification as specification + +origin_data_root = "./data/origin_data" +processed_data_root = "./data/processed_data" +dataset = "cifar10" +n_uploaders = 50 +n_users = 10 +n_classes = 10 +data_root = os.path.join(origin_data_root, dataset) +data_save_root = os.path.join(processed_data_root, dataset) +user_save_root = os.path.join(data_save_root, "user") +uploader_save_root = os.path.join(data_save_root, "uploader") +model_save_root = os.path.join(data_save_root, "uploader_model") +os.makedirs(data_root, exist_ok=True) +os.makedirs(user_save_root, exist_ok=True) +os.makedirs(uploader_save_root, exist_ok=True) +os.makedirs(model_save_root, exist_ok=True) + + +semantic_specs = [ + { + "Data": {"Values": ["Tabular"], "Type": "Class"}, + "Task": { + "Values": ["Classification"], + "Type": "Class", + }, + "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Scenario": {"Values": ["Nature"], "Type": "Tag"}, + "Description": {"Values": "", "Type": "Description"}, + "Name": {"Values": "learnware_1", "Type": "Name"}, + }, + { + "Data": {"Values": ["Tabular"], "Type": "Class"}, + "Task": { + "Values": ["Classification"], + "Type": "Class", + }, + "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"}, + "Description": {"Values": "", "Type": "Description"}, + "Name": {"Values": "learnware_2", "Type": "Name"}, + }, + { + "Data": {"Values": ["Tabular"], "Type": "Class"}, + "Task": { + "Values": ["Classification"], + "Type": "Class", + }, + "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Scenario": {"Values": ["Business"], "Type": "Tag"}, + "Description": {"Values": "", "Type": "Description"}, + "Name": {"Values": "learnware_3", "Type": "Name"}, + }, +] + +user_senmantic = { + "Data": {"Values": ["Tabular"], "Type": "Class"}, + "Task": { + "Values": ["Classification"], + "Type": "Class", + }, + "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Scenario": {"Values": ["Business"], "Type": "Tag"}, + "Description": {"Values": "", "Type": "Description"}, + "Name": {"Values": "", "Type": "Name"}, +} + + +def prepare_data(): + if dataset == "cifar10": + X_train, y_train, X_test, y_test = get_data.get_cifar10(data_root) + elif dataset == "mnist": + X_train, y_train, X_test, y_test = get_data.get_mnist(data_root) + else: + return + generate_uploader(X_train, y_train, n_uploaders=n_uploaders, data_save_root=uploader_save_root) + generate_user(X_test, y_test, n_users=n_users, data_save_root=user_save_root) + + +def prepare_model(): + dataloader = ImageDataLoader(data_save_root, train=True) + for i in range(n_uploaders): + print("Train on uploader: %d" % (i)) + X, y = dataloader.get_idx_data(i) + model = train(X, y, out_classes=n_classes) + model_save_path = os.path.join(model_save_root, "uploader_%d.pth" % (i)) + torch.save(model.state_dict(), model_save_path) + print("Model saved to '%s'" % (model_save_path)) + + +def prepare_learnware(): + pass + + +def prepare_market(): + for i in range(n_uploaders): + data_path = os.path.join(uploader_save_root, "uploader_%d_X.npy" % (i)) + model_path = os.path.join(model_save_root, "uploader_%d.pth" % (i)) + + +if __name__ == "__main__": + prepare_data() + prepare_model() + prepare_market() diff --git a/examples/example_image/model.py b/examples/example_image/model.py new file mode 100644 index 0000000..3281416 --- /dev/null +++ b/examples/example_image/model.py @@ -0,0 +1,183 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + + +class Linear(nn.Module): + def __init__(self, input_feature=256, num_classes=10): + super().__init__() + self.linear_1 = nn.Linear(input_feature, 128) + self.dropout_1 = nn.Dropout(p=0.5) + self.linear_2 = nn.Linear(128, 128) + self.dropout_2 = nn.Dropout(p=0.5) + self.linear_3 = nn.Linear(128, num_classes) + + def forward(self, x): + out1 = F.relu(self.dropout_1(self.linear_1(x))) + out2 = F.relu(self.dropout_2(self.linear_2(out1))) + out = self.linear_3(out2) + return out + + +class OriginModel(nn.Module): + def __init__(self, last_layer_feature=256): + super().__init__() + self.linear_1 = nn.Linear(last_layer_feature, 128) + self.linear_2 = nn.Linear(128, 128) + self.linear_3 = nn.Linear(128, 10) + + def forward(self, x): + out = F.relu(self.linear_1(x)) + out = F.relu(self.linear_2(out)) + out = self.linear_3(out) + return out + + +class ConvModel(nn.Module): + def __init__( + self, + channel, + n_random_features, + net_width=64, + net_depth=3, + net_act="relu", + net_norm="batchnorm", + net_pooling="avgpooling", + im_size=(32, 32), + ): + super().__init__() + # print('Building Conv Model') + self.features, shape_feat = self._make_layers( + channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size + ) + num_feat = shape_feat[0] * shape_feat[1] * shape_feat[2] + self.classifier = GaussianLinear(num_feat, n_random_features) + + def forward(self, x): + out = self.features(x) + out = out.reshape(out.size(0), -1) + out = self.classifier(out) + return out + + def _get_activation(self, net_act): + if net_act == "sigmoid": + return nn.Sigmoid() + elif net_act == "relu": + return nn.ReLU(inplace=True) + elif net_act == "leakyrelu": + return nn.LeakyReLU(negative_slope=0.01) + elif net_act == "gelu": + return nn.SiLU() + else: + exit("unknown activation function: %s" % net_act) + + def _get_pooling(self, net_pooling): + if net_pooling == "maxpooling": + return nn.MaxPool2d(kernel_size=2, stride=2) + elif net_pooling == "avgpooling": + return nn.AvgPool2d(kernel_size=2, stride=2) + elif net_pooling == "none": + return None + else: + exit("unknown net_pooling: %s" % net_pooling) + + def _get_normlayer(self, net_norm, shape_feat): + # shape_feat = (c*h*w) + if net_norm == "batchnorm": + return nn.BatchNorm2d(shape_feat[0], affine=True) + elif net_norm == "layernorm": + return nn.LayerNorm(shape_feat, elementwise_affine=True) + elif net_norm == "instancenorm": + return nn.GroupNorm(shape_feat[0], shape_feat[0], affine=True) + elif net_norm == "groupnorm": + return nn.GroupNorm(4, shape_feat[0], affine=True) + elif net_norm == "none": + return None + else: + exit("unknown net_norm: %s" % net_norm) + + def _make_layers(self, channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size): + layers = [] + in_channels = channel + # if im_size[0] == 28: + # im_size = (32, 32) + shape_feat = [in_channels, im_size[0], im_size[1]] + for d in range(net_depth): + # print(shape_feat) + layers += [Conv2d_gaussian(in_channels, net_width, kernel_size=3, padding=1)] + # layers += [nn.Conv2d(in_channels, net_width, kernel_size=3, padding='same')] + shape_feat[0] = net_width + if net_norm != "none": + layers += [self._get_normlayer(net_norm, shape_feat)] + layers += [self._get_activation(net_act)] + in_channels = net_width + if net_pooling != "none": + layers += [self._get_pooling(net_pooling)] + shape_feat[1] //= 2 + shape_feat[2] //= 2 + + return nn.Sequential(*layers), shape_feat + + +class Conv2d_gaussian(torch.nn.Conv2d): + def reset_parameters(self) -> None: + # Setting a=sqrt(5) in kaiming_uniform is the same as initializing with + # uniform(-1/sqrt(k), 1/sqrt(k)), where k = weight.size(1) * prod(*kernel_size) + # For more details see: https://github.com/pytorch/pytorch/issues/15314#issuecomment-477448573 + # torch.nn.init.kaiming_normal_(self.weight, a= math.sqrt(5)) + # W has shape out, in, h, w + torch.nn.init.normal_( + self.weight, 0, np.sqrt(2) / np.sqrt(self.weight.shape[1] * self.weight.shape[2] * self.weight.shape[3]) + ) + if self.bias is not None: + fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weight) + # print(fan_in) + if fan_in != 0: + # bound = 0 * 1 / math.sqrt(fan_in) + # torch.nn.init.uniform_(self.bias, -bound, bound) + # torch.nn.init.uniform_(self.bias, -bound, bound) + torch.nn.init.normal_(self.bias, 0, 0.1) + + +class GaussianLinear(torch.nn.Module): + __constants__ = ["in_features", "out_features"] + in_features: int + out_features: int + weight: torch.Tensor + + def __init__( + self, in_features: int, out_features: int, bias: bool = True, device=None, dtype=None, funny=False + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + super(GaussianLinear, self).__init__() + self.funny = funny + self.in_features = in_features + self.out_features = out_features + self.weight = torch.nn.Parameter(torch.empty((out_features, in_features), **factory_kwargs)) + if bias: + self.bias = torch.nn.Parameter(torch.empty(out_features, **factory_kwargs)) + else: + self.register_parameter("bias", None) + self.reset_parameters() + + def reset_parameters(self) -> None: + # Setting a=sqrt(5) in kaiming_uniform is the same as initializing with + # uniform(-1/sqrt(in_features), 1/sqrt(in_features)). For details, see + # https://github.com/pytorch/pytorch/issues/57109 + # torch.nn.init.kaiming_normal_(self.weight, a=1 * np.sqrt(5)) + torch.nn.init.normal_(self.weight, 0, np.sqrt(2) / np.sqrt(self.in_features)) + # torch.nn.init.normal_(self.weight, 0, 3/np.sqrt(self.in_features)) + if self.bias is not None: + fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weight) + bound = 1 / np.sqrt(fan_in) if fan_in > 0 else 0 + # torch.nn.init.uniform_(self.bias, -bound, bound) + torch.nn.init.normal_(self.bias, 0, 0.1) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return torch.nn.functional.linear(input, self.weight, self.bias) + + def extra_repr(self) -> str: + return "in_features={}, out_features={}, bias={}".format( + self.in_features, self.out_features, self.bias is not None + ) diff --git a/examples/example_image/utils.py b/examples/example_image/utils.py new file mode 100644 index 0000000..0c39c5e --- /dev/null +++ b/examples/example_image/utils.py @@ -0,0 +1,160 @@ +import os +import numpy as np +import random +import math + +import torch +import torch.nn as nn +import torch.optim as optim + +from model import ConvModel + + +class ImageDataLoader: + def __init__(self, data_root, train: bool = True): + self.data_root = data_root + self.train = train + + def get_idx_data(self, idx=0): + if self.train: + X_path = os.path.join(self.data_root, "uploader", "uploader_%d_X.npy" % (idx)) + y_path = os.path.join(self.data_root, "uploader", "uploader_%d_y.npy" % (idx)) + if not (os.path.exists(X_path) and os.path.exists(y_path)): + raise Exception("Index Error") + X = np.load(X_path) + y = np.load(y_path) + else: + X_path = os.path.join(self.data_root, "user", "user_%d_X.npy" % (idx)) + y_path = os.path.join(self.data_root, "user", "user_%d_y.npy" % (idx)) + if not (os.path.exists(X_path) and os.path.exists(y_path)): + raise Exception("Index Error") + X = np.load(X_path) + y = np.load(y_path) + return X, y + + +def generate_uploader(data_x, data_y, n_uploaders=50, data_save_root=None): + if data_save_root is None: + return + os.makedirs(data_save_root, exist_ok=True) + for i in range(n_uploaders): + random_class_num = random.randint(6, 10) + cls_indx = list(range(10)) + random.shuffle(cls_indx) + selected_cls_indx = cls_indx[:random_class_num] + rest_cls_indx = cls_indx[random_class_num:] + selected_data_indx = [] + for cls in selected_cls_indx: + data_indx = list(torch.where(data_y == cls)[0]) + # print(type(data_indx)) + random.shuffle(data_indx) + data_num = random.randint(800, 2000) + selected_indx = data_indx[:data_num] + selected_data_indx = selected_data_indx + selected_indx + for cls in rest_cls_indx: + flag = random.randint(0, 1) + if flag == 0: + continue + data_indx = list(torch.where(data_y == cls)[0]) + random.shuffle(data_indx) + data_num = random.randint(20, 80) + selected_indx = data_indx[:data_num] + selected_data_indx = selected_data_indx + selected_indx + selected_X = data_x[selected_data_indx].numpy() + selected_y = data_y[selected_data_indx].numpy() + print(selected_X.dtype, selected_y.dtype) + print(selected_X.shape, selected_y.shape) + X_save_dir = os.path.join(data_save_root, "uploader_%d_X.npy" % (i)) + y_save_dir = os.path.join(data_save_root, "uploader_%d_y.npy" % (i)) + np.save(X_save_dir, selected_X) + np.save(y_save_dir, selected_y) + print("Saving to %s" % (X_save_dir)) + + +def generate_user(data_x, data_y, n_users=50, data_save_root=None): + if data_save_root is None: + return + os.makedirs(data_save_root, exist_ok=True) + for i in range(n_users): + random_class_num = random.randint(3, 6) + cls_indx = list(range(10)) + random.shuffle(cls_indx) + selected_cls_indx = cls_indx[:random_class_num] + selected_data_indx = [] + for cls in selected_cls_indx: + data_indx = list(torch.where(data_y == cls)[0]) + # print(type(data_indx)) + random.shuffle(data_indx) + data_num = random.randint(150, 350) + selected_indx = data_indx[:data_num] + selected_data_indx = selected_data_indx + selected_indx + # print('Total Index:', len(selected_data_indx)) + selected_X = data_x[selected_data_indx].numpy() + selected_y = data_y[selected_data_indx].numpy() + print(selected_X.shape, selected_y.shape) + X_save_dir = os.path.join(data_save_root, "user_%d_X.npy" % (i)) + y_save_dir = os.path.join(data_save_root, "user_%d_y.npy" % (i)) + np.save(X_save_dir, selected_X) + np.save(y_save_dir, selected_y) + print("Saving to %s" % (X_save_dir)) + + +# Train Uploaders' models +def train(X, y, out_classes, epochs=35, batch_size=128): + print(X.shape, y.shape) + input_feature = X.shape[1] + data_size = X.shape[0] + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = ConvModel(channel=input_feature, n_random_features=out_classes).to(device) + model.train() + + # Adam optimizer with learning rate 1e-3 + # optimizer = optim.Adam(model.parameters(), lr=1e-3) + + # SGD optimizer with learning rate 1e-2 + optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) + + # mean-squared error loss + criterion = nn.CrossEntropyLoss() + + for epoch in range(epochs): + running_loss = [] + indx = list(range(data_size)) + random.shuffle(indx) + curr_X = X[indx] + curr_y = y[indx] + for i in range(math.floor(data_size / batch_size)): + inputs, annos = curr_X[i * batch_size : (i + 1) * batch_size], curr_y[i * batch_size : (i + 1) * batch_size] + inputs = torch.from_numpy(inputs).to(device) + annos = torch.from_numpy(annos).to(device) + # print(inputs.dtype, annos.dtype) + out = model(inputs) + optimizer.zero_grad() + loss = criterion(out, annos) + loss.backward() + optimizer.step() + running_loss.append(loss.item()) + # print('Epoch: %d, Average Loss: %.3f'%(epoch+1, np.mean(running_loss))) + + # Train Accuracy + acc = test(X, y, model) + model.train() + return model + + +def test(test_X, test_y, model, batch_size=128): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model.eval() + total, correct = 0, 0 + data_size = test_X.shape[0] + for i in range(math.ceil(data_size / batch_size)): + inputs, annos = test_X[i * batch_size : (i + 1) * batch_size], test_y[i * batch_size : (i + 1) * batch_size] + inputs = torch.Tensor(inputs).to(device) + annos = torch.Tensor(annos).to(device) + out = model(inputs) + _, predicted = torch.max(out.data, 1) + total += annos.size(0) + correct += (predicted == annos).sum().item() + acc = correct / total * 100 + print("Accuracy: %.2f" % (acc)) + return acc diff --git a/setup.py b/setup.py index 709c788..b63ce24 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ if os.path.exists("MANIFEST"): # What packages are required for this module to be executed? # `estimator` may depend on other packages. In order to reduce dependencies, it is not written here. REQUIRED = [ - "numpy>=1.12.0, <1.24", + "numpy>=1.20.0", "pandas>=0.25.1", "scipy>=1.0.0", "matplotlib>=3.1.3",