import joblib import os from sklearn.metrics import mean_squared_error from .pfs_cross_transfer import * from .split_data import feature_engineering class Dataloader: def __init__(self): self.algo = "ridge" def regenerate_data(self): feature_engineering() def set_algo(self, algo): self.algo = algo def get_algo_list(self): return ["lgb", "ridge"] def get_idx_list(self): return [i for i in range(53)] def get_idx_data(self, idx): shop_ids = [i for i in range(60) if i not in [0, 1, 40]] shop_ids = [i for i in shop_ids if i not in [8, 11, 23, 36]] fpath = os.path.join(pfs_split_dir, "Shop{:0>2d}-train.csv".format(shop_ids[idx])) train_xs, train_ys, _, _ = load_pfs_data(fpath) fpath = os.path.join(pfs_split_dir, "Shop{:0>2d}-val.csv".format(shop_ids[idx])) test_xs, test_ys, _, _ = load_pfs_data(fpath) return train_xs, train_ys, test_xs, test_ys def get_model_path(self, idx): shop_ids = [i for i in range(60) if i not in [0, 1, 40]] shop_ids = [i for i in shop_ids if i not in [8, 11, 23, 36]] return os.path.join(model_dir, "{}_Shop{:0>2d}.out".format(self.algo, shop_ids[idx])) def retrain_models(self): algo = self.algo errs = get_errors(algo=algo) fpath = os.path.join(pfs_res_dir, "PFS_{}_errs.txt".format(algo)) np.savetxt(fpath, errs.T) plot_heatmap(errs.T, algo) weights = np.loadtxt(os.path.join(pfs_res_dir, "PFS_{}_weights.txt".format(algo))) plot_performance(errs.T, weights, algo) def retrain_split_models(self): fpath = os.path.join(pfs_res_dir, "PFS_{}_split_errs_user.txt".format(self.algo)) if os.path.exists(fpath): return np.loadtxt(fpath) algo = self.algo errs = get_split_errs(algo=algo) fpath = os.path.join(pfs_res_dir, "PFS_{}_split_errs_user.txt".format(algo)) np.savetxt(fpath, errs) return errs def get_errs(self): return np.loadtxt(os.path.join(pfs_res_dir, "PFS_{}_errs.txt".format(self.algo))) def get_weights(self): return np.loadtxt(os.path.join(pfs_res_dir, "PFS_{}_weights.txt".format(self.algo))) def predict(self, idx, test_x): shop_ids = [i for i in range(60) if i not in [0, 1, 40]] shop_ids = [i for i in shop_ids if i not in [8, 11, 23, 36]] model = joblib.load(os.path.join(model_dir, "{}_Shop{:0>2d}.out".format(self.algo, shop_ids[idx]))) # test_x = (test_x - test_x.min(0)) / (test_x.max(0) - test_x.min(0) + 0.0001) return model.predict(test_x) def score(self, real_y, pred_y, sample_weight=None): return mean_squared_error(real_y, pred_y, sample_weight=sample_weight, squared=False)