beimingwu
/
learnware

 
			
							import joblib
import os
from sklearn.metrics import mean_squared_error


from .pfs_cross_transfer import *
from .split_data import feature_engineering


class Dataloader:
    def __init__(self):
        self.algo = "ridge"

    def regenerate_data(self):
        feature_engineering()

    def set_algo(self, algo):
        self.algo = algo

    def get_algo_list(self):
        return ["lgb", "ridge"]

    def get_idx_list(self):
        return [i for i in range(53)]

    def get_idx_data(self, idx):
        shop_ids = [i for i in range(60) if i not in [0, 1, 40]]
        shop_ids = [i for i in shop_ids if i not in [8, 11, 23, 36]]

        fpath = os.path.join(pfs_split_dir, "Shop{:0>2d}-train.csv".format(shop_ids[idx]))
        train_xs, train_ys, _, _ = load_pfs_data(fpath)
        fpath = os.path.join(pfs_split_dir, "Shop{:0>2d}-val.csv".format(shop_ids[idx]))
        test_xs, test_ys, _, _ = load_pfs_data(fpath)
        return train_xs, train_ys, test_xs, test_ys

    def get_model_path(self, idx):
        shop_ids = [i for i in range(60) if i not in [0, 1, 40]]
        shop_ids = [i for i in shop_ids if i not in [8, 11, 23, 36]]
        return os.path.join(model_dir, "{}_Shop{:0>2d}.out".format(self.algo, shop_ids[idx]))

    def retrain_models(self):
        algo = self.algo
        errs = get_errors(algo=algo)

        fpath = os.path.join(pfs_res_dir, "PFS_{}_errs.txt".format(algo))
        np.savetxt(fpath, errs.T)

        plot_heatmap(errs.T, algo)
        weights = np.loadtxt(os.path.join(pfs_res_dir, "PFS_{}_weights.txt".format(algo)))
        plot_performance(errs.T, weights, algo)

    def retrain_split_models(self):
        fpath = os.path.join(pfs_res_dir, "PFS_{}_split_errs_user.txt".format(self.algo))
        if os.path.exists(fpath):
            return np.loadtxt(fpath)
        algo = self.algo
        errs = get_split_errs(algo=algo)
        fpath = os.path.join(pfs_res_dir, "PFS_{}_split_errs_user.txt".format(algo))
        np.savetxt(fpath, errs)
        return errs

    def get_errs(self):
        return np.loadtxt(os.path.join(pfs_res_dir, "PFS_{}_errs.txt".format(self.algo)))

    def get_weights(self):
        return np.loadtxt(os.path.join(pfs_res_dir, "PFS_{}_weights.txt".format(self.algo)))

    def predict(self, idx, test_x):
        shop_ids = [i for i in range(60) if i not in [0, 1, 40]]
        shop_ids = [i for i in shop_ids if i not in [8, 11, 23, 36]]

        model = joblib.load(os.path.join(model_dir, "{}_Shop{:0>2d}.out".format(self.algo, shop_ids[idx])))
        # test_x = (test_x - test_x.min(0)) / (test_x.max(0) - test_x.min(0) + 0.0001)
        return model.predict(test_x)

    def score(self, real_y, pred_y, sample_weight=None):
        return mean_squared_error(real_y, pred_y, sample_weight=sample_weight, squared=False)