| @@ -1,184 +0,0 @@ | |||
| package com.ruoyi.platform.domain; | |||
| import com.baomidou.mybatisplus.annotation.TableField; | |||
| import com.fasterxml.jackson.databind.PropertyNamingStrategy; | |||
| import com.fasterxml.jackson.databind.annotation.JsonNaming; | |||
| import com.ruoyi.platform.vo.VersionVo; | |||
| import io.swagger.annotations.ApiModel; | |||
| import io.swagger.annotations.ApiModelProperty; | |||
| import lombok.Data; | |||
| import java.util.Date; | |||
| import java.util.Map; | |||
| @Data | |||
| @JsonNaming(PropertyNamingStrategy.SnakeCaseStrategy.class) | |||
| @ApiModel(description = "自动机器学习") | |||
| public class AutoMl { | |||
| private Long id; | |||
| @ApiModelProperty(value = "实验名称") | |||
| private String mlName; | |||
| @ApiModelProperty(value = "实验描述") | |||
| private String mlDescription; | |||
| @ApiModelProperty(value = "任务类型:classification或regression") | |||
| private String taskType; | |||
| @ApiModelProperty(value = "搜索合适模型的时间限制(以秒为单位)。通过增加这个值,auto-sklearn有更高的机会找到更好的模型。默认3600,非必传。") | |||
| private Integer timeLeftForThisTask; | |||
| @ApiModelProperty(value = "单次调用机器学习模型的时间限制(以秒为单位)。如果机器学习算法运行超过时间限制,将终止模型拟合。将这个值设置得足够高,这样典型的机器学习算法就可以适用于训练数据。默认600,非必传。") | |||
| private Integer perRunTimeLimit; | |||
| @ApiModelProperty(value = "集成模型数量,如果设置为0,则没有集成。默认50,非必传。") | |||
| private Integer ensembleSize; | |||
| @ApiModelProperty(value = "设置为None将禁用集成构建,设置为SingleBest仅使用单个最佳模型而不是集成,设置为default,它将对单目标问题使用EnsembleSelection,对多目标问题使用MultiObjectiveDummyEnsemble。默认default,非必传。") | |||
| private String ensembleClass; | |||
| @ApiModelProperty(value = "在构建集成时只考虑ensemble_nbest模型。这是受到了“最大限度地利用集成选择”中引入的库修剪概念的启发。这是独立于ensemble_class参数的,并且这个修剪步骤是在构造集成之前完成的。默认50,非必传。") | |||
| private Integer ensembleNbest; | |||
| @ApiModelProperty(value = "定义在磁盘中保存的模型的最大数量。额外的模型数量将被永久删除。由于这个变量的性质,它设置了一个集成可以使用多少个模型的上限。必须是大于等于1的整数。如果设置为None,则所有模型都保留在磁盘上。默认50,非必传。") | |||
| private Integer maxModelsOnDisc; | |||
| @ApiModelProperty(value = "随机种子,将决定输出文件名。默认1,非必传。") | |||
| private Integer seed; | |||
| @ApiModelProperty(value = "机器学习算法的内存限制(MB)。如果auto-sklearn试图分配超过memory_limit MB,它将停止拟合机器学习算法。默认3072,非必传。") | |||
| private Integer memoryLimit; | |||
| @ApiModelProperty(value = "如果为None,则使用所有可能的分类算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:adaboost\n" + | |||
| "bernoulli_nb\n" + | |||
| "decision_tree\n" + | |||
| "extra_trees\n" + | |||
| "gaussian_nb\n" + | |||
| "gradient_boosting\n" + | |||
| "k_nearest_neighbors\n" + | |||
| "lda\n" + | |||
| "liblinear_svc\n" + | |||
| "libsvm_svc\n" + | |||
| "mlp\n" + | |||
| "multinomial_nb\n" + | |||
| "passive_aggressive\n" + | |||
| "qda\n" + | |||
| "random_forest\n" + | |||
| "sgd") | |||
| private String includeClassifier; | |||
| @ApiModelProperty(value = "如果为None,则使用所有可能的特征预处理算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:densifier\n" + | |||
| "extra_trees_preproc_for_classification\n" + | |||
| "extra_trees_preproc_for_regression\n" + | |||
| "fast_ica\n" + | |||
| "feature_agglomeration\n" + | |||
| "kernel_pca\n" + | |||
| "kitchen_sinks\n" + | |||
| "liblinear_svc_preprocessor\n" + | |||
| "no_preprocessing\n" + | |||
| "nystroem_sampler\n" + | |||
| "pca\n" + | |||
| "polynomial\n" + | |||
| "random_trees_embedding\n" + | |||
| "select_percentile_classification\n" + | |||
| "select_percentile_regression\n" + | |||
| "select_rates_classification\n" + | |||
| "select_rates_regression\n" + | |||
| "truncatedSVD") | |||
| private String includeFeaturePreprocessor; | |||
| @ApiModelProperty(value = "如果为None,则使用所有可能的回归算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:adaboost,\n" + | |||
| "ard_regression,\n" + | |||
| "decision_tree,\n" + | |||
| "extra_trees,\n" + | |||
| "gaussian_process,\n" + | |||
| "gradient_boosting,\n" + | |||
| "k_nearest_neighbors,\n" + | |||
| "liblinear_svr,\n" + | |||
| "libsvm_svr,\n" + | |||
| "mlp,\n" + | |||
| "random_forest,\n" + | |||
| "sgd") | |||
| private String includeRegressor; | |||
| private String excludeClassifier; | |||
| private String excludeRegressor; | |||
| private String excludeFeaturePreprocessor; | |||
| @ApiModelProperty(value = "测试集的比率,0到1之间") | |||
| private Float testSize; | |||
| @ApiModelProperty(value = "如何处理过拟合,如果使用基于“cv”的方法或Splitter对象,可能需要使用resampling_strategy_arguments。holdout或crossValid") | |||
| private String resamplingStrategy; | |||
| @ApiModelProperty(value = "重采样划分训练集和验证集,训练集的比率,0到1之间") | |||
| private Float trainSize; | |||
| @ApiModelProperty(value = "拆分数据前是否进行shuffle") | |||
| private Boolean shuffle; | |||
| @ApiModelProperty(value = "交叉验证的折数,当resamplingStrategy为crossValid时,此项必填,为整数") | |||
| private Integer folds; | |||
| @ApiModelProperty(value = "文件夹存放配置输出和日志文件,默认/tmp/automl") | |||
| private String tmpFolder; | |||
| @ApiModelProperty(value = "数据集csv文件中哪几列是预测目标列,逗号分隔") | |||
| private String targetColumns; | |||
| @ApiModelProperty(value = "自定义指标名称") | |||
| private String metricName; | |||
| @ApiModelProperty(value = "模型优化目标指标及权重,json格式。分类的指标包含:accuracy\n" + | |||
| "balanced_accuracy\n" + | |||
| "roc_auc\n" + | |||
| "average_precision\n" + | |||
| "log_loss\n" + | |||
| "precision_macro\n" + | |||
| "precision_micro\n" + | |||
| "precision_samples\n" + | |||
| "precision_weighted\n" + | |||
| "recall_macro\n" + | |||
| "recall_micro\n" + | |||
| "recall_samples\n" + | |||
| "recall_weighted\n" + | |||
| "f1_macro\n" + | |||
| "f1_micro\n" + | |||
| "f1_samples\n" + | |||
| "f1_weighted\n" + | |||
| "回归的指标包含:mean_absolute_error\n" + | |||
| "mean_squared_error\n" + | |||
| "root_mean_squared_error\n" + | |||
| "mean_squared_log_error\n" + | |||
| "median_absolute_error\n" + | |||
| "r2") | |||
| private String metrics; | |||
| @ApiModelProperty(value = "指标优化方向,是越大越好还是越小越好") | |||
| private Boolean greaterIsBetter; | |||
| @ApiModelProperty(value = "模型计算并打印指标") | |||
| private String scoringFunctions; | |||
| private Integer state; | |||
| private String runState; | |||
| private Double progress; | |||
| private String createBy; | |||
| private Date createTime; | |||
| private String updateBy; | |||
| private Date updateTime; | |||
| private String dataset; | |||
| @ApiModelProperty(value = "状态列表") | |||
| private String statusList; | |||
| } | |||
| @@ -1,50 +0,0 @@ | |||
| package com.ruoyi.platform.domain; | |||
| import com.fasterxml.jackson.databind.PropertyNamingStrategy; | |||
| import com.fasterxml.jackson.databind.annotation.JsonNaming; | |||
| import io.swagger.annotations.ApiModel; | |||
| import io.swagger.annotations.ApiModelProperty; | |||
| import lombok.Data; | |||
| import java.util.Date; | |||
| @Data | |||
| @JsonNaming(PropertyNamingStrategy.SnakeCaseStrategy.class) | |||
| @ApiModel(description = "自动机器学习实验实例") | |||
| public class AutoMlIns { | |||
| private Long id; | |||
| private Long autoMlId; | |||
| private String resultPath; | |||
| private String modelPath; | |||
| private String imgPath; | |||
| private String runHistoryPath; | |||
| private Integer state; | |||
| private String status; | |||
| private String nodeStatus; | |||
| private String nodeResult; | |||
| private String param; | |||
| private String source; | |||
| @ApiModelProperty(value = "Argo实例名称") | |||
| private String argoInsName; | |||
| @ApiModelProperty(value = "Argo命名空间") | |||
| private String argoInsNs; | |||
| private Date createTime; | |||
| private Date updateTime; | |||
| private Date finishTime; | |||
| } | |||
| @@ -1,106 +0,0 @@ | |||
| package com.ruoyi.platform.scheduling; | |||
| import com.ruoyi.system.api.constant.Constant; | |||
| import com.ruoyi.platform.domain.AutoMl; | |||
| import com.ruoyi.platform.domain.AutoMlIns; | |||
| import com.ruoyi.platform.mapper.AutoMlDao; | |||
| import com.ruoyi.platform.mapper.AutoMlInsDao; | |||
| import com.ruoyi.platform.service.AutoMlInsService; | |||
| import org.apache.commons.lang3.StringUtils; | |||
| import org.springframework.scheduling.annotation.Scheduled; | |||
| import org.springframework.stereotype.Component; | |||
| import javax.annotation.Resource; | |||
| import java.util.ArrayList; | |||
| import java.util.HashSet; | |||
| import java.util.Iterator; | |||
| import java.util.List; | |||
| @Component() | |||
| public class AutoMlInsStatusTask { | |||
| @Resource | |||
| private AutoMlInsService autoMlInsService; | |||
| @Resource | |||
| private AutoMlInsDao autoMlInsDao; | |||
| @Resource | |||
| private AutoMlDao autoMlDao; | |||
| private HashSet<Long> autoMlIds = new HashSet<>(); | |||
| @Scheduled(cron = "0/10 * * * * ?") // 每10S执行一次 | |||
| public void executeAutoMlInsStatus() { | |||
| // 首先查到所有非终止态的实验实例 | |||
| List<AutoMlIns> autoMlInsList = autoMlInsService.queryByAutoMlInsIsNotTerminated(); | |||
| // 去argo查询状态 | |||
| List<AutoMlIns> updateList = new ArrayList<>(); | |||
| if (autoMlInsList != null && autoMlInsList.size() > 0) { | |||
| for (AutoMlIns autoMlIns : autoMlInsList) { | |||
| //当原本状态为null或非终止态时才调用argo接口 | |||
| try { | |||
| autoMlIns = autoMlInsService.queryStatusFromArgo(autoMlIns); | |||
| } catch (Exception e) { | |||
| autoMlIns.setStatus(Constant.Failed); | |||
| } | |||
| // 线程安全的添加操作 | |||
| synchronized (autoMlIds) { | |||
| autoMlIds.add(autoMlIns.getAutoMlId()); | |||
| } | |||
| updateList.add(autoMlIns); | |||
| } | |||
| if (updateList.size() > 0) { | |||
| for (AutoMlIns autoMlIns : updateList) { | |||
| autoMlInsDao.update(autoMlIns); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @Scheduled(cron = "0/10 * * * * ?") // / 每30S执行一次 | |||
| public void executeAutoMlStatus() { | |||
| if (autoMlIds.isEmpty()) { | |||
| return; | |||
| } | |||
| // 存储需要更新的实验对象列表 | |||
| List<AutoMl> updateAutoMls = new ArrayList<>(); | |||
| Iterator<Long> iterator1 = autoMlIds.iterator(); | |||
| while (iterator1.hasNext()) { | |||
| Long autoMlId = iterator1.next(); | |||
| // 获取当前实验的所有实例列表 | |||
| List<AutoMlIns> insList = autoMlInsDao.getByAutoMlId(autoMlId); | |||
| List<String> statusList = new ArrayList<>(); | |||
| // 更新实验状态列表 | |||
| for (int i = 0; i < insList.size(); i++) { | |||
| statusList.add(insList.get(i).getStatus()); | |||
| } | |||
| String subStatus = statusList.toString().substring(1, statusList.toString().length() - 1); | |||
| AutoMl autoMl = autoMlDao.getAutoMlById(autoMlId); | |||
| if (autoMl == null) { | |||
| iterator1.remove(); | |||
| } else { | |||
| if (!StringUtils.equals(autoMl.getStatusList(), subStatus)) { | |||
| autoMl.setStatusList(subStatus); | |||
| updateAutoMls.add(autoMl); | |||
| autoMlDao.edit(autoMl); | |||
| } | |||
| } | |||
| } | |||
| if (!updateAutoMls.isEmpty()) { | |||
| // 使用Iterator进行安全的删除操作 | |||
| Iterator<Long> iterator = autoMlIds.iterator(); | |||
| while (iterator.hasNext()) { | |||
| Long autoMlId = iterator.next(); | |||
| for (AutoMl autoMl : updateAutoMls) { | |||
| if (autoMl.getId().equals(autoMlId)) { | |||
| iterator.remove(); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -1,183 +0,0 @@ | |||
| package com.ruoyi.platform.vo; | |||
| import com.baomidou.mybatisplus.annotation.TableField; | |||
| import com.fasterxml.jackson.databind.PropertyNamingStrategy; | |||
| import com.fasterxml.jackson.databind.annotation.JsonNaming; | |||
| import io.swagger.annotations.ApiModel; | |||
| import io.swagger.annotations.ApiModelProperty; | |||
| import lombok.Data; | |||
| import java.util.Date; | |||
| import java.util.Map; | |||
| @Data | |||
| @JsonNaming(PropertyNamingStrategy.SnakeCaseStrategy.class) | |||
| @ApiModel(description = "自动机器学习") | |||
| public class AutoMlVo { | |||
| private Long id; | |||
| @ApiModelProperty(value = "实验名称") | |||
| private String mlName; | |||
| @ApiModelProperty(value = "实验描述") | |||
| private String mlDescription; | |||
| @ApiModelProperty(value = "任务类型:classification或regression") | |||
| private String taskType; | |||
| @ApiModelProperty(value = "搜索合适模型的时间限制(以秒为单位)。通过增加这个值,auto-sklearn有更高的机会找到更好的模型。默认3600,非必传。") | |||
| private Integer timeLeftForThisTask; | |||
| @ApiModelProperty(value = "单次调用机器学习模型的时间限制(以秒为单位)。如果机器学习算法运行超过时间限制,将终止模型拟合。将这个值设置得足够高,这样典型的机器学习算法就可以适用于训练数据。默认600,非必传。") | |||
| private Integer perRunTimeLimit; | |||
| @ApiModelProperty(value = "集成模型数量,如果设置为0,则没有集成。默认50,非必传。") | |||
| private Integer ensembleSize; | |||
| @ApiModelProperty(value = "设置为None将禁用集成构建,设置为SingleBest仅使用单个最佳模型而不是集成,设置为default,它将对单目标问题使用EnsembleSelection,对多目标问题使用MultiObjectiveDummyEnsemble。默认default,非必传。") | |||
| private String ensembleClass; | |||
| @ApiModelProperty(value = "在构建集成时只考虑ensemble_nbest模型。这是受到了“最大限度地利用集成选择”中引入的库修剪概念的启发。这是独立于ensemble_class参数的,并且这个修剪步骤是在构造集成之前完成的。默认50,非必传。") | |||
| private Integer ensembleNbest; | |||
| @ApiModelProperty(value = "定义在磁盘中保存的模型的最大数量。额外的模型数量将被永久删除。由于这个变量的性质,它设置了一个集成可以使用多少个模型的上限。必须是大于等于1的整数。如果设置为None,则所有模型都保留在磁盘上。默认50,非必传。") | |||
| private Integer maxModelsOnDisc; | |||
| @ApiModelProperty(value = "随机种子,将决定输出文件名。默认1,非必传。") | |||
| private Integer seed; | |||
| @ApiModelProperty(value = "机器学习算法的内存限制(MB)。如果auto-sklearn试图分配超过memory_limit MB,它将停止拟合机器学习算法。默认3072,非必传。") | |||
| private Integer memoryLimit; | |||
| @ApiModelProperty(value = "如果为None,则使用所有可能的分类算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:adaboost\n" + | |||
| "bernoulli_nb\n" + | |||
| "decision_tree\n" + | |||
| "extra_trees\n" + | |||
| "gaussian_nb\n" + | |||
| "gradient_boosting\n" + | |||
| "k_nearest_neighbors\n" + | |||
| "lda\n" + | |||
| "liblinear_svc\n" + | |||
| "libsvm_svc\n" + | |||
| "mlp\n" + | |||
| "multinomial_nb\n" + | |||
| "passive_aggressive\n" + | |||
| "qda\n" + | |||
| "random_forest\n" + | |||
| "sgd") | |||
| private String includeClassifier; | |||
| @ApiModelProperty(value = "如果为None,则使用所有可能的特征预处理算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:densifier\n" + | |||
| "extra_trees_preproc_for_classification\n" + | |||
| "extra_trees_preproc_for_regression\n" + | |||
| "fast_ica\n" + | |||
| "feature_agglomeration\n" + | |||
| "kernel_pca\n" + | |||
| "kitchen_sinks\n" + | |||
| "liblinear_svc_preprocessor\n" + | |||
| "no_preprocessing\n" + | |||
| "nystroem_sampler\n" + | |||
| "pca\n" + | |||
| "polynomial\n" + | |||
| "random_trees_embedding\n" + | |||
| "select_percentile_classification\n" + | |||
| "select_percentile_regression\n" + | |||
| "select_rates_classification\n" + | |||
| "select_rates_regression\n" + | |||
| "truncatedSVD") | |||
| private String includeFeaturePreprocessor; | |||
| @ApiModelProperty(value = "如果为None,则使用所有可能的回归算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:adaboost,\n" + | |||
| "ard_regression,\n" + | |||
| "decision_tree,\n" + | |||
| "extra_trees,\n" + | |||
| "gaussian_process,\n" + | |||
| "gradient_boosting,\n" + | |||
| "k_nearest_neighbors,\n" + | |||
| "liblinear_svr,\n" + | |||
| "libsvm_svr,\n" + | |||
| "mlp,\n" + | |||
| "random_forest,\n" + | |||
| "sgd") | |||
| private String includeRegressor; | |||
| private String excludeClassifier; | |||
| private String excludeRegressor; | |||
| private String excludeFeaturePreprocessor; | |||
| @ApiModelProperty(value = "测试集的比率,0到1之间") | |||
| private Float testSize; | |||
| @ApiModelProperty(value = "如何处理过拟合,如果使用基于“cv”的方法或Splitter对象,可能需要使用resampling_strategy_arguments。holdout或crossValid") | |||
| private String resamplingStrategy; | |||
| @ApiModelProperty(value = "重采样划分训练集和验证集,训练集的比率,0到1之间") | |||
| private Float trainSize; | |||
| @ApiModelProperty(value = "拆分数据前是否进行shuffle") | |||
| private Boolean shuffle; | |||
| @ApiModelProperty(value = "交叉验证的折数,当resamplingStrategy为crossValid时,此项必填,为整数") | |||
| private Integer folds; | |||
| @ApiModelProperty(value = "文件夹存放配置输出和日志文件,默认/tmp/automl") | |||
| private String tmpFolder; | |||
| @ApiModelProperty(value = "数据集csv文件中哪几列是预测目标列,逗号分隔") | |||
| private String targetColumns; | |||
| @ApiModelProperty(value = "自定义指标名称") | |||
| private String metricName; | |||
| @ApiModelProperty(value = "模型优化目标指标及权重,json格式。分类的指标包含:accuracy\n" + | |||
| "balanced_accuracy\n" + | |||
| "roc_auc\n" + | |||
| "average_precision\n" + | |||
| "log_loss\n" + | |||
| "precision_macro\n" + | |||
| "precision_micro\n" + | |||
| "precision_samples\n" + | |||
| "precision_weighted\n" + | |||
| "recall_macro\n" + | |||
| "recall_micro\n" + | |||
| "recall_samples\n" + | |||
| "recall_weighted\n" + | |||
| "f1_macro\n" + | |||
| "f1_micro\n" + | |||
| "f1_samples\n" + | |||
| "f1_weighted\n" + | |||
| "回归的指标包含:mean_absolute_error\n" + | |||
| "mean_squared_error\n" + | |||
| "root_mean_squared_error\n" + | |||
| "mean_squared_log_error\n" + | |||
| "median_absolute_error\n" + | |||
| "r2") | |||
| private String metrics; | |||
| @ApiModelProperty(value = "指标优化方向,是越大越好还是越小越好") | |||
| private Boolean greaterIsBetter; | |||
| @ApiModelProperty(value = "模型计算并打印指标") | |||
| private String scoringFunctions; | |||
| private Integer state; | |||
| private String runState; | |||
| private Double progress; | |||
| private String createBy; | |||
| private Date createTime; | |||
| private String updateBy; | |||
| private Date updateTime; | |||
| /** | |||
| * 对应数据集 | |||
| */ | |||
| private Map<String,Object> dataset; | |||
| } | |||