Browse Source

优化

pull/235/head
chenzhihang 10 months ago
parent
commit
bee9f43762
4 changed files with 0 additions and 523 deletions
  1. +0
    -184
      ruoyi-modules/management-platform/src/main/java/com/ruoyi/platform/domain/AutoMl.java
  2. +0
    -50
      ruoyi-modules/management-platform/src/main/java/com/ruoyi/platform/domain/AutoMlIns.java
  3. +0
    -106
      ruoyi-modules/management-platform/src/main/java/com/ruoyi/platform/scheduling/AutoMlInsStatusTask.java
  4. +0
    -183
      ruoyi-modules/management-platform/src/main/java/com/ruoyi/platform/vo/AutoMlVo.java

+ 0
- 184
ruoyi-modules/management-platform/src/main/java/com/ruoyi/platform/domain/AutoMl.java View File

@@ -1,184 +0,0 @@
package com.ruoyi.platform.domain;

import com.baomidou.mybatisplus.annotation.TableField;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
import com.fasterxml.jackson.databind.annotation.JsonNaming;
import com.ruoyi.platform.vo.VersionVo;
import io.swagger.annotations.ApiModel;
import io.swagger.annotations.ApiModelProperty;
import lombok.Data;

import java.util.Date;
import java.util.Map;

@Data
@JsonNaming(PropertyNamingStrategy.SnakeCaseStrategy.class)
@ApiModel(description = "自动机器学习")
public class AutoMl {
private Long id;

@ApiModelProperty(value = "实验名称")
private String mlName;

@ApiModelProperty(value = "实验描述")
private String mlDescription;

@ApiModelProperty(value = "任务类型:classification或regression")
private String taskType;

@ApiModelProperty(value = "搜索合适模型的时间限制(以秒为单位)。通过增加这个值,auto-sklearn有更高的机会找到更好的模型。默认3600,非必传。")
private Integer timeLeftForThisTask;

@ApiModelProperty(value = "单次调用机器学习模型的时间限制(以秒为单位)。如果机器学习算法运行超过时间限制,将终止模型拟合。将这个值设置得足够高,这样典型的机器学习算法就可以适用于训练数据。默认600,非必传。")
private Integer perRunTimeLimit;

@ApiModelProperty(value = "集成模型数量,如果设置为0,则没有集成。默认50,非必传。")
private Integer ensembleSize;

@ApiModelProperty(value = "设置为None将禁用集成构建,设置为SingleBest仅使用单个最佳模型而不是集成,设置为default,它将对单目标问题使用EnsembleSelection,对多目标问题使用MultiObjectiveDummyEnsemble。默认default,非必传。")
private String ensembleClass;

@ApiModelProperty(value = "在构建集成时只考虑ensemble_nbest模型。这是受到了“最大限度地利用集成选择”中引入的库修剪概念的启发。这是独立于ensemble_class参数的,并且这个修剪步骤是在构造集成之前完成的。默认50,非必传。")
private Integer ensembleNbest;

@ApiModelProperty(value = "定义在磁盘中保存的模型的最大数量。额外的模型数量将被永久删除。由于这个变量的性质,它设置了一个集成可以使用多少个模型的上限。必须是大于等于1的整数。如果设置为None,则所有模型都保留在磁盘上。默认50,非必传。")
private Integer maxModelsOnDisc;

@ApiModelProperty(value = "随机种子,将决定输出文件名。默认1,非必传。")
private Integer seed;

@ApiModelProperty(value = "机器学习算法的内存限制(MB)。如果auto-sklearn试图分配超过memory_limit MB,它将停止拟合机器学习算法。默认3072,非必传。")
private Integer memoryLimit;

@ApiModelProperty(value = "如果为None,则使用所有可能的分类算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:adaboost\n" +
"bernoulli_nb\n" +
"decision_tree\n" +
"extra_trees\n" +
"gaussian_nb\n" +
"gradient_boosting\n" +
"k_nearest_neighbors\n" +
"lda\n" +
"liblinear_svc\n" +
"libsvm_svc\n" +
"mlp\n" +
"multinomial_nb\n" +
"passive_aggressive\n" +
"qda\n" +
"random_forest\n" +
"sgd")
private String includeClassifier;

@ApiModelProperty(value = "如果为None,则使用所有可能的特征预处理算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:densifier\n" +
"extra_trees_preproc_for_classification\n" +
"extra_trees_preproc_for_regression\n" +
"fast_ica\n" +
"feature_agglomeration\n" +
"kernel_pca\n" +
"kitchen_sinks\n" +
"liblinear_svc_preprocessor\n" +
"no_preprocessing\n" +
"nystroem_sampler\n" +
"pca\n" +
"polynomial\n" +
"random_trees_embedding\n" +
"select_percentile_classification\n" +
"select_percentile_regression\n" +
"select_rates_classification\n" +
"select_rates_regression\n" +
"truncatedSVD")
private String includeFeaturePreprocessor;

@ApiModelProperty(value = "如果为None,则使用所有可能的回归算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:adaboost,\n" +
"ard_regression,\n" +
"decision_tree,\n" +
"extra_trees,\n" +
"gaussian_process,\n" +
"gradient_boosting,\n" +
"k_nearest_neighbors,\n" +
"liblinear_svr,\n" +
"libsvm_svr,\n" +
"mlp,\n" +
"random_forest,\n" +
"sgd")
private String includeRegressor;

private String excludeClassifier;

private String excludeRegressor;

private String excludeFeaturePreprocessor;

@ApiModelProperty(value = "测试集的比率,0到1之间")
private Float testSize;

@ApiModelProperty(value = "如何处理过拟合,如果使用基于“cv”的方法或Splitter对象,可能需要使用resampling_strategy_arguments。holdout或crossValid")
private String resamplingStrategy;

@ApiModelProperty(value = "重采样划分训练集和验证集,训练集的比率,0到1之间")
private Float trainSize;

@ApiModelProperty(value = "拆分数据前是否进行shuffle")
private Boolean shuffle;

@ApiModelProperty(value = "交叉验证的折数,当resamplingStrategy为crossValid时,此项必填,为整数")
private Integer folds;

@ApiModelProperty(value = "文件夹存放配置输出和日志文件,默认/tmp/automl")
private String tmpFolder;

@ApiModelProperty(value = "数据集csv文件中哪几列是预测目标列,逗号分隔")
private String targetColumns;

@ApiModelProperty(value = "自定义指标名称")
private String metricName;

@ApiModelProperty(value = "模型优化目标指标及权重,json格式。分类的指标包含:accuracy\n" +
"balanced_accuracy\n" +
"roc_auc\n" +
"average_precision\n" +
"log_loss\n" +
"precision_macro\n" +
"precision_micro\n" +
"precision_samples\n" +
"precision_weighted\n" +
"recall_macro\n" +
"recall_micro\n" +
"recall_samples\n" +
"recall_weighted\n" +
"f1_macro\n" +
"f1_micro\n" +
"f1_samples\n" +
"f1_weighted\n" +
"回归的指标包含:mean_absolute_error\n" +
"mean_squared_error\n" +
"root_mean_squared_error\n" +
"mean_squared_log_error\n" +
"median_absolute_error\n" +
"r2")
private String metrics;

@ApiModelProperty(value = "指标优化方向,是越大越好还是越小越好")
private Boolean greaterIsBetter;

@ApiModelProperty(value = "模型计算并打印指标")
private String scoringFunctions;

private Integer state;

private String runState;

private Double progress;

private String createBy;

private Date createTime;

private String updateBy;

private Date updateTime;

private String dataset;

@ApiModelProperty(value = "状态列表")
private String statusList;
}

+ 0
- 50
ruoyi-modules/management-platform/src/main/java/com/ruoyi/platform/domain/AutoMlIns.java View File

@@ -1,50 +0,0 @@
package com.ruoyi.platform.domain;

import com.fasterxml.jackson.databind.PropertyNamingStrategy;
import com.fasterxml.jackson.databind.annotation.JsonNaming;
import io.swagger.annotations.ApiModel;
import io.swagger.annotations.ApiModelProperty;
import lombok.Data;

import java.util.Date;

@Data
@JsonNaming(PropertyNamingStrategy.SnakeCaseStrategy.class)
@ApiModel(description = "自动机器学习实验实例")
public class AutoMlIns {
private Long id;

private Long autoMlId;

private String resultPath;

private String modelPath;

private String imgPath;

private String runHistoryPath;

private Integer state;

private String status;

private String nodeStatus;

private String nodeResult;

private String param;

private String source;

@ApiModelProperty(value = "Argo实例名称")
private String argoInsName;

@ApiModelProperty(value = "Argo命名空间")
private String argoInsNs;

private Date createTime;

private Date updateTime;

private Date finishTime;
}

+ 0
- 106
ruoyi-modules/management-platform/src/main/java/com/ruoyi/platform/scheduling/AutoMlInsStatusTask.java View File

@@ -1,106 +0,0 @@
package com.ruoyi.platform.scheduling;

import com.ruoyi.system.api.constant.Constant;
import com.ruoyi.platform.domain.AutoMl;
import com.ruoyi.platform.domain.AutoMlIns;
import com.ruoyi.platform.mapper.AutoMlDao;
import com.ruoyi.platform.mapper.AutoMlInsDao;
import com.ruoyi.platform.service.AutoMlInsService;
import org.apache.commons.lang3.StringUtils;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;

import javax.annotation.Resource;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;

@Component()
public class AutoMlInsStatusTask {

@Resource
private AutoMlInsService autoMlInsService;

@Resource
private AutoMlInsDao autoMlInsDao;

@Resource
private AutoMlDao autoMlDao;

private HashSet<Long> autoMlIds = new HashSet<>();

@Scheduled(cron = "0/10 * * * * ?") // 每10S执行一次
public void executeAutoMlInsStatus() {
// 首先查到所有非终止态的实验实例
List<AutoMlIns> autoMlInsList = autoMlInsService.queryByAutoMlInsIsNotTerminated();

// 去argo查询状态
List<AutoMlIns> updateList = new ArrayList<>();
if (autoMlInsList != null && autoMlInsList.size() > 0) {
for (AutoMlIns autoMlIns : autoMlInsList) {
//当原本状态为null或非终止态时才调用argo接口
try {
autoMlIns = autoMlInsService.queryStatusFromArgo(autoMlIns);
} catch (Exception e) {
autoMlIns.setStatus(Constant.Failed);
}
// 线程安全的添加操作
synchronized (autoMlIds) {
autoMlIds.add(autoMlIns.getAutoMlId());
}
updateList.add(autoMlIns);
}
if (updateList.size() > 0) {
for (AutoMlIns autoMlIns : updateList) {
autoMlInsDao.update(autoMlIns);
}
}
}
}

@Scheduled(cron = "0/10 * * * * ?") // / 每30S执行一次
public void executeAutoMlStatus() {
if (autoMlIds.isEmpty()) {
return;
}
// 存储需要更新的实验对象列表
List<AutoMl> updateAutoMls = new ArrayList<>();
Iterator<Long> iterator1 = autoMlIds.iterator();

while (iterator1.hasNext()) {
Long autoMlId = iterator1.next();
// 获取当前实验的所有实例列表
List<AutoMlIns> insList = autoMlInsDao.getByAutoMlId(autoMlId);
List<String> statusList = new ArrayList<>();
// 更新实验状态列表
for (int i = 0; i < insList.size(); i++) {
statusList.add(insList.get(i).getStatus());
}
String subStatus = statusList.toString().substring(1, statusList.toString().length() - 1);
AutoMl autoMl = autoMlDao.getAutoMlById(autoMlId);
if (autoMl == null) {
iterator1.remove();
} else {
if (!StringUtils.equals(autoMl.getStatusList(), subStatus)) {
autoMl.setStatusList(subStatus);
updateAutoMls.add(autoMl);
autoMlDao.edit(autoMl);
}
}
}

if (!updateAutoMls.isEmpty()) {
// 使用Iterator进行安全的删除操作
Iterator<Long> iterator = autoMlIds.iterator();
while (iterator.hasNext()) {
Long autoMlId = iterator.next();
for (AutoMl autoMl : updateAutoMls) {
if (autoMl.getId().equals(autoMlId)) {
iterator.remove();
}
}
}
}
}
}

+ 0
- 183
ruoyi-modules/management-platform/src/main/java/com/ruoyi/platform/vo/AutoMlVo.java View File

@@ -1,183 +0,0 @@
package com.ruoyi.platform.vo;

import com.baomidou.mybatisplus.annotation.TableField;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
import com.fasterxml.jackson.databind.annotation.JsonNaming;
import io.swagger.annotations.ApiModel;
import io.swagger.annotations.ApiModelProperty;
import lombok.Data;

import java.util.Date;
import java.util.Map;

@Data
@JsonNaming(PropertyNamingStrategy.SnakeCaseStrategy.class)
@ApiModel(description = "自动机器学习")
public class AutoMlVo {
private Long id;

@ApiModelProperty(value = "实验名称")
private String mlName;

@ApiModelProperty(value = "实验描述")
private String mlDescription;

@ApiModelProperty(value = "任务类型:classification或regression")
private String taskType;

@ApiModelProperty(value = "搜索合适模型的时间限制(以秒为单位)。通过增加这个值,auto-sklearn有更高的机会找到更好的模型。默认3600,非必传。")
private Integer timeLeftForThisTask;

@ApiModelProperty(value = "单次调用机器学习模型的时间限制(以秒为单位)。如果机器学习算法运行超过时间限制,将终止模型拟合。将这个值设置得足够高,这样典型的机器学习算法就可以适用于训练数据。默认600,非必传。")
private Integer perRunTimeLimit;

@ApiModelProperty(value = "集成模型数量,如果设置为0,则没有集成。默认50,非必传。")
private Integer ensembleSize;

@ApiModelProperty(value = "设置为None将禁用集成构建,设置为SingleBest仅使用单个最佳模型而不是集成,设置为default,它将对单目标问题使用EnsembleSelection,对多目标问题使用MultiObjectiveDummyEnsemble。默认default,非必传。")
private String ensembleClass;

@ApiModelProperty(value = "在构建集成时只考虑ensemble_nbest模型。这是受到了“最大限度地利用集成选择”中引入的库修剪概念的启发。这是独立于ensemble_class参数的,并且这个修剪步骤是在构造集成之前完成的。默认50,非必传。")
private Integer ensembleNbest;

@ApiModelProperty(value = "定义在磁盘中保存的模型的最大数量。额外的模型数量将被永久删除。由于这个变量的性质,它设置了一个集成可以使用多少个模型的上限。必须是大于等于1的整数。如果设置为None,则所有模型都保留在磁盘上。默认50,非必传。")
private Integer maxModelsOnDisc;

@ApiModelProperty(value = "随机种子,将决定输出文件名。默认1,非必传。")
private Integer seed;

@ApiModelProperty(value = "机器学习算法的内存限制(MB)。如果auto-sklearn试图分配超过memory_limit MB,它将停止拟合机器学习算法。默认3072,非必传。")
private Integer memoryLimit;

@ApiModelProperty(value = "如果为None,则使用所有可能的分类算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:adaboost\n" +
"bernoulli_nb\n" +
"decision_tree\n" +
"extra_trees\n" +
"gaussian_nb\n" +
"gradient_boosting\n" +
"k_nearest_neighbors\n" +
"lda\n" +
"liblinear_svc\n" +
"libsvm_svc\n" +
"mlp\n" +
"multinomial_nb\n" +
"passive_aggressive\n" +
"qda\n" +
"random_forest\n" +
"sgd")
private String includeClassifier;

@ApiModelProperty(value = "如果为None,则使用所有可能的特征预处理算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:densifier\n" +
"extra_trees_preproc_for_classification\n" +
"extra_trees_preproc_for_regression\n" +
"fast_ica\n" +
"feature_agglomeration\n" +
"kernel_pca\n" +
"kitchen_sinks\n" +
"liblinear_svc_preprocessor\n" +
"no_preprocessing\n" +
"nystroem_sampler\n" +
"pca\n" +
"polynomial\n" +
"random_trees_embedding\n" +
"select_percentile_classification\n" +
"select_percentile_regression\n" +
"select_rates_classification\n" +
"select_rates_regression\n" +
"truncatedSVD")
private String includeFeaturePreprocessor;

@ApiModelProperty(value = "如果为None,则使用所有可能的回归算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:adaboost,\n" +
"ard_regression,\n" +
"decision_tree,\n" +
"extra_trees,\n" +
"gaussian_process,\n" +
"gradient_boosting,\n" +
"k_nearest_neighbors,\n" +
"liblinear_svr,\n" +
"libsvm_svr,\n" +
"mlp,\n" +
"random_forest,\n" +
"sgd")
private String includeRegressor;

private String excludeClassifier;

private String excludeRegressor;

private String excludeFeaturePreprocessor;

@ApiModelProperty(value = "测试集的比率,0到1之间")
private Float testSize;

@ApiModelProperty(value = "如何处理过拟合,如果使用基于“cv”的方法或Splitter对象,可能需要使用resampling_strategy_arguments。holdout或crossValid")
private String resamplingStrategy;

@ApiModelProperty(value = "重采样划分训练集和验证集,训练集的比率,0到1之间")
private Float trainSize;

@ApiModelProperty(value = "拆分数据前是否进行shuffle")
private Boolean shuffle;

@ApiModelProperty(value = "交叉验证的折数,当resamplingStrategy为crossValid时,此项必填,为整数")
private Integer folds;

@ApiModelProperty(value = "文件夹存放配置输出和日志文件,默认/tmp/automl")
private String tmpFolder;

@ApiModelProperty(value = "数据集csv文件中哪几列是预测目标列,逗号分隔")
private String targetColumns;

@ApiModelProperty(value = "自定义指标名称")
private String metricName;

@ApiModelProperty(value = "模型优化目标指标及权重,json格式。分类的指标包含:accuracy\n" +
"balanced_accuracy\n" +
"roc_auc\n" +
"average_precision\n" +
"log_loss\n" +
"precision_macro\n" +
"precision_micro\n" +
"precision_samples\n" +
"precision_weighted\n" +
"recall_macro\n" +
"recall_micro\n" +
"recall_samples\n" +
"recall_weighted\n" +
"f1_macro\n" +
"f1_micro\n" +
"f1_samples\n" +
"f1_weighted\n" +
"回归的指标包含:mean_absolute_error\n" +
"mean_squared_error\n" +
"root_mean_squared_error\n" +
"mean_squared_log_error\n" +
"median_absolute_error\n" +
"r2")
private String metrics;

@ApiModelProperty(value = "指标优化方向,是越大越好还是越小越好")
private Boolean greaterIsBetter;

@ApiModelProperty(value = "模型计算并打印指标")
private String scoringFunctions;

private Integer state;

private String runState;

private Double progress;

private String createBy;

private Date createTime;

private String updateBy;

private Date updateTime;

/**
* 对应数据集
*/
private Map<String,Object> dataset;
}

Loading…
Cancel
Save