Browse Source

自动机器学习开发

dev-automl
chenzhihang 1 year ago
parent
commit
0ede04ec79
2 changed files with 117 additions and 5 deletions
  1. +98
    -2
      ruoyi-modules/management-platform/src/main/java/com/ruoyi/platform/domain/AutoMl.java
  2. +19
    -3
      ruoyi-modules/management-platform/src/main/resources/mapper/managementPlatform/AutoMLDaoMapper.xml

+ 98
- 2
ruoyi-modules/management-platform/src/main/java/com/ruoyi/platform/domain/AutoMl.java View File

@@ -22,31 +22,86 @@ public class AutoMl {
@ApiModelProperty(value = "实验描述")
private String mlDescription;

@ApiModelProperty(value = "任务类型")
@ApiModelProperty(value = "任务类型:classification或regression")
private String taskType;

@ApiModelProperty(value = "数据集名称")
private String datasetName;

@ApiModelProperty(value = "搜索合适模型的时间限制(以秒为单位)。通过增加这个值,auto-sklearn有更高的机会找到更好的模型。默认3600,非必传。")
private Integer timeLeftForThisTask;

@ApiModelProperty(value = "单次调用机器学习模型的时间限制(以秒为单位)。如果机器学习算法运行超过时间限制,将终止模型拟合。将这个值设置得足够高,这样典型的机器学习算法就可以适用于训练数据。默认600,非必传。")
private Integer perRunTimeLimit;

@ApiModelProperty(value = "集成模型数量,如果设置为0,则没有集成。默认50,非必传。")
private Integer ensembleSize;

@ApiModelProperty(value = "设置为None将禁用集成构建,设置为SingleBest仅使用单个最佳模型而不是集成,设置为default,它将对单目标问题使用EnsembleSelection,对多目标问题使用MultiObjectiveDummyEnsemble。默认default,非必传。")
private String ensembleClass;

@ApiModelProperty(value = "在构建集成时只考虑ensemble_nbest模型。这是受到了“最大限度地利用集成选择”中引入的库修剪概念的启发。这是独立于ensemble_class参数的,并且这个修剪步骤是在构造集成之前完成的。默认50,非必传。")
private Integer ensembleNbest;

@ApiModelProperty(value = "定义在磁盘中保存的模型的最大数量。额外的模型数量将被永久删除。由于这个变量的性质,它设置了一个集成可以使用多少个模型的上限。必须是大于等于1的整数。如果设置为None,则所有模型都保留在磁盘上。默认50,非必传。")
private Integer maxModelsOnDisc;

@ApiModelProperty(value = "随机种子,将决定输出文件名。默认1,非必传。")
private Integer seed;

@ApiModelProperty(value = "机器学习算法的内存限制(MB)。如果auto-sklearn试图分配超过memory_limit MB,它将停止拟合机器学习算法。默认3072,非必传。")
private Integer memoryLimit;

@ApiModelProperty(value = "如果为None,则使用所有可能的分类算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:adaboost\n" +
"bernoulli_nb\n" +
"decision_tree\n" +
"extra_trees\n" +
"gaussian_nb\n" +
"gradient_boosting\n" +
"k_nearest_neighbors\n" +
"lda\n" +
"liblinear_svc\n" +
"libsvm_svc\n" +
"mlp\n" +
"multinomial_nb\n" +
"passive_aggressive\n" +
"qda\n" +
"random_forest\n" +
"sgd")
private String includeClassifier;

@ApiModelProperty(value = "如果为None,则使用所有可能的特征预处理算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:densifier\n" +
"extra_trees_preproc_for_classification\n" +
"extra_trees_preproc_for_regression\n" +
"fast_ica\n" +
"feature_agglomeration\n" +
"kernel_pca\n" +
"kitchen_sinks\n" +
"liblinear_svc_preprocessor\n" +
"no_preprocessing\n" +
"nystroem_sampler\n" +
"pca\n" +
"polynomial\n" +
"random_trees_embedding\n" +
"select_percentile_classification\n" +
"select_percentile_regression\n" +
"select_rates_classification\n" +
"select_rates_regression\n" +
"truncatedSVD")
private String includeFeaturePreprocessor;

@ApiModelProperty(value = "如果为None,则使用所有可能的回归算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:adaboost,\n" +
"ard_regression,\n" +
"decision_tree,\n" +
"extra_trees,\n" +
"gaussian_process,\n" +
"gradient_boosting,\n" +
"k_nearest_neighbors,\n" +
"liblinear_svr,\n" +
"libsvm_svr,\n" +
"mlp,\n" +
"random_forest,\n" +
"sgd")
private String includeRegressor;

private String excludeClassifier;
@@ -55,20 +110,61 @@ public class AutoMl {

private String excludeFeaturePreprocessor;

@ApiModelProperty(value = "如何处理过拟合,如果使用基于“cv”的方法或Splitter对象,可能需要使用resampling_strategy_arguments。holdout或crossValid")
private String resamplingStrategy;

@ApiModelProperty(value = "训练集的比率,0到1之间")
private Float trainSize;

@ApiModelProperty(value = "拆分数据前是否进行shuffle")
private Boolean shuffle;

@ApiModelProperty(value = "当resamplingStrategy为crossValid时,此项必填。为整数")
private Integer folds;

private Boolean deleteTmpFolderAfterTerminate;
@ApiModelProperty(value = "文件夹存放配置输出和日志文件,默认/tmp/automl")
private String tmpFolder;

@ApiModelProperty(value = "数据集csv文件路径")
private String dataCsv;

@ApiModelProperty(value = "数据集csv文件中哪几列是预测目标列,逗号分隔")
private String targetColumns;

@ApiModelProperty(value = "自定义指标名称")
private String metricName;

@ApiModelProperty(value = "模型优化目标指标及权重,json格式。分类的指标包含:accuracy\n" +
"balanced_accuracy\n" +
"roc_auc\n" +
"average_precision\n" +
"log_loss\n" +
"precision_macro\n" +
"precision_micro\n" +
"precision_samples\n" +
"precision_weighted\n" +
"recall_macro\n" +
"recall_micro\n" +
"recall_samples\n" +
"recall_weighted\n" +
"f1_macro\n" +
"f1_micro\n" +
"f1_samples\n" +
"f1_weighted\n" +
"回归的指标包含:mean_absolute_error\n" +
"mean_squared_error\n" +
"root_mean_squared_error\n" +
"mean_squared_log_error\n" +
"median_absolute_error\n" +
"r2")
private String metrics;

@ApiModelProperty(value = "指标优化方向,是越大越好还是越小越好")
private Boolean greaterIsBetter;

@ApiModelProperty(value = "模型计算并打印指标")
private String scoringFunctions;

private Integer state;

private String runState;


+ 19
- 3
ruoyi-modules/management-platform/src/main/resources/mapper/managementPlatform/AutoMLDaoMapper.xml View File

@@ -7,8 +7,8 @@
memory_limit,
include_classifier, include_feature_preprocessor, include_regressor, exclude_classifier,
exclude_regressor, exclude_feature_preprocessor, resampling_strategy, train_size,
shuffle, folds, data_csv, target_columns, create_by,
update_by)
shuffle, folds, data_csv, target_columns, metric_name, metrics,greater_is_better,scoring_functions,tmp_folder,
create_by,update_by)
values (#{autoMl.mlName}, #{autoMl.mlDescription}, #{autoMl.taskType}, #{autoMl.datasetName},
#{autoMl.timeLeftForThisTask}, #{autoMl.perRunTimeLimit},
#{autoMl.ensembleSize}, #{autoMl.ensembleClass}, #{autoMl.ensembleNbest},
@@ -18,7 +18,8 @@
#{autoMl.excludeRegressor}, #{autoMl.excludeFeaturePreprocessor}, #{autoMl.resamplingStrategy},
#{autoMl.trainSize}, #{autoMl.shuffle},
#{autoMl.folds}, #{autoMl.dataCsv},
#{autoMl.targetColumns}, #{autoMl.createBy}, #{autoMl.updateBy})
#{autoMl.targetColumns}, #{autoMl.metricName}, #{autoMl.metrics},#{autoMl.greaterIsBetter},#{autoMl.scoringFunctions},#{autoMl.tmpFolder},
#{autoMl.createBy}, #{autoMl.updateBy})
</insert>

<update id="edit">
@@ -99,6 +100,21 @@
<if test="autoMl.dataCsv != null and autoMl.dataCsv !=''">
data_csv = #{autoMl.dataCsv},
</if>
<if test="autoMl.tmpFolder != null and autoMl.tmpFolder !=''">
tmp_folder = #{autoMl.tmpFolder},
</if>
<if test="autoMl.metricName != null and autoMl.metricName !=''">
metric_name = #{autoMl.metricName},
</if>
<if test="autoMl.metrics != null and autoMl.metrics !=''">
metrics = #{autoMl.metrics},
</if>
<if test="autoMl.greater_is_better != null">
greater_is_better = #{autoMl.greaterIsBetter},
</if>
<if test="autoMl.scoringFunctions != null and autoMl.scoringFunctions !=''">
scoring_functions = #{autoMl.scoringFunctions},
</if>
<if test="autoMl.targetColumns != null and autoMl.targetColumns !=''">
target_columns = #{autoMl.targetColumns},
</if>


Loading…
Cancel
Save