diff --git a/ruoyi-modules/management-platform/src/main/java/com/ruoyi/platform/domain/AutoMl.java b/ruoyi-modules/management-platform/src/main/java/com/ruoyi/platform/domain/AutoMl.java index 43b8255f..e1aa5506 100644 --- a/ruoyi-modules/management-platform/src/main/java/com/ruoyi/platform/domain/AutoMl.java +++ b/ruoyi-modules/management-platform/src/main/java/com/ruoyi/platform/domain/AutoMl.java @@ -22,31 +22,86 @@ public class AutoMl { @ApiModelProperty(value = "实验描述") private String mlDescription; - @ApiModelProperty(value = "任务类型") + @ApiModelProperty(value = "任务类型:classification或regression") private String taskType; + @ApiModelProperty(value = "数据集名称") private String datasetName; + @ApiModelProperty(value = "搜索合适模型的时间限制(以秒为单位)。通过增加这个值,auto-sklearn有更高的机会找到更好的模型。默认3600,非必传。") private Integer timeLeftForThisTask; + @ApiModelProperty(value = "单次调用机器学习模型的时间限制(以秒为单位)。如果机器学习算法运行超过时间限制,将终止模型拟合。将这个值设置得足够高,这样典型的机器学习算法就可以适用于训练数据。默认600,非必传。") private Integer perRunTimeLimit; + @ApiModelProperty(value = "集成模型数量,如果设置为0,则没有集成。默认50,非必传。") private Integer ensembleSize; + @ApiModelProperty(value = "设置为None将禁用集成构建,设置为SingleBest仅使用单个最佳模型而不是集成,设置为default,它将对单目标问题使用EnsembleSelection,对多目标问题使用MultiObjectiveDummyEnsemble。默认default,非必传。") private String ensembleClass; + @ApiModelProperty(value = "在构建集成时只考虑ensemble_nbest模型。这是受到了“最大限度地利用集成选择”中引入的库修剪概念的启发。这是独立于ensemble_class参数的,并且这个修剪步骤是在构造集成之前完成的。默认50,非必传。") private Integer ensembleNbest; + @ApiModelProperty(value = "定义在磁盘中保存的模型的最大数量。额外的模型数量将被永久删除。由于这个变量的性质,它设置了一个集成可以使用多少个模型的上限。必须是大于等于1的整数。如果设置为None,则所有模型都保留在磁盘上。默认50,非必传。") private Integer maxModelsOnDisc; + @ApiModelProperty(value = "随机种子,将决定输出文件名。默认1,非必传。") private Integer seed; + @ApiModelProperty(value = "机器学习算法的内存限制(MB)。如果auto-sklearn试图分配超过memory_limit MB,它将停止拟合机器学习算法。默认3072,非必传。") private Integer memoryLimit; + @ApiModelProperty(value = "如果为None,则使用所有可能的分类算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components//*。与参数exclude不兼容。多选,逗号分隔。包含:adaboost\n" + + "bernoulli_nb\n" + + "decision_tree\n" + + "extra_trees\n" + + "gaussian_nb\n" + + "gradient_boosting\n" + + "k_nearest_neighbors\n" + + "lda\n" + + "liblinear_svc\n" + + "libsvm_svc\n" + + "mlp\n" + + "multinomial_nb\n" + + "passive_aggressive\n" + + "qda\n" + + "random_forest\n" + + "sgd") private String includeClassifier; + @ApiModelProperty(value = "如果为None,则使用所有可能的特征预处理算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components//*。与参数exclude不兼容。多选,逗号分隔。包含:densifier\n" + + "extra_trees_preproc_for_classification\n" + + "extra_trees_preproc_for_regression\n" + + "fast_ica\n" + + "feature_agglomeration\n" + + "kernel_pca\n" + + "kitchen_sinks\n" + + "liblinear_svc_preprocessor\n" + + "no_preprocessing\n" + + "nystroem_sampler\n" + + "pca\n" + + "polynomial\n" + + "random_trees_embedding\n" + + "select_percentile_classification\n" + + "select_percentile_regression\n" + + "select_rates_classification\n" + + "select_rates_regression\n" + + "truncatedSVD") private String includeFeaturePreprocessor; + @ApiModelProperty(value = "如果为None,则使用所有可能的回归算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components//*。与参数exclude不兼容。多选,逗号分隔。包含:adaboost,\n" + + "ard_regression,\n" + + "decision_tree,\n" + + "extra_trees,\n" + + "gaussian_process,\n" + + "gradient_boosting,\n" + + "k_nearest_neighbors,\n" + + "liblinear_svr,\n" + + "libsvm_svr,\n" + + "mlp,\n" + + "random_forest,\n" + + "sgd") private String includeRegressor; private String excludeClassifier; @@ -55,20 +110,61 @@ public class AutoMl { private String excludeFeaturePreprocessor; + @ApiModelProperty(value = "如何处理过拟合,如果使用基于“cv”的方法或Splitter对象,可能需要使用resampling_strategy_arguments。holdout或crossValid") private String resamplingStrategy; + @ApiModelProperty(value = "训练集的比率,0到1之间") private Float trainSize; + @ApiModelProperty(value = "拆分数据前是否进行shuffle") private Boolean shuffle; + @ApiModelProperty(value = "当resamplingStrategy为crossValid时,此项必填。为整数") private Integer folds; - private Boolean deleteTmpFolderAfterTerminate; + @ApiModelProperty(value = "文件夹存放配置输出和日志文件,默认/tmp/automl") + private String tmpFolder; + @ApiModelProperty(value = "数据集csv文件路径") private String dataCsv; + @ApiModelProperty(value = "数据集csv文件中哪几列是预测目标列,逗号分隔") private String targetColumns; + @ApiModelProperty(value = "自定义指标名称") + private String metricName; + + @ApiModelProperty(value = "模型优化目标指标及权重,json格式。分类的指标包含:accuracy\n" + + "balanced_accuracy\n" + + "roc_auc\n" + + "average_precision\n" + + "log_loss\n" + + "precision_macro\n" + + "precision_micro\n" + + "precision_samples\n" + + "precision_weighted\n" + + "recall_macro\n" + + "recall_micro\n" + + "recall_samples\n" + + "recall_weighted\n" + + "f1_macro\n" + + "f1_micro\n" + + "f1_samples\n" + + "f1_weighted\n" + + "回归的指标包含:mean_absolute_error\n" + + "mean_squared_error\n" + + "root_mean_squared_error\n" + + "mean_squared_log_error\n" + + "median_absolute_error\n" + + "r2") + private String metrics; + + @ApiModelProperty(value = "指标优化方向,是越大越好还是越小越好") + private Boolean greaterIsBetter; + + @ApiModelProperty(value = "模型计算并打印指标") + private String scoringFunctions; + private Integer state; private String runState; diff --git a/ruoyi-modules/management-platform/src/main/resources/mapper/managementPlatform/AutoMLDaoMapper.xml b/ruoyi-modules/management-platform/src/main/resources/mapper/managementPlatform/AutoMLDaoMapper.xml index 0af3beb7..55f8e8f5 100644 --- a/ruoyi-modules/management-platform/src/main/resources/mapper/managementPlatform/AutoMLDaoMapper.xml +++ b/ruoyi-modules/management-platform/src/main/resources/mapper/managementPlatform/AutoMLDaoMapper.xml @@ -7,8 +7,8 @@ memory_limit, include_classifier, include_feature_preprocessor, include_regressor, exclude_classifier, exclude_regressor, exclude_feature_preprocessor, resampling_strategy, train_size, - shuffle, folds, data_csv, target_columns, create_by, - update_by) + shuffle, folds, data_csv, target_columns, metric_name, metrics,greater_is_better,scoring_functions,tmp_folder, + create_by,update_by) values (#{autoMl.mlName}, #{autoMl.mlDescription}, #{autoMl.taskType}, #{autoMl.datasetName}, #{autoMl.timeLeftForThisTask}, #{autoMl.perRunTimeLimit}, #{autoMl.ensembleSize}, #{autoMl.ensembleClass}, #{autoMl.ensembleNbest}, @@ -18,7 +18,8 @@ #{autoMl.excludeRegressor}, #{autoMl.excludeFeaturePreprocessor}, #{autoMl.resamplingStrategy}, #{autoMl.trainSize}, #{autoMl.shuffle}, #{autoMl.folds}, #{autoMl.dataCsv}, - #{autoMl.targetColumns}, #{autoMl.createBy}, #{autoMl.updateBy}) + #{autoMl.targetColumns}, #{autoMl.metricName}, #{autoMl.metrics},#{autoMl.greaterIsBetter},#{autoMl.scoringFunctions},#{autoMl.tmpFolder}, + #{autoMl.createBy}, #{autoMl.updateBy}) @@ -99,6 +100,21 @@ data_csv = #{autoMl.dataCsv}, + + tmp_folder = #{autoMl.tmpFolder}, + + + metric_name = #{autoMl.metricName}, + + + metrics = #{autoMl.metrics}, + + + greater_is_better = #{autoMl.greaterIsBetter}, + + + scoring_functions = #{autoMl.scoringFunctions}, + target_columns = #{autoMl.targetColumns},