|
|
|
@@ -22,31 +22,86 @@ public class AutoMl { |
|
|
|
@ApiModelProperty(value = "实验描述") |
|
|
|
private String mlDescription; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "任务类型") |
|
|
|
@ApiModelProperty(value = "任务类型:classification或regression") |
|
|
|
private String taskType; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "数据集名称") |
|
|
|
private String datasetName; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "搜索合适模型的时间限制(以秒为单位)。通过增加这个值,auto-sklearn有更高的机会找到更好的模型。默认3600,非必传。") |
|
|
|
private Integer timeLeftForThisTask; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "单次调用机器学习模型的时间限制(以秒为单位)。如果机器学习算法运行超过时间限制,将终止模型拟合。将这个值设置得足够高,这样典型的机器学习算法就可以适用于训练数据。默认600,非必传。") |
|
|
|
private Integer perRunTimeLimit; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "集成模型数量,如果设置为0,则没有集成。默认50,非必传。") |
|
|
|
private Integer ensembleSize; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "设置为None将禁用集成构建,设置为SingleBest仅使用单个最佳模型而不是集成,设置为default,它将对单目标问题使用EnsembleSelection,对多目标问题使用MultiObjectiveDummyEnsemble。默认default,非必传。") |
|
|
|
private String ensembleClass; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "在构建集成时只考虑ensemble_nbest模型。这是受到了“最大限度地利用集成选择”中引入的库修剪概念的启发。这是独立于ensemble_class参数的,并且这个修剪步骤是在构造集成之前完成的。默认50,非必传。") |
|
|
|
private Integer ensembleNbest; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "定义在磁盘中保存的模型的最大数量。额外的模型数量将被永久删除。由于这个变量的性质,它设置了一个集成可以使用多少个模型的上限。必须是大于等于1的整数。如果设置为None,则所有模型都保留在磁盘上。默认50,非必传。") |
|
|
|
private Integer maxModelsOnDisc; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "随机种子,将决定输出文件名。默认1,非必传。") |
|
|
|
private Integer seed; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "机器学习算法的内存限制(MB)。如果auto-sklearn试图分配超过memory_limit MB,它将停止拟合机器学习算法。默认3072,非必传。") |
|
|
|
private Integer memoryLimit; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "如果为None,则使用所有可能的分类算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:adaboost\n" + |
|
|
|
"bernoulli_nb\n" + |
|
|
|
"decision_tree\n" + |
|
|
|
"extra_trees\n" + |
|
|
|
"gaussian_nb\n" + |
|
|
|
"gradient_boosting\n" + |
|
|
|
"k_nearest_neighbors\n" + |
|
|
|
"lda\n" + |
|
|
|
"liblinear_svc\n" + |
|
|
|
"libsvm_svc\n" + |
|
|
|
"mlp\n" + |
|
|
|
"multinomial_nb\n" + |
|
|
|
"passive_aggressive\n" + |
|
|
|
"qda\n" + |
|
|
|
"random_forest\n" + |
|
|
|
"sgd") |
|
|
|
private String includeClassifier; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "如果为None,则使用所有可能的特征预处理算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:densifier\n" + |
|
|
|
"extra_trees_preproc_for_classification\n" + |
|
|
|
"extra_trees_preproc_for_regression\n" + |
|
|
|
"fast_ica\n" + |
|
|
|
"feature_agglomeration\n" + |
|
|
|
"kernel_pca\n" + |
|
|
|
"kitchen_sinks\n" + |
|
|
|
"liblinear_svc_preprocessor\n" + |
|
|
|
"no_preprocessing\n" + |
|
|
|
"nystroem_sampler\n" + |
|
|
|
"pca\n" + |
|
|
|
"polynomial\n" + |
|
|
|
"random_trees_embedding\n" + |
|
|
|
"select_percentile_classification\n" + |
|
|
|
"select_percentile_regression\n" + |
|
|
|
"select_rates_classification\n" + |
|
|
|
"select_rates_regression\n" + |
|
|
|
"truncatedSVD") |
|
|
|
private String includeFeaturePreprocessor; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "如果为None,则使用所有可能的回归算法。否则,指定搜索中包含的步骤和组件。有关可用组件,请参见/pipeline/components/<step>/*。与参数exclude不兼容。多选,逗号分隔。包含:adaboost,\n" + |
|
|
|
"ard_regression,\n" + |
|
|
|
"decision_tree,\n" + |
|
|
|
"extra_trees,\n" + |
|
|
|
"gaussian_process,\n" + |
|
|
|
"gradient_boosting,\n" + |
|
|
|
"k_nearest_neighbors,\n" + |
|
|
|
"liblinear_svr,\n" + |
|
|
|
"libsvm_svr,\n" + |
|
|
|
"mlp,\n" + |
|
|
|
"random_forest,\n" + |
|
|
|
"sgd") |
|
|
|
private String includeRegressor; |
|
|
|
|
|
|
|
private String excludeClassifier; |
|
|
|
@@ -55,20 +110,61 @@ public class AutoMl { |
|
|
|
|
|
|
|
private String excludeFeaturePreprocessor; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "如何处理过拟合,如果使用基于“cv”的方法或Splitter对象,可能需要使用resampling_strategy_arguments。holdout或crossValid") |
|
|
|
private String resamplingStrategy; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "训练集的比率,0到1之间") |
|
|
|
private Float trainSize; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "拆分数据前是否进行shuffle") |
|
|
|
private Boolean shuffle; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "当resamplingStrategy为crossValid时,此项必填。为整数") |
|
|
|
private Integer folds; |
|
|
|
|
|
|
|
private Boolean deleteTmpFolderAfterTerminate; |
|
|
|
@ApiModelProperty(value = "文件夹存放配置输出和日志文件,默认/tmp/automl") |
|
|
|
private String tmpFolder; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "数据集csv文件路径") |
|
|
|
private String dataCsv; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "数据集csv文件中哪几列是预测目标列,逗号分隔") |
|
|
|
private String targetColumns; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "自定义指标名称") |
|
|
|
private String metricName; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "模型优化目标指标及权重,json格式。分类的指标包含:accuracy\n" + |
|
|
|
"balanced_accuracy\n" + |
|
|
|
"roc_auc\n" + |
|
|
|
"average_precision\n" + |
|
|
|
"log_loss\n" + |
|
|
|
"precision_macro\n" + |
|
|
|
"precision_micro\n" + |
|
|
|
"precision_samples\n" + |
|
|
|
"precision_weighted\n" + |
|
|
|
"recall_macro\n" + |
|
|
|
"recall_micro\n" + |
|
|
|
"recall_samples\n" + |
|
|
|
"recall_weighted\n" + |
|
|
|
"f1_macro\n" + |
|
|
|
"f1_micro\n" + |
|
|
|
"f1_samples\n" + |
|
|
|
"f1_weighted\n" + |
|
|
|
"回归的指标包含:mean_absolute_error\n" + |
|
|
|
"mean_squared_error\n" + |
|
|
|
"root_mean_squared_error\n" + |
|
|
|
"mean_squared_log_error\n" + |
|
|
|
"median_absolute_error\n" + |
|
|
|
"r2") |
|
|
|
private String metrics; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "指标优化方向,是越大越好还是越小越好") |
|
|
|
private Boolean greaterIsBetter; |
|
|
|
|
|
|
|
@ApiModelProperty(value = "模型计算并打印指标") |
|
|
|
private String scoringFunctions; |
|
|
|
|
|
|
|
private Integer state; |
|
|
|
|
|
|
|
private String runState; |
|
|
|
|