From bcae95e566eb8ef80957eef1e950c2e0910edcfe Mon Sep 17 00:00:00 2001 From: lewis <747342561@qq.com> Date: Thu, 17 Mar 2022 19:17:52 +0800 Subject: [PATCH] create --- models/action.go | 1 + modules/cloudbrain/cloudbrain.go | 33 +- modules/setting/setting.go | 30 +- routers/repo/cloudbrain.go | 35 +- templates/repo/cloudbrain/trainjob/new.tmpl | 427 ++++++++++++++++++++ 5 files changed, 501 insertions(+), 25 deletions(-) create mode 100755 templates/repo/cloudbrain/trainjob/new.tmpl diff --git a/models/action.go b/models/action.go index 2a9d88399..9b92b4192 100755 --- a/models/action.go +++ b/models/action.go @@ -57,6 +57,7 @@ const ( ActionCreateInferenceTask // 28 ActionCreateBenchMarkTask //29 ActionCreateNewModelTask //30 + ActionCreateGPUTrainTask //31 ) // Action represents user operation type and other information to diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index dc3f483b7..8b0786b57 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -15,9 +15,7 @@ import ( ) const ( - Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple; - service ssh stop; - jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"` + Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;set TEST1=1111;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"` //CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"` CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"` CodeMountPath = "/code" @@ -37,7 +35,8 @@ const ( ) var ( - ResourceSpecs *models.ResourceSpecs + ResourceSpecs *models.ResourceSpecs + TrainResourceSpecs *models.ResourceSpecs ) func isAdminOrOwnerOrJobCreater(ctx *context.Context, job *models.Cloudbrain, err error) bool { @@ -157,12 +156,23 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, uuid var resourceSpec *models.ResourceSpec - if ResourceSpecs == nil { - json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs) - } - for _, spec := range ResourceSpecs.ResourceSpec { - if resourceSpecId == spec.Id { - resourceSpec = spec + if jobType == string(models.JobTypeDebug) { + if ResourceSpecs == nil { + json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs) + } + for _, spec := range ResourceSpecs.ResourceSpec { + if resourceSpecId == spec.Id { + resourceSpec = spec + } + } + } else if jobType == string(models.JobTypeTrain) { + if TrainResourceSpecs == nil { + json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) + } + for _, spec := range TrainResourceSpecs.ResourceSpec { + if resourceSpecId == spec.Id { + resourceSpec = spec + } } } @@ -265,6 +275,7 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, BenchmarkTypeID: benchmarkTypeID, BenchmarkChildTypeID: benchmarkChildTypeID, Description: description, + IsLatestVersion: "1", }) if err != nil { @@ -280,6 +291,8 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, if string(models.JobTypeBenchmark) == jobType { notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateBenchMarkTask) + } else if string(models.JobTypeTrain) == jobType { + notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateGPUTrainTask) } else { notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugGPUTask) } diff --git a/modules/setting/setting.go b/modules/setting/setting.go index 2a29dd700..7ae2263f7 100755 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -450,16 +450,18 @@ var ( DecompressOBSTaskName string //cloudbrain config - CBAuthUser string - CBAuthPassword string - RestServerHost string - JobPath string - CBCodePathPrefix string - JobType string - GpuTypes string - DebugServerHost string - ResourceSpecs string - MaxDuration int64 + CBAuthUser string + CBAuthPassword string + RestServerHost string + JobPath string + CBCodePathPrefix string + JobType string + GpuTypes string + DebugServerHost string + ResourceSpecs string + MaxDuration int64 + TrainGpuTypes string + TrainResourceSpecs string //benchmark config IsBenchmarkEnabled bool @@ -512,9 +514,9 @@ var ( ProfileID string PoolInfos string Flavor string - DebugHost string - ImageInfos string - Capacity int + DebugHost string + ImageInfos string + Capacity int //train-job ResourcePools string Engines string @@ -1283,6 +1285,8 @@ func NewContext() { GpuTypes = sec.Key("GPU_TYPES").MustString("") ResourceSpecs = sec.Key("RESOURCE_SPECS").MustString("") MaxDuration = sec.Key("MAX_DURATION").MustInt64(14400) + TrainGpuTypes = sec.Key("TRAIN_GPU_TYPES").MustString("") + TrainResourceSpecs = sec.Key("TRAIN_RESOURCE_SPECS").MustString("") sec = Cfg.Section("benchmark") IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 923ff0953..fb7909c8c 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -45,6 +45,7 @@ var ( benchmarkTypes *models.BenchmarkTypes benchmarkGpuInfos *models.GpuInfos benchmarkResourceSpecs *models.ResourceSpecs + trainGpuInfos *models.GpuInfos ) var jobNamePattern = regexp.MustCompile(`^[a-z0-9][a-z0-9-_]{1,34}[a-z0-9-]$`) @@ -144,6 +145,11 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { } ctx.Data["gpu_types"] = gpuInfos.GpuInfo + if trainGpuInfos == nil { + json.Unmarshal([]byte(setting.TrainGpuTypes), &trainGpuInfos) + } + ctx.Data["train_gpu_types"] = trainGpuInfos.GpuInfo + if benchmarkGpuInfos == nil { json.Unmarshal([]byte(setting.BenchmarkGpuTypes), &benchmarkGpuInfos) } @@ -158,6 +164,14 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs) } ctx.Data["resource_specs"] = cloudbrain.ResourceSpecs.ResourceSpec + + if cloudbrain.TrainResourceSpecs == nil { + json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) + } + ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec + ctx.Data["params"] = "" + ctx.Data["branchName"] = ctx.Repo.BranchName + ctx.Data["snn4imagenet_path"] = cloudbrain.Snn4imagenetMountPath ctx.Data["is_snn4imagenet_enabled"] = setting.IsSnn4imagenetEnabled @@ -193,14 +207,14 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { command := cloudbrain.Command if jobType == string(models.JobTypeTrain) { tpl = tplCloudBrainTrainJobNew - command, err := getTrainJobCommand(form) + commandTrain, err := getTrainJobCommand(form) if err != nil { log.Error("getTrainJobCommand failed: %v", err) ctx.RenderWithErr(err.Error(), tpl, &form) return } - log.Info("%s", command) + command = commandTrain } tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) @@ -1431,11 +1445,28 @@ func CloudBrainTrainJobNew(ctx *context.Context) { func getTrainJobCommand(form auth.CreateCloudBrainForm) (string, error) { var command string bootFile := form.BootFile + params := form.Params if !strings.HasSuffix(bootFile, ".py") { log.Error("bootFile(%s) format error", bootFile) return command, errors.New("bootFile format error") } + var parameters models.Parameters + if len(params) != 0 { + err := json.Unmarshal([]byte(params), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal params: %s (%v)", params, err) + return command, err + } + + for _, parameter := range parameters.Parameter { + command += "set " + parameter.Label + "=" + parameter.Value + ";" + } + } + + command += "python /code/" + bootFile + log.Info("command:" + command) + return command, nil } diff --git a/templates/repo/cloudbrain/trainjob/new.tmpl b/templates/repo/cloudbrain/trainjob/new.tmpl new file mode 100755 index 000000000..24c733263 --- /dev/null +++ b/templates/repo/cloudbrain/trainjob/new.tmpl @@ -0,0 +1,427 @@ +{{template "base/head" .}} + + +
+
+
+
+
+
+
+
+
+
+ {{template "repo/header" .}} +
+ {{template "base/alert" .}} +

+ {{.i18n.Tr "repo.modelarts.train_job.new"}} +

+
+ +
+ {{.CsrfTokenHtml}} + + + +

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

+
+ + + 请输入字母、数字、_和-,最长64个字符,且不能以中划线(-)结尾。 +
+ +
+ + +
+
+ +

{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:

+ + +
+ + +
+ + + +
+ + +
+ +
+ + + + + {{range .images}} + + {{end}} + {{range .public_images}} + + {{end}} + +
+ +
+ + {{if .bootFile}} + + {{else}} + + {{end}} + + + + 查看样例 +
+ +
+ + + 训练脚本存储在/code中,数据集存储在/dataset中,训练输出请存储在/model中以供后续下载。 +
+ +
+ + {{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}} + +
+ {{if ne 0 (len .params)}} + {{range $k ,$v := .params}} +
+
+ +
+
+ +
+ + + + +
+ {{end}} + {{end}} +
+
+ +
+ + +
+ +
+ + {{.i18n.Tr "repo.cloudbrain.cancel"}} +
+ + + +
+
+
+
+{{template "base/footer" .}} + + \ No newline at end of file