diff --git a/routers/api/v1/repo/cloudbrain.go b/routers/api/v1/repo/cloudbrain.go index caa0660cd..c3a803f70 100755 --- a/routers/api/v1/repo/cloudbrain.go +++ b/routers/api/v1/repo/cloudbrain.go @@ -103,6 +103,7 @@ func GetCloudbrainTask(ctx *context.APIContext) { "SubState": result.JobStatus.SubState, "CreatedTime": time.Unix(result.JobStatus.CreatedTime/1000, 0).Format("2006-01-02 15:04:05"), "CompletedTime": time.Unix(result.JobStatus.CompletedTime/1000, 0).Format("2006-01-02 15:04:05"), + "JobDuration": job.TrainJobDuration, }) } diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go index 54220fbca..7d30614b5 100755 --- a/routers/api/v1/repo/modelarts.go +++ b/routers/api/v1/repo/modelarts.go @@ -95,9 +95,10 @@ func GetModelArtsNotebook2(ctx *context.APIContext) { } ctx.JSON(http.StatusOK, map[string]interface{}{ - "ID": ID, - "JobName": job.JobName, - "JobStatus": result.Status, + "ID": ID, + "JobName": job.JobName, + "JobStatus": result.Status, + "JobDuration": job.TrainJobDuration, }) } diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index da9801664..257101331 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -219,6 +219,255 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { return nil } +func cloudBrainTrainJobErrorPrepare(ctx *context.Context, form auth.CreateCloudBrainForm) error { + ctx.Data["PageIsCloudBrain"] = true + + if categories == nil { + json.Unmarshal([]byte(setting.BenchmarkCategory), &categories) + } + ctx.Data["benchmark_categories"] = categories.Category + + ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType + queuesDetail, _ := cloudbrain.GetQueuesDetail() + if queuesDetail != nil { + ctx.Data["QueuesDetail"] = queuesDetail + } + + cloudbrain.InitSpecialPool() + + if gpuInfos == nil { + json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos) + } + ctx.Data["gpu_types"] = gpuInfos.GpuInfo + + if trainGpuInfos == nil { + json.Unmarshal([]byte(setting.TrainGpuTypes), &trainGpuInfos) + } + ctx.Data["train_gpu_types"] = trainGpuInfos.GpuInfo + + if inferenceGpuInfos == nil && setting.InferenceGpuTypes != "" { + json.Unmarshal([]byte(setting.InferenceGpuTypes), &inferenceGpuInfos) + } + if inferenceGpuInfos != nil { + ctx.Data["inference_gpu_types"] = inferenceGpuInfos.GpuInfo + } + + if benchmarkGpuInfos == nil { + json.Unmarshal([]byte(setting.BenchmarkGpuTypes), &benchmarkGpuInfos) + } + ctx.Data["benchmark_gpu_types"] = benchmarkGpuInfos.GpuInfo + + if benchmarkResourceSpecs == nil { + json.Unmarshal([]byte(setting.BenchmarkResourceSpecs), &benchmarkResourceSpecs) + } + ctx.Data["benchmark_resource_specs"] = benchmarkResourceSpecs.ResourceSpec + + if cloudbrain.ResourceSpecs == nil { + json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs) + } + ctx.Data["resource_specs"] = cloudbrain.ResourceSpecs.ResourceSpec + + if cloudbrain.TrainResourceSpecs == nil { + json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) + } + ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec + + if cloudbrain.InferenceResourceSpecs == nil && setting.InferenceResourceSpecs != "" { + json.Unmarshal([]byte(setting.InferenceResourceSpecs), &cloudbrain.InferenceResourceSpecs) + } + if cloudbrain.InferenceResourceSpecs != nil { + ctx.Data["inference_resource_specs"] = cloudbrain.InferenceResourceSpecs.ResourceSpec + } + + if cloudbrain.SpecialPools != nil { + var debugGpuTypes []*models.GpuInfo + var trainGpuTypes []*models.GpuInfo + + for _, pool := range cloudbrain.SpecialPools.Pools { + org, _ := models.GetOrgByName(pool.Org) + if org != nil { + isOrgMember, _ := models.IsOrganizationMember(org.ID, ctx.User.ID) + if isOrgMember { + for _, jobType := range pool.JobType { + if jobType == string(models.JobTypeDebug) { + debugGpuTypes = append(debugGpuTypes, pool.Pool...) + if pool.ResourceSpec != nil { + ctx.Data["resource_specs"] = pool.ResourceSpec + } + } else if jobType == string(models.JobTypeTrain) { + trainGpuTypes = append(trainGpuTypes, pool.Pool...) + if pool.ResourceSpec != nil { + ctx.Data["train_resource_specs"] = pool.ResourceSpec + } + } + } + break + } + } + + } + + if len(debugGpuTypes) > 0 { + ctx.Data["gpu_types"] = debugGpuTypes + } + + if len(trainGpuTypes) > 0 { + ctx.Data["train_gpu_types"] = trainGpuTypes + } + + } + + var Parameters modelarts.Parameters + if err := json.Unmarshal([]byte(form.Params), &Parameters); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["params"] = Parameters.Parameter + ctx.Data["boot_file"] = form.BootFile + ctx.Data["attachment"] = form.Attachment + _, datasetNames, err := models.GetDatasetInfo(form.Attachment) + if err != nil { + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) + return nil + } + ctx.Data["dataset_name"] = datasetNames + ctx.Data["branch_name"] = form.BranchName + ctx.Data["datasetType"] = models.TypeCloudBrainOne + + ctx.Data["display_job_name"] = form.DisplayJobName + ctx.Data["image"] = form.Image + ctx.Data["job_type"] = form.JobType + ctx.Data["gpu_type"] = form.GpuType + ctx.Data["resource_spec_id"] = form.ResourceSpecId + return nil +} + +func cloudBrainInferenceJobErrorPrepare(ctx *context.Context, form auth.CreateCloudBrainInferencForm) error { + ctx.Data["PageIsCloudBrain"] = true + + if categories == nil { + json.Unmarshal([]byte(setting.BenchmarkCategory), &categories) + } + ctx.Data["benchmark_categories"] = categories.Category + + ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType + queuesDetail, _ := cloudbrain.GetQueuesDetail() + if queuesDetail != nil { + ctx.Data["QueuesDetail"] = queuesDetail + } + + cloudbrain.InitSpecialPool() + + if gpuInfos == nil { + json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos) + } + ctx.Data["gpu_types"] = gpuInfos.GpuInfo + + if trainGpuInfos == nil { + json.Unmarshal([]byte(setting.TrainGpuTypes), &trainGpuInfos) + } + ctx.Data["train_gpu_types"] = trainGpuInfos.GpuInfo + + if inferenceGpuInfos == nil && setting.InferenceGpuTypes != "" { + json.Unmarshal([]byte(setting.InferenceGpuTypes), &inferenceGpuInfos) + } + if inferenceGpuInfos != nil { + ctx.Data["inference_gpu_types"] = inferenceGpuInfos.GpuInfo + } + + if benchmarkGpuInfos == nil { + json.Unmarshal([]byte(setting.BenchmarkGpuTypes), &benchmarkGpuInfos) + } + ctx.Data["benchmark_gpu_types"] = benchmarkGpuInfos.GpuInfo + + if benchmarkResourceSpecs == nil { + json.Unmarshal([]byte(setting.BenchmarkResourceSpecs), &benchmarkResourceSpecs) + } + ctx.Data["benchmark_resource_specs"] = benchmarkResourceSpecs.ResourceSpec + + if cloudbrain.ResourceSpecs == nil { + json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs) + } + ctx.Data["resource_specs"] = cloudbrain.ResourceSpecs.ResourceSpec + + if cloudbrain.TrainResourceSpecs == nil { + json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) + } + ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec + + if cloudbrain.InferenceResourceSpecs == nil && setting.InferenceResourceSpecs != "" { + json.Unmarshal([]byte(setting.InferenceResourceSpecs), &cloudbrain.InferenceResourceSpecs) + } + if cloudbrain.InferenceResourceSpecs != nil { + ctx.Data["inference_resource_specs"] = cloudbrain.InferenceResourceSpecs.ResourceSpec + } + + if cloudbrain.SpecialPools != nil { + var debugGpuTypes []*models.GpuInfo + var trainGpuTypes []*models.GpuInfo + + for _, pool := range cloudbrain.SpecialPools.Pools { + org, _ := models.GetOrgByName(pool.Org) + if org != nil { + isOrgMember, _ := models.IsOrganizationMember(org.ID, ctx.User.ID) + if isOrgMember { + for _, jobType := range pool.JobType { + if jobType == string(models.JobTypeDebug) { + debugGpuTypes = append(debugGpuTypes, pool.Pool...) + if pool.ResourceSpec != nil { + ctx.Data["resource_specs"] = pool.ResourceSpec + } + } else if jobType == string(models.JobTypeTrain) { + trainGpuTypes = append(trainGpuTypes, pool.Pool...) + if pool.ResourceSpec != nil { + ctx.Data["train_resource_specs"] = pool.ResourceSpec + } + } + } + break + } + } + + } + if len(debugGpuTypes) > 0 { + ctx.Data["gpu_types"] = debugGpuTypes + } + + if len(trainGpuTypes) > 0 { + ctx.Data["train_gpu_types"] = trainGpuTypes + } + + } + var Parameters modelarts.Parameters + if err := json.Unmarshal([]byte(form.Params), &Parameters); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["params"] = Parameters.Parameter + ctx.Data["boot_file"] = form.BootFile + ctx.Data["attachment"] = form.Attachment + _, datasetNames, err := models.GetDatasetInfo(form.Attachment) + if err != nil { + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) + return nil + } + ctx.Data["dataset_name"] = datasetNames + ctx.Data["branch_name"] = form.BranchName + ctx.Data["datasetType"] = models.TypeCloudBrainOne + + ctx.Data["display_job_name"] = form.DisplayJobName + ctx.Data["image"] = form.Image + ctx.Data["job_type"] = form.JobType + ctx.Data["gpu_type"] = form.GpuType + ctx.Data["resource_spec_id"] = form.ResourceSpecId + ctx.Data["label_names"] = form.LabelName + ctx.Data["train_url"] = form.TrainUrl + ctx.Data["ckpt_name"] = form.CkptName + ctx.Data["model_name"] = form.ModelName + ctx.Data["model_version"] = form.ModelVersion + ctx.Data["description"] = form.Description + return nil +} func CloudBrainNew(ctx *context.Context) { err := cloudBrainNewDataPrepare(ctx) @@ -251,28 +500,28 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr("the job name did already exist", tpl, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr("system error", tpl, &form) return } } if !jobNamePattern.MatchString(displayJobName) { - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) return } if jobType != string(models.JobTypeBenchmark) && jobType != string(models.JobTypeDebug) && jobType != string(models.JobTypeTrain) { log.Error("jobtype error:", jobType, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr("jobtype error", tpl, &form) return } @@ -280,13 +529,13 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { count, err := models.GetCloudbrainCountByUserID(ctx.User.ID, jobType) if err != nil { log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr("system error", tpl, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tpl, &form) return } @@ -295,7 +544,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { datasetInfos, datasetNames, err := models.GetDatasetInfo(uuids) if err != nil { log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } @@ -316,7 +565,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { errStr := checkCloudBrainSpecialPool(ctx, jobType, gpuQueue, resourceSpecId) if errStr != "" { - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr(errStr, tpl, &form) return } @@ -362,7 +611,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { err = cloudbrain.GenerateTask(req) if err != nil { - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr(err.Error(), tpl, &form) return } @@ -402,20 +651,21 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainInferenceJobErrorPrepare(ctx, form) ctx.RenderWithErr("the job name did already exist", tpl, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainInferenceJobErrorPrepare(ctx, form) ctx.RenderWithErr("system error", tpl, &form) return } } if !jobNamePattern.MatchString(displayJobName) { + cloudBrainInferenceJobErrorPrepare(ctx, form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) return } @@ -423,13 +673,13 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra count, err := models.GetCloudbrainCountByUserID(ctx.User.ID, jobType) if err != nil { log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainInferenceJobErrorPrepare(ctx, form) ctx.RenderWithErr("system error", tpl, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainInferenceJobErrorPrepare(ctx, form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tpl, &form) return } @@ -449,7 +699,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid) if err != nil { log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainInferenceJobErrorPrepare(ctx, form) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } @@ -486,7 +736,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra err = cloudbrain.GenerateTask(req) if err != nil { - cloudBrainNewDataPrepare(ctx) + cloudBrainInferenceJobErrorPrepare(ctx, form) ctx.RenderWithErr(err.Error(), tpl, &form) return } diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 53fd9be30..a41f884d9 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -1,16 +1,8 @@ package repo import ( - "code.gitea.io/gitea/modules/auth" - "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/grampus" - "code.gitea.io/gitea/modules/modelarts" - "code.gitea.io/gitea/modules/notification" - "code.gitea.io/gitea/modules/timeutil" - "code.gitea.io/gitea/modules/util" "encoding/json" "errors" - "github.com/unknwon/com" "io/ioutil" "net/http" "os" @@ -19,6 +11,15 @@ import ( "strings" "time" + "code.gitea.io/gitea/modules/auth" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/grampus" + "code.gitea.io/gitea/modules/modelarts" + "code.gitea.io/gitea/modules/notification" + "code.gitea.io/gitea/modules/timeutil" + "code.gitea.io/gitea/modules/util" + "github.com/unknwon/com" + "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/cloudbrain" @@ -137,6 +138,93 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err return nil } +func grampusTrainJobErrorPrepare(ctx *context.Context, processType string, form auth.CreateGrampusTrainJobForm) error { + ctx.Data["PageIsCloudBrain"] = true + + //get valid images + images, err := grampus.GetImages(processType) + if err != nil { + log.Error("GetImages failed:", err.Error()) + } else { + ctx.Data["images"] = images.Infos + } + + grampus.InitSpecialPool() + + ctx.Data["GPUEnabled"] = true + ctx.Data["NPUEnabled"] = true + includeCenters := make(map[string]struct{}) + excludeCenters := make(map[string]struct{}) + if grampus.SpecialPools != nil { + for _, pool := range grampus.SpecialPools.Pools { + if pool.IsExclusive { + if !IsUserInOrgPool(ctx.User.ID, pool) { + ctx.Data[pool.Type+"Enabled"] = false + } + } else { + if strings.Contains(strings.ToLower(processType), strings.ToLower(pool.Type)) { + if IsUserInOrgPool(ctx.User.ID, pool) { + for _, center := range pool.Pool { + includeCenters[center.Queue] = struct{}{} + } + } else { + for _, center := range pool.Pool { + excludeCenters[center.Queue] = struct{}{} + } + + } + + } + + } + } + } + + //get valid resource specs + specs, err := grampus.GetResourceSpecs(processType) + + grampusSpecs := getFilterSpecBySpecialPool(specs, includeCenters, excludeCenters) + + if err != nil { + log.Error("GetResourceSpecs failed:", err.Error()) + } else { + ctx.Data["flavor_infos"] = grampusSpecs + } + + if processType == grampus.ProcessorTypeGPU { + ctx.Data["datasetType"] = models.TypeCloudBrainOne + } else if processType == grampus.ProcessorTypeNPU { + ctx.Data["datasetType"] = models.TypeCloudBrainTwo + } + + var Parameters modelarts.Parameters + if err := json.Unmarshal([]byte(form.Params), &Parameters); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["params"] = Parameters.Parameter + ctx.Data["boot_file"] = form.BootFile + ctx.Data["attachment"] = form.Attachment + _, datasetNames, err := models.GetDatasetInfo(form.Attachment) + if err != nil { + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) + return nil + } + ctx.Data["dataset_name"] = datasetNames + ctx.Data["branch_name"] = form.BranchName + ctx.Data["image_id"] = form.ImageID + + ctx.Data["display_job_name"] = form.DisplayJobName + ctx.Data["image"] = form.Image + ctx.Data["flavor"] = form.FlavorID + ctx.Data["flavor_name"] = form.FlavorName + ctx.Data["description"] = form.Description + ctx.Data["engine_name"] = form.EngineName + ctx.Data["work_server_number"] = form.WorkServerNumber + + return nil +} + func getFilterSpecBySpecialPool(specs *models.GetGrampusResourceSpecsResult, includeCenters map[string]struct{}, excludeCenters map[string]struct{}) []models.GrampusSpec { if len(includeCenters) == 0 && len(excludeCenters) == 0 { return specs.Infos @@ -207,14 +295,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain image := strings.TrimSpace(form.Image) if !jobNamePattern.MatchString(displayJobName) { - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobGPUNew, &form) return } errStr := checkSpecialPool(ctx, "GPU") if errStr != "" { - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr(errStr, tplGrampusTrainJobGPUNew, &form) return } @@ -223,13 +311,13 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.GPUResource) if err != nil { log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobGPUNew, &form) return } @@ -238,7 +326,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain //check param if err := grampusParamCheckCreateTrainJob(form); err != nil { log.Error("paramCheckCreateTrainJob failed:(%v)", err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form) return } @@ -248,14 +336,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobGPUNew, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form) return } @@ -265,7 +353,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain attachment, err := models.GetAttachmentByUUID(uuid) if err != nil { log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobGPUNew, &form) return } @@ -278,7 +366,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form) return } @@ -287,7 +375,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain //upload code if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form) return } @@ -295,7 +383,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain modelPath := setting.JobPath + jobName + cloudbrain.ModelMountPath + "/" if err := mkModelPath(modelPath); err != nil { log.Error("Failed to mkModelPath: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form) return } @@ -303,7 +391,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain //init model readme if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil { log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form) return } @@ -312,7 +400,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", dataMinioPath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", attachment.Name) if err != nil { log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form) return } @@ -344,7 +432,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain err = grampus.GenerateTrainJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form) return } @@ -391,14 +479,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain engineName := form.EngineName if !jobNamePattern.MatchString(displayJobName) { - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobNPUNew, &form) return } errStr := checkSpecialPool(ctx, "NPU") if errStr != "" { - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr(errStr, tplGrampusTrainJobGPUNew, &form) return } @@ -407,13 +495,13 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.NPUResource) if err != nil { log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobNPUNew, &form) return } @@ -422,7 +510,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain //check param if err := grampusParamCheckCreateTrainJob(form); err != nil { log.Error("paramCheckCreateTrainJob failed:(%v)", err) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form) return } @@ -432,14 +520,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobNPUNew, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form) return } @@ -449,7 +537,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain attachment, err := models.GetAttachmentByUUID(uuid) if err != nil { log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobNPUNew, &form) return } @@ -462,7 +550,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("Create task failed, server timed out", tplGrampusTrainJobNPUNew, &form) return } @@ -470,14 +558,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain //todo: upload code (send to file_server todo this work?) if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("Failed to obsMkdir_output", tplGrampusTrainJobNPUNew, &form) return } if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("Failed to uploadCodeToObs", tplGrampusTrainJobNPUNew, &form) return } @@ -486,7 +574,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", dataObsPath+"'"+attachment.Name+"'", bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, attachment.Name) if err != nil { log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobNPUNew, &form) return } @@ -522,7 +610,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain err = grampus.GenerateTrainJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form) return } diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 88bf0d22a..ef9fe41b2 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -778,6 +778,12 @@ func trainJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArts ctx.Data["config_list"] = configList.ParaConfigs ctx.Data["bootFile"] = form.BootFile ctx.Data["uuid"] = form.Attachment + _, datasetNames, err := models.GetDatasetInfo(form.Attachment) + if err != nil { + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) + return nil + } + ctx.Data["dataset_name"] = datasetNames ctx.Data["branch_name"] = form.BranchName ctx.Data["datasetType"] = models.TypeCloudBrainTwo @@ -2286,6 +2292,12 @@ func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModel ctx.Data["config_list"] = configList.ParaConfigs ctx.Data["bootFile"] = form.BootFile ctx.Data["uuid"] = form.Attachment + _, datasetNames, err := models.GetDatasetInfo(form.Attachment) + if err != nil { + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) + return nil + } + ctx.Data["dataset_name"] = datasetNames ctx.Data["branch_name"] = form.BranchName ctx.Data["model_name"] = form.ModelName ctx.Data["model_version"] = form.ModelVersion diff --git a/templates/repo/cloudbrain/inference/new.tmpl b/templates/repo/cloudbrain/inference/new.tmpl index 4228a42be..7d06c55ea 100644 --- a/templates/repo/cloudbrain/inference/new.tmpl +++ b/templates/repo/cloudbrain/inference/new.tmpl @@ -58,18 +58,16 @@