diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 6135dac40..f0e27995e 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -2015,11 +2015,6 @@ func GetModelSafetyTestTask() ([]*Cloudbrain, error) { return cloudbrains, err } -func GetCloudbrainCountByUserID(userID int64, jobType string) (int, error) { - count, err := x.In("status", JobWaiting, JobRunning).And("job_type = ? and user_id = ? and type = ?", jobType, userID, TypeCloudBrainOne).Count(new(Cloudbrain)) - return int(count), err -} - func GetCloudbrainRunCountByRepoID(repoID int64) (int, error) { count, err := x.In("status", JobWaiting, JobRunning, ModelArtsCreateQueue, ModelArtsCreating, ModelArtsStarting, ModelArtsReadyToStart, ModelArtsResizing, ModelArtsStartQueuing, ModelArtsRunning, ModelArtsDeleting, ModelArtsRestarting, ModelArtsTrainJobInit, @@ -2028,11 +2023,6 @@ func GetCloudbrainRunCountByRepoID(repoID int64) (int, error) { return int(count), err } -func GetBenchmarkCountByUserID(userID int64) (int, error) { - count, err := x.In("status", JobWaiting, JobRunning).And("(job_type = ? or job_type = ? or job_type = ?) and user_id = ? and type = ?", string(JobTypeBenchmark), string(JobTypeModelSafety), string(JobTypeBrainScore), string(JobTypeSnn4imagenet), userID, TypeCloudBrainOne).Count(new(Cloudbrain)) - return int(count), err -} - func GetModelSafetyCountByUserID(userID int64) (int, error) { count, err := x.In("status", JobWaiting, JobRunning).And("job_type = ? and user_id = ?", string(JobTypeModelSafety), userID).Count(new(Cloudbrain)) return int(count), err @@ -2048,40 +2038,14 @@ func GetWaitingCloudbrainCount(cloudbrainType int, computeResource string, jobTy } return sess.Count(new(Cloudbrain)) } - -func GetCloudbrainNotebookCountByUserID(userID int64) (int, error) { - count, err := x.In("status", ModelArtsCreateQueue, ModelArtsCreating, ModelArtsStarting, ModelArtsReadyToStart, ModelArtsResizing, ModelArtsStartQueuing, ModelArtsRunning, ModelArtsRestarting). - And("job_type = ? and user_id = ? and type in (?,?)", JobTypeDebug, userID, TypeCloudBrainTwo, TypeCDCenter).Count(new(Cloudbrain)) - return int(count), err -} - -func GetCloudbrainTrainJobCountByUserID(userID int64) (int, error) { - count, err := x.In("status", ModelArtsTrainJobInit, ModelArtsTrainJobImageCreating, ModelArtsTrainJobSubmitTrying, ModelArtsTrainJobWaiting, ModelArtsTrainJobRunning, ModelArtsTrainJobScaling, ModelArtsTrainJobCheckInit, ModelArtsTrainJobCheckRunning, ModelArtsTrainJobCheckRunningCompleted). - And("job_type = ? and user_id = ? and type = ?", JobTypeTrain, userID, TypeCloudBrainTwo).Count(new(Cloudbrain)) - return int(count), err -} - -func GetCloudbrainInferenceJobCountByUserID(userID int64) (int, error) { - count, err := x.In("status", ModelArtsTrainJobInit, ModelArtsTrainJobImageCreating, ModelArtsTrainJobSubmitTrying, ModelArtsTrainJobWaiting, ModelArtsTrainJobRunning, ModelArtsTrainJobScaling, ModelArtsTrainJobCheckInit, ModelArtsTrainJobCheckRunning, ModelArtsTrainJobCheckRunningCompleted). - And("job_type = ? and user_id = ? and type = ?", JobTypeInference, userID, TypeCloudBrainTwo).Count(new(Cloudbrain)) - return int(count), err -} - -func GetGrampusCountByUserID(userID int64, jobType, computeResource string) (int, error) { - count, err := x.In("status", GrampusStatusWaiting, GrampusStatusRunning).And("job_type = ? and user_id = ? and type = ?", jobType, userID, TypeC2Net).And("compute_resource = ?", computeResource).Count(new(Cloudbrain)) +func GetNotFinalStatusTaskCount(userID int64, notFinalStatus []string, jobTypes []JobType, cloudbrainTypes []int, computeResource string) (int, error) { + count, err := x.In("status", notFinalStatus). + In("job_type", jobTypes). + In("type", cloudbrainTypes). + And("user_id = ? and compute_resource = ?", userID, computeResource).Count(new(Cloudbrain)) return int(count), err } -func UpdateInferenceJob(job *Cloudbrain) error { - return updateInferenceJob(x, job) -} - -func updateInferenceJob(e Engine, job *Cloudbrain) error { - var sess *xorm.Session - sess = e.Where("job_id = ?", job.JobID) - _, err := sess.Cols("status", "train_job_duration", "duration", "start_time", "end_time", "created_unix").Update(job) - return err -} func RestartCloudbrain(old *Cloudbrain, new *Cloudbrain) (err error) { sess := x.NewSession() defer sess.Close() @@ -2411,97 +2375,6 @@ var ( CloudbrainSpecialGpuInfosMap map[string]*GpuInfo ) -func InitCloudbrainOneResourceSpecMap() { - if CloudbrainDebugResourceSpecsMap == nil || len(CloudbrainDebugResourceSpecsMap) == 0 { - t := ResourceSpecs{} - json.Unmarshal([]byte(setting.ResourceSpecs), &t) - CloudbrainDebugResourceSpecsMap = make(map[int]*ResourceSpec, len(t.ResourceSpec)) - for _, spec := range t.ResourceSpec { - CloudbrainDebugResourceSpecsMap[spec.Id] = spec - } - } - if CloudbrainTrainResourceSpecsMap == nil || len(CloudbrainTrainResourceSpecsMap) == 0 { - t := ResourceSpecs{} - json.Unmarshal([]byte(setting.TrainResourceSpecs), &t) - CloudbrainTrainResourceSpecsMap = make(map[int]*ResourceSpec, len(t.ResourceSpec)) - for _, spec := range t.ResourceSpec { - CloudbrainTrainResourceSpecsMap[spec.Id] = spec - } - } - if CloudbrainInferenceResourceSpecsMap == nil || len(CloudbrainInferenceResourceSpecsMap) == 0 { - t := ResourceSpecs{} - json.Unmarshal([]byte(setting.InferenceResourceSpecs), &t) - CloudbrainInferenceResourceSpecsMap = make(map[int]*ResourceSpec, len(t.ResourceSpec)) - for _, spec := range t.ResourceSpec { - CloudbrainInferenceResourceSpecsMap[spec.Id] = spec - } - } - if CloudbrainBenchmarkResourceSpecsMap == nil || len(CloudbrainBenchmarkResourceSpecsMap) == 0 { - t := ResourceSpecs{} - json.Unmarshal([]byte(setting.BenchmarkResourceSpecs), &t) - CloudbrainBenchmarkResourceSpecsMap = make(map[int]*ResourceSpec, len(t.ResourceSpec)) - for _, spec := range t.ResourceSpec { - CloudbrainBenchmarkResourceSpecsMap[spec.Id] = spec - } - } - if CloudbrainSpecialResourceSpecsMap == nil || len(CloudbrainSpecialResourceSpecsMap) == 0 { - t := SpecialPools{} - json.Unmarshal([]byte(setting.SpecialPools), &t) - for _, pool := range t.Pools { - CloudbrainSpecialResourceSpecsMap = make(map[int]*ResourceSpec, len(pool.ResourceSpec)) - for _, spec := range pool.ResourceSpec { - CloudbrainSpecialResourceSpecsMap[spec.Id] = spec - } - } - } - SpecsMapInitFlag = true -} - -func InitCloudbrainOneGpuInfoMap() { - if CloudbrainDebugGpuInfosMap == nil || len(CloudbrainDebugGpuInfosMap) == 0 { - t := GpuInfos{} - json.Unmarshal([]byte(setting.GpuTypes), &t) - CloudbrainDebugGpuInfosMap = make(map[string]*GpuInfo, len(t.GpuInfo)) - for _, GpuInfo := range t.GpuInfo { - CloudbrainDebugGpuInfosMap[GpuInfo.Queue] = GpuInfo - } - } - if CloudbrainTrainGpuInfosMap == nil || len(CloudbrainTrainGpuInfosMap) == 0 { - t := GpuInfos{} - json.Unmarshal([]byte(setting.TrainGpuTypes), &t) - CloudbrainTrainGpuInfosMap = make(map[string]*GpuInfo, len(t.GpuInfo)) - for _, GpuInfo := range t.GpuInfo { - CloudbrainTrainGpuInfosMap[GpuInfo.Queue] = GpuInfo - } - } - if CloudbrainInferenceGpuInfosMap == nil || len(CloudbrainInferenceGpuInfosMap) == 0 { - t := GpuInfos{} - json.Unmarshal([]byte(setting.InferenceGpuTypes), &t) - CloudbrainInferenceGpuInfosMap = make(map[string]*GpuInfo, len(t.GpuInfo)) - for _, GpuInfo := range t.GpuInfo { - CloudbrainInferenceGpuInfosMap[GpuInfo.Queue] = GpuInfo - } - } - if CloudbrainBenchmarkGpuInfosMap == nil || len(CloudbrainBenchmarkGpuInfosMap) == 0 { - t := GpuInfos{} - json.Unmarshal([]byte(setting.BenchmarkGpuTypes), &t) - CloudbrainBenchmarkGpuInfosMap = make(map[string]*GpuInfo, len(t.GpuInfo)) - for _, GpuInfo := range t.GpuInfo { - CloudbrainBenchmarkGpuInfosMap[GpuInfo.Queue] = GpuInfo - } - } - if CloudbrainSpecialGpuInfosMap == nil || len(CloudbrainSpecialGpuInfosMap) == 0 { - t := SpecialPools{} - json.Unmarshal([]byte(setting.SpecialPools), &t) - for _, pool := range t.Pools { - CloudbrainSpecialGpuInfosMap = make(map[string]*GpuInfo, len(pool.Pool)) - for _, GpuInfo := range pool.Pool { - CloudbrainSpecialGpuInfosMap[GpuInfo.Queue] = GpuInfo - } - } - } - GpuInfosMapInitFlag = true -} func GetNewestJobsByAiCenter() ([]int64, error) { ids := make([]int64, 0) return ids, x. diff --git a/routers/repo/aisafety.go b/routers/repo/aisafety.go index 5102a6722..63f50592b 100644 --- a/routers/repo/aisafety.go +++ b/routers/repo/aisafety.go @@ -535,6 +535,8 @@ func AiSafetyCreateForGetGPU(ctx *context.Context) { } else { log.Info("The GPU WaitCount not get") } + NotStopTaskCount, _ := models.GetModelSafetyCountByUserID(ctx.User.ID) + ctx.Data["NotStopTaskCount"] = NotStopTaskCount ctx.HTML(200, tplModelSafetyTestCreateGpu) } @@ -578,6 +580,8 @@ func AiSafetyCreateForGetNPU(ctx *context.Context) { waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") ctx.Data["WaitCount"] = waitCount log.Info("The NPU WaitCount is " + fmt.Sprint(waitCount)) + NotStopTaskCount, _ := models.GetModelSafetyCountByUserID(ctx.User.ID) + ctx.Data["NotStopTaskCount"] = NotStopTaskCount ctx.HTML(200, tplModelSafetyTestCreateNpu) } @@ -980,6 +984,8 @@ func modelSafetyNewDataPrepare(ctx *context.Context) error { ctx.Data["ckpt_name"] = ctx.Query("ckpt_name") ctx.Data["model_name"] = ctx.Query("model_name") ctx.Data["model_version"] = ctx.Query("model_version") + NotStopTaskCount, _ := models.GetModelSafetyCountByUserID(ctx.User.ID) + ctx.Data["NotStopTaskCount"] = NotStopTaskCount if ctx.QueryInt("type") == models.TypeCloudBrainOne { ctx.Data["type"] = models.TypeCloudBrainOne diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index a2ea7d51b..f5a43e697 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -17,6 +17,7 @@ import ( "code.gitea.io/gitea/modules/dataset" + "code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" "code.gitea.io/gitea/services/cloudbrain/resource" "code.gitea.io/gitea/services/reward/point/account" @@ -107,7 +108,7 @@ func jobNamePrefixValid(s string) string { } -func cloudBrainNewDataPrepare(ctx *context.Context) error { +func cloudBrainNewDataPrepare(ctx *context.Context, jobType string) error { ctx.Data["PageIsCloudBrain"] = true t := time.Now() var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] @@ -148,6 +149,8 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { defaultMode = "alogrithm" } ctx.Data["benchmarkMode"] = defaultMode + NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType, models.GPUResource) + ctx.Data["NotStopTaskCount"] = NotStopTaskCount if ctx.Cloudbrain != nil { ctx.Data["branch_name"] = ctx.Cloudbrain.BranchName @@ -210,7 +213,7 @@ func prepareCloudbrainOneSpecs(ctx *context.Context) { } func CloudBrainNew(ctx *context.Context) { - err := cloudBrainNewDataPrepare(ctx) + err := cloudBrainNewDataPrepare(ctx, string(models.JobTypeDebug)) if err != nil { ctx.ServerError("get new cloudbrain info failed", err) return @@ -244,7 +247,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { isOk, err := lock.Lock(models.CloudbrainKeyDuration) if !isOk { log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tpl, &form) return } @@ -254,42 +257,42 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("the job name did already exist", tpl, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("system error", tpl, &form) return } } if !jobNamePattern.MatchString(displayJobName) { - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) return } if jobType != string(models.JobTypeBenchmark) && jobType != string(models.JobTypeDebug) && jobType != string(models.JobTypeTrain) { log.Error("jobtype error:", jobType, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("jobtype error", tpl, &form) return } - count, err := models.GetCloudbrainCountByUserID(ctx.User.ID, jobType) + count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType, models.GPUResource) if err != nil { log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("system error", tpl, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tpl, &form) return } @@ -301,7 +304,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { datasetInfos, datasetNames, err = models.GetDatasetInfo(uuids) if err != nil { log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } @@ -312,7 +315,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) if err != nil || !bootFileExist { log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form) return } @@ -320,7 +323,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { commandTrain, err := getTrainJobCommand(form) if err != nil { log.Error("getTrainJobCommand failed: %v", err) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(err.Error(), tpl, &form) return } @@ -333,7 +336,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { } errStr := loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ModelMountPath) if errStr != "" { - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr(errStr), tpl, &form) return } @@ -346,14 +349,14 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { Cluster: models.OpenICluster, AiCenterCode: models.AICenterOfCloudBrainOne}) if err != nil || spec == nil { - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("Resource specification not available", tpl, &form) return } if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tpl, &form) return } @@ -396,7 +399,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { err = cloudbrain.GenerateTask(req) if err != nil { - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(err.Error(), tpl, &form) return } @@ -454,7 +457,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra isOk, err := lock.Lock(models.CloudbrainKeyDuration) if !isOk { log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tpl, &form) return } @@ -465,7 +468,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra command, err := getInferenceJobCommand(form) if err != nil { log.Error("getTrainJobCommand failed: %v", err) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(err.Error(), tpl, &form) return } @@ -474,21 +477,21 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("the job name did already exist", tpl, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("system error", tpl, &form) return } } if !jobNamePattern.MatchString(displayJobName) { - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) return } @@ -496,21 +499,21 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) if err != nil || !bootFileExist { log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form) return } - count, err := models.GetCloudbrainCountByUserID(ctx.User.ID, jobType) + count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType, models.GPUResource) if err != nil { log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("system error", tpl, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tpl, &form) return } @@ -521,7 +524,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra } errStr := loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ResultPath) if errStr != "" { - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr(errStr), tpl, &form) return } @@ -531,7 +534,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid) if err != nil { log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } @@ -541,13 +544,13 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra Cluster: models.OpenICluster, AiCenterCode: models.AICenterOfCloudBrainOne}) if err != nil || spec == nil { - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("Resource specification not available", tpl, &form) return } if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tpl, &form) return } @@ -582,7 +585,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra err = cloudbrain.GenerateTask(req) if err != nil { - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(err.Error(), tpl, &form) return } @@ -682,7 +685,7 @@ func CloudBrainRestart(ctx *context.Context) { break } - count, err := models.GetCloudbrainCountByUserID(ctx.User.ID, string(models.JobTypeDebug)) + count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, string(models.JobTypeDebug), models.GPUResource) if err != nil { log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) resultCode = "-1" @@ -2222,7 +2225,7 @@ func CloudBrainBenchmarkNew(ctx *context.Context) { ctx.Data["description"] = "" ctx.Data["benchmarkTypeID"] = -1 ctx.Data["benchmark_child_types_id_hidden"] = -1 - err := cloudBrainNewDataPrepare(ctx) + err := cloudBrainNewDataPrepare(ctx, string(models.JobTypeBenchmark)) if err != nil { ctx.ServerError("get new cloudbrain info failed", err) return @@ -2327,6 +2330,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo benchmarkTypeID := form.BenchmarkTypeID benchmarkChildTypeID := form.BenchmarkChildTypeID repo := ctx.Repo.Repository + jobType := form.JobType ctx.Data["description"] = form.Description ctx.Data["benchmarkTypeID"] = benchmarkTypeID @@ -2336,31 +2340,31 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo isOk, err := lock.Lock(models.CloudbrainKeyDuration) if !isOk { log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tplCloudBrainBenchmarkNew, &form) return } defer lock.UnLock() - tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeBenchmark), displayJobName) + tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("the job name did already exist", tplCloudBrainBenchmarkNew, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) return } } if !jobNamePattern.MatchString(jobName) { - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplCloudBrainBenchmarkNew, &form) return } @@ -2368,7 +2372,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo childInfo, err := getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID, ctx) if err != nil { log.Error("getBenchmarkAttachment failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("benchmark type error", tplCloudBrainBenchmarkNew, &form) return } @@ -2379,27 +2383,27 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo Cluster: models.OpenICluster, AiCenterCode: models.AICenterOfCloudBrainOne}) if err != nil || spec == nil { - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("Resource specification not available", tplCloudBrainBenchmarkNew, &form) return } if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tplCloudBrainBenchmarkNew, &form) return } - count, err := models.GetBenchmarkCountByUserID(ctx.User.ID) + count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType, models.GPUResource) if err != nil { log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tplCloudBrainBenchmarkNew, &form) return } @@ -2408,7 +2412,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo os.RemoveAll(codePath) if err := downloadCode(repo, codePath, cloudbrain.DefaultBranchName); err != nil { log.Error("downloadCode failed, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) return } @@ -2417,11 +2421,11 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo if os.IsNotExist(err) { // file does not exist log.Error("train.py does not exist, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("train.py does not exist", tplCloudBrainBenchmarkNew, &form) } else { log.Error("Stat failed, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) } return @@ -2429,11 +2433,11 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo if os.IsNotExist(err) { // file does not exist log.Error("test.py does not exist, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("test.py does not exist", tplCloudBrainBenchmarkNew, &form) } else { log.Error("Stat failed, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) } return @@ -2441,7 +2445,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo if err := uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { log.Error("uploadCodeToMinio failed, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) return } @@ -2466,7 +2470,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid) if err != nil { log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplCloudBrainBenchmarkNew, &form) return } @@ -2500,7 +2504,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo err = cloudbrain.GenerateTask(req) if err != nil { - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(err.Error(), tplCloudBrainBenchmarkNew, &form) return } @@ -2526,7 +2530,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) isOk, err := lock.Lock(models.CloudbrainKeyDuration) if !isOk { log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tpl, &form) return } @@ -2536,42 +2540,42 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("the job name did already exist", tpl, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("system error", tpl, &form) return } } if !jobNamePattern.MatchString(displayJobName) { - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) return } if jobType != string(models.JobTypeSnn4imagenet) && jobType != string(models.JobTypeBrainScore) { log.Error("jobtype error:", jobType, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("jobtype error", tpl, &form) return } - count, err := models.GetBenchmarkCountByUserID(ctx.User.ID) + count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType, models.GPUResource) if err != nil { log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("system error", tpl, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tpl, &form) return } @@ -2603,7 +2607,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid) if err != nil { log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } @@ -2613,14 +2617,14 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) Cluster: models.OpenICluster, AiCenterCode: models.AICenterOfCloudBrainOne}) if err != nil || spec == nil { - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr("Resource specification not available", tpl, &form) return } if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tpl, &form) return } @@ -2654,7 +2658,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) err = cloudbrain.GenerateTask(req) if err != nil { - cloudBrainNewDataPrepare(ctx) + cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(err.Error(), tpl, &form) return } @@ -2701,7 +2705,7 @@ func CloudBrainTrainJobVersionNew(ctx *context.Context) { } func cloudBrainTrainJobCreate(ctx *context.Context) { - err := cloudBrainNewDataPrepare(ctx) + err := cloudBrainNewDataPrepare(ctx, string(models.JobTypeTrain)) if err != nil { ctx.ServerError("get new train-job info failed", err) return @@ -2710,7 +2714,7 @@ func cloudBrainTrainJobCreate(ctx *context.Context) { } func InferenceCloudBrainJobNew(ctx *context.Context) { - err := cloudBrainNewDataPrepare(ctx) + err := cloudBrainNewDataPrepare(ctx, string(models.JobTypeInference)) if err != nil { ctx.ServerError("get new train-job info failed", err) return diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index b78bdebd3..d901298b7 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -12,6 +12,8 @@ import ( "strings" "time" + "code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" + "code.gitea.io/gitea/modules/dataset" "code.gitea.io/gitea/services/cloudbrain/resource" @@ -135,10 +137,15 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err ctx.Data["datasetType"] = models.TypeCloudBrainOne waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.GPUResource, models.JobTypeTrain) ctx.Data["WaitCount"] = waitCount + NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeTrain), models.GPUResource) + ctx.Data["NotStopTaskCount"] = NotStopTaskCount + } else if processType == grampus.ProcessorTypeNPU { ctx.Data["datasetType"] = models.TypeCloudBrainTwo waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.NPUResource, models.JobTypeTrain) ctx.Data["WaitCount"] = waitCount + NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeTrain), models.NPUResource) + ctx.Data["NotStopTaskCount"] = NotStopTaskCount } if ctx.Cloudbrain != nil { @@ -300,7 +307,7 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain } //check count limit - count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.GPUResource) + count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeTrain), models.GPUResource) if err != nil { log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) @@ -570,7 +577,7 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain } //check count limit - count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.NPUResource) + count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeTrain), models.NPUResource) if err != nil { log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 6e44b3cd2..2a10da264 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -15,6 +15,8 @@ import ( "time" "unicode/utf8" + "code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" + "code.gitea.io/gitea/modules/dataset" "code.gitea.io/gitea/modules/modelarts_cd" @@ -144,6 +146,8 @@ func notebookNewDataPrepare(ctx *context.Context) error { waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") ctx.Data["WaitCount"] = waitCount + NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeDebug), models.NPUResource) + ctx.Data["NotStopTaskCount"] = NotStopTaskCount return nil } @@ -162,50 +166,6 @@ func prepareCloudbrainTwoDebugSpecs(ctx *context.Context) { ctx.Data["Specs"] = noteBookSpecs } -func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) { - ctx.Data["PageIsNotebook"] = true - jobName := form.JobName - uuid := form.Attachment - description := form.Description - flavor := form.Flavor - - count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) - if err != nil { - log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form) - return - } else { - if count >= 1 { - log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsNotebookNew, &form) - return - } - } - _, err = models.GetCloudbrainByName(jobName) - if err == nil { - log.Error("the job name did already exist", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("the job name did already exist", tplModelArtsNotebookNew, &form) - return - } else { - if !models.IsErrJobNotExist(err) { - log.Error("system error, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form) - return - } - } - - err = modelarts.GenerateTask(ctx, jobName, uuid, description, flavor) - if err != nil { - ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form) - return - } - ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all") -} - func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm) { ctx.Data["PageIsNotebook"] = true displayJobName := form.DisplayJobName @@ -225,7 +185,8 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm } defer lock.UnLock() - count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) + count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeDebug), models.NPUResource) + if err != nil { log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) notebookNewDataPrepare(ctx) @@ -272,7 +233,7 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm } if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { log.Error("point balance is not enough,userId=%d specId=%d ", ctx.User.ID, spec.ID) - cloudBrainNewDataPrepare(ctx) + notebookNewDataPrepare(ctx) ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tplModelArtsNotebookNew, &form) return } @@ -450,7 +411,8 @@ func NotebookRestart(ctx *context.Context) { break } - count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) + count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeDebug), models.NPUResource) + if err != nil { log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) errorMsg = "system error" @@ -798,6 +760,8 @@ func trainJobNewDataPrepare(ctx *context.Context) error { ctx.Data["datasetType"] = models.TypeCloudBrainTwo waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") ctx.Data["WaitCount"] = waitCount + NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeTrain), models.NPUResource) + ctx.Data["NotStopTaskCount"] = NotStopTaskCount setMultiNodeIfConfigureMatch(ctx) @@ -966,6 +930,8 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { ctx.Data["config_list"] = configList.ParaConfigs waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") ctx.Data["WaitCount"] = waitCount + NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeTrain), models.NPUResource) + ctx.Data["NotStopTaskCount"] = NotStopTaskCount return nil } @@ -1012,7 +978,8 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) } defer lock.UnLock() - count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) + count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeTrain), models.NPUResource) + if err != nil { log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) trainJobNewDataPrepare(ctx) @@ -1356,7 +1323,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ return } - count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) + count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeTrain), models.NPUResource) if err != nil { log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) trainJobNewVersionDataPrepare(ctx) @@ -2007,7 +1974,8 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference } defer lock.UnLock() - count, err := models.GetCloudbrainInferenceJobCountByUserID(ctx.User.ID) + count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeInference), models.NPUResource) + if err != nil { log.Error("GetCloudbrainInferenceJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) inferenceJobErrorNewDataPrepare(ctx, form) @@ -2409,6 +2377,8 @@ func inferenceJobNewDataPrepare(ctx *context.Context) error { ctx.Data["datasetType"] = models.TypeCloudBrainTwo waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") ctx.Data["WaitCount"] = waitCount + NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeInference), models.NPUResource) + ctx.Data["NotStopTaskCount"] = NotStopTaskCount return nil } diff --git a/services/cloudbrain/cloudbrainTask/count.go b/services/cloudbrain/cloudbrainTask/count.go new file mode 100644 index 000000000..25d56b0b6 --- /dev/null +++ b/services/cloudbrain/cloudbrainTask/count.go @@ -0,0 +1,86 @@ +package cloudbrainTask + +import ( + "fmt" + "strconv" + + "code.gitea.io/gitea/models" +) + +type StatusInfo struct { + CloudBrainTypes []int + JobType []models.JobType + NotFinalStatuses []string + ComputeResource string +} + +var cloudbrainOneNotFinalStatuses = []string{string(models.JobWaiting), string(models.JobRunning)} +var cloudbrainTwoNotFinalStatuses = []string{string(models.ModelArtsTrainJobInit), string(models.ModelArtsTrainJobImageCreating), string(models.ModelArtsTrainJobSubmitTrying), string(models.ModelArtsTrainJobWaiting), string(models.ModelArtsTrainJobRunning), string(models.ModelArtsTrainJobScaling), string(models.ModelArtsTrainJobCheckInit), string(models.ModelArtsTrainJobCheckRunning), string(models.ModelArtsTrainJobCheckRunningCompleted)} +var grampusTwoNotFinalStatuses = []string{models.GrampusStatusWaiting, models.GrampusStatusRunning} +var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { + CloudBrainTypes: []int{models.TypeCloudBrainOne}, + JobType: []models.JobType{models.JobTypeDebug}, + NotFinalStatuses: cloudbrainOneNotFinalStatuses, + ComputeResource: models.GPUResource, +}, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { + CloudBrainTypes: []int{models.TypeCloudBrainOne}, + JobType: []models.JobType{models.JobTypeTrain}, + NotFinalStatuses: cloudbrainOneNotFinalStatuses, + ComputeResource: models.GPUResource, +}, string(models.JobTypeInference) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { + CloudBrainTypes: []int{models.TypeCloudBrainOne}, + JobType: []models.JobType{models.JobTypeInference}, + NotFinalStatuses: cloudbrainOneNotFinalStatuses, + ComputeResource: models.GPUResource, +}, string(models.JobTypeBenchmark) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { + CloudBrainTypes: []int{models.TypeCloudBrainOne}, + JobType: []models.JobType{models.JobTypeBenchmark, models.JobTypeModelSafety, models.JobTypeBrainScore, models.JobTypeSnn4imagenet}, + NotFinalStatuses: cloudbrainOneNotFinalStatuses, + ComputeResource: models.GPUResource, +}, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { + CloudBrainTypes: []int{models.TypeCloudBrainTwo, models.TypeCDCenter}, + JobType: []models.JobType{models.JobTypeDebug}, + NotFinalStatuses: []string{string(models.ModelArtsCreateQueue), string(models.ModelArtsCreating), string(models.ModelArtsStarting), string(models.ModelArtsReadyToStart), string(models.ModelArtsResizing), string(models.ModelArtsStartQueuing), string(models.ModelArtsRunning), string(models.ModelArtsRestarting)}, + ComputeResource: models.NPUResource, +}, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { + CloudBrainTypes: []int{models.TypeCloudBrainTwo}, + JobType: []models.JobType{models.JobTypeTrain}, + NotFinalStatuses: cloudbrainTwoNotFinalStatuses, + ComputeResource: models.NPUResource, +}, string(models.JobTypeInference) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { + CloudBrainTypes: []int{models.TypeCloudBrainTwo}, + JobType: []models.JobType{models.JobTypeTrain}, + NotFinalStatuses: cloudbrainTwoNotFinalStatuses, + ComputeResource: models.NPUResource, +}, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.GPUResource: { + CloudBrainTypes: []int{models.TypeC2Net}, + JobType: []models.JobType{models.JobTypeTrain}, + NotFinalStatuses: grampusTwoNotFinalStatuses, + ComputeResource: models.GPUResource, +}, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.NPUResource: { + CloudBrainTypes: []int{models.TypeC2Net}, + JobType: []models.JobType{models.JobTypeTrain}, + NotFinalStatuses: grampusTwoNotFinalStatuses, + ComputeResource: models.NPUResource, +}} + +func GetNotFinalStatusTaskCount(uid int64, cloudbrainType int, jobType string, computeResource ...string) (int, error) { + jobNewType := jobType + if jobType == string(models.JobTypeSnn4imagenet) || jobType == string(models.JobTypeBrainScore) { + jobNewType = string(models.JobTypeBenchmark) + } + + key := jobNewType + "-" + strconv.Itoa(cloudbrainType) + if len(computeResource) > 0 { + key = key + "-" + computeResource[0] + } + + if statusInfo, ok := StatusInfoDict[key]; ok { + + return models.GetNotFinalStatusTaskCount(uid, statusInfo.NotFinalStatuses, statusInfo.JobType, statusInfo.CloudBrainTypes, statusInfo.ComputeResource) + + } else { + return 0, fmt.Errorf("Can not find the status info.") + } + +}