| @@ -2015,11 +2015,6 @@ func GetModelSafetyTestTask() ([]*Cloudbrain, error) { | |||
| return cloudbrains, err | |||
| } | |||
| func GetCloudbrainCountByUserID(userID int64, jobType string) (int, error) { | |||
| count, err := x.In("status", JobWaiting, JobRunning).And("job_type = ? and user_id = ? and type = ?", jobType, userID, TypeCloudBrainOne).Count(new(Cloudbrain)) | |||
| return int(count), err | |||
| } | |||
| func GetCloudbrainRunCountByRepoID(repoID int64) (int, error) { | |||
| count, err := x.In("status", JobWaiting, JobRunning, ModelArtsCreateQueue, ModelArtsCreating, ModelArtsStarting, | |||
| ModelArtsReadyToStart, ModelArtsResizing, ModelArtsStartQueuing, ModelArtsRunning, ModelArtsDeleting, ModelArtsRestarting, ModelArtsTrainJobInit, | |||
| @@ -2028,11 +2023,6 @@ func GetCloudbrainRunCountByRepoID(repoID int64) (int, error) { | |||
| return int(count), err | |||
| } | |||
| func GetBenchmarkCountByUserID(userID int64) (int, error) { | |||
| count, err := x.In("status", JobWaiting, JobRunning).And("(job_type = ? or job_type = ? or job_type = ?) and user_id = ? and type = ?", string(JobTypeBenchmark), string(JobTypeModelSafety), string(JobTypeBrainScore), string(JobTypeSnn4imagenet), userID, TypeCloudBrainOne).Count(new(Cloudbrain)) | |||
| return int(count), err | |||
| } | |||
| func GetModelSafetyCountByUserID(userID int64) (int, error) { | |||
| count, err := x.In("status", JobWaiting, JobRunning).And("job_type = ? and user_id = ?", string(JobTypeModelSafety), userID).Count(new(Cloudbrain)) | |||
| return int(count), err | |||
| @@ -2048,40 +2038,14 @@ func GetWaitingCloudbrainCount(cloudbrainType int, computeResource string, jobTy | |||
| } | |||
| return sess.Count(new(Cloudbrain)) | |||
| } | |||
| func GetCloudbrainNotebookCountByUserID(userID int64) (int, error) { | |||
| count, err := x.In("status", ModelArtsCreateQueue, ModelArtsCreating, ModelArtsStarting, ModelArtsReadyToStart, ModelArtsResizing, ModelArtsStartQueuing, ModelArtsRunning, ModelArtsRestarting). | |||
| And("job_type = ? and user_id = ? and type in (?,?)", JobTypeDebug, userID, TypeCloudBrainTwo, TypeCDCenter).Count(new(Cloudbrain)) | |||
| return int(count), err | |||
| } | |||
| func GetCloudbrainTrainJobCountByUserID(userID int64) (int, error) { | |||
| count, err := x.In("status", ModelArtsTrainJobInit, ModelArtsTrainJobImageCreating, ModelArtsTrainJobSubmitTrying, ModelArtsTrainJobWaiting, ModelArtsTrainJobRunning, ModelArtsTrainJobScaling, ModelArtsTrainJobCheckInit, ModelArtsTrainJobCheckRunning, ModelArtsTrainJobCheckRunningCompleted). | |||
| And("job_type = ? and user_id = ? and type = ?", JobTypeTrain, userID, TypeCloudBrainTwo).Count(new(Cloudbrain)) | |||
| return int(count), err | |||
| } | |||
| func GetCloudbrainInferenceJobCountByUserID(userID int64) (int, error) { | |||
| count, err := x.In("status", ModelArtsTrainJobInit, ModelArtsTrainJobImageCreating, ModelArtsTrainJobSubmitTrying, ModelArtsTrainJobWaiting, ModelArtsTrainJobRunning, ModelArtsTrainJobScaling, ModelArtsTrainJobCheckInit, ModelArtsTrainJobCheckRunning, ModelArtsTrainJobCheckRunningCompleted). | |||
| And("job_type = ? and user_id = ? and type = ?", JobTypeInference, userID, TypeCloudBrainTwo).Count(new(Cloudbrain)) | |||
| return int(count), err | |||
| } | |||
| func GetGrampusCountByUserID(userID int64, jobType, computeResource string) (int, error) { | |||
| count, err := x.In("status", GrampusStatusWaiting, GrampusStatusRunning).And("job_type = ? and user_id = ? and type = ?", jobType, userID, TypeC2Net).And("compute_resource = ?", computeResource).Count(new(Cloudbrain)) | |||
| func GetNotFinalStatusTaskCount(userID int64, notFinalStatus []string, jobTypes []JobType, cloudbrainTypes []int, computeResource string) (int, error) { | |||
| count, err := x.In("status", notFinalStatus). | |||
| In("job_type", jobTypes). | |||
| In("type", cloudbrainTypes). | |||
| And("user_id = ? and compute_resource = ?", userID, computeResource).Count(new(Cloudbrain)) | |||
| return int(count), err | |||
| } | |||
| func UpdateInferenceJob(job *Cloudbrain) error { | |||
| return updateInferenceJob(x, job) | |||
| } | |||
| func updateInferenceJob(e Engine, job *Cloudbrain) error { | |||
| var sess *xorm.Session | |||
| sess = e.Where("job_id = ?", job.JobID) | |||
| _, err := sess.Cols("status", "train_job_duration", "duration", "start_time", "end_time", "created_unix").Update(job) | |||
| return err | |||
| } | |||
| func RestartCloudbrain(old *Cloudbrain, new *Cloudbrain) (err error) { | |||
| sess := x.NewSession() | |||
| defer sess.Close() | |||
| @@ -2411,97 +2375,6 @@ var ( | |||
| CloudbrainSpecialGpuInfosMap map[string]*GpuInfo | |||
| ) | |||
| func InitCloudbrainOneResourceSpecMap() { | |||
| if CloudbrainDebugResourceSpecsMap == nil || len(CloudbrainDebugResourceSpecsMap) == 0 { | |||
| t := ResourceSpecs{} | |||
| json.Unmarshal([]byte(setting.ResourceSpecs), &t) | |||
| CloudbrainDebugResourceSpecsMap = make(map[int]*ResourceSpec, len(t.ResourceSpec)) | |||
| for _, spec := range t.ResourceSpec { | |||
| CloudbrainDebugResourceSpecsMap[spec.Id] = spec | |||
| } | |||
| } | |||
| if CloudbrainTrainResourceSpecsMap == nil || len(CloudbrainTrainResourceSpecsMap) == 0 { | |||
| t := ResourceSpecs{} | |||
| json.Unmarshal([]byte(setting.TrainResourceSpecs), &t) | |||
| CloudbrainTrainResourceSpecsMap = make(map[int]*ResourceSpec, len(t.ResourceSpec)) | |||
| for _, spec := range t.ResourceSpec { | |||
| CloudbrainTrainResourceSpecsMap[spec.Id] = spec | |||
| } | |||
| } | |||
| if CloudbrainInferenceResourceSpecsMap == nil || len(CloudbrainInferenceResourceSpecsMap) == 0 { | |||
| t := ResourceSpecs{} | |||
| json.Unmarshal([]byte(setting.InferenceResourceSpecs), &t) | |||
| CloudbrainInferenceResourceSpecsMap = make(map[int]*ResourceSpec, len(t.ResourceSpec)) | |||
| for _, spec := range t.ResourceSpec { | |||
| CloudbrainInferenceResourceSpecsMap[spec.Id] = spec | |||
| } | |||
| } | |||
| if CloudbrainBenchmarkResourceSpecsMap == nil || len(CloudbrainBenchmarkResourceSpecsMap) == 0 { | |||
| t := ResourceSpecs{} | |||
| json.Unmarshal([]byte(setting.BenchmarkResourceSpecs), &t) | |||
| CloudbrainBenchmarkResourceSpecsMap = make(map[int]*ResourceSpec, len(t.ResourceSpec)) | |||
| for _, spec := range t.ResourceSpec { | |||
| CloudbrainBenchmarkResourceSpecsMap[spec.Id] = spec | |||
| } | |||
| } | |||
| if CloudbrainSpecialResourceSpecsMap == nil || len(CloudbrainSpecialResourceSpecsMap) == 0 { | |||
| t := SpecialPools{} | |||
| json.Unmarshal([]byte(setting.SpecialPools), &t) | |||
| for _, pool := range t.Pools { | |||
| CloudbrainSpecialResourceSpecsMap = make(map[int]*ResourceSpec, len(pool.ResourceSpec)) | |||
| for _, spec := range pool.ResourceSpec { | |||
| CloudbrainSpecialResourceSpecsMap[spec.Id] = spec | |||
| } | |||
| } | |||
| } | |||
| SpecsMapInitFlag = true | |||
| } | |||
| func InitCloudbrainOneGpuInfoMap() { | |||
| if CloudbrainDebugGpuInfosMap == nil || len(CloudbrainDebugGpuInfosMap) == 0 { | |||
| t := GpuInfos{} | |||
| json.Unmarshal([]byte(setting.GpuTypes), &t) | |||
| CloudbrainDebugGpuInfosMap = make(map[string]*GpuInfo, len(t.GpuInfo)) | |||
| for _, GpuInfo := range t.GpuInfo { | |||
| CloudbrainDebugGpuInfosMap[GpuInfo.Queue] = GpuInfo | |||
| } | |||
| } | |||
| if CloudbrainTrainGpuInfosMap == nil || len(CloudbrainTrainGpuInfosMap) == 0 { | |||
| t := GpuInfos{} | |||
| json.Unmarshal([]byte(setting.TrainGpuTypes), &t) | |||
| CloudbrainTrainGpuInfosMap = make(map[string]*GpuInfo, len(t.GpuInfo)) | |||
| for _, GpuInfo := range t.GpuInfo { | |||
| CloudbrainTrainGpuInfosMap[GpuInfo.Queue] = GpuInfo | |||
| } | |||
| } | |||
| if CloudbrainInferenceGpuInfosMap == nil || len(CloudbrainInferenceGpuInfosMap) == 0 { | |||
| t := GpuInfos{} | |||
| json.Unmarshal([]byte(setting.InferenceGpuTypes), &t) | |||
| CloudbrainInferenceGpuInfosMap = make(map[string]*GpuInfo, len(t.GpuInfo)) | |||
| for _, GpuInfo := range t.GpuInfo { | |||
| CloudbrainInferenceGpuInfosMap[GpuInfo.Queue] = GpuInfo | |||
| } | |||
| } | |||
| if CloudbrainBenchmarkGpuInfosMap == nil || len(CloudbrainBenchmarkGpuInfosMap) == 0 { | |||
| t := GpuInfos{} | |||
| json.Unmarshal([]byte(setting.BenchmarkGpuTypes), &t) | |||
| CloudbrainBenchmarkGpuInfosMap = make(map[string]*GpuInfo, len(t.GpuInfo)) | |||
| for _, GpuInfo := range t.GpuInfo { | |||
| CloudbrainBenchmarkGpuInfosMap[GpuInfo.Queue] = GpuInfo | |||
| } | |||
| } | |||
| if CloudbrainSpecialGpuInfosMap == nil || len(CloudbrainSpecialGpuInfosMap) == 0 { | |||
| t := SpecialPools{} | |||
| json.Unmarshal([]byte(setting.SpecialPools), &t) | |||
| for _, pool := range t.Pools { | |||
| CloudbrainSpecialGpuInfosMap = make(map[string]*GpuInfo, len(pool.Pool)) | |||
| for _, GpuInfo := range pool.Pool { | |||
| CloudbrainSpecialGpuInfosMap[GpuInfo.Queue] = GpuInfo | |||
| } | |||
| } | |||
| } | |||
| GpuInfosMapInitFlag = true | |||
| } | |||
| func GetNewestJobsByAiCenter() ([]int64, error) { | |||
| ids := make([]int64, 0) | |||
| return ids, x. | |||
| @@ -535,6 +535,8 @@ func AiSafetyCreateForGetGPU(ctx *context.Context) { | |||
| } else { | |||
| log.Info("The GPU WaitCount not get") | |||
| } | |||
| NotStopTaskCount, _ := models.GetModelSafetyCountByUserID(ctx.User.ID) | |||
| ctx.Data["NotStopTaskCount"] = NotStopTaskCount | |||
| ctx.HTML(200, tplModelSafetyTestCreateGpu) | |||
| } | |||
| @@ -578,6 +580,8 @@ func AiSafetyCreateForGetNPU(ctx *context.Context) { | |||
| waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") | |||
| ctx.Data["WaitCount"] = waitCount | |||
| log.Info("The NPU WaitCount is " + fmt.Sprint(waitCount)) | |||
| NotStopTaskCount, _ := models.GetModelSafetyCountByUserID(ctx.User.ID) | |||
| ctx.Data["NotStopTaskCount"] = NotStopTaskCount | |||
| ctx.HTML(200, tplModelSafetyTestCreateNpu) | |||
| } | |||
| @@ -980,6 +984,8 @@ func modelSafetyNewDataPrepare(ctx *context.Context) error { | |||
| ctx.Data["ckpt_name"] = ctx.Query("ckpt_name") | |||
| ctx.Data["model_name"] = ctx.Query("model_name") | |||
| ctx.Data["model_version"] = ctx.Query("model_version") | |||
| NotStopTaskCount, _ := models.GetModelSafetyCountByUserID(ctx.User.ID) | |||
| ctx.Data["NotStopTaskCount"] = NotStopTaskCount | |||
| if ctx.QueryInt("type") == models.TypeCloudBrainOne { | |||
| ctx.Data["type"] = models.TypeCloudBrainOne | |||
| @@ -17,6 +17,7 @@ import ( | |||
| "code.gitea.io/gitea/modules/dataset" | |||
| "code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" | |||
| "code.gitea.io/gitea/services/cloudbrain/resource" | |||
| "code.gitea.io/gitea/services/reward/point/account" | |||
| @@ -107,7 +108,7 @@ func jobNamePrefixValid(s string) string { | |||
| } | |||
| func cloudBrainNewDataPrepare(ctx *context.Context) error { | |||
| func cloudBrainNewDataPrepare(ctx *context.Context, jobType string) error { | |||
| ctx.Data["PageIsCloudBrain"] = true | |||
| t := time.Now() | |||
| var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] | |||
| @@ -148,6 +149,8 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { | |||
| defaultMode = "alogrithm" | |||
| } | |||
| ctx.Data["benchmarkMode"] = defaultMode | |||
| NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType, models.GPUResource) | |||
| ctx.Data["NotStopTaskCount"] = NotStopTaskCount | |||
| if ctx.Cloudbrain != nil { | |||
| ctx.Data["branch_name"] = ctx.Cloudbrain.BranchName | |||
| @@ -210,7 +213,7 @@ func prepareCloudbrainOneSpecs(ctx *context.Context) { | |||
| } | |||
| func CloudBrainNew(ctx *context.Context) { | |||
| err := cloudBrainNewDataPrepare(ctx) | |||
| err := cloudBrainNewDataPrepare(ctx, string(models.JobTypeDebug)) | |||
| if err != nil { | |||
| ctx.ServerError("get new cloudbrain info failed", err) | |||
| return | |||
| @@ -244,7 +247,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
| isOk, err := lock.Lock(models.CloudbrainKeyDuration) | |||
| if !isOk { | |||
| log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tpl, &form) | |||
| return | |||
| } | |||
| @@ -254,42 +257,42 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
| if err == nil { | |||
| if len(tasks) != 0 { | |||
| log.Error("the job name did already exist", ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("the job name did already exist", tpl, &form) | |||
| return | |||
| } | |||
| } else { | |||
| if !models.IsErrJobNotExist(err) { | |||
| log.Error("system error, %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("system error", tpl, &form) | |||
| return | |||
| } | |||
| } | |||
| if !jobNamePattern.MatchString(displayJobName) { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) | |||
| return | |||
| } | |||
| if jobType != string(models.JobTypeBenchmark) && jobType != string(models.JobTypeDebug) && jobType != string(models.JobTypeTrain) { | |||
| log.Error("jobtype error:", jobType, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("jobtype error", tpl, &form) | |||
| return | |||
| } | |||
| count, err := models.GetCloudbrainCountByUserID(ctx.User.ID, jobType) | |||
| count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType, models.GPUResource) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("system error", tpl, &form) | |||
| return | |||
| } else { | |||
| if count >= 1 { | |||
| log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tpl, &form) | |||
| return | |||
| } | |||
| @@ -301,7 +304,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
| datasetInfos, datasetNames, err = models.GetDatasetInfo(uuids) | |||
| if err != nil { | |||
| log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) | |||
| return | |||
| } | |||
| @@ -312,7 +315,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
| bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) | |||
| if err != nil || !bootFileExist { | |||
| log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form) | |||
| return | |||
| } | |||
| @@ -320,7 +323,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
| commandTrain, err := getTrainJobCommand(form) | |||
| if err != nil { | |||
| log.Error("getTrainJobCommand failed: %v", err) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(err.Error(), tpl, &form) | |||
| return | |||
| } | |||
| @@ -333,7 +336,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
| } | |||
| errStr := loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ModelMountPath) | |||
| if errStr != "" { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr(errStr), tpl, &form) | |||
| return | |||
| } | |||
| @@ -346,14 +349,14 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
| Cluster: models.OpenICluster, | |||
| AiCenterCode: models.AICenterOfCloudBrainOne}) | |||
| if err != nil || spec == nil { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("Resource specification not available", tpl, &form) | |||
| return | |||
| } | |||
| if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { | |||
| log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tpl, &form) | |||
| return | |||
| } | |||
| @@ -396,7 +399,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
| err = cloudbrain.GenerateTask(req) | |||
| if err != nil { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(err.Error(), tpl, &form) | |||
| return | |||
| } | |||
| @@ -454,7 +457,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra | |||
| isOk, err := lock.Lock(models.CloudbrainKeyDuration) | |||
| if !isOk { | |||
| log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tpl, &form) | |||
| return | |||
| } | |||
| @@ -465,7 +468,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra | |||
| command, err := getInferenceJobCommand(form) | |||
| if err != nil { | |||
| log.Error("getTrainJobCommand failed: %v", err) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(err.Error(), tpl, &form) | |||
| return | |||
| } | |||
| @@ -474,21 +477,21 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra | |||
| if err == nil { | |||
| if len(tasks) != 0 { | |||
| log.Error("the job name did already exist", ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("the job name did already exist", tpl, &form) | |||
| return | |||
| } | |||
| } else { | |||
| if !models.IsErrJobNotExist(err) { | |||
| log.Error("system error, %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("system error", tpl, &form) | |||
| return | |||
| } | |||
| } | |||
| if !jobNamePattern.MatchString(displayJobName) { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) | |||
| return | |||
| } | |||
| @@ -496,21 +499,21 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra | |||
| bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) | |||
| if err != nil || !bootFileExist { | |||
| log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form) | |||
| return | |||
| } | |||
| count, err := models.GetCloudbrainCountByUserID(ctx.User.ID, jobType) | |||
| count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType, models.GPUResource) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("system error", tpl, &form) | |||
| return | |||
| } else { | |||
| if count >= 1 { | |||
| log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tpl, &form) | |||
| return | |||
| } | |||
| @@ -521,7 +524,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra | |||
| } | |||
| errStr := loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ResultPath) | |||
| if errStr != "" { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr(errStr), tpl, &form) | |||
| return | |||
| } | |||
| @@ -531,7 +534,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra | |||
| datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid) | |||
| if err != nil { | |||
| log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) | |||
| return | |||
| } | |||
| @@ -541,13 +544,13 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra | |||
| Cluster: models.OpenICluster, | |||
| AiCenterCode: models.AICenterOfCloudBrainOne}) | |||
| if err != nil || spec == nil { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("Resource specification not available", tpl, &form) | |||
| return | |||
| } | |||
| if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { | |||
| log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tpl, &form) | |||
| return | |||
| } | |||
| @@ -582,7 +585,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra | |||
| err = cloudbrain.GenerateTask(req) | |||
| if err != nil { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(err.Error(), tpl, &form) | |||
| return | |||
| } | |||
| @@ -682,7 +685,7 @@ func CloudBrainRestart(ctx *context.Context) { | |||
| break | |||
| } | |||
| count, err := models.GetCloudbrainCountByUserID(ctx.User.ID, string(models.JobTypeDebug)) | |||
| count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, string(models.JobTypeDebug), models.GPUResource) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| resultCode = "-1" | |||
| @@ -2222,7 +2225,7 @@ func CloudBrainBenchmarkNew(ctx *context.Context) { | |||
| ctx.Data["description"] = "" | |||
| ctx.Data["benchmarkTypeID"] = -1 | |||
| ctx.Data["benchmark_child_types_id_hidden"] = -1 | |||
| err := cloudBrainNewDataPrepare(ctx) | |||
| err := cloudBrainNewDataPrepare(ctx, string(models.JobTypeBenchmark)) | |||
| if err != nil { | |||
| ctx.ServerError("get new cloudbrain info failed", err) | |||
| return | |||
| @@ -2327,6 +2330,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||
| benchmarkTypeID := form.BenchmarkTypeID | |||
| benchmarkChildTypeID := form.BenchmarkChildTypeID | |||
| repo := ctx.Repo.Repository | |||
| jobType := form.JobType | |||
| ctx.Data["description"] = form.Description | |||
| ctx.Data["benchmarkTypeID"] = benchmarkTypeID | |||
| @@ -2336,31 +2340,31 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||
| isOk, err := lock.Lock(models.CloudbrainKeyDuration) | |||
| if !isOk { | |||
| log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tplCloudBrainBenchmarkNew, &form) | |||
| return | |||
| } | |||
| defer lock.UnLock() | |||
| tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeBenchmark), displayJobName) | |||
| tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) | |||
| if err == nil { | |||
| if len(tasks) != 0 { | |||
| log.Error("the job name did already exist", ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("the job name did already exist", tplCloudBrainBenchmarkNew, &form) | |||
| return | |||
| } | |||
| } else { | |||
| if !models.IsErrJobNotExist(err) { | |||
| log.Error("system error, %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) | |||
| return | |||
| } | |||
| } | |||
| if !jobNamePattern.MatchString(jobName) { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplCloudBrainBenchmarkNew, &form) | |||
| return | |||
| } | |||
| @@ -2368,7 +2372,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||
| childInfo, err := getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID, ctx) | |||
| if err != nil { | |||
| log.Error("getBenchmarkAttachment failed:%v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("benchmark type error", tplCloudBrainBenchmarkNew, &form) | |||
| return | |||
| } | |||
| @@ -2379,27 +2383,27 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||
| Cluster: models.OpenICluster, | |||
| AiCenterCode: models.AICenterOfCloudBrainOne}) | |||
| if err != nil || spec == nil { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("Resource specification not available", tplCloudBrainBenchmarkNew, &form) | |||
| return | |||
| } | |||
| if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { | |||
| log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tplCloudBrainBenchmarkNew, &form) | |||
| return | |||
| } | |||
| count, err := models.GetBenchmarkCountByUserID(ctx.User.ID) | |||
| count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType, models.GPUResource) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) | |||
| return | |||
| } else { | |||
| if count >= 1 { | |||
| log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tplCloudBrainBenchmarkNew, &form) | |||
| return | |||
| } | |||
| @@ -2408,7 +2412,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||
| os.RemoveAll(codePath) | |||
| if err := downloadCode(repo, codePath, cloudbrain.DefaultBranchName); err != nil { | |||
| log.Error("downloadCode failed, %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) | |||
| return | |||
| } | |||
| @@ -2417,11 +2421,11 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||
| if os.IsNotExist(err) { | |||
| // file does not exist | |||
| log.Error("train.py does not exist, %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("train.py does not exist", tplCloudBrainBenchmarkNew, &form) | |||
| } else { | |||
| log.Error("Stat failed, %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) | |||
| } | |||
| return | |||
| @@ -2429,11 +2433,11 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||
| if os.IsNotExist(err) { | |||
| // file does not exist | |||
| log.Error("test.py does not exist, %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("test.py does not exist", tplCloudBrainBenchmarkNew, &form) | |||
| } else { | |||
| log.Error("Stat failed, %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) | |||
| } | |||
| return | |||
| @@ -2441,7 +2445,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||
| if err := uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { | |||
| log.Error("uploadCodeToMinio failed, %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) | |||
| return | |||
| } | |||
| @@ -2466,7 +2470,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||
| datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid) | |||
| if err != nil { | |||
| log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplCloudBrainBenchmarkNew, &form) | |||
| return | |||
| } | |||
| @@ -2500,7 +2504,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||
| err = cloudbrain.GenerateTask(req) | |||
| if err != nil { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(err.Error(), tplCloudBrainBenchmarkNew, &form) | |||
| return | |||
| } | |||
| @@ -2526,7 +2530,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) | |||
| isOk, err := lock.Lock(models.CloudbrainKeyDuration) | |||
| if !isOk { | |||
| log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tpl, &form) | |||
| return | |||
| } | |||
| @@ -2536,42 +2540,42 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) | |||
| if err == nil { | |||
| if len(tasks) != 0 { | |||
| log.Error("the job name did already exist", ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("the job name did already exist", tpl, &form) | |||
| return | |||
| } | |||
| } else { | |||
| if !models.IsErrJobNotExist(err) { | |||
| log.Error("system error, %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("system error", tpl, &form) | |||
| return | |||
| } | |||
| } | |||
| if !jobNamePattern.MatchString(displayJobName) { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) | |||
| return | |||
| } | |||
| if jobType != string(models.JobTypeSnn4imagenet) && jobType != string(models.JobTypeBrainScore) { | |||
| log.Error("jobtype error:", jobType, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("jobtype error", tpl, &form) | |||
| return | |||
| } | |||
| count, err := models.GetBenchmarkCountByUserID(ctx.User.ID) | |||
| count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType, models.GPUResource) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("system error", tpl, &form) | |||
| return | |||
| } else { | |||
| if count >= 1 { | |||
| log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tpl, &form) | |||
| return | |||
| } | |||
| @@ -2603,7 +2607,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) | |||
| datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid) | |||
| if err != nil { | |||
| log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) | |||
| return | |||
| } | |||
| @@ -2613,14 +2617,14 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) | |||
| Cluster: models.OpenICluster, | |||
| AiCenterCode: models.AICenterOfCloudBrainOne}) | |||
| if err != nil || spec == nil { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr("Resource specification not available", tpl, &form) | |||
| return | |||
| } | |||
| if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { | |||
| log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tpl, &form) | |||
| return | |||
| } | |||
| @@ -2654,7 +2658,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) | |||
| err = cloudbrain.GenerateTask(req) | |||
| if err != nil { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| cloudBrainNewDataPrepare(ctx, jobType) | |||
| ctx.RenderWithErr(err.Error(), tpl, &form) | |||
| return | |||
| } | |||
| @@ -2701,7 +2705,7 @@ func CloudBrainTrainJobVersionNew(ctx *context.Context) { | |||
| } | |||
| func cloudBrainTrainJobCreate(ctx *context.Context) { | |||
| err := cloudBrainNewDataPrepare(ctx) | |||
| err := cloudBrainNewDataPrepare(ctx, string(models.JobTypeTrain)) | |||
| if err != nil { | |||
| ctx.ServerError("get new train-job info failed", err) | |||
| return | |||
| @@ -2710,7 +2714,7 @@ func cloudBrainTrainJobCreate(ctx *context.Context) { | |||
| } | |||
| func InferenceCloudBrainJobNew(ctx *context.Context) { | |||
| err := cloudBrainNewDataPrepare(ctx) | |||
| err := cloudBrainNewDataPrepare(ctx, string(models.JobTypeInference)) | |||
| if err != nil { | |||
| ctx.ServerError("get new train-job info failed", err) | |||
| return | |||
| @@ -12,6 +12,8 @@ import ( | |||
| "strings" | |||
| "time" | |||
| "code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" | |||
| "code.gitea.io/gitea/modules/dataset" | |||
| "code.gitea.io/gitea/services/cloudbrain/resource" | |||
| @@ -135,10 +137,15 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err | |||
| ctx.Data["datasetType"] = models.TypeCloudBrainOne | |||
| waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.GPUResource, models.JobTypeTrain) | |||
| ctx.Data["WaitCount"] = waitCount | |||
| NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeTrain), models.GPUResource) | |||
| ctx.Data["NotStopTaskCount"] = NotStopTaskCount | |||
| } else if processType == grampus.ProcessorTypeNPU { | |||
| ctx.Data["datasetType"] = models.TypeCloudBrainTwo | |||
| waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.NPUResource, models.JobTypeTrain) | |||
| ctx.Data["WaitCount"] = waitCount | |||
| NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeTrain), models.NPUResource) | |||
| ctx.Data["NotStopTaskCount"] = NotStopTaskCount | |||
| } | |||
| if ctx.Cloudbrain != nil { | |||
| @@ -300,7 +307,7 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||
| } | |||
| //check count limit | |||
| count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.GPUResource) | |||
| count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeTrain), models.GPUResource) | |||
| if err != nil { | |||
| log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) | |||
| @@ -570,7 +577,7 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||
| } | |||
| //check count limit | |||
| count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.NPUResource) | |||
| count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeTrain), models.NPUResource) | |||
| if err != nil { | |||
| log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) | |||
| @@ -15,6 +15,8 @@ import ( | |||
| "time" | |||
| "unicode/utf8" | |||
| "code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" | |||
| "code.gitea.io/gitea/modules/dataset" | |||
| "code.gitea.io/gitea/modules/modelarts_cd" | |||
| @@ -144,6 +146,8 @@ func notebookNewDataPrepare(ctx *context.Context) error { | |||
| waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") | |||
| ctx.Data["WaitCount"] = waitCount | |||
| NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeDebug), models.NPUResource) | |||
| ctx.Data["NotStopTaskCount"] = NotStopTaskCount | |||
| return nil | |||
| } | |||
| @@ -162,50 +166,6 @@ func prepareCloudbrainTwoDebugSpecs(ctx *context.Context) { | |||
| ctx.Data["Specs"] = noteBookSpecs | |||
| } | |||
| func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) { | |||
| ctx.Data["PageIsNotebook"] = true | |||
| jobName := form.JobName | |||
| uuid := form.Attachment | |||
| description := form.Description | |||
| flavor := form.Flavor | |||
| count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form) | |||
| return | |||
| } else { | |||
| if count >= 1 { | |||
| log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsNotebookNew, &form) | |||
| return | |||
| } | |||
| } | |||
| _, err = models.GetCloudbrainByName(jobName) | |||
| if err == nil { | |||
| log.Error("the job name did already exist", ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| ctx.RenderWithErr("the job name did already exist", tplModelArtsNotebookNew, &form) | |||
| return | |||
| } else { | |||
| if !models.IsErrJobNotExist(err) { | |||
| log.Error("system error, %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form) | |||
| return | |||
| } | |||
| } | |||
| err = modelarts.GenerateTask(ctx, jobName, uuid, description, flavor) | |||
| if err != nil { | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form) | |||
| return | |||
| } | |||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all") | |||
| } | |||
| func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm) { | |||
| ctx.Data["PageIsNotebook"] = true | |||
| displayJobName := form.DisplayJobName | |||
| @@ -225,7 +185,8 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm | |||
| } | |||
| defer lock.UnLock() | |||
| count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) | |||
| count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeDebug), models.NPUResource) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| notebookNewDataPrepare(ctx) | |||
| @@ -272,7 +233,7 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm | |||
| } | |||
| if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { | |||
| log.Error("point balance is not enough,userId=%d specId=%d ", ctx.User.ID, spec.ID) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| notebookNewDataPrepare(ctx) | |||
| ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tplModelArtsNotebookNew, &form) | |||
| return | |||
| } | |||
| @@ -450,7 +411,8 @@ func NotebookRestart(ctx *context.Context) { | |||
| break | |||
| } | |||
| count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) | |||
| count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeDebug), models.NPUResource) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| errorMsg = "system error" | |||
| @@ -798,6 +760,8 @@ func trainJobNewDataPrepare(ctx *context.Context) error { | |||
| ctx.Data["datasetType"] = models.TypeCloudBrainTwo | |||
| waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") | |||
| ctx.Data["WaitCount"] = waitCount | |||
| NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeTrain), models.NPUResource) | |||
| ctx.Data["NotStopTaskCount"] = NotStopTaskCount | |||
| setMultiNodeIfConfigureMatch(ctx) | |||
| @@ -966,6 +930,8 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { | |||
| ctx.Data["config_list"] = configList.ParaConfigs | |||
| waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") | |||
| ctx.Data["WaitCount"] = waitCount | |||
| NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeTrain), models.NPUResource) | |||
| ctx.Data["NotStopTaskCount"] = NotStopTaskCount | |||
| return nil | |||
| } | |||
| @@ -1012,7 +978,8 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| } | |||
| defer lock.UnLock() | |||
| count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) | |||
| count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeTrain), models.NPUResource) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| trainJobNewDataPrepare(ctx) | |||
| @@ -1356,7 +1323,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||
| return | |||
| } | |||
| count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) | |||
| count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeTrain), models.NPUResource) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| @@ -2007,7 +1974,8 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference | |||
| } | |||
| defer lock.UnLock() | |||
| count, err := models.GetCloudbrainInferenceJobCountByUserID(ctx.User.ID) | |||
| count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeInference), models.NPUResource) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainInferenceJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| inferenceJobErrorNewDataPrepare(ctx, form) | |||
| @@ -2409,6 +2377,8 @@ func inferenceJobNewDataPrepare(ctx *context.Context) error { | |||
| ctx.Data["datasetType"] = models.TypeCloudBrainTwo | |||
| waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") | |||
| ctx.Data["WaitCount"] = waitCount | |||
| NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeInference), models.NPUResource) | |||
| ctx.Data["NotStopTaskCount"] = NotStopTaskCount | |||
| return nil | |||
| } | |||
| @@ -0,0 +1,86 @@ | |||
| package cloudbrainTask | |||
| import ( | |||
| "fmt" | |||
| "strconv" | |||
| "code.gitea.io/gitea/models" | |||
| ) | |||
| type StatusInfo struct { | |||
| CloudBrainTypes []int | |||
| JobType []models.JobType | |||
| NotFinalStatuses []string | |||
| ComputeResource string | |||
| } | |||
| var cloudbrainOneNotFinalStatuses = []string{string(models.JobWaiting), string(models.JobRunning)} | |||
| var cloudbrainTwoNotFinalStatuses = []string{string(models.ModelArtsTrainJobInit), string(models.ModelArtsTrainJobImageCreating), string(models.ModelArtsTrainJobSubmitTrying), string(models.ModelArtsTrainJobWaiting), string(models.ModelArtsTrainJobRunning), string(models.ModelArtsTrainJobScaling), string(models.ModelArtsTrainJobCheckInit), string(models.ModelArtsTrainJobCheckRunning), string(models.ModelArtsTrainJobCheckRunningCompleted)} | |||
| var grampusTwoNotFinalStatuses = []string{models.GrampusStatusWaiting, models.GrampusStatusRunning} | |||
| var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { | |||
| CloudBrainTypes: []int{models.TypeCloudBrainOne}, | |||
| JobType: []models.JobType{models.JobTypeDebug}, | |||
| NotFinalStatuses: cloudbrainOneNotFinalStatuses, | |||
| ComputeResource: models.GPUResource, | |||
| }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { | |||
| CloudBrainTypes: []int{models.TypeCloudBrainOne}, | |||
| JobType: []models.JobType{models.JobTypeTrain}, | |||
| NotFinalStatuses: cloudbrainOneNotFinalStatuses, | |||
| ComputeResource: models.GPUResource, | |||
| }, string(models.JobTypeInference) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { | |||
| CloudBrainTypes: []int{models.TypeCloudBrainOne}, | |||
| JobType: []models.JobType{models.JobTypeInference}, | |||
| NotFinalStatuses: cloudbrainOneNotFinalStatuses, | |||
| ComputeResource: models.GPUResource, | |||
| }, string(models.JobTypeBenchmark) + "-" + strconv.Itoa(models.TypeCloudBrainOne): { | |||
| CloudBrainTypes: []int{models.TypeCloudBrainOne}, | |||
| JobType: []models.JobType{models.JobTypeBenchmark, models.JobTypeModelSafety, models.JobTypeBrainScore, models.JobTypeSnn4imagenet}, | |||
| NotFinalStatuses: cloudbrainOneNotFinalStatuses, | |||
| ComputeResource: models.GPUResource, | |||
| }, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { | |||
| CloudBrainTypes: []int{models.TypeCloudBrainTwo, models.TypeCDCenter}, | |||
| JobType: []models.JobType{models.JobTypeDebug}, | |||
| NotFinalStatuses: []string{string(models.ModelArtsCreateQueue), string(models.ModelArtsCreating), string(models.ModelArtsStarting), string(models.ModelArtsReadyToStart), string(models.ModelArtsResizing), string(models.ModelArtsStartQueuing), string(models.ModelArtsRunning), string(models.ModelArtsRestarting)}, | |||
| ComputeResource: models.NPUResource, | |||
| }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { | |||
| CloudBrainTypes: []int{models.TypeCloudBrainTwo}, | |||
| JobType: []models.JobType{models.JobTypeTrain}, | |||
| NotFinalStatuses: cloudbrainTwoNotFinalStatuses, | |||
| ComputeResource: models.NPUResource, | |||
| }, string(models.JobTypeInference) + "-" + strconv.Itoa(models.TypeCloudBrainTwo): { | |||
| CloudBrainTypes: []int{models.TypeCloudBrainTwo}, | |||
| JobType: []models.JobType{models.JobTypeTrain}, | |||
| NotFinalStatuses: cloudbrainTwoNotFinalStatuses, | |||
| ComputeResource: models.NPUResource, | |||
| }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.GPUResource: { | |||
| CloudBrainTypes: []int{models.TypeC2Net}, | |||
| JobType: []models.JobType{models.JobTypeTrain}, | |||
| NotFinalStatuses: grampusTwoNotFinalStatuses, | |||
| ComputeResource: models.GPUResource, | |||
| }, string(models.JobTypeTrain) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.NPUResource: { | |||
| CloudBrainTypes: []int{models.TypeC2Net}, | |||
| JobType: []models.JobType{models.JobTypeTrain}, | |||
| NotFinalStatuses: grampusTwoNotFinalStatuses, | |||
| ComputeResource: models.NPUResource, | |||
| }} | |||
| func GetNotFinalStatusTaskCount(uid int64, cloudbrainType int, jobType string, computeResource ...string) (int, error) { | |||
| jobNewType := jobType | |||
| if jobType == string(models.JobTypeSnn4imagenet) || jobType == string(models.JobTypeBrainScore) { | |||
| jobNewType = string(models.JobTypeBenchmark) | |||
| } | |||
| key := jobNewType + "-" + strconv.Itoa(cloudbrainType) | |||
| if len(computeResource) > 0 { | |||
| key = key + "-" + computeResource[0] | |||
| } | |||
| if statusInfo, ok := StatusInfoDict[key]; ok { | |||
| return models.GetNotFinalStatusTaskCount(uid, statusInfo.NotFinalStatuses, statusInfo.JobType, statusInfo.CloudBrainTypes, statusInfo.ComputeResource) | |||
| } else { | |||
| return 0, fmt.Errorf("Can not find the status info.") | |||
| } | |||
| } | |||