diff --git a/models/action.go b/models/action.go index 4b6f1dbad..869acb762 100755 --- a/models/action.go +++ b/models/action.go @@ -65,6 +65,8 @@ const ( ActionCreateImage //36 ActionImageRecommend //37 ActionChangeUserAvatar //38 + ActionCreateGrampusNPUDebugTask //39 + ActionCreateGrampusGPUDebugTask //40 ) // Action represents user operation type and other information to @@ -375,6 +377,8 @@ func (a *Action) IsCloudbrainAction() bool { ActionCreateInferenceTask, ActionCreateBenchMarkTask, ActionCreateGPUTrainTask, + ActionCreateGrampusGPUDebugTask, + ActionCreateGrampusNPUDebugTask, ActionCreateGrampusNPUTrainTask, ActionCreateGrampusGPUTrainTask: return true diff --git a/models/cloudbrain.go b/models/cloudbrain.go index cdd9698fe..a2a69316f 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -1442,6 +1442,20 @@ type GrampusJobInfo struct { UserID string `json:"userId"` Tasks []GrampusTasks `json:"tasks"` } + +type GrampusNotebookInfo struct { + StartedAt int64 `json:"startedAt"` + RunSec int64 `json:"runSec"` + CompletedAt int64 `json:"completedAt"` + CreatedAt int64 `json:"createdAt"` + UpdatedAt int64 `json:"updatedAt"` + Desc string `json:"desc"` + JobID string `json:"id"` + Name string `json:"name"` + Status string `json:"status"` + UserID string `json:"userId"` + Tasks []GrampusNotebookTask `json:"tasks"` +} type Center struct { ID string `json:"id"` Name string `json:"name"` @@ -1518,6 +1532,11 @@ type GetGrampusJobResponse struct { JobInfo GrampusJobInfo `json:"otJob"` } +type GrampusNotebookResponse struct { + GrampusResult + JobInfo GrampusNotebookInfo `json:"otJob"` +} + type GrampusStopJobResponse struct { GrampusResult StoppedAt int64 `json:"stoppedAt"` @@ -1537,6 +1556,21 @@ type GrampusTasks struct { Code GrampusDataset `json:"code"` BootFile string `json:"bootFile"` } +type GrampusNotebookTask struct { + AutoStopDuration int `json:"autoStopDuration"` + Name string `json:"name"` + Capacity int `json:"capacity"` + CenterID []string `json:"centerID"` + CenterName []string `json:"centerName"` + Code GrampusDataset `json:"code"` + Datasets []GrampusDataset `json:"datasets"` + ImageId string `json:"imageId"` + ImageUrl string `json:"imageUrl"` + ResourceSpecId string `json:"resourceSpecId"` + Token string `json:"token"` + Url string `json:"url"` + Status string `json:"status"` +} type GrampusDataset struct { Name string `json:"name"` @@ -1550,6 +1584,11 @@ type CreateGrampusJobRequest struct { Tasks []GrampusTasks `json:"tasks"` } +type CreateGrampusNotebookRequest struct { + Name string `json:"name"` + Tasks []GrampusNotebookTask `json:"tasks"` +} + type GetTrainJobMetricStatisticResult struct { TrainJobResult Interval int `json:"interval"` //查询的时间间隔,单位为分钟 diff --git a/models/task_config.go b/models/task_config.go index 0d9d21187..f86032fc9 100644 --- a/models/task_config.go +++ b/models/task_config.go @@ -36,6 +36,8 @@ func GetTaskTypeFromAction(a ActionType) TaskType { ActionCreateInferenceTask, ActionCreateBenchMarkTask, ActionCreateGPUTrainTask, + ActionCreateGrampusGPUDebugTask, + ActionCreateGrampusNPUDebugTask, ActionCreateGrampusNPUTrainTask, ActionCreateGrampusGPUTrainTask: return TaskCreateCloudbrainTask diff --git a/modules/auth/grampus.go b/modules/auth/grampus.go index 414a7c25d..a50613613 100755 --- a/modules/auth/grampus.go +++ b/modules/auth/grampus.go @@ -29,3 +29,19 @@ type CreateGrampusTrainJobForm struct { func (f *CreateGrampusTrainJobForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { return validate(errs, ctx.Data, f, ctx.Locale) } + +type CreateGrampusNotebookForm struct { + Type int `form:"type"` + DisplayJobName string `form:"display_job_name" binding:"Required"` + Attachment string `form:"attachment"` + ImageID string `form:"image_id" binding:"Required"` + Description string `form:"description"` + BranchName string `form:"branch_name" binding:"Required"` + Image string `form:"image" binding:"Required"` + DatasetName string `form:"dataset_name"` + SpecId int64 `form:"spec_id" binding:"Required"` +} + +func (f *CreateGrampusNotebookForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { + return validate(errs, ctx.Data, f, ctx.Locale) +} diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index b6f62560a..c8fc381d8 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -28,6 +28,7 @@ const ( BucketRemote = "grampus" RemoteModelPath = "/output/" + models.ModelSuffix + autoStopDurationMs = 4 * 60 * 60 * 1000 ) var ( @@ -81,6 +82,25 @@ type GenerateTrainJobReq struct { CodeName string } +type GenerateNotebookJobReq struct { + JobName string + Command string + ImageUrl string + ImageId string + DisplayJobName string + Uuid string + Description string + CodeObsPath string + CommitID string + BranchName string + ComputeResource string + ProcessType string + DatasetNames string + DatasetInfos map[string]models.DatasetInfo + Spec *models.Specification + CodeName string +} + func getEndPoint() string { index := strings.Index(setting.Endpoint, "//") endpoint := setting.Endpoint[index+2:] @@ -102,6 +122,82 @@ func getDatasetGrampus(datasetInfos map[string]models.DatasetInfo) []models.Gram return datasetGrampus } +func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (jobId string, err error) { + createTime := timeutil.TimeStampNow() + + var datasetGrampus []models.GrampusDataset + var codeGrampus models.GrampusDataset + if ProcessorTypeNPU == req.ProcessType { + datasetGrampus = getDatasetGrampus(req.DatasetInfos) + codeGrampus = models.GrampusDataset{ + Name: req.CodeName, + Bucket: setting.Bucket, + EndPoint: getEndPoint(), + ObjectKey: req.CodeObsPath + cloudbrain.DefaultBranchName + ".zip", + } + } + + jobResult, err := createNotebookJob(models.CreateGrampusNotebookRequest{ + Name: req.JobName, + Tasks: []models.GrampusNotebookTask{ + { + Name: req.JobName, + ResourceSpecId: req.Spec.SourceSpecId, + ImageId: req.ImageId, + ImageUrl: req.ImageUrl, + Datasets: datasetGrampus, + Code: codeGrampus, + AutoStopDuration:autoStopDurationMs, + Capacity: setting.Capacity, + }, + }, + }) + if err != nil { + log.Error("createNotebookJob failed: %v", err.Error()) + return "", err + } + + jobID := jobResult.JobInfo.JobID + err = models.CreateCloudbrain(&models.Cloudbrain{ + Status: TransTrainJobStatus(jobResult.JobInfo.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: jobID, + JobName: req.JobName, + DisplayJobName: req.DisplayJobName, + JobType: string(models.JobTypeDebug), + Type: models.TypeC2Net, + Uuid: req.Uuid, + DatasetName: req.DatasetNames, + CommitID: req.CommitID, + IsLatestVersion: "1", + ComputeResource: req.ComputeResource, + ImageID: req.ImageId, + BranchName: req.BranchName, + Description: req.Description, + WorkServerNumber: 1, + EngineName: req.ImageUrl, + CreatedUnix: createTime, + UpdatedUnix: createTime, + Spec: req.Spec, + }) + + if err != nil { + log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error()) + return "", err + } + + var actionType models.ActionType + if req.ComputeResource == models.NPUResource { + actionType = models.ActionCreateGrampusNPUDebugTask + } else if req.ComputeResource == models.GPUResource { + actionType = models.ActionCreateGrampusGPUDebugTask + } + notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.DisplayJobName, actionType) + + return jobID, nil +} + func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId string, err error) { createTime := timeutil.TimeStampNow() diff --git a/modules/grampus/resty.go b/modules/grampus/resty.go index a9e1aed5c..a5d55a71f 100755 --- a/modules/grampus/resty.go +++ b/modules/grampus/resty.go @@ -26,6 +26,7 @@ const ( urlGetResourceSpecs = urlOpenApiV1 + "resourcespec" urlGetAiCenter = urlOpenApiV1 + "sharescreen/aicenter" urlGetImages = urlOpenApiV1 + "image" + urlNotebookJob = urlOpenApiV1 + "notebook" errorIllegalToken = 1005 ) @@ -87,6 +88,39 @@ func getToken() error { return nil } +func createNotebookJob(req models.CreateGrampusNotebookRequest) (*models.GrampusNotebookResponse, error) { + checkSetting() + client := getRestyClient() + var result models.GrampusNotebookResponse + + retry := 0 + +sendjob: + _, err := client.R(). + SetHeader("Content-Type", "application/json"). + SetAuthToken(TOKEN). + SetBody(req). + SetResult(&result). + Post(HOST + urlNotebookJob) + + if err != nil { + return nil, fmt.Errorf("resty CreateNotebookJob: %s", err) + } + + if result.ErrorCode == errorIllegalToken && retry < 1 { + retry++ + _ = getToken() + goto sendjob + } + + if result.ErrorCode != 0 { + log.Error("CreateNotebookJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) + return &result, fmt.Errorf("CreateNotebookJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) + } + + return &result, nil +} + func createJob(req models.CreateGrampusJobRequest) (*models.CreateGrampusJobResponse, error) { checkSetting() client := getRestyClient() @@ -120,6 +154,39 @@ sendjob: return &result, nil } + +func GetNotebookJob(jobID string)(*models.GrampusNotebookResponse, error){ + checkSetting() + client := getRestyClient() + var result models.GrampusNotebookResponse + + retry := 0 + +sendjob: + _, err := client.R(). + SetAuthToken(TOKEN). + SetResult(&result). + Get(HOST + urlNotebookJob + "/" + jobID) + + if err != nil { + return nil, fmt.Errorf("resty GetNotebookJob: %v", err) + } + + if result.ErrorCode == errorIllegalToken && retry < 1 { + retry++ + log.Info("retry get token") + _ = getToken() + goto sendjob + } + + if result.ErrorCode != 0 { + log.Error("GetNotebookJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) + return nil, fmt.Errorf("GetNotebookJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) + } + + return &result, nil +} + func GetJob(jobID string) (*models.GetGrampusJobResponse, error) { checkSetting() client := getRestyClient() diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index dd502dfd0..c9d9bab69 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -4,7 +4,6 @@ import ( "encoding/json" "errors" "fmt" - "path" "strconv" "strings" @@ -15,20 +14,13 @@ import ( "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/notification" "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/modules/timeutil" ) const ( //notebook - storageTypeOBS = "obs" - autoStopDuration = 4 * 60 * 60 autoStopDurationMs = 4 * 60 * 60 * 1000 - MORDELART_USER_IMAGE_ENGINE_ID = -1 - DataSetMountPath = "/home/ma-user/work" - NotebookEnv = "Python3" - NotebookType = "Ascend" - FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)" + //train-job // ResourcePools = "{\"resource_pool\":[{\"id\":\"pool1328035d\", \"value\":\"专属资源池\"}]}" @@ -185,14 +177,6 @@ type OrgMultiNode struct { Node []int `json:"node"` } -// type Parameter struct { -// Label string `json:"label"` -// Value string `json:"value"` -// } - -// type Parameters struct { -// Parameter []Parameter `json:"parameter"` -// } type Parameters struct { Parameter []struct { @@ -201,80 +185,6 @@ type Parameters struct { } `json:"parameter"` } -func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor string) error { - var dataActualPath string - if uuid != "" { - dataActualPath = setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" - } else { - userPath := setting.UserBasePath + ctx.User.Name + "/" - isExist, err := storage.ObsHasObject(userPath) - if err != nil { - log.Error("ObsHasObject failed:%v", err.Error(), ctx.Data["MsgID"]) - return err - } - - if !isExist { - if err = storage.ObsCreateObject(userPath); err != nil { - log.Error("ObsCreateObject failed:%v", err.Error(), ctx.Data["MsgID"]) - return err - } - } - - dataActualPath = setting.Bucket + "/" + userPath - } - - if poolInfos == nil { - json.Unmarshal([]byte(setting.PoolInfos), &poolInfos) - } - createTime := timeutil.TimeStampNow() - jobResult, err := CreateJob(models.CreateNotebookParams{ - JobName: jobName, - Description: description, - ProfileID: setting.ProfileID, - Flavor: flavor, - Pool: models.Pool{ - ID: poolInfos.PoolInfo[0].PoolId, - Name: poolInfos.PoolInfo[0].PoolName, - Type: poolInfos.PoolInfo[0].PoolType, - }, - Spec: models.Spec{ - Storage: models.Storage{ - Type: storageTypeOBS, - Location: models.Location{ - Path: dataActualPath, - }, - }, - AutoStop: models.AutoStop{ - Enable: true, - Duration: autoStopDuration, - }, - }, - }) - if err != nil { - log.Error("CreateJob failed: %v", err.Error()) - return err - } - err = models.CreateCloudbrain(&models.Cloudbrain{ - - Status: string(models.JobWaiting), - UserID: ctx.User.ID, - RepoID: ctx.Repo.Repository.ID, - JobID: jobResult.ID, - JobName: jobName, - JobType: string(models.JobTypeDebug), - Type: models.TypeCloudBrainTwo, - Uuid: uuid, - ComputeResource: models.NPUResource, - CreatedUnix: createTime, - UpdatedUnix: createTime, - }) - - if err != nil { - return err - } - notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobResult.ID, jobName, models.ActionCreateDebugNPUTask) - return nil -} func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, description, imageId string, spec *models.Specification) error { if poolInfos == nil { diff --git a/routers/api/v1/repo/images.go b/routers/api/v1/repo/images.go index f0cb62980..e09ca260a 100644 --- a/routers/api/v1/repo/images.go +++ b/routers/api/v1/repo/images.go @@ -88,7 +88,7 @@ func getModelArtsImages(ctx *context.APIContext) { } func getC2netNpuImages(ctx *context.APIContext) { - images, err := grampus.GetImages(grampus.ProcessorTypeNPU) + images, err := grampus.GetImages(grampus.ProcessorTypeNPU, string(models.JobTypeTrain)) var npuImageInfos []NPUImageINFO if err != nil { log.Error("GetImages failed:", err.Error()) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index de7bb454d..4942e1df2 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -47,12 +47,34 @@ const ( tplGrampusTrainJobShow base.TplName = "repo/grampus/trainjob/show" //GPU + tplGrampusNotebookGPUNew base.TplName = "repo/grampus/notebook/gpu/new" tplGrampusTrainJobGPUNew base.TplName = "repo/grampus/trainjob/gpu/new" //NPU + tplGrampusNotebookNPUNew base.TplName = "repo/grampus/notebook/npu/new" tplGrampusTrainJobNPUNew base.TplName = "repo/grampus/trainjob/npu/new" ) +func GrampusNotebookNew(ctx *context.Context) { + ctx.Data["IsCreate"] = true + notebookType := ctx.QueryInt("type") + processType := grampus.ProcessorTypeGPU + if notebookType == 1 { + processType = grampus.ProcessorTypeNPU + } + err := grampusNotebookNewDataPrepare(ctx, processType) + if err != nil { + ctx.ServerError("get new notebook-job info failed", err) + return + } + if processType == grampus.ProcessorTypeGPU { + ctx.HTML(http.StatusOK, tplGrampusNotebookGPUNew) + } else { + ctx.HTML(http.StatusOK, tplGrampusNotebookNPUNew) + } + +} + func GrampusTrainJobGPUNew(ctx *context.Context) { ctx.Data["IsCreate"] = true err := grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) @@ -73,8 +95,125 @@ func GrampusTrainJobNPUNew(ctx *context.Context) { } ctx.HTML(200, tplGrampusTrainJobNPUNew) } +func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebookForm) { + ctx.Data["IsCreate"] = true + displayJobName := form.DisplayJobName + jobName := util.ConvertDisplayJobNameToJobName(displayJobName) + uuid := form.Attachment + description := form.Description + repo := ctx.Repo.Repository + branchName := form.BranchName + image := strings.TrimSpace(form.Image) + tpl := tplGrampusNotebookGPUNew + processType := grampus.ProcessorTypeGPU + computeSource := models.GPUResource + computeSourceSimple := models.GPU + if form.Type == 1 { + tpl = tplGrampusNotebookNPUNew + processType = grampus.ProcessorTypeNPU + computeSource = models.NPUResource + computeSourceSimple := models.NPU + } + + lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName)) + defer lock.UnLock() + isOk, err := lock.Lock(models.CloudbrainKeyDuration) + if !isOk { + log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) + grampusNotebookNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tpl, &form) + return + } -func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) error { + if !jobNamePattern.MatchString(displayJobName) { + grampusNotebookNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) + return + } + + //check count limit + count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeDebug), computeSource) + if err != nil { + log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) + grampusTrainJobNewDataPrepare(ctx, processType) + ctx.RenderWithErr("system error", tpl, &form) + return + } else { + if count >= 1 { + log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) + grampusTrainJobNewDataPrepare(ctx, processType) + ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form) + return + } + } + + //check whether the task name in the project is duplicated + tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeDebug), displayJobName) + if err == nil { + if len(tasks) != 0 { + log.Error("the job name did already exist", ctx.Data["MsgID"]) + grampusTrainJobNewDataPrepare(ctx, processType) + ctx.RenderWithErr("the job name did already exist", tpl, &form) + return + } + } else { + if !models.IsErrJobNotExist(err) { + log.Error("system error, %v", err, ctx.Data["MsgID"]) + grampusTrainJobNewDataPrepare(ctx, processType) + ctx.RenderWithErr("system error", tpl, &form) + return + } + } + + //check specification + spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ + JobType: models.JobTypeDebug, + ComputeResource: computeSourceSimple, + Cluster: models.C2NetCluster, + }) + if err != nil || spec == nil { + grampusTrainJobNewDataPrepare(ctx, processType) + ctx.RenderWithErr("Resource specification not available", tpl, &form) + return + } + + if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { + log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) + grampusTrainJobNewDataPrepare(ctx, processType) + ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tpl, &form) + return + } + + commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) + command := "" + + req := &grampus.GenerateNotebookReq{ + JobName: jobName, + DisplayJobName: displayJobName, + ComputeResource: computeSource, + ProcessType: processType, + Command: command, + ImageUrl: image, + ImageId: form.ImageID, + Description: description, + Uuid: uuid, + CommitID: commitID, + BranchName: branchName, + DatasetNames: form.DatasetName, + WorkServerNumber: 1, + Spec: spec, + } + + _, err = grampus.GenerateNotebook(ctx, req) + if err != nil { + log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) + grampusTrainJobNewDataPrepare(ctx, processType) + ctx.RenderWithErr(err.Error(), tpl, &form) + return + } + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all") +} +func grampusNotebookNewDataPrepare(ctx *context.Context, processType string) error { ctx.Data["PageIsCloudBrain"] = true t := time.Now() @@ -82,49 +221,67 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err ctx.Data["display_job_name"] = displayJobName //get valid images - images, err := grampus.GetImages(processType) + if processType == grampus.ProcessorTypeNPU { + images, err := grampus.GetImages(processType, string(models.JobTypeDebug)) + if err != nil { + log.Error("GetImages failed:", err.Error()) + } else { + ctx.Data["images"] = images.Infos + } + } + //prepare available specs + computeResourceSimple := models.GPU + datasetType := models.TypeCloudBrainOne + computeResource := models.GPUResource + if processType == grampus.ProcessorTypeNPU { + computeResourceSimple = models.NPU + datasetType = models.TypeCloudBrainTwo + computeResource = models.NPUResource + } + + prepareGrampusSpecs(ctx, computeResourceSimple, models.JobTypeDebug) + + //get branches + branches, _, err := ctx.Repo.GitRepo.GetBranches(0, 0) if err != nil { - log.Error("GetImages failed:", err.Error()) + log.Error("GetBranches error:", err.Error()) } else { - ctx.Data["images"] = images.Infos + ctx.Data["branches"] = branches } - grampus.InitSpecialPool() + ctx.Data["branchName"] = ctx.Repo.BranchName - ctx.Data["GPUEnabled"] = true - ctx.Data["NPUEnabled"] = true - includeCenters := make(map[string]struct{}) - excludeCenters := make(map[string]struct{}) - if grampus.SpecialPools != nil { - for _, pool := range grampus.SpecialPools.Pools { - if pool.IsExclusive { - if !IsUserInOrgPool(ctx.User.ID, pool) { - ctx.Data[pool.Type+"Enabled"] = false - } - } else { - if strings.Contains(strings.ToLower(processType), strings.ToLower(pool.Type)) { - if IsUserInOrgPool(ctx.User.ID, pool) { - for _, center := range pool.Pool { - includeCenters[center.Queue] = struct{}{} - } - } else { - for _, center := range pool.Pool { - excludeCenters[center.Queue] = struct{}{} - } + ctx.Data["datasetType"] = datasetType + waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, computeResource, models.JobTypeDebug) + ctx.Data["WaitCount"] = waitCount + NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeDebug), computeResource) + ctx.Data["NotStopTaskCount"] = NotStopTaskCount - } + return nil +} - } +func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) error { + ctx.Data["PageIsCloudBrain"] = true - } + t := time.Now() + var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + ctx.Data["display_job_name"] = displayJobName + + //get valid images + if processType == grampus.ProcessorTypeNPU { + images, err := grampus.GetImages(processType, string(models.JobTypeTrain)) + if err != nil { + log.Error("GetImages failed:", err.Error()) + } else { + ctx.Data["images"] = images.Infos } } //prepare available specs if processType == grampus.ProcessorTypeNPU { - prepareGrampusTrainSpecs(ctx, models.NPU) + prepareGrampusSpecs(ctx, models.NPU) } else if processType == grampus.ProcessorTypeGPU { - prepareGrampusTrainSpecs(ctx, models.GPU) + prepareGrampusSpecs(ctx, models.GPU) } //get branches @@ -203,55 +360,19 @@ func GrampusTrainJobVersionNew(ctx *context.Context) { } } -func prepareGrampusTrainSpecs(ctx *context.Context, computeResource string) { +func prepareGrampusSpecs(ctx *context.Context, computeResource string, jobType ...models.JobType) { + tempJobType := models.JobTypeTrain + if len(jobType) > 0 { + tempJobType = jobType[0] + } noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ - JobType: models.JobTypeTrain, + JobType: tempJobType, ComputeResource: computeResource, Cluster: models.C2NetCluster, }) ctx.Data["Specs"] = noteBookSpecs } -func getFilterSpecBySpecialPool(specs *models.GetGrampusResourceSpecsResult, includeCenters map[string]struct{}, excludeCenters map[string]struct{}) []models.GrampusSpec { - if len(includeCenters) == 0 && len(excludeCenters) == 0 { - return specs.Infos - } - var grampusSpecs []models.GrampusSpec - for _, info := range specs.Infos { - if isInIncludeCenters(info, includeCenters) || (len(excludeCenters) != 0 && isNotAllInExcludeCenters(info, excludeCenters)) { - grampusSpecs = append(grampusSpecs, info) - } - - } - return grampusSpecs -} - -func isInIncludeCenters(grampusSpec models.GrampusSpec, centers map[string]struct{}) bool { - for _, center := range grampusSpec.Centers { - if _, ok := centers[center.ID]; ok { - return true - } - } - return false -} -func isNotAllInExcludeCenters(grampusSpec models.GrampusSpec, centers map[string]struct{}) bool { - for _, center := range grampusSpec.Centers { - if _, ok := centers[center.ID]; !ok { - return true - } - } - return false -} - -func IsUserInOrgPool(userId int64, pool *models.SpecialPool) bool { - org, _ := models.GetOrgByName(pool.Org) - if org != nil { - isOrgMember, _ := models.IsOrganizationMember(org.ID, userId) - return isOrgMember - } - return false -} - func grampusParamCheckCreateTrainJob(form auth.CreateGrampusTrainJobForm) error { if !strings.HasSuffix(strings.TrimSpace(form.BootFile), ".py") { log.Error("the boot file(%s) must be a python file", form.BootFile) diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 2b361b507..450c2ac1b 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1216,6 +1216,17 @@ func RegisterRoutes(m *macaron.Macaron) { }) }, context.RepoRef()) m.Group("/grampus", func() { + m.Group("/notebook", func() { + m.Group("/:jobid", func() { + m.Get("", reqRepoCloudBrainReader, repo.GrampusTrainJobShow) + m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.GrampusStopJob) + m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.GrampusTrainJobDel) + }) + + m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.GrampusNotebookNew) + m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateGrampusNotebookForm{}), repo.GrampusNotebookCreate) + }) + m.Group("/train-job", func() { m.Group("/:jobid", func() { m.Get("", reqRepoCloudBrainReader, repo.GrampusTrainJobShow) @@ -1288,16 +1299,6 @@ func RegisterRoutes(m *macaron.Macaron) { m.Group("/modelarts", func() { m.Group("/notebook", func() { - /* v1.0 - m.Group("/:jobid", func() { - m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) - m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug) - m.Post("/:action", reqRepoCloudBrainWriter, repo.NotebookManage) - m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel) - }) - m.Get("/create", reqRepoCloudBrainWriter, repo.NotebookNew) - m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsNotebookForm{}), repo.NotebookCreate) - */ m.Group("/:id", func() { m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug2) diff --git a/services/cloudbrain/cloudbrainTask/count.go b/services/cloudbrain/cloudbrainTask/count.go index 985706911..4ae742c3a 100644 --- a/services/cloudbrain/cloudbrainTask/count.go +++ b/services/cloudbrain/cloudbrainTask/count.go @@ -62,6 +62,16 @@ var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + s JobType: []models.JobType{models.JobTypeTrain}, NotFinalStatuses: GrampusNotFinalStatuses, ComputeResource: models.NPUResource, +}, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.GPUResource: { + CloudBrainTypes: []int{models.TypeC2Net}, + JobType: []models.JobType{models.JobTypeDebug}, + NotFinalStatuses: GrampusNotFinalStatuses, + ComputeResource: models.GPUResource, +}, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.NPUResource: { + CloudBrainTypes: []int{models.TypeC2Net}, + JobType: []models.JobType{models.JobTypeDebug}, + NotFinalStatuses: GrampusNotFinalStatuses, + ComputeResource: models.NPUResource, }} func GetNotFinalStatusTaskCount(uid int64, cloudbrainType int, jobType string, computeResource ...string) (int, error) {