| @@ -569,11 +569,12 @@ type SpecialPools struct { | |||
| Pools []*SpecialPool `json:"pools"` | |||
| } | |||
| type SpecialPool struct { | |||
| Org string `json:"org"` | |||
| Type string `json:"type"` | |||
| IsExclusive bool `json:"isExclusive"` | |||
| Pool []*GpuInfo `json:"pool"` | |||
| JobType []string `json:"jobType"` | |||
| Org string `json:"org"` | |||
| Type string `json:"type"` | |||
| IsExclusive bool `json:"isExclusive"` | |||
| Pool []*GpuInfo `json:"pool"` | |||
| JobType []string `json:"jobType"` | |||
| ResourceSpec []*ResourceSpec `json:"resourceSpecs"` | |||
| } | |||
| type ImageInfosModelArts struct { | |||
| @@ -42,6 +42,7 @@ const ( | |||
| var ( | |||
| ResourceSpecs *models.ResourceSpecs | |||
| TrainResourceSpecs *models.ResourceSpecs | |||
| SpecialPools *models.SpecialPools | |||
| ) | |||
| type GenerateCloudBrainTaskReq struct { | |||
| @@ -222,6 +223,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { | |||
| for _, spec := range TrainResourceSpecs.ResourceSpec { | |||
| if req.ResourceSpecId == spec.Id { | |||
| resourceSpec = spec | |||
| break | |||
| } | |||
| } | |||
| } else { | |||
| @@ -231,10 +233,29 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { | |||
| for _, spec := range ResourceSpecs.ResourceSpec { | |||
| if req.ResourceSpecId == spec.Id { | |||
| resourceSpec = spec | |||
| break | |||
| } | |||
| } | |||
| } | |||
| //如果没有匹配到spec信息,尝试从专属资源池获取 | |||
| if resourceSpec == nil && SpecialPools != nil { | |||
| for _, specialPool := range SpecialPools.Pools { | |||
| if resourceSpec != nil { | |||
| break | |||
| } | |||
| if specialPool.ResourceSpec != nil { | |||
| if IsElementExist(specialPool.JobType, req.JobType) && IsQueueInSpecialtPool(specialPool.Pool, req.GpuQueue) { | |||
| for _, spec := range specialPool.ResourceSpec { | |||
| if req.ResourceSpecId == spec.Id { | |||
| resourceSpec = spec | |||
| break | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| if resourceSpec == nil { | |||
| log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"]) | |||
| @@ -538,3 +559,39 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) e | |||
| return nil | |||
| } | |||
| func InitSpecialPool() { | |||
| if SpecialPools == nil && setting.SpecialPools != "" { | |||
| json.Unmarshal([]byte(setting.SpecialPools), &SpecialPools) | |||
| } | |||
| } | |||
| func IsResourceSpecInSpecialPool(resourceSpecs []*models.ResourceSpec, resourceSpecId int) bool { | |||
| if resourceSpecs == nil || len(resourceSpecs) == 0 { | |||
| return true | |||
| } | |||
| for _, v := range resourceSpecs { | |||
| if v.Id == resourceSpecId { | |||
| return true | |||
| } | |||
| } | |||
| return false | |||
| } | |||
| func IsQueueInSpecialtPool(pool []*models.GpuInfo, queue string) bool { | |||
| for _, v := range pool { | |||
| if v.Queue == queue { | |||
| return true | |||
| } | |||
| } | |||
| return false | |||
| } | |||
| func IsElementExist(s []string, str string) bool { | |||
| for _, v := range s { | |||
| if v == str { | |||
| return true | |||
| } | |||
| } | |||
| return false | |||
| } | |||
| @@ -460,6 +460,7 @@ var ( | |||
| CBCodePathPrefix string | |||
| JobType string | |||
| GpuTypes string | |||
| SpecialPools string | |||
| DebugServerHost string | |||
| ResourceSpecs string | |||
| MaxDuration int64 | |||
| @@ -1331,6 +1332,8 @@ func NewContext() { | |||
| TrainGpuTypes = sec.Key("TRAIN_GPU_TYPES").MustString("") | |||
| TrainResourceSpecs = sec.Key("TRAIN_RESOURCE_SPECS").MustString("") | |||
| MaxModelSize = sec.Key("MAX_MODEL_SIZE").MustFloat64(500) | |||
| SpecialPools = sec.Key("SPECIAL_POOL").MustString("") | |||
| MaxDatasetNum = sec.Key("MAX_DATASET_NUM").MustInt(5) | |||
| sec = Cfg.Section("benchmark") | |||
| @@ -752,10 +752,26 @@ func GetCloudbrainsDetailData(ctx *context.Context) { | |||
| taskDetail.RepoAlias = ciTasks[i].Repo.OwnerName + "/" + ciTasks[i].Repo.Alias | |||
| } | |||
| if ciTasks[i].Cloudbrain.Status == string(models.JobWaiting) { | |||
| WaitTimeInt := time.Now().Unix() - ciTasks[i].Cloudbrain.CreatedUnix.AsTime().Unix() | |||
| taskDetail.WaitTime = models.ConvertDurationToStr(WaitTimeInt) | |||
| if WaitTimeInt < 0 { | |||
| taskDetail.WaitTime = "00:00:00" | |||
| if ciTasks[i].Cloudbrain.DeletedAt != nilTime { | |||
| WaitTimeInt := ciTasks[i].Cloudbrain.UpdatedUnix.AsTime().Unix() - ciTasks[i].Cloudbrain.CreatedUnix.AsTime().Unix() | |||
| taskDetail.WaitTime = models.ConvertDurationToStr(WaitTimeInt) | |||
| if WaitTimeInt < 0 { | |||
| taskDetail.WaitTime = "00:00:00" | |||
| } | |||
| } else { | |||
| if ciTasks[i].Cloudbrain.StartTime.AsTime().Unix() == 0 { | |||
| WaitTimeInt := time.Now().Unix() - ciTasks[i].Cloudbrain.CreatedUnix.AsTime().Unix() | |||
| taskDetail.WaitTime = models.ConvertDurationToStr(WaitTimeInt) | |||
| if WaitTimeInt < 0 { | |||
| taskDetail.WaitTime = "00:00:00" | |||
| } | |||
| } else { | |||
| WaitTimeInt := ciTasks[i].Cloudbrain.StartTime.AsTime().Unix() - ciTasks[i].Cloudbrain.CreatedUnix.AsTime().Unix() | |||
| taskDetail.WaitTime = models.ConvertDurationToStr(WaitTimeInt) | |||
| if WaitTimeInt < 0 { | |||
| taskDetail.WaitTime = "00:00:00" | |||
| } | |||
| } | |||
| } | |||
| } else if ciTasks[i].Cloudbrain.Status == string(models.JobStopped) && ciTasks[i].Cloudbrain.StartTime.AsTime().Unix() == 0 { | |||
| WaitTimeInt := ciTasks[i].Cloudbrain.EndTime.AsTime().Unix() - ciTasks[i].Cloudbrain.CreatedUnix.AsTime().Unix() | |||
| @@ -2,7 +2,6 @@ package repo | |||
| import ( | |||
| "bufio" | |||
| "code.gitea.io/gitea/modules/grampus" | |||
| "encoding/json" | |||
| "errors" | |||
| "fmt" | |||
| @@ -16,6 +15,8 @@ import ( | |||
| "time" | |||
| "unicode/utf8" | |||
| "code.gitea.io/gitea/modules/grampus" | |||
| "code.gitea.io/gitea/modules/timeutil" | |||
| "github.com/unknwon/i18n" | |||
| @@ -149,6 +150,8 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { | |||
| ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType | |||
| cloudbrain.InitSpecialPool() | |||
| if gpuInfos == nil { | |||
| json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos) | |||
| } | |||
| @@ -178,6 +181,45 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { | |||
| json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) | |||
| } | |||
| ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec | |||
| if cloudbrain.SpecialPools != nil { | |||
| var debugGpuTypes []*models.GpuInfo | |||
| var trainGpuTypes []*models.GpuInfo | |||
| for _, pool := range cloudbrain.SpecialPools.Pools { | |||
| org, _ := models.GetOrgByName(pool.Org) | |||
| if org != nil { | |||
| isOrgMember, _ := models.IsOrganizationMember(org.ID, ctx.User.ID) | |||
| if isOrgMember { | |||
| for _, jobType := range pool.JobType { | |||
| if jobType == string(models.JobTypeDebug) { | |||
| debugGpuTypes = append(debugGpuTypes, pool.Pool...) | |||
| if pool.ResourceSpec != nil { | |||
| ctx.Data["resource_specs"] = pool.ResourceSpec | |||
| } | |||
| } else if jobType == string(models.JobTypeTrain) { | |||
| trainGpuTypes = append(trainGpuTypes, pool.Pool...) | |||
| if pool.ResourceSpec != nil { | |||
| ctx.Data["train_resource_specs"] = pool.ResourceSpec | |||
| } | |||
| } | |||
| } | |||
| break | |||
| } | |||
| } | |||
| } | |||
| if len(debugGpuTypes) > 0 { | |||
| ctx.Data["gpu_types"] = debugGpuTypes | |||
| } | |||
| if len(trainGpuTypes) > 0 { | |||
| ctx.Data["train_gpu_types"] = trainGpuTypes | |||
| } | |||
| } | |||
| ctx.Data["params"] = "" | |||
| ctx.Data["branchName"] = ctx.Repo.BranchName | |||
| @@ -217,6 +259,10 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
| repo := ctx.Repo.Repository | |||
| tpl := tplCloudBrainNew | |||
| if jobType == string(models.JobTypeTrain) { | |||
| tpl = tplCloudBrainTrainJobNew | |||
| } | |||
| tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) | |||
| if err == nil { | |||
| if len(tasks) != 0 { | |||
| @@ -282,6 +328,14 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
| command = commandTrain | |||
| } | |||
| errStr := checkCloudBrainSpecialPool(ctx, jobType, gpuQueue, resourceSpecId) | |||
| if errStr != "" { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| ctx.RenderWithErr(errStr, tpl, &form) | |||
| return | |||
| } | |||
| if branchName == "" { | |||
| branchName = cloudbrain.DefaultBranchName | |||
| } | |||
| @@ -334,6 +388,42 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
| } | |||
| } | |||
| /** | |||
| 检查用户传输的参数是否符合专属资源池 | |||
| */ | |||
| func checkCloudBrainSpecialPool(ctx *context.Context, jobType string, queue string, resourceSpecId int) string { | |||
| if cloudbrain.SpecialPools != nil { | |||
| var isInPoolOrg = false | |||
| var matchSpecialPool = false | |||
| for _, specialPool := range cloudbrain.SpecialPools.Pools { | |||
| if cloudbrain.IsElementExist(specialPool.JobType, jobType) && cloudbrain.IsQueueInSpecialtPool(specialPool.Pool, queue) { | |||
| if cloudbrain.IsResourceSpecInSpecialPool(specialPool.ResourceSpec, resourceSpecId) { | |||
| matchSpecialPool = true | |||
| org, _ := models.GetOrgByName(specialPool.Org) | |||
| if org != nil { | |||
| isInPoolOrg, _ = models.IsOrganizationMember(org.ID, ctx.User.ID) | |||
| if isInPoolOrg { | |||
| break //传入参数,和专属资源池匹配上了,检查通过 | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| //资源池有匹配上,但是用户不在相应的组织中,返回错误信息。界面已经过滤了选择,界面操作不会到这个逻辑 | |||
| if matchSpecialPool && !isInPoolOrg { | |||
| return ctx.Tr("repo.grampus.no_operate_right") | |||
| } | |||
| } | |||
| //没有匹配到资源池或者没有设置专属资源池,检查通过; 获取和资源池完全匹配检查通过 | |||
| return "" | |||
| } | |||
| func CloudBrainRestart(ctx *context.Context) { | |||
| var ID = ctx.Params(":id") | |||
| var resultCode = "0" | |||
| @@ -573,7 +663,9 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo | |||
| if task.TrainJobDuration == "" { | |||
| if task.Duration == 0 { | |||
| var duration int64 | |||
| if task.Status == string(models.JobRunning) { | |||
| if task.Status == string(models.JobWaiting) { | |||
| duration = 0 | |||
| } else if task.Status == string(models.JobRunning) { | |||
| duration = time.Now().Unix() - int64(task.CreatedUnix) | |||
| } else { | |||
| duration = int64(task.UpdatedUnix) - int64(task.CreatedUnix) | |||
| @@ -446,24 +446,6 @@ | |||
| ] | |||
| }, | |||
| work_server_number: { | |||
| identifier : 'work_server_number', | |||
| rules: [ | |||
| { | |||
| type : 'integer[1..25]', | |||
| prompt : '计算节点需要在1-25之间,请您键入正确的值' | |||
| } | |||
| ] | |||
| }, | |||
| run_para_list:{ | |||
| identifier : 'run_para_list', | |||
| rules: [ | |||
| { | |||
| type: 'maxLength[255]', | |||
| prompt : '所有字符最长不超过255个字符。' | |||
| } | |||
| ] | |||
| }, | |||
| }, | |||
| }) | |||
| @@ -512,24 +494,6 @@ | |||
| ] | |||
| }, | |||
| work_server_number: { | |||
| identifier : 'work_server_number', | |||
| rules: [ | |||
| { | |||
| type : 'integer[1..25]', | |||
| prompt : '计算节点需要在1-25之间,请您键入正确的值' | |||
| } | |||
| ] | |||
| }, | |||
| run_para_list:{ | |||
| identifier : 'run_para_list', | |||
| rules: [ | |||
| { | |||
| type: 'maxLength[255]', | |||
| prompt : '所有字符最长不超过255个字符。' | |||
| } | |||
| ] | |||
| }, | |||
| }, | |||
| onSuccess: function(){ | |||
| // $('.ui.page.dimmer').dimmer('show') | |||