| @@ -146,6 +146,7 @@ type FindSpecsOptions struct { | |||||
| ComputeResource string | ComputeResource string | ||||
| Cluster string | Cluster string | ||||
| AiCenterCode string | AiCenterCode string | ||||
| SpecId int64 | |||||
| } | } | ||||
| type Specification struct { | type Specification struct { | ||||
| @@ -315,7 +316,7 @@ func SyncGrampusSpecs(updateList []ResourceSpecification, insertList []ResourceS | |||||
| return sess.Commit() | return sess.Commit() | ||||
| } | } | ||||
| func FindAvailableSpecs(opts FindSpecsOptions) ([]Specification, error) { | |||||
| func FindAvailableSpecs(opts FindSpecsOptions) ([]*Specification, error) { | |||||
| var cond = builder.NewCond() | var cond = builder.NewCond() | ||||
| if opts.JobType != "" { | if opts.JobType != "" { | ||||
| cond = cond.And(builder.Eq{"resource_scene.job_type": opts.JobType}) | cond = cond.And(builder.Eq{"resource_scene.job_type": opts.JobType}) | ||||
| @@ -329,9 +330,12 @@ func FindAvailableSpecs(opts FindSpecsOptions) ([]Specification, error) { | |||||
| if opts.AiCenterCode != "" { | if opts.AiCenterCode != "" { | ||||
| cond = cond.And(builder.Eq{"resource_queue.ai_center_code": opts.AiCenterCode}) | cond = cond.And(builder.Eq{"resource_queue.ai_center_code": opts.AiCenterCode}) | ||||
| } | } | ||||
| if opts.SpecId > 0 { | |||||
| cond = cond.And(builder.Eq{"resource_specification.id": opts.SpecId}) | |||||
| } | |||||
| cond = cond.And(builder.Or(builder.Eq{"resource_scene.delete_time": 0}, builder.IsNull{"resource_scene.delete_time"})) | cond = cond.And(builder.Or(builder.Eq{"resource_scene.delete_time": 0}, builder.IsNull{"resource_scene.delete_time"})) | ||||
| r := make([]Specification, 0) | |||||
| r := make([]*Specification, 0) | |||||
| err := x.Where(cond). | err := x.Where(cond). | ||||
| Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id"). | Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id"). | ||||
| Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id"). | Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id"). | ||||
| @@ -24,6 +24,7 @@ type CreateCloudBrainForm struct { | |||||
| Params string `form:"run_para_list"` | Params string `form:"run_para_list"` | ||||
| BranchName string `form:"branch_name"` | BranchName string `form:"branch_name"` | ||||
| DatasetName string `form:"dataset_name"` | DatasetName string `form:"dataset_name"` | ||||
| SpecId int64 `form:"spec_id"` | |||||
| } | } | ||||
| type CommitImageCloudBrainForm struct { | type CommitImageCloudBrainForm struct { | ||||
| @@ -72,6 +73,7 @@ type CreateCloudBrainInferencForm struct { | |||||
| CkptName string `form:"ckpt_name" binding:"Required"` | CkptName string `form:"ckpt_name" binding:"Required"` | ||||
| LabelName string `form:"label_names" binding:"Required"` | LabelName string `form:"label_names" binding:"Required"` | ||||
| DatasetName string `form:"dataset_name"` | DatasetName string `form:"dataset_name"` | ||||
| SpecId int64 `form:"spec_id"` | |||||
| } | } | ||||
| func (f *CreateCloudBrainForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { | func (f *CreateCloudBrainForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { | ||||
| @@ -79,6 +79,7 @@ type GenerateCloudBrainTaskReq struct { | |||||
| ModelVersion string | ModelVersion string | ||||
| CkptName string | CkptName string | ||||
| LabelName string | LabelName string | ||||
| Spec *models.Specification | |||||
| } | } | ||||
| func GetCloudbrainDebugCommand() string { | func GetCloudbrainDebugCommand() string { | ||||
| @@ -227,50 +228,9 @@ func AdminOrImageCreaterRight(ctx *context.Context) { | |||||
| } | } | ||||
| func GenerateTask(req GenerateCloudBrainTaskReq) error { | func GenerateTask(req GenerateCloudBrainTaskReq) error { | ||||
| var resourceSpec *models.ResourceSpec | |||||
| var versionCount int | var versionCount int | ||||
| if req.JobType == string(models.JobTypeTrain) { | if req.JobType == string(models.JobTypeTrain) { | ||||
| versionCount = 1 | versionCount = 1 | ||||
| if TrainResourceSpecs == nil { | |||||
| json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) | |||||
| } | |||||
| for _, spec := range TrainResourceSpecs.ResourceSpec { | |||||
| if req.ResourceSpecId == spec.Id { | |||||
| resourceSpec = spec | |||||
| break | |||||
| } | |||||
| } | |||||
| } else if req.JobType == string(models.JobTypeInference) { | |||||
| if InferenceResourceSpecs == nil { | |||||
| json.Unmarshal([]byte(setting.InferenceResourceSpecs), &InferenceResourceSpecs) | |||||
| } | |||||
| for _, spec := range InferenceResourceSpecs.ResourceSpec { | |||||
| if req.ResourceSpecId == spec.Id { | |||||
| resourceSpec = spec | |||||
| break | |||||
| } | |||||
| } | |||||
| } else { | |||||
| if ResourceSpecs == nil { | |||||
| json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs) | |||||
| } | |||||
| for _, spec := range ResourceSpecs.ResourceSpec { | |||||
| if req.ResourceSpecId == spec.Id { | |||||
| resourceSpec = spec | |||||
| break | |||||
| } | |||||
| } | |||||
| } | |||||
| //如果没有匹配到spec信息,尝试从专属资源池获取 | |||||
| if resourceSpec == nil && SpecialPools != nil { | |||||
| resourceSpec = geMatchResourceSpec(req.JobType, req.GpuQueue, req.ResourceSpecId) | |||||
| } | |||||
| if resourceSpec == nil { | |||||
| log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"]) | |||||
| return errors.New("no such resourceSpec") | |||||
| } | } | ||||
| volumes := []models.Volume{ | volumes := []models.Volume{ | ||||
| @@ -342,7 +302,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { | |||||
| jobResult, err := CreateJob(req.JobName, models.CreateJobParams{ | jobResult, err := CreateJob(req.JobName, models.CreateJobParams{ | ||||
| JobName: req.JobName, | JobName: req.JobName, | ||||
| RetryCount: 1, | RetryCount: 1, | ||||
| GpuType: req.GpuQueue, | |||||
| GpuType: req.Spec.QueueCode, | |||||
| Image: req.Image, | Image: req.Image, | ||||
| TaskRoles: []models.TaskRole{ | TaskRoles: []models.TaskRole{ | ||||
| { | { | ||||
| @@ -350,10 +310,10 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { | |||||
| TaskNumber: 1, | TaskNumber: 1, | ||||
| MinSucceededTaskCount: 1, | MinSucceededTaskCount: 1, | ||||
| MinFailedTaskCount: 1, | MinFailedTaskCount: 1, | ||||
| CPUNumber: resourceSpec.CpuNum, | |||||
| GPUNumber: resourceSpec.GpuNum, | |||||
| MemoryMB: resourceSpec.MemMiB, | |||||
| ShmMB: resourceSpec.ShareMemMiB, | |||||
| CPUNumber: req.Spec.CpuCores, | |||||
| GPUNumber: req.Spec.AccCardsNum, | |||||
| MemoryMB: int(req.Spec.MemGiB * 1024), | |||||
| ShmMB: int(req.Spec.ShareMemGiB * 1024), | |||||
| Command: req.Command, | Command: req.Command, | ||||
| NeedIBDevice: false, | NeedIBDevice: false, | ||||
| IsMainRole: false, | IsMainRole: false, | ||||
| @@ -368,6 +368,17 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||||
| commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) | commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) | ||||
| spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ | |||||
| JobType: models.JobType(jobType), | |||||
| ComputeResource: models.GPU, | |||||
| Cluster: models.OpenICluster, | |||||
| AiCenterCode: models.AICenterOfCloudBrainOne}) | |||||
| if err != nil || spec == nil { | |||||
| cloudBrainNewDataPrepare(ctx) | |||||
| ctx.RenderWithErr("Illegal resource specification", tpl, &form) | |||||
| return | |||||
| } | |||||
| req := cloudbrain.GenerateCloudBrainTaskReq{ | req := cloudbrain.GenerateCloudBrainTaskReq{ | ||||
| Ctx: ctx, | Ctx: ctx, | ||||
| DisplayJobName: displayJobName, | DisplayJobName: displayJobName, | ||||
| @@ -393,6 +404,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||||
| BenchmarkChildTypeID: 0, | BenchmarkChildTypeID: 0, | ||||
| ResourceSpecId: resourceSpecId, | ResourceSpecId: resourceSpecId, | ||||
| ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), | ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), | ||||
| Spec: spec, | |||||
| } | } | ||||
| err = cloudbrain.GenerateTask(req) | err = cloudbrain.GenerateTask(req) | ||||
| @@ -515,7 +527,16 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra | |||||
| ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) | ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) | ||||
| return | return | ||||
| } | } | ||||
| spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ | |||||
| JobType: models.JobTypeInference, | |||||
| ComputeResource: models.GPU, | |||||
| Cluster: models.OpenICluster, | |||||
| AiCenterCode: models.AICenterOfCloudBrainOne}) | |||||
| if err != nil || spec == nil { | |||||
| cloudBrainNewDataPrepare(ctx) | |||||
| ctx.RenderWithErr("Illegal resource specification", tpl, &form) | |||||
| return | |||||
| } | |||||
| req := cloudbrain.GenerateCloudBrainTaskReq{ | req := cloudbrain.GenerateCloudBrainTaskReq{ | ||||
| Ctx: ctx, | Ctx: ctx, | ||||
| DisplayJobName: displayJobName, | DisplayJobName: displayJobName, | ||||
| @@ -544,6 +565,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra | |||||
| CkptName: form.CkptName, | CkptName: form.CkptName, | ||||
| TrainUrl: form.TrainUrl, | TrainUrl: form.TrainUrl, | ||||
| LabelName: labelName, | LabelName: labelName, | ||||
| Spec: spec, | |||||
| } | } | ||||
| err = cloudbrain.GenerateTask(req) | err = cloudbrain.GenerateTask(req) | ||||
| @@ -2453,6 +2475,17 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||||
| return | return | ||||
| } | } | ||||
| spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ | |||||
| JobType: models.JobTypeBenchmark, | |||||
| ComputeResource: models.GPU, | |||||
| Cluster: models.OpenICluster, | |||||
| AiCenterCode: models.AICenterOfCloudBrainOne}) | |||||
| if err != nil || spec == nil { | |||||
| cloudBrainNewDataPrepare(ctx) | |||||
| ctx.RenderWithErr("Illegal resource specification", tplCloudBrainBenchmarkNew, &form) | |||||
| return | |||||
| } | |||||
| req := cloudbrain.GenerateCloudBrainTaskReq{ | req := cloudbrain.GenerateCloudBrainTaskReq{ | ||||
| Ctx: ctx, | Ctx: ctx, | ||||
| DisplayJobName: displayJobName, | DisplayJobName: displayJobName, | ||||
| @@ -2478,6 +2511,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||||
| BenchmarkChildTypeID: benchmarkChildTypeID, | BenchmarkChildTypeID: benchmarkChildTypeID, | ||||
| ResourceSpecId: resourceSpecId, | ResourceSpecId: resourceSpecId, | ||||
| ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), | ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), | ||||
| Spec: spec, | |||||
| } | } | ||||
| err = cloudbrain.GenerateTask(req) | err = cloudbrain.GenerateTask(req) | ||||
| @@ -2581,7 +2615,16 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) | |||||
| ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) | ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) | ||||
| return | return | ||||
| } | } | ||||
| spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ | |||||
| JobType: models.JobTypeBenchmark, | |||||
| ComputeResource: models.GPU, | |||||
| Cluster: models.OpenICluster, | |||||
| AiCenterCode: models.AICenterOfCloudBrainOne}) | |||||
| if err != nil || spec == nil { | |||||
| cloudBrainNewDataPrepare(ctx) | |||||
| ctx.RenderWithErr("Illegal resource specification", tpl, &form) | |||||
| return | |||||
| } | |||||
| req := cloudbrain.GenerateCloudBrainTaskReq{ | req := cloudbrain.GenerateCloudBrainTaskReq{ | ||||
| Ctx: ctx, | Ctx: ctx, | ||||
| DisplayJobName: displayJobName, | DisplayJobName: displayJobName, | ||||
| @@ -2607,6 +2650,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) | |||||
| BenchmarkChildTypeID: benchmarkChildTypeID, | BenchmarkChildTypeID: benchmarkChildTypeID, | ||||
| ResourceSpecId: resourceSpecId, | ResourceSpecId: resourceSpecId, | ||||
| ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), | ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), | ||||
| Spec: spec, | |||||
| } | } | ||||
| err = cloudbrain.GenerateTask(req) | err = cloudbrain.GenerateTask(req) | ||||
| @@ -185,18 +185,23 @@ func AddSpecOperateLog(doerId int64, operateType string, newValue, oldValue *mod | |||||
| }) | }) | ||||
| } | } | ||||
| func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]models.Specification, error) { | |||||
| func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]*models.Specification, error) { | |||||
| r, err := models.FindAvailableSpecs(opts) | r, err := models.FindAvailableSpecs(opts) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("FindAvailableSpecs error.%v", err) | log.Error("FindAvailableSpecs error.%v", err) | ||||
| return nil, err | return nil, err | ||||
| } | } | ||||
| specs := make([]models.Specification, 0, len(r)) | |||||
| specs := make([]*models.Specification, 0, len(r)) | |||||
| specMap := make(map[int64]string, 0) | |||||
| //filter exclusive spec | //filter exclusive spec | ||||
| for i := 0; i < len(r); i++ { | for i := 0; i < len(r); i++ { | ||||
| spec := r[i] | spec := r[i] | ||||
| if _, has := specMap[spec.ID]; has { | |||||
| continue | |||||
| } | |||||
| if !spec.IsExclusive { | if !spec.IsExclusive { | ||||
| specs = append(specs, spec) | specs = append(specs, spec) | ||||
| specMap[spec.ID] = "" | |||||
| continue | continue | ||||
| } | } | ||||
| orgs := strings.Split(spec.ExclusiveOrg, ";") | orgs := strings.Split(spec.ExclusiveOrg, ";") | ||||
| @@ -204,8 +209,24 @@ func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]models.Sp | |||||
| isMember, _ := models.IsOrganizationMemberByOrgName(org, userId) | isMember, _ := models.IsOrganizationMemberByOrgName(org, userId) | ||||
| if isMember { | if isMember { | ||||
| specs = append(specs, spec) | specs = append(specs, spec) | ||||
| specMap[spec.ID] = "" | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| return specs, err | return specs, err | ||||
| } | } | ||||
| func GetAndCheckSpec(userId int64, specId int64, opts models.FindSpecsOptions) (*models.Specification, error) { | |||||
| if specId == 0 { | |||||
| return nil, nil | |||||
| } | |||||
| opts.SpecId = specId | |||||
| r, err := FindAvailableSpecs(userId, opts) | |||||
| if err != nil { | |||||
| return nil, err | |||||
| } | |||||
| if r == nil || len(r) == 0 { | |||||
| return nil, nil | |||||
| } | |||||
| return r[0], nil | |||||
| } | |||||