| @@ -2,6 +2,7 @@ package models | |||
| import ( | |||
| "code.gitea.io/gitea/modules/timeutil" | |||
| "fmt" | |||
| "xorm.io/builder" | |||
| ) | |||
| @@ -423,6 +424,10 @@ func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specifi | |||
| return nil, err | |||
| } | |||
| sess.Commit() | |||
| return BuildSpecification(queue, spec), nil | |||
| } | |||
| func BuildSpecification(queue ResourceQueue, spec ResourceSpecification) *Specification { | |||
| return &Specification{ | |||
| ID: spec.ID, | |||
| SourceSpecId: spec.SourceSpecId, | |||
| @@ -439,7 +444,7 @@ func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specifi | |||
| Cluster: queue.Cluster, | |||
| AiCenterCode: queue.AiCenterCode, | |||
| AiCenterName: queue.AiCenterName, | |||
| }, nil | |||
| } | |||
| } | |||
| func GetCloudbrainOneAccCardType(queueCode string) string { | |||
| @@ -454,3 +459,69 @@ func GetCloudbrainOneAccCardType(queueCode string) string { | |||
| } | |||
| return "" | |||
| } | |||
| var cloudbrainTwoSpecsInitFlag = false | |||
| var cloudbrainTwoSpecs map[string]*Specification | |||
| func GetCloudbrainTwoSpecs() (map[string]*Specification, error) { | |||
| if !cloudbrainTwoSpecsInitFlag { | |||
| r, err := InitCloudbrainTwoSpecs() | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| cloudbrainTwoSpecsInitFlag = true | |||
| cloudbrainTwoSpecs = r | |||
| } | |||
| return cloudbrainTwoSpecs, nil | |||
| } | |||
| func InitCloudbrainTwoSpecs() (map[string]*Specification, error) { | |||
| r := make(map[string]*Specification, 0) | |||
| queue, err := GetResourceQueue(&ResourceQueue{QueueCode: "openisupport"}) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| if queue == nil { | |||
| queue = &ResourceQueue{ | |||
| QueueCode: "openisupport", | |||
| Cluster: OpenICluster, | |||
| AiCenterCode: AICenterOfCloudBrainTwo, | |||
| AiCenterName: "云脑二", | |||
| ComputeResource: NPU, | |||
| AccCardType: "ASCEND910", | |||
| Remark: "处理历史云脑任务时自动生成", | |||
| } | |||
| _, err = x.InsertOne(queue) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| } | |||
| for i := 1; i <= 8; i = i * 2 { | |||
| sourceSpecId := "modelarts.bm.910.arm.public." + fmt.Sprint(i) | |||
| spec, err := GetResourceSpecification(&ResourceSpecification{ | |||
| SourceSpecId: sourceSpecId, | |||
| QueueId: queue.ID, | |||
| }) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| if spec == nil { | |||
| spec = &ResourceSpecification{ | |||
| QueueId: queue.ID, | |||
| SourceSpecId: sourceSpecId, | |||
| AccCardsNum: i, | |||
| CpuCores: i * 24, | |||
| MemGiB: float32(i * 256), | |||
| GPUMemGiB: float32(32), | |||
| Status: SpecOffShelf, | |||
| } | |||
| _, err = x.Insert(spec) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| } | |||
| r[sourceSpecId] = BuildSpecification(*queue, *spec) | |||
| } | |||
| return r, nil | |||
| } | |||
| @@ -917,14 +917,7 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { | |||
| } | |||
| ctx.Data["engine_versions"] = versionInfos.Version | |||
| var flavorInfos modelarts.Flavor | |||
| if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { | |||
| ctx.ServerError("json.Unmarshal failed:", err) | |||
| return err | |||
| } | |||
| ctx.Data["flavor_infos"] = flavorInfos.Info | |||
| setSpecBySpecialPoolConfig(ctx, string(models.JobTypeTrain)) | |||
| prepareCloudbrainTwoTrainSpecs(ctx) | |||
| var Parameters modelarts.Parameters | |||
| if err = json.Unmarshal([]byte(task.Parameters), &Parameters); err != nil { | |||
| @@ -5,12 +5,14 @@ import ( | |||
| "code.gitea.io/gitea/modules/cloudbrain" | |||
| "code.gitea.io/gitea/modules/grampus" | |||
| "code.gitea.io/gitea/modules/log" | |||
| "code.gitea.io/gitea/modules/modelarts" | |||
| "code.gitea.io/gitea/modules/setting" | |||
| "code.gitea.io/gitea/routers/response" | |||
| "code.gitea.io/gitea/services/admin/operate_log" | |||
| "encoding/json" | |||
| "errors" | |||
| "fmt" | |||
| "strconv" | |||
| "strings" | |||
| "time" | |||
| ) | |||
| @@ -296,6 +298,7 @@ func RefreshHistorySpec(scopeAll bool, ids []int64) (int64, int64, error) { | |||
| continue | |||
| } | |||
| success++ | |||
| time.Sleep(500 * time.Millisecond) | |||
| } | |||
| } else { | |||
| @@ -318,13 +321,13 @@ func RefreshHistorySpec(scopeAll bool, ids []int64) (int64, int64, error) { | |||
| break | |||
| } | |||
| for _, task := range list { | |||
| time.Sleep(1 * time.Second) | |||
| err = RefreshOneHistorySpec(task) | |||
| if err != nil { | |||
| log.Error("RefreshOneHistorySpec error.%v", err) | |||
| continue | |||
| } | |||
| success++ | |||
| time.Sleep(500 * time.Millisecond) | |||
| } | |||
| if len(list) < pageSize { | |||
| log.Info("RefreshHistorySpec. list < pageSize") | |||
| @@ -342,6 +345,8 @@ func RefreshOneHistorySpec(task *models.Cloudbrain) error { | |||
| switch task.Type { | |||
| case models.TypeCloudBrainOne: | |||
| spec, err = getCloudbrainOneSpec(task) | |||
| case models.TypeCloudBrainTwo: | |||
| spec, err = getCloudbrainTwoSpec(task) | |||
| } | |||
| if err != nil { | |||
| log.Error("find spec error,task.ID=%d err=%v", task.ID, err) | |||
| @@ -506,6 +511,37 @@ func getCloudbrainOneSpec(task *models.Cloudbrain) (*models.Specification, error | |||
| } | |||
| func getCloudbrainTwoSpec(task *models.Cloudbrain) (*models.Specification, error) { | |||
| specMap, err := models.GetCloudbrainTwoSpecs() | |||
| if err != nil { | |||
| log.Error("InitCloudbrainTwoSpecs err.%v", err) | |||
| return nil, err | |||
| } | |||
| if task.FlavorCode != "" { | |||
| return specMap[task.FlavorCode], nil | |||
| } | |||
| if task.JobType == string(models.JobTypeDebug) { | |||
| result, err := modelarts.GetNotebook2(task.JobID) | |||
| if err != nil { | |||
| log.Error("getCloudbrainTwoSpec GetNotebook2 error.%v", err) | |||
| return nil, err | |||
| } | |||
| if result != nil { | |||
| return specMap[result.Flavor], nil | |||
| } | |||
| } else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) { | |||
| result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) | |||
| if err != nil { | |||
| log.Error("getCloudbrainTwoSpec GetTrainJob error:%v", task.JobName, err) | |||
| return nil, err | |||
| } | |||
| if result != nil { | |||
| return specMap[result.Flavor.Code], nil | |||
| } | |||
| } | |||
| return nil, nil | |||
| } | |||
| func RefreshCloudbrainTwoSpec(task *models.Cloudbrain) error { | |||
| return nil | |||
| } | |||