|
|
|
@@ -1844,7 +1844,8 @@ func CloudbrainAll(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { |
|
|
|
} |
|
|
|
|
|
|
|
if (opts.IsLatestVersion) != "" { |
|
|
|
cond = cond.And(builder.Or(builder.And(builder.Eq{"cloudbrain.is_latest_version": opts.IsLatestVersion}, builder.Eq{"cloudbrain.job_type": "TRAIN"}), builder.Neq{"cloudbrain.job_type": "TRAIN"})) |
|
|
|
cond = cond.And(builder.Or(builder.And(builder.Eq{"cloudbrain.is_latest_version": opts.IsLatestVersion}, |
|
|
|
builder.Eq{"cloudbrain.job_type": "TRAIN"}), builder.Neq{"cloudbrain.job_type": "TRAIN"})) |
|
|
|
} |
|
|
|
|
|
|
|
if len(opts.CloudbrainIDs) > 0 { |
|
|
|
@@ -1882,7 +1883,8 @@ func CloudbrainAll(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { |
|
|
|
} else { |
|
|
|
lowerKeyWord := strings.ToLower(opts.Keyword) |
|
|
|
|
|
|
|
cond = cond.And(builder.Or(builder.Like{"LOWER(cloudbrain.job_name)", lowerKeyWord}, builder.Like{"LOWER(cloudbrain.display_job_name)", lowerKeyWord}, builder.Like{"`user`.lower_name", lowerKeyWord})) |
|
|
|
cond = cond.And(builder.Or(builder.Like{"LOWER(cloudbrain.job_name)", lowerKeyWord}, |
|
|
|
builder.Like{"LOWER(cloudbrain.display_job_name)", lowerKeyWord}, builder.Like{"`user`.lower_name", lowerKeyWord})) |
|
|
|
count, err = sess.Table(&Cloudbrain{}).Unscoped().Where(cond). |
|
|
|
Join("left", "`user`", condition).Count(new(CloudbrainInfo)) |
|
|
|
|
|
|
|
@@ -1960,7 +1962,8 @@ func CloudbrainAllStatic(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, er |
|
|
|
} |
|
|
|
sess.OrderBy("cloudbrain.created_unix DESC") |
|
|
|
cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum) |
|
|
|
if err := sess.Cols("status", "type", "job_type", "train_job_duration", "duration", "compute_resource", "created_unix", "start_time", "end_time").Table(&Cloudbrain{}).Unscoped().Where(cond). |
|
|
|
if err := sess.Cols("status", "type", "job_type", "train_job_duration", "duration", "compute_resource", |
|
|
|
"created_unix", "start_time", "end_time").Table(&Cloudbrain{}).Unscoped().Where(cond). |
|
|
|
Find(&cloudbrains); err != nil { |
|
|
|
return nil, 0, fmt.Errorf("Find: %v", err) |
|
|
|
} |
|
|
|
@@ -2020,22 +2023,12 @@ var ( |
|
|
|
CloudbrainDebugResourceSpecsMap map[int]*ResourceSpec |
|
|
|
CloudbrainTrainResourceSpecsMap map[int]*ResourceSpec |
|
|
|
CloudbrainBenchmarkResourceSpecsMap map[int]*ResourceSpec |
|
|
|
ModelArtsDebugResourceSpecsMap map[string]*FlavorInfo |
|
|
|
ModelArtsTrainResourceSpecsMap map[string]*FlavorInfo |
|
|
|
GpuInfosMapInitFlag = false |
|
|
|
CloudbrainDebugGpuInfosMap map[string]*GpuInfo |
|
|
|
CloudbrainTrainGpuInfosMap map[string]*GpuInfo |
|
|
|
) |
|
|
|
|
|
|
|
type ModelArtsFlavor struct { |
|
|
|
Info []struct { |
|
|
|
Code string `json:"code"` |
|
|
|
Value string `json:"value"` |
|
|
|
UnitPrice int64 `json:"unitPrice"` |
|
|
|
} `json:"flavor"` |
|
|
|
} |
|
|
|
|
|
|
|
func InitResourceSpecMap() { |
|
|
|
func InitCloudbrainOneResourceSpecMap() { |
|
|
|
if CloudbrainDebugResourceSpecsMap == nil || len(CloudbrainDebugResourceSpecsMap) == 0 { |
|
|
|
t := ResourceSpecs{} |
|
|
|
json.Unmarshal([]byte(setting.ResourceSpecs), &t) |
|
|
|
@@ -2060,34 +2053,14 @@ func InitResourceSpecMap() { |
|
|
|
CloudbrainBenchmarkResourceSpecsMap[spec.Id] = spec |
|
|
|
} |
|
|
|
} |
|
|
|
if ModelArtsDebugResourceSpecsMap == nil || len(ModelArtsDebugResourceSpecsMap) == 0 { |
|
|
|
t := FlavorInfos{} |
|
|
|
json.Unmarshal([]byte(setting.FlavorInfos), &t) |
|
|
|
ModelArtsDebugResourceSpecsMap = make(map[string]*FlavorInfo, len(t.FlavorInfo)) |
|
|
|
for _, spec := range t.FlavorInfo { |
|
|
|
ModelArtsDebugResourceSpecsMap[spec.Value] = spec |
|
|
|
} |
|
|
|
} |
|
|
|
if ModelArtsTrainResourceSpecsMap == nil || len(ModelArtsTrainResourceSpecsMap) == 0 { |
|
|
|
t := ModelArtsFlavor{} |
|
|
|
json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &t) |
|
|
|
ModelArtsTrainResourceSpecsMap = make(map[string]*FlavorInfo, len(t.Info)) |
|
|
|
for _, spec := range t.Info { |
|
|
|
f := &FlavorInfo{ |
|
|
|
Value: spec.Code, |
|
|
|
Desc: spec.Value, |
|
|
|
} |
|
|
|
ModelArtsTrainResourceSpecsMap[spec.Value] = f |
|
|
|
} |
|
|
|
} |
|
|
|
SpecsMapInitFlag = true |
|
|
|
} |
|
|
|
|
|
|
|
func InitGpuInfoMap() { |
|
|
|
func InitCloudbrainOneGpuInfoMap() { |
|
|
|
if CloudbrainDebugGpuInfosMap == nil || len(CloudbrainDebugGpuInfosMap) == 0 { |
|
|
|
t := GpuInfos{} |
|
|
|
json.Unmarshal([]byte(setting.GpuTypes), &t) |
|
|
|
CloudbrainDebugGpuInfosMap = make(map[int]*GpuInfo, len(t.GpuInfo)) |
|
|
|
CloudbrainDebugGpuInfosMap = make(map[string]*GpuInfo, len(t.GpuInfo)) |
|
|
|
for _, GpuInfo := range t.GpuInfo { |
|
|
|
CloudbrainDebugGpuInfosMap[GpuInfo.Queue] = GpuInfo |
|
|
|
} |
|
|
|
@@ -2095,7 +2068,7 @@ func InitGpuInfoMap() { |
|
|
|
if CloudbrainTrainGpuInfosMap == nil || len(CloudbrainTrainGpuInfosMap) == 0 { |
|
|
|
t := GpuInfos{} |
|
|
|
json.Unmarshal([]byte(setting.TrainGpuTypes), &t) |
|
|
|
CloudbrainTrainGpuInfosMap = make(map[int]*GpuInfo, len(t.GpuInfo)) |
|
|
|
CloudbrainTrainGpuInfosMap = make(map[string]*GpuInfo, len(t.GpuInfo)) |
|
|
|
for _, GpuInfo := range t.GpuInfo { |
|
|
|
CloudbrainTrainGpuInfosMap[GpuInfo.Queue] = GpuInfo |
|
|
|
} |
|
|
|
@@ -2103,93 +2076,82 @@ func InitGpuInfoMap() { |
|
|
|
GpuInfosMapInitFlag = true |
|
|
|
} |
|
|
|
|
|
|
|
type ResourceAndFlavor struct { |
|
|
|
ResourceSpec *ResourceSpec |
|
|
|
FlavorInfo *FlavorInfo |
|
|
|
} |
|
|
|
|
|
|
|
func NewResourceAndFlavor(resourceSpec *ResourceSpec, flavorInfo *FlavorInfo) *ResourceAndFlavor { |
|
|
|
return &ResourceAndFlavor{ |
|
|
|
ResourceSpec: resourceSpec, |
|
|
|
FlavorInfo: flavorInfo, |
|
|
|
func getCardNumAndTypeByFlavorname(FlavorName string) (int, string, error) { |
|
|
|
if FlavorName == "" { |
|
|
|
return 0, "", nil |
|
|
|
} else { |
|
|
|
var beginIndex = strings.Index(FlavorName, ":") |
|
|
|
var lastIndex = strings.LastIndex(FlavorName, ":") |
|
|
|
var endIndex = strings.Index(FlavorName, "*") |
|
|
|
cardNum, err := strconv.Atoi(strings.TrimSpace(FlavorName[beginIndex+1 : endIndex])) |
|
|
|
if err != nil { |
|
|
|
log.Error("strconv.Atoi failed: %v", err) |
|
|
|
return 0, "", err |
|
|
|
} |
|
|
|
cardType := strings.TrimSpace(FlavorName[endIndex+1 : lastIndex]) |
|
|
|
return cardNum, cardType, err |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
func GetCloudbrainResourceSpec(jobType string, clusterType int, resourceSpecId int, flavorCode string) *ResourceAndFlavor { |
|
|
|
func GetCloudbrainCardNumAndType(task Cloudbrain) (int, string, error) { |
|
|
|
if !SpecsMapInitFlag { |
|
|
|
InitResourceSpecMap() |
|
|
|
InitCloudbrainOneResourceSpecMap() |
|
|
|
} |
|
|
|
if clusterType == TypeCloudBrainOne { |
|
|
|
switch jobType { |
|
|
|
case string(JobTypeDebug): |
|
|
|
return NewResourceAndFlavor(CloudbrainDebugResourceSpecsMap[resourceSpecId], nil) |
|
|
|
case string(JobTypeTrain): |
|
|
|
return NewResourceAndFlavor(CloudbrainTrainResourceSpecsMap[resourceSpecId], nil) |
|
|
|
case string(JobTypeBenchmark): |
|
|
|
return NewResourceAndFlavor(CloudbrainBenchmarkResourceSpecsMap[resourceSpecId], nil) |
|
|
|
if !GpuInfosMapInitFlag { |
|
|
|
InitCloudbrainOneGpuInfoMap() |
|
|
|
} |
|
|
|
FlavorName, err := GetCloudbrainFlavorName(task) |
|
|
|
if err != nil { |
|
|
|
return 0, "", nil |
|
|
|
} |
|
|
|
return getCardNumAndTypeByFlavorname(FlavorName) |
|
|
|
} |
|
|
|
|
|
|
|
func GetCloudbrainFlavorName(task Cloudbrain) (string, error) { |
|
|
|
if task.Type == TypeCloudBrainOne { |
|
|
|
ResourceSpec, GpuInfo, err := GetCloudBrainOneResourceSpec(task) |
|
|
|
if err != nil { |
|
|
|
log.Info("GetCloudBrainOneResourceSpec err:", err) |
|
|
|
return "", err |
|
|
|
} else { |
|
|
|
if ResourceSpec == nil || GpuInfo == nil { |
|
|
|
err := errors.New("ResourceSpec or GpuInfo is nil") |
|
|
|
return "", err |
|
|
|
} else { |
|
|
|
CloudbrainOneFlavorName := "GPU:" + strconv.Itoa(ResourceSpec.GpuNum) + "*Nvidia-" + GpuInfo.Value + |
|
|
|
" | CPU:" + strconv.Itoa(ResourceSpec.CpuNum) + "核" + strconv.Itoa(ResourceSpec.MemMiB) + "MB" |
|
|
|
return CloudbrainOneFlavorName, nil |
|
|
|
} |
|
|
|
} |
|
|
|
} else if clusterType == TypeCloudBrainTwo { |
|
|
|
switch jobType { |
|
|
|
case string(JobTypeDebug): |
|
|
|
return NewResourceAndFlavor(nil, ModelArtsDebugResourceSpecsMap[flavorCode]) |
|
|
|
case string(JobTypeTrain): |
|
|
|
return NewResourceAndFlavor(nil, ModelArtsTrainResourceSpecsMap[flavorCode]) |
|
|
|
case string(JobTypeInference): |
|
|
|
return NewResourceAndFlavor(nil, ModelArtsTrainResourceSpecsMap[flavorCode]) |
|
|
|
|
|
|
|
} else if (task.Type == TypeCloudBrainTwo || task.Type == TypeC2Net) && task.FlavorName != "" { |
|
|
|
return task.FlavorName, nil |
|
|
|
} else if task.Type == TypeCloudBrainTwo && task.FlavorName == "" && task.FlavorCode != "" { |
|
|
|
index := strings.LastIndex(task.FlavorCode, ".") |
|
|
|
cardNum, err := strconv.Atoi(strings.TrimSpace(task.FlavorCode[index+1 : len(task.FlavorCode)])) |
|
|
|
if err != nil { |
|
|
|
log.Error("strconv.Atoi failed: %v", err) |
|
|
|
return "", err |
|
|
|
} |
|
|
|
CloudbrainTwoFlavorName := "Ascend:" + strings.TrimSpace(task.FlavorCode[index+1:len(task.FlavorCode)]) + |
|
|
|
"*Ascend-910(" + strconv.Itoa(cardNum*32) + "GB)|ARM:" + strconv.Itoa(cardNum*24) + |
|
|
|
"核" + strconv.Itoa(cardNum*256) + "GB" |
|
|
|
return CloudbrainTwoFlavorName, nil |
|
|
|
} |
|
|
|
|
|
|
|
return nil |
|
|
|
|
|
|
|
return "", nil |
|
|
|
} |
|
|
|
|
|
|
|
// func GetCloudbrainTaskCardNum(task Cloudbrain) int { |
|
|
|
// spec := GetCloudbrainResourceSpec(task.JobType, task.Type, task.ResourceSpecId, task.FlavorCode) |
|
|
|
// if spec == nil { |
|
|
|
// return 0 |
|
|
|
// } |
|
|
|
// if task.Type == TypeCloudBrainOne { |
|
|
|
// if spec.ResourceSpec == nil { |
|
|
|
// return 0 |
|
|
|
// } |
|
|
|
// return spec.ResourceSpec.GpuNum |
|
|
|
// } else if task.Type == TypeCloudBrainTwo || task.Type == TypeC2Net { |
|
|
|
// return getCardnumByFlavorname(task.FlavorName) |
|
|
|
// } |
|
|
|
// return 0 |
|
|
|
// } |
|
|
|
|
|
|
|
func getCardNumAndTypeByFlavorname(FlavorName string) (int, string) { |
|
|
|
var beginIndex = strings.Index(FlavorName, ":") |
|
|
|
var lastIndex = strings.LastIndex(FlavorName, ":") |
|
|
|
var endIndex = strings.Index(FlavorName, "*") |
|
|
|
cardNum, err := strconv.Atoi(strings.TrimSpace(FlavorName[beginIndex:endIndex])) |
|
|
|
if err != nil { |
|
|
|
log.Error("strconv.Atoi failed: %v", err) |
|
|
|
return 0, "" |
|
|
|
} |
|
|
|
cardType := strings.TrimSpace(FlavorName[endIndex:lastIndex]) |
|
|
|
return cardNum, cardType |
|
|
|
} |
|
|
|
|
|
|
|
func GetCloudbrainCardNumAndType(task Cloudbrain) (int, string) { |
|
|
|
if !SpecsMapInitFlag { |
|
|
|
InitResourceSpecMap() |
|
|
|
} |
|
|
|
if !GpuInfosMapInitFlag { |
|
|
|
InitGpuInfoMap() |
|
|
|
} |
|
|
|
if task.Type == TypeCloudBrainOne { |
|
|
|
switch task.JobType { |
|
|
|
case string(JobTypeDebug): |
|
|
|
return CloudbrainDebugResourceSpecsMap[task.ResourceSpecId].GpuNum, CloudbrainDebugGpuInfosMap[task.GpuQueue].Value |
|
|
|
case string(JobTypeTrain): |
|
|
|
return CloudbrainTrainResourceSpecsMap[task.ResourceSpecId].GpuNum, CloudbrainTrainGpuInfosMap[task.GpuQueue].Value |
|
|
|
func GetCloudBrainOneResourceSpec(task Cloudbrain) (*ResourceSpec, *GpuInfo, error) { |
|
|
|
if task.ResourceSpecId >= 0 { |
|
|
|
if task.JobType == string(JobTypeTrain) { |
|
|
|
return CloudbrainTrainResourceSpecsMap[task.ResourceSpecId], CloudbrainTrainGpuInfosMap[task.GpuQueue], nil |
|
|
|
} else if task.JobType == string(JobTypeDebug) { |
|
|
|
return CloudbrainDebugResourceSpecsMap[task.ResourceSpecId], CloudbrainDebugGpuInfosMap[task.GpuQueue], nil |
|
|
|
} else { |
|
|
|
return CloudbrainDebugResourceSpecsMap[task.ResourceSpecId], CloudbrainDebugGpuInfosMap[task.GpuQueue], nil |
|
|
|
} |
|
|
|
} else if task.Type == TypeCloudBrainTwo || task.Type == TypeC2Net { |
|
|
|
return getCardNumAndTypeByFlavorname(task.FlavorName) |
|
|
|
} else { |
|
|
|
err := errors.New("ResourceSpecId is null") |
|
|
|
return nil, nil, err |
|
|
|
} |
|
|
|
return 0, "" |
|
|
|
} |