|
|
|
@@ -135,18 +135,13 @@ func grampusGpuNewDataPrepare(ctx *context.Context) error { |
|
|
|
json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) |
|
|
|
} |
|
|
|
ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec |
|
|
|
ctx.Data["params"] = "" |
|
|
|
ctx.Data["branchName"] = ctx.Repo.BranchName |
|
|
|
|
|
|
|
ctx.Data["snn4imagenet_path"] = cloudbrain.Snn4imagenetMountPath |
|
|
|
ctx.Data["is_snn4imagenet_enabled"] = setting.IsSnn4imagenetEnabled |
|
|
|
|
|
|
|
ctx.Data["brainscore_path"] = cloudbrain.BrainScoreMountPath |
|
|
|
ctx.Data["is_brainscore_enabled"] = setting.IsBrainScoreEnabled |
|
|
|
|
|
|
|
ctx.Data["cloudbraintype"] = models.TypeCloudBrainOne |
|
|
|
|
|
|
|
ctx.Data["benchmarkMode"] = ctx.Query("benchmarkMode") |
|
|
|
branches, _, err := ctx.Repo.GitRepo.GetBranches(0, 0) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetBranches error:", err) |
|
|
|
} |
|
|
|
ctx.Data["branches"] = branches |
|
|
|
ctx.Data["branchName"] = ctx.Repo.BranchName |
|
|
|
|
|
|
|
return nil |
|
|
|
} |
|
|
|
@@ -170,51 +165,37 @@ func grampusTrainJobNpuNewDataPrepare(ctx *context.Context) error { |
|
|
|
//get valid dataset |
|
|
|
attachs, err := models.GetModelArtsTrainAttachments(ctx.User.ID) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("GetAllUserAttachments failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["attachments"] = attachs |
|
|
|
|
|
|
|
//get valid resource specs |
|
|
|
var resourcePools modelarts.ResourcePool |
|
|
|
if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
log.Error("GetModelArtsTrainAttachments failed:", err.Error()) |
|
|
|
} else { |
|
|
|
ctx.Data["attachments"] = attachs |
|
|
|
} |
|
|
|
ctx.Data["resource_pools"] = resourcePools.Info |
|
|
|
|
|
|
|
var engines modelarts.Engine |
|
|
|
if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
//get valid engines |
|
|
|
images, err := grampus.GetImages(grampus.ProcessorTypeNPU) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetResourceSpecs failed:", err.Error()) |
|
|
|
} else { |
|
|
|
ctx.Data["engine_versions"] = images.Infos |
|
|
|
} |
|
|
|
ctx.Data["engines"] = engines.Info |
|
|
|
|
|
|
|
var versionInfos modelarts.VersionInfo |
|
|
|
if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
//get valid resource specs |
|
|
|
specs, err := grampus.GetResourceSpecs(grampus.ProcessorTypeNPU) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetResourceSpecs failed:", err.Error()) |
|
|
|
} else { |
|
|
|
ctx.Data["flavor_infos"] = specs.Infos |
|
|
|
} |
|
|
|
ctx.Data["engine_versions"] = versionInfos.Version |
|
|
|
|
|
|
|
var flavorInfos modelarts.Flavor |
|
|
|
if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
//get branches |
|
|
|
branches, _, err := ctx.Repo.GitRepo.GetBranches(0, 0) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetBranches error:", err.Error()) |
|
|
|
} else { |
|
|
|
ctx.Data["branches"] = branches |
|
|
|
} |
|
|
|
ctx.Data["flavor_infos"] = flavorInfos.Info |
|
|
|
|
|
|
|
ctx.Data["params"] = "" |
|
|
|
ctx.Data["branchName"] = ctx.Repo.BranchName |
|
|
|
|
|
|
|
configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("getConfigList failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["config_list"] = configList.ParaConfigs |
|
|
|
ctx.Data["cloudbraintype"] = models.TypeCloudBrainTwo |
|
|
|
|
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
@@ -246,10 +227,11 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" |
|
|
|
branchName := form.BranchName |
|
|
|
isLatestVersion := modelarts.IsLatestVersion |
|
|
|
FlavorName := form.FlavorName |
|
|
|
VersionCount := modelarts.VersionCount |
|
|
|
EngineName := form.EngineName |
|
|
|
flavorName := form.FlavorName |
|
|
|
versionCount := modelarts.VersionCount |
|
|
|
engineName := form.EngineName |
|
|
|
|
|
|
|
//check count limit |
|
|
|
count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.NPUResource) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) |
|
|
|
@@ -265,12 +247,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
//check param |
|
|
|
if err := grampusParamCheckCreateTrainJob(form); err != nil { |
|
|
|
log.Error("paramCheckCreateTrainJob failed:(%v)", err) |
|
|
|
grampusTrainJobNpuNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
//check whether the task name in the project is duplicated |
|
|
|
tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeTrain), displayJobName) |
|
|
|
if err == nil { |
|
|
|
@@ -295,9 +279,6 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
os.RemoveAll(codeLocalPath) |
|
|
|
} |
|
|
|
|
|
|
|
gitRepo, _ := git.OpenRepository(repo.RepoPath()) |
|
|
|
commitID, _ := gitRepo.GetBranchCommitID(branchName) |
|
|
|
|
|
|
|
if err := downloadCode(repo, codeLocalPath, branchName); err != nil { |
|
|
|
log.Error("downloadCode failed, server timed out: %s (%v)", repo.FullName(), err) |
|
|
|
grampusTrainJobNpuNewDataPrepare(ctx) |
|
|
|
@@ -321,7 +302,6 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
} |
|
|
|
|
|
|
|
if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { |
|
|
|
// if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) |
|
|
|
grampusTrainJobNpuNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("Failed to uploadCodeToObs", tplGrampusTrainJobNPUNew, &form) |
|
|
|
@@ -330,9 +310,9 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
|
|
|
|
//prepare command |
|
|
|
//todo: download code, download dataset, unzip dataset, exec code, upload model |
|
|
|
command, err := generateCommand(grampus.ProcessorTypeNPU, codeObsPath, dataPath, params, "") |
|
|
|
var parameters models.Parameters |
|
|
|
param := make([]models.Parameter, 0) |
|
|
|
existDeviceTarget := false |
|
|
|
if len(params) != 0 { |
|
|
|
err := json.Unmarshal([]byte(params), ¶meters) |
|
|
|
if err != nil { |
|
|
|
@@ -343,63 +323,45 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
} |
|
|
|
|
|
|
|
for _, parameter := range parameters.Parameter { |
|
|
|
if parameter.Label == modelarts.DeviceTarget { |
|
|
|
existDeviceTarget = true |
|
|
|
} |
|
|
|
if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl { |
|
|
|
param = append(param, models.Parameter{ |
|
|
|
Label: parameter.Label, |
|
|
|
Value: parameter.Value, |
|
|
|
}) |
|
|
|
} |
|
|
|
param = append(param, models.Parameter{ |
|
|
|
Label: parameter.Label, |
|
|
|
Value: parameter.Value, |
|
|
|
}) |
|
|
|
} |
|
|
|
} |
|
|
|
if !existDeviceTarget { |
|
|
|
param = append(param, models.Parameter{ |
|
|
|
Label: modelarts.DeviceTarget, |
|
|
|
Value: modelarts.Ascend, |
|
|
|
}) |
|
|
|
} |
|
|
|
param = append(param, models.Parameter{ |
|
|
|
Label: modelarts.DeviceTarget, |
|
|
|
Value: modelarts.Ascend, |
|
|
|
}) |
|
|
|
|
|
|
|
gitRepo, _ := git.OpenRepository(repo.RepoPath()) |
|
|
|
commitID, _ := gitRepo.GetBranchCommitID(branchName) |
|
|
|
|
|
|
|
req := &grampus.GenerateTrainJobReq{ |
|
|
|
JobName: jobName, |
|
|
|
DisplayJobName: displayJobName, |
|
|
|
ComputeResource: models.NPUResource, |
|
|
|
Command: "echo test", |
|
|
|
ResourceSpecId: "f2497d54732b45fb8d887e63be1db4a7", |
|
|
|
ImageUrl: "", |
|
|
|
ImageId: "e6e85cd78ca24e158f71b6fac9c2fb95", |
|
|
|
|
|
|
|
DataUrl: dataPath, |
|
|
|
Description: description, |
|
|
|
CodeObsPath: codeObsPath, |
|
|
|
BootFileUrl: codeObsPath + bootFile, |
|
|
|
BootFile: bootFile, |
|
|
|
//TrainUrl: outputObsPath, |
|
|
|
//FlavorCode: flavorCode, |
|
|
|
WorkServerNumber: 1, |
|
|
|
//EngineID: int64(engineID), |
|
|
|
//LogUrl: logObsPath, |
|
|
|
//PoolID: poolID, |
|
|
|
Uuid: uuid, |
|
|
|
//Parameters: param, |
|
|
|
JobName: jobName, |
|
|
|
DisplayJobName: displayJobName, |
|
|
|
ComputeResource: models.NPUResource, |
|
|
|
Command: command, |
|
|
|
ResourceSpecId: form.FlavorID, |
|
|
|
ImageUrl: "", |
|
|
|
ImageId: form.ImageID, |
|
|
|
DataUrl: dataPath, |
|
|
|
Description: description, |
|
|
|
CodeObsPath: codeObsPath, |
|
|
|
BootFileUrl: codeObsPath + bootFile, |
|
|
|
BootFile: bootFile, |
|
|
|
WorkServerNumber: form.WorkServerNumber, |
|
|
|
Uuid: uuid, |
|
|
|
CommitID: commitID, |
|
|
|
IsLatestVersion: isLatestVersion, |
|
|
|
BranchName: branchName, |
|
|
|
Params: form.Params, |
|
|
|
FlavorName: FlavorName, |
|
|
|
EngineName: EngineName, |
|
|
|
VersionCount: VersionCount, |
|
|
|
FlavorName: flavorName, |
|
|
|
EngineName: engineName, |
|
|
|
VersionCount: versionCount, |
|
|
|
TotalVersionCount: modelarts.TotalVersionCount, |
|
|
|
} |
|
|
|
|
|
|
|
//将params转换Parameters.Parameter,出错时返回给前端 |
|
|
|
var Parameters modelarts.Parameters |
|
|
|
if err := json.Unmarshal([]byte(params), &Parameters); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
err = grampus.GenerateTrainJob(ctx, req) |
|
|
|
if err != nil { |
|
|
|
log.Error("GenerateTrainJob failed:%v", err.Error()) |
|
|
|
@@ -517,25 +479,6 @@ func GrampusTrainJobShow(ctx *context.Context) { |
|
|
|
task.DatasetName = attachment.Name |
|
|
|
} |
|
|
|
|
|
|
|
if len(task.Parameters) > 0 { |
|
|
|
var parameters models.Parameters |
|
|
|
err := json.Unmarshal([]byte(task.Parameters), ¶meters) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err) |
|
|
|
ctx.ServerError("system error", err) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if len(parameters.Parameter) > 0 { |
|
|
|
paramTemp := "" |
|
|
|
for _, Parameter := range parameters.Parameter { |
|
|
|
param := Parameter.Label + " = " + Parameter.Value + "; " |
|
|
|
paramTemp = paramTemp + param |
|
|
|
} |
|
|
|
task.Parameters = paramTemp[:len(paramTemp)-2] |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if task.DeletedAt.IsZero() { //normal record |
|
|
|
result, err := grampus.GetJob(task.JobID) |
|
|
|
if err != nil { |
|
|
|
@@ -565,6 +508,25 @@ func GrampusTrainJobShow(ctx *context.Context) { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if len(task.Parameters) > 0 { |
|
|
|
var parameters models.Parameters |
|
|
|
err := json.Unmarshal([]byte(task.Parameters), ¶meters) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err) |
|
|
|
ctx.ServerError("system error", err) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if len(parameters.Parameter) > 0 { |
|
|
|
paramTemp := "" |
|
|
|
for _, Parameter := range parameters.Parameter { |
|
|
|
param := Parameter.Label + " = " + Parameter.Value + "; " |
|
|
|
paramTemp = paramTemp + param |
|
|
|
} |
|
|
|
task.Parameters = paramTemp[:len(paramTemp)-2] |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
taskList := make([]*models.Cloudbrain, 0) |
|
|
|
taskList = append(taskList, task) |
|
|
|
ctx.Data["version_list_task"] = taskList |
|
|
|
@@ -595,3 +557,13 @@ func GrampusGetLog(ctx *context.Context) { |
|
|
|
|
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
func generateCommand(processorType, codePath, dataPath, params, outputPath string) (string, error) { |
|
|
|
var command string |
|
|
|
//download code |
|
|
|
//download dataset |
|
|
|
//unzip dataset |
|
|
|
//exec code |
|
|
|
//upload models |
|
|
|
return command, nil |
|
|
|
} |