|
|
|
@@ -834,84 +834,6 @@ func setSpecBySpecialPoolConfig(ctx *context.Context, jobType string) { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
func trainJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) error { |
|
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
|
|
|
|
//can, err := canUserCreateTrainJob(ctx.User.ID) |
|
|
|
//if err != nil { |
|
|
|
// ctx.ServerError("canUserCreateTrainJob", err) |
|
|
|
// return |
|
|
|
//} |
|
|
|
// |
|
|
|
//if !can { |
|
|
|
// log.Error("the user can not create train-job") |
|
|
|
// ctx.ServerError("the user can not create train-job", fmt.Errorf("the user can not create train-job")) |
|
|
|
// return |
|
|
|
//} |
|
|
|
|
|
|
|
t := time.Now() |
|
|
|
var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] |
|
|
|
ctx.Data["display_job_name"] = displayJobName |
|
|
|
|
|
|
|
attachs, err := models.GetModelArtsTrainAttachments(ctx.User.ID) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("GetAllUserAttachments failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["attachments"] = attachs |
|
|
|
|
|
|
|
var resourcePools modelarts.ResourcePool |
|
|
|
if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["resource_pools"] = resourcePools.Info |
|
|
|
|
|
|
|
var engines modelarts.Engine |
|
|
|
if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["engines"] = engines.Info |
|
|
|
|
|
|
|
var versionInfos modelarts.VersionInfo |
|
|
|
if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["engine_versions"] = versionInfos.Version |
|
|
|
|
|
|
|
prepareCloudbrainTwoTrainSpecs(ctx) |
|
|
|
|
|
|
|
configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("getConfigList failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
var Parameters modelarts.Parameters |
|
|
|
if err = json.Unmarshal([]byte(form.Params), &Parameters); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["params"] = Parameters.Parameter |
|
|
|
ctx.Data["config_list"] = configList.ParaConfigs |
|
|
|
ctx.Data["bootFile"] = form.BootFile |
|
|
|
ctx.Data["uuid"] = form.Attachment |
|
|
|
_, datasetNames, err := models.GetDatasetInfo(form.Attachment) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) |
|
|
|
return nil |
|
|
|
} |
|
|
|
ctx.Data["dataset_name"] = datasetNames |
|
|
|
ctx.Data["branch_name"] = form.BranchName |
|
|
|
ctx.Data["datasetType"] = models.TypeCloudBrainTwo |
|
|
|
waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") |
|
|
|
ctx.Data["WaitCount"] = waitCount |
|
|
|
setMultiNodeIfConfigureMatch(ctx) |
|
|
|
|
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func TrainJobNewVersion(ctx *context.Context) { |
|
|
|
|
|
|
|
err := trainJobNewVersionDataPrepare(ctx) |
|
|
|
@@ -1024,93 +946,6 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { |
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func versionErrorDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) error { |
|
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
// var versionName = ctx.Params(":version-name") |
|
|
|
var versionName = ctx.Query("version_name") |
|
|
|
|
|
|
|
task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
t := time.Now() |
|
|
|
var jobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] |
|
|
|
ctx.Data["job_name"] = task.JobName |
|
|
|
|
|
|
|
attachs, err := models.GetModelArtsTrainAttachments(ctx.User.ID) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("GetAllUserAttachments failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["attachments"] = attachs |
|
|
|
|
|
|
|
var resourcePools modelarts.ResourcePool |
|
|
|
if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["resource_pools"] = resourcePools.Info |
|
|
|
|
|
|
|
var engines modelarts.Engine |
|
|
|
if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["engines"] = engines.Info |
|
|
|
|
|
|
|
var versionInfos modelarts.VersionInfo |
|
|
|
if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["engine_versions"] = versionInfos.Version |
|
|
|
|
|
|
|
prepareCloudbrainTwoTrainSpecs(ctx) |
|
|
|
|
|
|
|
var Parameters modelarts.Parameters |
|
|
|
if err = json.Unmarshal([]byte(form.Params), &Parameters); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["params"] = Parameters.Parameter |
|
|
|
|
|
|
|
outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath |
|
|
|
ctx.Data["train_url"] = outputObsPath |
|
|
|
|
|
|
|
branches, _, err := ctx.Repo.GitRepo.GetBranches(0, 0) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("GetBranches error:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["branches"] = branches |
|
|
|
ctx.Data["description"] = form.Description |
|
|
|
ctx.Data["dataset_name"] = task.DatasetName |
|
|
|
ctx.Data["work_server_number"] = form.WorkServerNumber |
|
|
|
ctx.Data["flavor_name"] = form.FlavorName |
|
|
|
ctx.Data["engine_name"] = form.EngineName |
|
|
|
ctx.Data["flavor_code"] = task.FlavorCode |
|
|
|
ctx.Data["engine_id"] = task.EngineID |
|
|
|
ctx.Data["version_name"] = form.VersionName |
|
|
|
|
|
|
|
ctx.Data["bootFile"] = form.BootFile |
|
|
|
ctx.Data["uuid"] = form.Attachment |
|
|
|
ctx.Data["branch_name"] = form.BranchName |
|
|
|
configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("getConfigList failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["config_list"] = configList.ParaConfigs |
|
|
|
ctx.Data["datasetType"] = models.TypeCloudBrainTwo |
|
|
|
waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") |
|
|
|
ctx.Data["WaitCount"] = waitCount |
|
|
|
|
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { |
|
|
|
ctx.Data["PageIsTrainJob"] = true |
|
|
|
VersionOutputPath := modelarts.GetOutputPathByCount(modelarts.TotalVersionCount) |
|
|
|
@@ -1138,7 +973,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
|
|
|
|
errStr := checkMultiNode(ctx.User.ID, form.WorkServerNumber) |
|
|
|
if errStr != "" { |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1146,13 +981,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("system error", tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} else { |
|
|
|
if count >= 1 { |
|
|
|
log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1160,7 +995,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
|
|
|
|
if err := paramCheckCreateTrainJob(form); err != nil { |
|
|
|
log.Error("paramCheckCreateTrainJob failed:(%v)", err) |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1168,7 +1003,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) |
|
|
|
if err != nil || !bootFileExist { |
|
|
|
log.Error("Get bootfile error:", err) |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1179,7 +1014,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
Cluster: models.OpenICluster, |
|
|
|
AiCenterCode: models.AICenterOfCloudBrainTwo}) |
|
|
|
if err != nil || spec == nil { |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("Resource specification not available", tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1188,14 +1023,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
if err == nil { |
|
|
|
if len(tasks) != 0 { |
|
|
|
log.Error("the job name did already exist", ctx.Data["MsgID"]) |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("the job name did already exist", tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} else { |
|
|
|
if !models.IsErrJobNotExist(err) { |
|
|
|
log.Error("system error, %v", err, ctx.Data["MsgID"]) |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("system error", tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1212,7 +1047,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
|
|
|
|
if err := downloadCode(repo, codeLocalPath, branchName); err != nil { |
|
|
|
log.Error("downloadCode failed, server timed out: %s (%v)", repo.FullName(), err) |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1220,14 +1055,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
//todo: upload code (send to file_server todo this work?) |
|
|
|
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { |
|
|
|
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { |
|
|
|
log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1236,7 +1071,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { |
|
|
|
// if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1248,7 +1083,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
err := json.Unmarshal([]byte(params), ¶meters) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to Unmarshal params: %s (%v)", params, err) |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1274,7 +1109,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
datasUrlList, dataUrl, datasetNames, isMultiDataset, err := getDatasUrlListByUUIDS(uuid) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to getDatasUrlListByUUIDS: %v", err) |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("Failed to getDatasUrlListByUUIDS:"+err.Error(), tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1282,7 +1117,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
jsondatas, err := json.Marshal(datasUrlList) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to Marshal: %v", err) |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("json error:"+err.Error(), tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1390,7 +1225,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
err = modelarts.GenerateTrainJob(ctx, req) |
|
|
|
if err != nil { |
|
|
|
log.Error("GenerateTrainJob failed:%v", err.Error()) |
|
|
|
trainJobErrorNewDataPrepare(ctx, form) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1475,7 +1310,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
|
|
|
|
errStr := checkMultiNode(ctx.User.ID, form.WorkServerNumber) |
|
|
|
if errStr != "" { |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1483,13 +1318,13 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("system error", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} else { |
|
|
|
if count >= 1 { |
|
|
|
log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1526,14 +1361,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
|
|
|
|
canNewJob, _ := canUserCreateTrainJobVersion(ctx, latestTask.UserID) |
|
|
|
if !canNewJob { |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("user cann't new trainjob", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if err := paramCheckCreateTrainJob(form); err != nil { |
|
|
|
log.Error("paramCheckCreateTrainJob failed:(%v)", err) |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1541,7 +1376,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) |
|
|
|
if err != nil || !bootFileExist { |
|
|
|
log.Error("Get bootfile error:", err) |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1552,7 +1387,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
Cluster: models.OpenICluster, |
|
|
|
AiCenterCode: models.AICenterOfCloudBrainTwo}) |
|
|
|
if err != nil || spec == nil { |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("Resource specification not available", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1567,7 +1402,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
commitID, _ := gitRepo.GetBranchCommitID(branchName) |
|
|
|
if err := downloadCode(repo, codeLocalPath, branchName); err != nil { |
|
|
|
log.Error("Failed git clone repo to local(!: %s (%v)", repo.FullName(), err) |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1575,14 +1410,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
//todo: upload code (send to file_server todo this work?) |
|
|
|
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { |
|
|
|
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { |
|
|
|
log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1592,7 +1427,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
// if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { |
|
|
|
if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1606,7 +1441,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
err := json.Unmarshal([]byte(params), ¶meters) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to Unmarshal params: %s (%v)", params, err) |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1632,7 +1467,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
datasUrlList, dataUrl, datasetNames, isMultiDataset, err := getDatasUrlListByUUIDS(uuid) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to getDatasUrlListByUUIDS: %v", err) |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("Failed to getDatasUrlListByUUIDS:"+err.Error(), tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1640,7 +1475,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
jsondatas, err := json.Marshal(datasUrlList) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to Marshal: %v", err) |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("json error:"+err.Error(), tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
@@ -1749,7 +1584,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
err = modelarts.GenerateTrainJobVersion(ctx, req, jobID) |
|
|
|
if err != nil { |
|
|
|
log.Error("GenerateTrainJob failed:%v", err.Error()) |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|