From d546653ab296d8862d108ee051a8a068b4434530 Mon Sep 17 00:00:00 2001 From: liuzx Date: Wed, 23 Mar 2022 18:15:41 +0800 Subject: [PATCH 01/16] fix-1710 --- modules/modelarts/modelarts.go | 2 + routers/repo/modelarts.go | 120 ++++++++------------------------- 2 files changed, 30 insertions(+), 92 deletions(-) diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index b740b1167..e30d0100c 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -51,6 +51,8 @@ const ( DataUrl = "data_url" ResultUrl = "result_url" CkptUrl = "ckpt_url" + DeviceTarget = "device_target" + Ascend = "Ascend" PerPage = 10 IsLatestVersion = "1" NotLatestVersion = "0" diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 9c670e203..b37c7b3b6 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -859,7 +859,6 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) flavorCode := form.Flavor params := form.Params poolID := form.PoolID - isSaveParam := form.IsSaveParam repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + modelarts.CodePath codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath @@ -953,17 +952,9 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) return } - //todo: del local code? - var parameters models.Parameters param := make([]models.Parameter, 0) - param = append(param, models.Parameter{ - Label: modelarts.TrainUrl, - Value: outputObsPath, - }, models.Parameter{ - Label: modelarts.DataUrl, - Value: dataPath, - }) + existDeviceTarget := false if len(params) != 0 { err := json.Unmarshal([]byte(params), ¶meters) if err != nil { @@ -974,6 +965,9 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) } for _, parameter := range parameters.Parameter { + if parameter.Label == modelarts.DeviceTarget { + existDeviceTarget = true + } if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl { param = append(param, models.Parameter{ Label: parameter.Label, @@ -982,39 +976,11 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) } } } - - //save param config - if isSaveParam == "on" { - if form.ParameterTemplateName == "" { - log.Error("ParameterTemplateName is empty") - trainJobNewDataPrepare(ctx) - ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobNew, &form) - return - } - - _, err := modelarts.CreateTrainJobConfig(models.CreateConfigParams{ - ConfigName: form.ParameterTemplateName, - Description: form.PrameterDescription, - DataUrl: dataPath, - AppUrl: codeObsPath, - BootFileUrl: codeObsPath + bootFile, - TrainUrl: outputObsPath, - Flavor: models.Flavor{ - Code: flavorCode, - }, - WorkServerNum: workServerNumber, - EngineID: int64(engineID), - LogUrl: logObsPath, - PoolID: poolID, - Parameter: param, + if !existDeviceTarget { + param = append(param, models.Parameter{ + Label: modelarts.DeviceTarget, + Value: modelarts.Ascend, }) - - if err != nil { - log.Error("Failed to CreateTrainJobConfig: %v", err) - trainJobErrorNewDataPrepare(ctx, form) - ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobNew, &form) - return - } } req := &modelarts.GenerateTrainJobReq{ @@ -1032,7 +998,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) LogUrl: logObsPath, PoolID: poolID, Uuid: uuid, - Parameters: parameters.Parameter, + Parameters: param, CommitID: commitID, IsLatestVersion: isLatestVersion, BranchName: branch_name, @@ -1096,7 +1062,6 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ flavorCode := form.Flavor params := form.Params poolID := form.PoolID - isSaveParam := form.IsSaveParam repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + modelarts.CodePath codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + VersionOutputPath + "/" @@ -1168,13 +1133,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ var parameters models.Parameters param := make([]models.Parameter, 0) - param = append(param, models.Parameter{ - Label: modelarts.TrainUrl, - Value: outputObsPath, - }, models.Parameter{ - Label: modelarts.DataUrl, - Value: dataPath, - }) + existDeviceTarget := true if len(params) != 0 { err := json.Unmarshal([]byte(params), ¶meters) if err != nil { @@ -1183,8 +1142,10 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobVersionNew, &form) return } - for _, parameter := range parameters.Parameter { + if parameter.Label == modelarts.DeviceTarget { + existDeviceTarget = true + } if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl { param = append(param, models.Parameter{ Label: parameter.Label, @@ -1193,45 +1154,11 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ } } } - - //save param config - if isSaveParam == "on" { - if form.ParameterTemplateName == "" { - log.Error("ParameterTemplateName is empty") - versionErrorDataPrepare(ctx, form) - ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobVersionNew, &form) - return - } - - _, err := modelarts.CreateTrainJobConfig(models.CreateConfigParams{ - ConfigName: form.ParameterTemplateName, - Description: form.PrameterDescription, - DataUrl: dataPath, - AppUrl: codeObsPath, - BootFileUrl: codeObsPath + bootFile, - TrainUrl: outputObsPath, - Flavor: models.Flavor{ - Code: flavorCode, - }, - WorkServerNum: workServerNumber, - EngineID: int64(engineID), - LogUrl: logObsPath, - PoolID: poolID, - Parameter: parameters.Parameter, + if !existDeviceTarget { + param = append(param, models.Parameter{ + Label: modelarts.DeviceTarget, + Value: modelarts.Ascend, }) - - if err != nil { - log.Error("Failed to CreateTrainJobConfig: %v", err) - versionErrorDataPrepare(ctx, form) - ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobVersionNew, &form) - return - } - } - - if err != nil { - log.Error("getFlavorNameByEngineID(%s) failed:%v", engineID, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) - return } task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, PreVersionName) @@ -1257,7 +1184,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ PoolID: poolID, Uuid: uuid, Params: form.Params, - Parameters: parameters.Parameter, + Parameters: param, PreVersionId: task.VersionID, CommitID: commitID, BranchName: branch_name, @@ -1782,7 +1709,6 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference return } - //todo: del local code? var parameters models.Parameters param := make([]models.Parameter, 0) param = append(param, models.Parameter{ @@ -1792,6 +1718,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference Label: modelarts.CkptUrl, Value: "s3:/" + ckptUrl, }) + existDeviceTarget := false if len(params) != 0 { err := json.Unmarshal([]byte(params), ¶meters) if err != nil { @@ -1802,6 +1729,9 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference } for _, parameter := range parameters.Parameter { + if parameter.Label == modelarts.DeviceTarget { + existDeviceTarget = true + } if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl { param = append(param, models.Parameter{ Label: parameter.Label, @@ -1810,6 +1740,12 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference } } } + if !existDeviceTarget { + param = append(param, models.Parameter{ + Label: modelarts.DeviceTarget, + Value: modelarts.Ascend, + }) + } req := &modelarts.GenerateInferenceJobReq{ JobName: jobName, From 8face5eb12d1a563fda3e9aac80863d441bf17de Mon Sep 17 00:00:00 2001 From: Chunxiang Xu Date: Thu, 24 Mar 2022 09:46:26 +0800 Subject: [PATCH 02/16] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20'README.md'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 061ece70c..99f6a6e8c 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ ## 授权许可 本项目采用 MIT 开源授权许可证,完整的授权说明已放置在 [LICENSE](https://git.openi.org.cn/OpenI/aiforge/src/branch/develop/LICENSE) 文件中。 + ## 需要帮助? 如果您在使用或者开发过程中遇到问题,可以在以下渠道咨询: - 点击[这里](https://git.openi.org.cn/OpenI/aiforge/issues)在线提交问题(点击页面右上角绿色按钮**创建任务**) @@ -49,3 +50,8 @@ ## 启智社区小白训练营: - 结合案例给大家详细讲解如何使用社区平台,帮助无技术背景的小白成长为启智社区达人 (https://git.openi.org.cn/zeizei/OpenI_Learning) + +## 平台引用 +如果本平台对您的科研工作提供了帮助,可在论文致谢中加入: +英文版:```Thanks for the support provided by OpenI Community (https://git.openi.org.cn).``` +中文版:```感谢启智社区提供的技术支持(https://git.openi.org.cn)。``` \ No newline at end of file From a765e65c4ba40c1363c9ca9b1d3b0c212cae2d20 Mon Sep 17 00:00:00 2001 From: wangjr Date: Fri, 25 Mar 2022 11:04:24 +0800 Subject: [PATCH 03/16] =?UTF-8?q?=E5=A4=9A=E5=85=AC=E5=91=8A=E4=BF=AE?= =?UTF-8?q?=E6=94=B9=E5=85=B3=E9=97=AD=E5=BC=B9=E7=AA=97=E5=90=8E=E6=96=B0?= =?UTF-8?q?=E5=85=AC=E5=91=8A=E5=BC=B9=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- templates/base/head.tmpl | 6 +++--- templates/base/head_fluid.tmpl | 6 +++--- templates/base/head_home.tmpl | 6 +++--- templates/base/head_pro.tmpl | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/templates/base/head.tmpl b/templates/base/head.tmpl index 2cecee52b..937abd588 100755 --- a/templates/base/head.tmpl +++ b/templates/base/head.tmpl @@ -215,10 +215,10 @@ var _hmt = _hmt || []; localStorage.setItem("isCloseNotice",true) } function isShowNotice(){ - var current_notice = localStorage.getItem("notice") + var current_notice = localStorage.getItem("notices") - if (current_notice != "{{.notice.CommitId}}"){ - localStorage.setItem('notice',"{{.notice.CommitId}}"); + if (current_notice != "{{.notices.CommitId}}"){ + localStorage.setItem('notices',"{{.notices.CommitId}}"); isNewNotice=true; localStorage.setItem("isCloseNotice",false) }else{ diff --git a/templates/base/head_fluid.tmpl b/templates/base/head_fluid.tmpl index 59e542b0b..5340c7cb8 100644 --- a/templates/base/head_fluid.tmpl +++ b/templates/base/head_fluid.tmpl @@ -216,10 +216,10 @@ var _hmt = _hmt || []; localStorage.setItem("isCloseNotice",true) } function isShowNotice(){ - var current_notice = localStorage.getItem("notice") + var current_notice = localStorage.getItem("notices") - if (current_notice != "{{.notice.CommitId}}"){ - localStorage.setItem('notice',"{{.notice.CommitId}}"); + if (current_notice != "{{.notices.CommitId}}"){ + localStorage.setItem('notices',"{{.notices.CommitId}}"); isNewNotice=true; localStorage.setItem("isCloseNotice",false) }else{ diff --git a/templates/base/head_home.tmpl b/templates/base/head_home.tmpl index 561edd5ce..25d7a92ec 100644 --- a/templates/base/head_home.tmpl +++ b/templates/base/head_home.tmpl @@ -220,10 +220,10 @@ var _hmt = _hmt || []; localStorage.setItem("isCloseNotice",true) } function isShowNotice(){ - var current_notice = localStorage.getItem("notice") + var current_notice = localStorage.getItem("notices") - if (current_notice != "{{.notice.CommitId}}"){ - localStorage.setItem('notice',"{{.notice.CommitId}}"); + if (current_notice != "{{.notices.CommitId}}"){ + localStorage.setItem('notices',"{{.notices.CommitId}}"); isNewNotice=true; localStorage.setItem("isCloseNotice",false) }else{ diff --git a/templates/base/head_pro.tmpl b/templates/base/head_pro.tmpl index 82543ac61..75292b6fc 100644 --- a/templates/base/head_pro.tmpl +++ b/templates/base/head_pro.tmpl @@ -217,10 +217,10 @@ var _hmt = _hmt || []; localStorage.setItem("isCloseNotice",true) } function isShowNotice(){ - var current_notice = localStorage.getItem("notice") + var current_notice = localStorage.getItem("notices") - if (current_notice != "{{.notice.CommitId}}"){ - localStorage.setItem('notice',"{{.notice.CommitId}}"); + if (current_notice != "{{.notices.CommitId}}"){ + localStorage.setItem('notices',"{{.notices.CommitId}}"); isNewNotice=true; localStorage.setItem("isCloseNotice",false) }else{ From 624372f01b31a0758286a7e81668e2007a29c6a1 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Fri, 25 Mar 2022 11:46:57 +0800 Subject: [PATCH 04/16] fix bug --- models/repo.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/models/repo.go b/models/repo.go index 2d1fdacfb..5d662db45 100755 --- a/models/repo.go +++ b/models/repo.go @@ -2691,7 +2691,7 @@ func ReadLatestFileInRepo(userName, repoName, refName, treePath string) (*RepoFi log.Error("ReadLatestFileInRepo error when OpenRepository,error=%v", err) return nil, err } - commitID, err := gitRepo.GetBranchCommitID(refName) + _, err = gitRepo.GetBranchCommitID(refName) if err != nil { log.Error("ReadLatestFileInRepo error when GetBranchCommitID,error=%v", err) return nil, err @@ -2723,5 +2723,9 @@ func ReadLatestFileInRepo(userName, repoName, refName, treePath string) (*RepoFi if n >= 0 { buf = buf[:n] } - return &RepoFile{CommitId: commitID, Content: buf}, nil + commitId := "" + if blob != nil { + commitId = fmt.Sprint(blob.ID) + } + return &RepoFile{CommitId: commitId, Content: buf}, nil } From 2c677c8de84b1c5d3c90dba75ccd99292882774e Mon Sep 17 00:00:00 2001 From: liuzx Date: Fri, 25 Mar 2022 17:58:50 +0800 Subject: [PATCH 05/16] fix-bug --- routers/repo/modelarts.go | 70 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index b37c7b3b6..73d3172a5 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -859,6 +859,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) flavorCode := form.Flavor params := form.Params poolID := form.PoolID + isSaveParam := form.IsSaveParam repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + modelarts.CodePath codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath @@ -983,6 +984,40 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) }) } + //save param config + if isSaveParam == "on" { + if form.ParameterTemplateName == "" { + log.Error("ParameterTemplateName is empty") + trainJobNewDataPrepare(ctx) + ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobNew, &form) + return + } + + _, err := modelarts.CreateTrainJobConfig(models.CreateConfigParams{ + ConfigName: form.ParameterTemplateName, + Description: form.PrameterDescription, + DataUrl: dataPath, + AppUrl: codeObsPath, + BootFileUrl: codeObsPath + bootFile, + TrainUrl: outputObsPath, + Flavor: models.Flavor{ + Code: flavorCode, + }, + WorkServerNum: workServerNumber, + EngineID: int64(engineID), + LogUrl: logObsPath, + PoolID: poolID, + Parameter: param, + }) + + if err != nil { + log.Error("Failed to CreateTrainJobConfig: %v", err) + trainJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobNew, &form) + return + } + } + req := &modelarts.GenerateTrainJobReq{ JobName: jobName, DisplayJobName: displayJobName, @@ -1062,6 +1097,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ flavorCode := form.Flavor params := form.Params poolID := form.PoolID + isSaveParam := form.IsSaveParam repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + modelarts.CodePath codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + VersionOutputPath + "/" @@ -1161,6 +1197,40 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ }) } + //save param config + if isSaveParam == "on" { + if form.ParameterTemplateName == "" { + log.Error("ParameterTemplateName is empty") + versionErrorDataPrepare(ctx, form) + ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobVersionNew, &form) + return + } + + _, err := modelarts.CreateTrainJobConfig(models.CreateConfigParams{ + ConfigName: form.ParameterTemplateName, + Description: form.PrameterDescription, + DataUrl: dataPath, + AppUrl: codeObsPath, + BootFileUrl: codeObsPath + bootFile, + TrainUrl: outputObsPath, + Flavor: models.Flavor{ + Code: flavorCode, + }, + WorkServerNum: workServerNumber, + EngineID: int64(engineID), + LogUrl: logObsPath, + PoolID: poolID, + Parameter: parameters.Parameter, + }) + + if err != nil { + log.Error("Failed to CreateTrainJobConfig: %v", err) + versionErrorDataPrepare(ctx, form) + ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobVersionNew, &form) + return + } + } + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, PreVersionName) if err != nil { log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) From d34517cc32c130151e432fce4a9c9d1fb947e765 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Fri, 25 Mar 2022 18:02:01 +0800 Subject: [PATCH 06/16] #1746 handle history data --- models/cloudbrain.go | 14 +++- modules/cloudbrain/cloudbrain.go | 2 + routers/private/internal.go | 2 + routers/repo/cloudbrain.go | 113 +++++++++++++++++++++++++++++-- 4 files changed, 124 insertions(+), 7 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index ea6d0338e..4cd02d7c6 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -87,6 +87,8 @@ const ( ModelArtsTrainJobCheckRunning ModelArtsJobStatus = "CHECK_RUNNING" //审核作业正在运行中 ModelArtsTrainJobCheckRunningCompleted ModelArtsJobStatus = "CHECK_RUNNING_COMPLETED" //审核作业已经完成 ModelArtsTrainJobCheckFailed ModelArtsJobStatus = "CHECK_FAILED" //审核作业失败 + + DURATION_STR_ZERO = "00:00:00" ) type Cloudbrain struct { @@ -174,7 +176,7 @@ func (task *Cloudbrain) ComputeAndSetDuration() { func ConvertDurationToStr(duration int64) string { if duration == 0 { - return "00:00:00" + return DURATION_STR_ZERO } return util.AddZero(duration/3600) + ":" + util.AddZero(duration%3600/60) + ":" + util.AddZero(duration%60) } @@ -1323,6 +1325,7 @@ func CloudbrainsVersionList(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int, e } func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) { + cloudbrain.TrainJobDuration = DURATION_STR_ZERO if _, err = x.Insert(cloudbrain); err != nil { return err } @@ -1467,6 +1470,15 @@ func GetCloudBrainUnStoppedJob() ([]*Cloudbrain, error) { Find(&cloudbrains) } +func GetStoppedJobWithNoDurationJob() ([]*Cloudbrain, error) { + cloudbrains := make([]*Cloudbrain, 0, 10) + return cloudbrains, x. + In("status", ModelArtsTrainJobCompleted, ModelArtsTrainJobFailed, ModelArtsTrainJobKilled, ModelArtsStopped, JobStopped, JobFailed, JobSucceeded). + Where("train_job_duration is null or train_job_duration = '' "). + Limit(100). + Find(&cloudbrains) +} + func GetCloudbrainCountByUserID(userID int64, jobType string) (int, error) { count, err := x.In("status", JobWaiting, JobRunning).And("job_type = ? and user_id = ? and type = ?", jobType, userID, TypeCloudBrainOne).Count(new(Cloudbrain)) return int(count), err diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index 54ac0c7ac..9aae447b0 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -158,10 +158,12 @@ func GenerateTask(ctx *context.Context, displayJobName, jobName, image, command, if ResourceSpecs == nil { json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs) } + for _, spec := range ResourceSpecs.ResourceSpec { if resourceSpecId == spec.Id { resourceSpec = spec } + } if resourceSpec == nil { diff --git a/routers/private/internal.go b/routers/private/internal.go index 0dd725ca3..d80a706cc 100755 --- a/routers/private/internal.go +++ b/routers/private/internal.go @@ -6,6 +6,7 @@ package private import ( + "code.gitea.io/gitea/routers/repo" "strings" "code.gitea.io/gitea/modules/log" @@ -45,6 +46,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Post("/tool/update_all_repo_commit_cnt", UpdateAllRepoCommitCnt) m.Post("/tool/repo_stat/:date", RepoStatisticManually) m.Post("/tool/update_repo_visit/:date", UpdateRepoVisit) + m.Post("/task/history_handle/duration", repo.HandleTaskWithNoDuration) }, CheckInternalToken) } diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index d444ea73f..084dbcbe9 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -417,13 +417,16 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName) { } } if task.TrainJobDuration == "" { - var duration int64 - if task.Status == string(models.JobRunning) { - duration = time.Now().Unix() - int64(task.CreatedUnix) - } else { - duration = int64(task.UpdatedUnix) - int64(task.CreatedUnix) + if task.Duration == 0 { + var duration int64 + if task.Status == string(models.JobRunning) { + duration = time.Now().Unix() - int64(task.CreatedUnix) + } else { + duration = int64(task.UpdatedUnix) - int64(task.CreatedUnix) + } + task.Duration = duration } - task.TrainJobDuration = models.ConvertDurationToStr(duration) + task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) } ctx.Data["duration"] = task.TrainJobDuration ctx.Data["task"] = task @@ -1060,6 +1063,104 @@ func SyncCloudbrainStatus() { return } +func HandleTaskWithNoDuration(ctx *context.Context) { + log.Info("HandleTaskWithNoDuration start") + cloudBrains, err := models.GetStoppedJobWithNoDurationJob() + if err != nil { + log.Error("HandleTaskWithNoTrainJobDuration failed:", err.Error()) + return + } + if len(cloudBrains) == 0 { + log.Info("HandleTaskWithNoTrainJobDuration:no task need handle") + return + } + + for _, task := range cloudBrains { + log.Info("Handle job ,%+v", task) + if task.Type == models.TypeCloudBrainOne { + result, err := cloudbrain.GetJob(task.JobID) + if err != nil { + log.Error("GetJob(%s) failed:%v", task.JobName, err) + continue + } + + if result != nil { + jobRes, _ := models.ConvertToJobResultPayload(result.Payload) + taskRoles := jobRes.TaskRoles + taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) + task.Status = taskRes.TaskStatuses[0].State + startTime := taskRes.TaskStatuses[0].StartAt.Unix() + endTime := taskRes.TaskStatuses[0].FinishedAt.Unix() + log.Info("task startTime = %v endTime= %v", startTime, endTime) + if startTime > 0 && endTime > 0 && endTime-startTime > 0 { + task.StartTime = timeutil.TimeStamp(startTime) + task.EndTime = timeutil.TimeStamp(endTime) + } else { + task.StartTime = task.CreatedUnix + task.EndTime = task.UpdatedUnix + } + task.ComputeAndSetDuration() + err = models.UpdateJob(task) + if err != nil { + log.Error("UpdateJob(%s) failed:%v", task.JobName, err) + } + } + } else if task.Type == models.TypeCloudBrainTwo { + if task.JobType == string(models.JobTypeDebug) { + //result, err := modelarts.GetJob(task.JobID) + result, err := modelarts.GetNotebook2(task.JobID) + if err != nil { + log.Error("GetJob(%s) failed:%v", task.JobName, err) + continue + } + + if result != nil { + task.Status = result.Status + startTime := result.Lease.CreateTime + duration := result.Lease.Duration / 1000 + if startTime > 0 { + task.StartTime = timeutil.TimeStamp(startTime) + task.EndTime = task.StartTime.Add(duration) + } + task.ComputeAndSetDuration() + err = models.UpdateJob(task) + if err != nil { + log.Error("UpdateJob(%s) failed:%v", task.JobName, err) + continue + } + } + } else if task.JobType == string(models.JobTypeTrain) { + result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) + if err != nil { + log.Error("GetTrainJob(%s) failed:%v", task.JobName, err) + continue + } + + if result != nil { + startTime := result.StartTime / 1000 + if startTime > 0 { + task.StartTime = timeutil.TimeStamp(startTime) + task.EndTime = task.StartTime.Add(result.Duration / 1000) + } + task.ComputeAndSetDuration() + err = models.UpdateJob(task) + if err != nil { + log.Error("UpdateJob(%s) failed:%v", task.JobName, err) + continue + } + } + } else { + log.Error("task.JobType(%s) is error:%s", task.JobName, task.JobType) + } + + } else { + log.Error("task.Type(%s) is error:%d", task.JobName, task.Type) + } + } + + return +} + func CloudBrainBenchmarkIndex(ctx *context.Context) { MustEnableCloudbrain(ctx) repo := ctx.Repo.Repository From 1400ba501c380b722b4bcb946df6881a0f9a9a25 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Fri, 25 Mar 2022 18:09:42 +0800 Subject: [PATCH 07/16] #1627 fix bug --- options/locale/locale_en-US.ini | 2 +- templates/repo/cloudbrain/benchmark/index.tmpl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 3ebe5a9b5..42ea439c9 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -870,7 +870,7 @@ gpu_type_all=All model_download=Model Download submit_image=Submit Image download=Download -score=score +score=Score cloudbrain=Cloudbrain cloudbrain.new=New cloudbrain diff --git a/templates/repo/cloudbrain/benchmark/index.tmpl b/templates/repo/cloudbrain/benchmark/index.tmpl index 989e3bfd2..4e7d5b4e5 100755 --- a/templates/repo/cloudbrain/benchmark/index.tmpl +++ b/templates/repo/cloudbrain/benchmark/index.tmpl @@ -155,7 +155,7 @@ {{end}} - {{$.i18n.Tr "repo.stop"}} + {{$.i18n.Tr "repo.score"}} From 2c51360819b24d2a4e472fc780b6a078ff7883b1 Mon Sep 17 00:00:00 2001 From: liuzx Date: Mon, 28 Mar 2022 09:50:37 +0800 Subject: [PATCH 08/16] update --- routers/repo/modelarts.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 73d3172a5..5f9b6bc9c 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -986,6 +986,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) //save param config if isSaveParam == "on" { + saveparams := append(param, models.Parameter{ + Label: modelarts.TrainUrl, + Value: outputObsPath, + }, models.Parameter{ + Label: modelarts.DataUrl, + Value: dataPath, + }) if form.ParameterTemplateName == "" { log.Error("ParameterTemplateName is empty") trainJobNewDataPrepare(ctx) @@ -1007,7 +1014,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) EngineID: int64(engineID), LogUrl: logObsPath, PoolID: poolID, - Parameter: param, + Parameter: saveparams, }) if err != nil { @@ -1199,6 +1206,13 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ //save param config if isSaveParam == "on" { + saveparams := append(param, models.Parameter{ + Label: modelarts.TrainUrl, + Value: outputObsPath, + }, models.Parameter{ + Label: modelarts.DataUrl, + Value: dataPath, + }) if form.ParameterTemplateName == "" { log.Error("ParameterTemplateName is empty") versionErrorDataPrepare(ctx, form) @@ -1220,7 +1234,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ EngineID: int64(engineID), LogUrl: logObsPath, PoolID: poolID, - Parameter: parameters.Parameter, + Parameter: saveparams, }) if err != nil { From 23fb767efa64cc0b64260edb1702e56e83e0921a Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Mon, 28 Mar 2022 10:18:18 +0800 Subject: [PATCH 09/16] #1654 fix bug --- models/cloudbrain.go | 2 +- routers/repo/cloudbrain.go | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 4cd02d7c6..17761a1dc 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -1471,7 +1471,7 @@ func GetCloudBrainUnStoppedJob() ([]*Cloudbrain, error) { } func GetStoppedJobWithNoDurationJob() ([]*Cloudbrain, error) { - cloudbrains := make([]*Cloudbrain, 0, 10) + cloudbrains := make([]*Cloudbrain, 0) return cloudbrains, x. In("status", ModelArtsTrainJobCompleted, ModelArtsTrainJobFailed, ModelArtsTrainJobKilled, ModelArtsStopped, JobStopped, JobFailed, JobSucceeded). Where("train_job_duration is null or train_job_duration = '' "). diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 084dbcbe9..7053177ac 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -1091,14 +1091,24 @@ func HandleTaskWithNoDuration(ctx *context.Context) { task.Status = taskRes.TaskStatuses[0].State startTime := taskRes.TaskStatuses[0].StartAt.Unix() endTime := taskRes.TaskStatuses[0].FinishedAt.Unix() - log.Info("task startTime = %v endTime= %v", startTime, endTime) - if startTime > 0 && endTime > 0 && endTime-startTime > 0 { + log.Info("task startTime = %v endTime= %v ,jobId=%d", startTime, endTime, task.ID) + if startTime > 0 { task.StartTime = timeutil.TimeStamp(startTime) - task.EndTime = timeutil.TimeStamp(endTime) } else { task.StartTime = task.CreatedUnix + } + if endTime > 0 { + task.EndTime = timeutil.TimeStamp(endTime) + } else { task.EndTime = task.UpdatedUnix } + + if task.EndTime < task.StartTime { + log.Info("endTime[%v] is less than starTime[%v],jobId=%d", task.EndTime, task.StartTime, task.ID) + st := task.StartTime + task.StartTime = task.EndTime + task.EndTime = st + } task.ComputeAndSetDuration() err = models.UpdateJob(task) if err != nil { From 389ab81ceb8cd4d6caee4f9e5e593c10e47cd863 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Mon, 28 Mar 2022 10:30:44 +0800 Subject: [PATCH 10/16] #1654 fix bug --- routers/repo/cloudbrain.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 7053177ac..ee44df4f9 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -1081,6 +1081,13 @@ func HandleTaskWithNoDuration(ctx *context.Context) { result, err := cloudbrain.GetJob(task.JobID) if err != nil { log.Error("GetJob(%s) failed:%v", task.JobName, err) + task.StartTime = task.CreatedUnix + task.EndTime = task.UpdatedUnix + task.ComputeAndSetDuration() + err = models.UpdateJob(task) + if err != nil { + log.Error("UpdateJob(%s) failed:%v", task.JobName, err) + } continue } @@ -1121,6 +1128,13 @@ func HandleTaskWithNoDuration(ctx *context.Context) { result, err := modelarts.GetNotebook2(task.JobID) if err != nil { log.Error("GetJob(%s) failed:%v", task.JobName, err) + task.StartTime = task.CreatedUnix + task.EndTime = task.UpdatedUnix + task.ComputeAndSetDuration() + err = models.UpdateJob(task) + if err != nil { + log.Error("UpdateJob(%s) failed:%v", task.JobName, err) + } continue } From 82787ddc2ed001f6c517b39692cb9cab1a4d1500 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Mon, 28 Mar 2022 10:39:36 +0800 Subject: [PATCH 11/16] #1654 fix bug --- routers/repo/cloudbrain.go | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index ee44df4f9..fb40c8dec 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -1065,16 +1065,29 @@ func SyncCloudbrainStatus() { func HandleTaskWithNoDuration(ctx *context.Context) { log.Info("HandleTaskWithNoDuration start") - cloudBrains, err := models.GetStoppedJobWithNoDurationJob() - if err != nil { - log.Error("HandleTaskWithNoTrainJobDuration failed:", err.Error()) - return - } - if len(cloudBrains) == 0 { - log.Info("HandleTaskWithNoTrainJobDuration:no task need handle") - return + count := 0 + for { + cloudBrains, err := models.GetStoppedJobWithNoDurationJob() + if err != nil { + log.Error("HandleTaskWithNoTrainJobDuration failed:", err.Error()) + break + } + if len(cloudBrains) == 0 { + log.Info("HandleTaskWithNoTrainJobDuration:no task need handle") + break + } + handleNoDurationTask(cloudBrains) + count += len(cloudBrains) + if len(cloudBrains) < 100 { + log.Info("HandleTaskWithNoTrainJobDuration:task less than 100") + break + } } + log.Info("HandleTaskWithNoTrainJobDuration:count=%d", count) +} + +func handleNoDurationTask(cloudBrains []*models.Cloudbrain) { for _, task := range cloudBrains { log.Info("Handle job ,%+v", task) if task.Type == models.TypeCloudBrainOne { @@ -1181,8 +1194,6 @@ func HandleTaskWithNoDuration(ctx *context.Context) { log.Error("task.Type(%s) is error:%d", task.JobName, task.Type) } } - - return } func CloudBrainBenchmarkIndex(ctx *context.Context) { From 9f13443fa783314ef11e99445d3eac8e512ff3e5 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Mon, 28 Mar 2022 10:56:37 +0800 Subject: [PATCH 12/16] #1654 fix bug --- routers/repo/cloudbrain.go | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index fb40c8dec..a1a8d4a12 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -1094,17 +1094,15 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) { result, err := cloudbrain.GetJob(task.JobID) if err != nil { log.Error("GetJob(%s) failed:%v", task.JobName, err) - task.StartTime = task.CreatedUnix - task.EndTime = task.UpdatedUnix - task.ComputeAndSetDuration() - err = models.UpdateJob(task) - if err != nil { - log.Error("UpdateJob(%s) failed:%v", task.JobName, err) - } + updateDefaultDuration(task) continue } if result != nil { + if result.Msg != "success" { + updateDefaultDuration(task) + continue + } jobRes, _ := models.ConvertToJobResultPayload(result.Payload) taskRoles := jobRes.TaskRoles taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) @@ -1196,6 +1194,17 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) { } } +func updateDefaultDuration(task *models.Cloudbrain) { + log.Info("updateDefaultDuration: taskId=%d", task.ID) + task.StartTime = task.CreatedUnix + task.EndTime = task.UpdatedUnix + task.ComputeAndSetDuration() + err := models.UpdateJob(task) + if err != nil { + log.Error("UpdateJob(%s) failed:%v", task.JobName, err) + } +} + func CloudBrainBenchmarkIndex(ctx *context.Context) { MustEnableCloudbrain(ctx) repo := ctx.Repo.Repository From 8acf96236f86e554b483dbbf77f616cf9d201ce8 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Mon, 28 Mar 2022 11:13:37 +0800 Subject: [PATCH 13/16] #1654 fix bug --- routers/repo/cloudbrain.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index a1a8d4a12..a0bed615c 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -1105,7 +1105,15 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) { } jobRes, _ := models.ConvertToJobResultPayload(result.Payload) taskRoles := jobRes.TaskRoles + if len(taskRoles) == 0 { + updateDefaultDuration(task) + continue + } taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) + if len(taskRes.TaskStatuses) == 0 { + updateDefaultDuration(task) + continue + } task.Status = taskRes.TaskStatuses[0].State startTime := taskRes.TaskStatuses[0].StartAt.Unix() endTime := taskRes.TaskStatuses[0].FinishedAt.Unix() From f58583c46e71f22c9f831aae5a55eff7b3b8d15f Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Mon, 28 Mar 2022 11:24:43 +0800 Subject: [PATCH 14/16] #1654 fix bug --- routers/repo/cloudbrain.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index a0bed615c..2d4b4f279 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -1103,14 +1103,14 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) { updateDefaultDuration(task) continue } - jobRes, _ := models.ConvertToJobResultPayload(result.Payload) - taskRoles := jobRes.TaskRoles - if len(taskRoles) == 0 { + jobRes, err := models.ConvertToJobResultPayload(result.Payload) + if err != nil || len(jobRes.TaskRoles) == 0 { updateDefaultDuration(task) continue } - taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) - if len(taskRes.TaskStatuses) == 0 { + taskRoles := jobRes.TaskRoles + taskRes, err := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) + if err != nil || len(taskRes.TaskStatuses) == 0 { updateDefaultDuration(task) continue } From 215b67061fa500858a627fc514f33f4a57e2bc7d Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Mon, 28 Mar 2022 11:45:52 +0800 Subject: [PATCH 15/16] #1654 fix bug --- routers/repo/cloudbrain.go | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 2d4b4f279..1b83c86ab 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -1084,7 +1084,7 @@ func HandleTaskWithNoDuration(ctx *context.Context) { } } log.Info("HandleTaskWithNoTrainJobDuration:count=%d", count) - + ctx.JSON(200, "success") } func handleNoDurationTask(cloudBrains []*models.Cloudbrain) { @@ -1241,13 +1241,16 @@ func CloudBrainBenchmarkIndex(ctx *context.Context) { ciTasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) ciTasks[i].Cloudbrain.ComputeResource = task.ComputeResource if ciTasks[i].TrainJobDuration == "" { - var duration int64 - if task.Status == string(models.JobRunning) { - duration = time.Now().Unix() - int64(task.Cloudbrain.CreatedUnix) - } else { - duration = int64(task.Cloudbrain.UpdatedUnix) - int64(task.Cloudbrain.CreatedUnix) + if ciTasks[i].Duration == 0 { + var duration int64 + if task.Status == string(models.JobRunning) { + duration = time.Now().Unix() - int64(task.Cloudbrain.CreatedUnix) + } else { + duration = int64(task.Cloudbrain.UpdatedUnix) - int64(task.Cloudbrain.CreatedUnix) + } + ciTasks[i].Duration = duration } - ciTasks[i].TrainJobDuration = models.ConvertDurationToStr(duration) + ciTasks[i].TrainJobDuration = models.ConvertDurationToStr(ciTasks[i].Duration) } ciTasks[i].BenchmarkTypeName = "" From 1894cc135002310ec842072fcfed11d8c452f94d Mon Sep 17 00:00:00 2001 From: wangjr Date: Mon, 28 Mar 2022 16:01:24 +0800 Subject: [PATCH 16/16] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=95=9C=E5=83=8F?= =?UTF-8?q?=E5=92=8C=E6=B4=BE=E7=94=9F=E5=8F=8D=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web_src/js/components/ProAnalysis.vue | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/web_src/js/components/ProAnalysis.vue b/web_src/js/components/ProAnalysis.vue index d92eb6df9..bdc874c27 100755 --- a/web_src/js/components/ProAnalysis.vue +++ b/web_src/js/components/ProAnalysis.vue @@ -150,21 +150,21 @@ align="center"> - + {{scope.row.isFork|changeType}} + + - +