Browse Source

Merge pull request '修复任务运行时长不准的问题' (#1789) from fix-1654 into V20220328

Reviewed-on: https://git.openi.org.cn/OpenI/aiforge/pulls/1789
Reviewed-by: lewis <747342561@qq.com>
tags/v1.22.3.2^2
lewis 4 years ago
parent
commit
2c1e9ef253
7 changed files with 95 additions and 55 deletions
  1. +22
    -2
      models/cloudbrain.go
  2. +3
    -12
      routers/api/v1/repo/cloudbrain.go
  3. +27
    -34
      routers/repo/cloudbrain.go
  4. +14
    -2
      templates/repo/cloudbrain/benchmark/show.tmpl
  5. +10
    -2
      templates/repo/cloudbrain/show.tmpl
  6. +7
    -1
      templates/repo/modelarts/inferencejob/show.tmpl
  7. +12
    -2
      templates/repo/modelarts/trainjob/show.tmpl

+ 22
- 2
models/cloudbrain.go View File

@@ -170,7 +170,9 @@ func (task *Cloudbrain) ComputeAndSetDuration() {
if task.StartTime == 0 {
d = 0
} else if task.EndTime == 0 {
d = time.Now().Unix() - task.StartTime.AsTime().Unix()
if !task.IsTerminal() {
d = time.Now().Unix() - task.StartTime.AsTime().Unix()
}
} else {
d = task.EndTime.AsTime().Unix() - task.StartTime.AsTime().Unix()
}
@@ -182,6 +184,11 @@ func (task *Cloudbrain) ComputeAndSetDuration() {
task.TrainJobDuration = ConvertDurationToStr(d)
}

func (task *Cloudbrain) IsTerminal() bool {
status := task.Status
return status == string(ModelArtsTrainJobCompleted) || status == string(ModelArtsTrainJobFailed) || status == string(ModelArtsTrainJobKilled) || status == string(ModelArtsStopped) || status == string(JobStopped) || status == string(JobFailed) || status == string(JobSucceeded)
}

func ConvertDurationToStr(duration int64) string {
if duration == 0 {
return DURATION_STR_ZERO
@@ -201,6 +208,19 @@ func IsCloudBrainOneDebugJobTerminal(status string) bool {
return status == string(JobStopped) || status == string(JobFailed) || status == string(JobSucceeded)
}

func ParseAndSetDurationFromCloudBrainOne(result JobResultPayload, task *Cloudbrain) {
isActivated := result.JobStatus.CreatedTime > 0
if task.StartTime == 0 && isActivated {
task.StartTime = timeutil.TimeStamp(result.JobStatus.CreatedTime / 1000)
}
if task.EndTime == 0 && IsCloudBrainOneDebugJobTerminal(task.Status) && isActivated {
if result.JobStatus.CompletedTime > 0 {
task.EndTime = timeutil.TimeStamp(result.JobStatus.CompletedTime / 1000)
}
}
task.ComputeAndSetDuration()
}

type CloudbrainInfo struct {
Cloudbrain `xorm:"extends"`
User `xorm:"extends"`
@@ -368,7 +388,7 @@ type JobResultPayload struct {
AppProgress string `json:"appProgress"`
AppTrackingURL string `json:"appTrackingUrl"`
AppLaunchedTime int64 `json:"appLaunchedTime"`
AppCompletedTime interface{} `json:"appCompletedTime"`
AppCompletedTime int64 `json:"appCompletedTime"`
AppExitCode int `json:"appExitCode"`
AppExitDiagnostics string `json:"appExitDiagnostics"`
AppExitType interface{} `json:"appExitType"`


+ 3
- 12
routers/api/v1/repo/cloudbrain.go View File

@@ -17,7 +17,6 @@ import (
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/modules/timeutil"
routerRepo "code.gitea.io/gitea/routers/repo"
)

@@ -74,24 +73,16 @@ func GetCloudbrainTask(ctx *context.APIContext) {
}

job.Status = result.JobStatus.State
taskRoles := result.TaskRoles
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))
if result.JobStatus.State != string(models.JobWaiting) && result.JobStatus.State != string(models.JobFailed) {
taskRoles := result.TaskRoles
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))

job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP
job.ContainerID = taskRes.TaskStatuses[0].ContainerID
job.Status = taskRes.TaskStatuses[0].State

if job.StartTime == 0 && !taskRes.TaskStatuses[0].StartAt.IsZero() {
job.StartTime = timeutil.TimeStamp(taskRes.TaskStatuses[0].StartAt.Unix())
}
}

if result.JobStatus.State != string(models.JobWaiting) {
if job.EndTime == 0 && models.IsCloudBrainOneDebugJobTerminal(job.Status) {
job.EndTime = timeutil.TimeStampNow()
}
job.ComputeAndSetDuration()
models.ParseAndSetDurationFromCloudBrainOne(result, job)
err = models.UpdateJob(job)
if err != nil {
log.Error("UpdateJob failed:", err)


+ 27
- 34
routers/repo/cloudbrain.go View File

@@ -435,9 +435,7 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo
task.Status = taskRes.TaskStatuses[0].State
task.ContainerID = taskRes.TaskStatuses[0].ContainerID
task.ContainerIp = taskRes.TaskStatuses[0].ContainerIP
if task.StartTime == 0 && !taskRes.TaskStatuses[0].StartAt.IsZero() {
task.StartTime = timeutil.TimeStamp(taskRes.TaskStatuses[0].StartAt.Unix())
}
models.ParseAndSetDurationFromCloudBrainOne(jobRes, task)
err = models.UpdateJob(task)
if err != nil {
ctx.Data["error"] = err.Error()
@@ -1048,14 +1046,7 @@ func SyncCloudbrainStatus() {
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))
task.Status = taskRes.TaskStatuses[0].State
if task.Status != string(models.JobWaiting) {
task.Duration = time.Now().Unix() - taskRes.TaskStatuses[0].StartAt.Unix()
if task.StartTime == 0 && !taskRes.TaskStatuses[0].StartAt.IsZero() {
task.StartTime = timeutil.TimeStamp(taskRes.TaskStatuses[0].StartAt.Unix())
}
if task.EndTime == 0 && models.IsCloudBrainOneDebugJobTerminal(task.Status) {
task.EndTime = timeutil.TimeStampNow()
}
task.ComputeAndSetDuration()
models.ParseAndSetDurationFromCloudBrainOne(jobRes, task)
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
@@ -1111,7 +1102,7 @@ func SyncCloudbrainStatus() {
continue
}
}
} else if task.JobType == string(models.JobTypeTrain) {
} else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) {
result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10))
if err != nil {
log.Error("GetTrainJob(%s) failed:%v", task.JobName, err)
@@ -1152,6 +1143,7 @@ func SyncCloudbrainStatus() {
func HandleTaskWithNoDuration(ctx *context.Context) {
log.Info("HandleTaskWithNoDuration start")
count := 0
start := time.Now().Unix()
for {
cloudBrains, err := models.GetStoppedJobWithNoDurationJob()
if err != nil {
@@ -1168,13 +1160,19 @@ func HandleTaskWithNoDuration(ctx *context.Context) {
log.Info("HandleTaskWithNoTrainJobDuration:task less than 100")
break
}
if time.Now().Unix()-start > 600 {
log.Info("HandleTaskWithNoDuration : time out")
ctx.JSON(200, fmt.Sprintf("task stop for time out,count=%d", count))
return
}
}
log.Info("HandleTaskWithNoTrainJobDuration:count=%d", count)
ctx.JSON(200, "success")
ctx.JSON(200, fmt.Sprintf("success,count=%d", count))
}

func handleNoDurationTask(cloudBrains []*models.Cloudbrain) {
for _, task := range cloudBrains {
time.Sleep(time.Millisecond * 100)
log.Info("Handle job ,%+v", task)
if task.Type == models.TypeCloudBrainOne {
result, err := cloudbrain.GetJob(task.JobID)
@@ -1201,18 +1199,17 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) {
continue
}
task.Status = taskRes.TaskStatuses[0].State
startTime := taskRes.TaskStatuses[0].StartAt.Unix()
endTime := taskRes.TaskStatuses[0].FinishedAt.Unix()
log.Info("task startTime = %v endTime= %v ,jobId=%d", startTime, endTime, task.ID)
if startTime > 0 {
task.StartTime = timeutil.TimeStamp(startTime)
} else {
task.StartTime = task.CreatedUnix
}
if endTime > 0 {
task.EndTime = timeutil.TimeStamp(endTime)
log.Info("task startTime = %v endTime= %v ,jobId=%d", jobRes.JobStatus.StartTime, jobRes.JobStatus.EndTime, task.ID)
if jobRes.JobStatus.CreatedTime > 0 {
task.StartTime = timeutil.TimeStamp(jobRes.JobStatus.CreatedTime / 1000)
if jobRes.JobStatus.CompletedTime > 0 {
task.EndTime = timeutil.TimeStamp(jobRes.JobStatus.CompletedTime / 1000)
} else {
task.EndTime = task.UpdatedUnix
}
} else {
task.EndTime = task.UpdatedUnix
task.StartTime = 0
task.EndTime = 0
}

if task.EndTime < task.StartTime {
@@ -1221,7 +1218,8 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) {
task.StartTime = task.EndTime
task.EndTime = st
}
task.ComputeAndSetDuration()
task.Duration = task.EndTime.AsTime().Unix() - task.StartTime.AsTime().Unix()
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
@@ -1233,13 +1231,7 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) {
result, err := modelarts.GetNotebook2(task.JobID)
if err != nil {
log.Error("GetJob(%s) failed:%v", task.JobName, err)
task.StartTime = task.CreatedUnix
task.EndTime = task.UpdatedUnix
task.ComputeAndSetDuration()
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
}
updateDefaultDuration(task)
continue
}

@@ -1248,7 +1240,7 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) {
startTime := result.Lease.CreateTime
duration := result.Lease.Duration / 1000
if startTime > 0 {
task.StartTime = timeutil.TimeStamp(startTime)
task.StartTime = timeutil.TimeStamp(startTime / 1000)
task.EndTime = task.StartTime.Add(duration)
}
task.ComputeAndSetDuration()
@@ -1258,10 +1250,11 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) {
continue
}
}
} else if task.JobType == string(models.JobTypeTrain) {
} else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) {
result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10))
if err != nil {
log.Error("GetTrainJob(%s) failed:%v", task.JobName, err)
updateDefaultDuration(task)
continue
}



+ 14
- 2
templates/repo/cloudbrain/benchmark/show.tmpl View File

@@ -196,7 +196,13 @@ td, th {
<span class="accordion-panel-title-content">
<span>
<div class="ac-display-inblock title_text acc-margin-bottom">
<span class="cti-mgRight-sm">{{TimeSinceUnix1 .CreatedUnix}}</span>
<span class="cti-mgRight-sm">
{{if not (eq .StartTime 0)}}
<td>{{TimeSinceUnix1 .StartTime}}</td>
{{else}}
<td>{{TimeSinceUnix1 .CreatedUnix}}<td>
{{end}}
</span>

<span class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.status"}}:
<span id="{{.VersionName}}-status-span"><i id="icon" style="vertical-align: middle;" class="{{.Status}}"></i><span id="text" style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span>
@@ -252,7 +258,13 @@ td, th {

<td class="ti-text-form-content">
<div class="text-span text-span-w">
<span style="font-size: 12px;" class="">{{TimeSinceUnix1 .CreatedUnix}}</span>
<span style="font-size: 12px;" class="">
{{if not (eq .StartTime 0)}}
{{TimeSinceUnix1 .StartTime}}
{{else}}
{{TimeSinceUnix1 .CreatedUnix}}
{{end}}
</span>
</div>
</td>
</tr>


+ 10
- 2
templates/repo/cloudbrain/show.tmpl View File

@@ -74,11 +74,19 @@
</tr>
<tr>
<td> 开始时间 </td>
<td>{{.JobStatus.StartTime}}</td>
{{if not (eq $.task.StartTime 0)}}
<td>{{TimeSinceUnix1 $.task.StartTime}}</td>
{{else}}
<td>无<td>
{{end}}
</tr>
<tr>
<td> 结束时间 </td>
<td>{{.JobStatus.EndTime}}</td>
{{if not (eq $.task.EndTime 0)}}
<td>{{TimeSinceUnix1 $.task.EndTime}}</td>
{{else}}
<td>无<td>
{{end}}
</tr>
<tr>
<td> ExitCode </td>


+ 7
- 1
templates/repo/modelarts/inferencejob/show.tmpl View File

@@ -232,7 +232,13 @@ td, th {

<td class="ti-text-form-content">
<div class="text-span text-span-w">
<span style="font-size: 12px;" class="">{{TimeSinceUnix1 .CreatedUnix}}</span>
<span style="font-size: 12px;" class="">
{{if not (eq .StartTime 0)}}
{{TimeSinceUnix1 .StartTime}}
{{else}}
{{TimeSinceUnix1 .CreatedUnix}}
{{end}}
</span>
</div>
</td>
</tr>


+ 12
- 2
templates/repo/modelarts/trainjob/show.tmpl View File

@@ -223,7 +223,12 @@ td, th {
</div>
<div class="ac-display-inblock title_text acc-margin-bottom">

<span class="cti-mgRight-sm">{{TimeSinceUnix1 .Cloudbrain.CreatedUnix}}</span>
<span class="cti-mgRight-sm">
{{if not (eq .Cloudbrain.StartTime 0)}}
{{TimeSinceUnix1 .Cloudbrain.StartTime}}
{{else}}
{{TimeSinceUnix1 .Cloudbrain.CreatedUnix}}
{{end}}</span>
<span class="cti-mgRight-sm"> {{$.i18n.Tr "repo.modelarts.current_version"}}:{{.VersionName}}</span>
<span class="cti-mgRight-sm"> {{$.i18n.Tr "repo.modelarts.parent_version"}}:{{.PreVersionName}}</span>
<span class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.status"}}:
@@ -293,7 +298,12 @@ td, th {

<td class="ti-text-form-content">
<div class="text-span text-span-w">
<span style="font-size: 12px;" class="">{{TimeSinceUnix1 .Cloudbrain.CreatedUnix}}</span>
<span style="font-size: 12px;" class="">
{{if not (eq .Cloudbrain.StartTime 0)}}
{{TimeSinceUnix1 .Cloudbrain.StartTime}}
{{else}}
{{TimeSinceUnix1 .Cloudbrain.CreatedUnix}}
{{end}}</span>
</div>
</td>
</tr>


Loading…
Cancel
Save