Reviewed-on: https://git.openi.org.cn/OpenI/aiforge/pulls/1789 Reviewed-by: lewis <747342561@qq.com>tags/v1.22.3.2^2
| @@ -170,7 +170,9 @@ func (task *Cloudbrain) ComputeAndSetDuration() { | |||
| if task.StartTime == 0 { | |||
| d = 0 | |||
| } else if task.EndTime == 0 { | |||
| d = time.Now().Unix() - task.StartTime.AsTime().Unix() | |||
| if !task.IsTerminal() { | |||
| d = time.Now().Unix() - task.StartTime.AsTime().Unix() | |||
| } | |||
| } else { | |||
| d = task.EndTime.AsTime().Unix() - task.StartTime.AsTime().Unix() | |||
| } | |||
| @@ -182,6 +184,11 @@ func (task *Cloudbrain) ComputeAndSetDuration() { | |||
| task.TrainJobDuration = ConvertDurationToStr(d) | |||
| } | |||
| func (task *Cloudbrain) IsTerminal() bool { | |||
| status := task.Status | |||
| return status == string(ModelArtsTrainJobCompleted) || status == string(ModelArtsTrainJobFailed) || status == string(ModelArtsTrainJobKilled) || status == string(ModelArtsStopped) || status == string(JobStopped) || status == string(JobFailed) || status == string(JobSucceeded) | |||
| } | |||
| func ConvertDurationToStr(duration int64) string { | |||
| if duration == 0 { | |||
| return DURATION_STR_ZERO | |||
| @@ -201,6 +208,19 @@ func IsCloudBrainOneDebugJobTerminal(status string) bool { | |||
| return status == string(JobStopped) || status == string(JobFailed) || status == string(JobSucceeded) | |||
| } | |||
| func ParseAndSetDurationFromCloudBrainOne(result JobResultPayload, task *Cloudbrain) { | |||
| isActivated := result.JobStatus.CreatedTime > 0 | |||
| if task.StartTime == 0 && isActivated { | |||
| task.StartTime = timeutil.TimeStamp(result.JobStatus.CreatedTime / 1000) | |||
| } | |||
| if task.EndTime == 0 && IsCloudBrainOneDebugJobTerminal(task.Status) && isActivated { | |||
| if result.JobStatus.CompletedTime > 0 { | |||
| task.EndTime = timeutil.TimeStamp(result.JobStatus.CompletedTime / 1000) | |||
| } | |||
| } | |||
| task.ComputeAndSetDuration() | |||
| } | |||
| type CloudbrainInfo struct { | |||
| Cloudbrain `xorm:"extends"` | |||
| User `xorm:"extends"` | |||
| @@ -368,7 +388,7 @@ type JobResultPayload struct { | |||
| AppProgress string `json:"appProgress"` | |||
| AppTrackingURL string `json:"appTrackingUrl"` | |||
| AppLaunchedTime int64 `json:"appLaunchedTime"` | |||
| AppCompletedTime interface{} `json:"appCompletedTime"` | |||
| AppCompletedTime int64 `json:"appCompletedTime"` | |||
| AppExitCode int `json:"appExitCode"` | |||
| AppExitDiagnostics string `json:"appExitDiagnostics"` | |||
| AppExitType interface{} `json:"appExitType"` | |||
| @@ -17,7 +17,6 @@ import ( | |||
| "code.gitea.io/gitea/modules/context" | |||
| "code.gitea.io/gitea/modules/log" | |||
| "code.gitea.io/gitea/modules/storage" | |||
| "code.gitea.io/gitea/modules/timeutil" | |||
| routerRepo "code.gitea.io/gitea/routers/repo" | |||
| ) | |||
| @@ -74,24 +73,16 @@ func GetCloudbrainTask(ctx *context.APIContext) { | |||
| } | |||
| job.Status = result.JobStatus.State | |||
| taskRoles := result.TaskRoles | |||
| taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) | |||
| if result.JobStatus.State != string(models.JobWaiting) && result.JobStatus.State != string(models.JobFailed) { | |||
| taskRoles := result.TaskRoles | |||
| taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) | |||
| job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP | |||
| job.ContainerID = taskRes.TaskStatuses[0].ContainerID | |||
| job.Status = taskRes.TaskStatuses[0].State | |||
| if job.StartTime == 0 && !taskRes.TaskStatuses[0].StartAt.IsZero() { | |||
| job.StartTime = timeutil.TimeStamp(taskRes.TaskStatuses[0].StartAt.Unix()) | |||
| } | |||
| } | |||
| if result.JobStatus.State != string(models.JobWaiting) { | |||
| if job.EndTime == 0 && models.IsCloudBrainOneDebugJobTerminal(job.Status) { | |||
| job.EndTime = timeutil.TimeStampNow() | |||
| } | |||
| job.ComputeAndSetDuration() | |||
| models.ParseAndSetDurationFromCloudBrainOne(result, job) | |||
| err = models.UpdateJob(job) | |||
| if err != nil { | |||
| log.Error("UpdateJob failed:", err) | |||
| @@ -435,9 +435,7 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo | |||
| task.Status = taskRes.TaskStatuses[0].State | |||
| task.ContainerID = taskRes.TaskStatuses[0].ContainerID | |||
| task.ContainerIp = taskRes.TaskStatuses[0].ContainerIP | |||
| if task.StartTime == 0 && !taskRes.TaskStatuses[0].StartAt.IsZero() { | |||
| task.StartTime = timeutil.TimeStamp(taskRes.TaskStatuses[0].StartAt.Unix()) | |||
| } | |||
| models.ParseAndSetDurationFromCloudBrainOne(jobRes, task) | |||
| err = models.UpdateJob(task) | |||
| if err != nil { | |||
| ctx.Data["error"] = err.Error() | |||
| @@ -1048,14 +1046,7 @@ func SyncCloudbrainStatus() { | |||
| taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) | |||
| task.Status = taskRes.TaskStatuses[0].State | |||
| if task.Status != string(models.JobWaiting) { | |||
| task.Duration = time.Now().Unix() - taskRes.TaskStatuses[0].StartAt.Unix() | |||
| if task.StartTime == 0 && !taskRes.TaskStatuses[0].StartAt.IsZero() { | |||
| task.StartTime = timeutil.TimeStamp(taskRes.TaskStatuses[0].StartAt.Unix()) | |||
| } | |||
| if task.EndTime == 0 && models.IsCloudBrainOneDebugJobTerminal(task.Status) { | |||
| task.EndTime = timeutil.TimeStampNow() | |||
| } | |||
| task.ComputeAndSetDuration() | |||
| models.ParseAndSetDurationFromCloudBrainOne(jobRes, task) | |||
| err = models.UpdateJob(task) | |||
| if err != nil { | |||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | |||
| @@ -1111,7 +1102,7 @@ func SyncCloudbrainStatus() { | |||
| continue | |||
| } | |||
| } | |||
| } else if task.JobType == string(models.JobTypeTrain) { | |||
| } else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) { | |||
| result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) | |||
| if err != nil { | |||
| log.Error("GetTrainJob(%s) failed:%v", task.JobName, err) | |||
| @@ -1152,6 +1143,7 @@ func SyncCloudbrainStatus() { | |||
| func HandleTaskWithNoDuration(ctx *context.Context) { | |||
| log.Info("HandleTaskWithNoDuration start") | |||
| count := 0 | |||
| start := time.Now().Unix() | |||
| for { | |||
| cloudBrains, err := models.GetStoppedJobWithNoDurationJob() | |||
| if err != nil { | |||
| @@ -1168,13 +1160,19 @@ func HandleTaskWithNoDuration(ctx *context.Context) { | |||
| log.Info("HandleTaskWithNoTrainJobDuration:task less than 100") | |||
| break | |||
| } | |||
| if time.Now().Unix()-start > 600 { | |||
| log.Info("HandleTaskWithNoDuration : time out") | |||
| ctx.JSON(200, fmt.Sprintf("task stop for time out,count=%d", count)) | |||
| return | |||
| } | |||
| } | |||
| log.Info("HandleTaskWithNoTrainJobDuration:count=%d", count) | |||
| ctx.JSON(200, "success") | |||
| ctx.JSON(200, fmt.Sprintf("success,count=%d", count)) | |||
| } | |||
| func handleNoDurationTask(cloudBrains []*models.Cloudbrain) { | |||
| for _, task := range cloudBrains { | |||
| time.Sleep(time.Millisecond * 100) | |||
| log.Info("Handle job ,%+v", task) | |||
| if task.Type == models.TypeCloudBrainOne { | |||
| result, err := cloudbrain.GetJob(task.JobID) | |||
| @@ -1201,18 +1199,17 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) { | |||
| continue | |||
| } | |||
| task.Status = taskRes.TaskStatuses[0].State | |||
| startTime := taskRes.TaskStatuses[0].StartAt.Unix() | |||
| endTime := taskRes.TaskStatuses[0].FinishedAt.Unix() | |||
| log.Info("task startTime = %v endTime= %v ,jobId=%d", startTime, endTime, task.ID) | |||
| if startTime > 0 { | |||
| task.StartTime = timeutil.TimeStamp(startTime) | |||
| } else { | |||
| task.StartTime = task.CreatedUnix | |||
| } | |||
| if endTime > 0 { | |||
| task.EndTime = timeutil.TimeStamp(endTime) | |||
| log.Info("task startTime = %v endTime= %v ,jobId=%d", jobRes.JobStatus.StartTime, jobRes.JobStatus.EndTime, task.ID) | |||
| if jobRes.JobStatus.CreatedTime > 0 { | |||
| task.StartTime = timeutil.TimeStamp(jobRes.JobStatus.CreatedTime / 1000) | |||
| if jobRes.JobStatus.CompletedTime > 0 { | |||
| task.EndTime = timeutil.TimeStamp(jobRes.JobStatus.CompletedTime / 1000) | |||
| } else { | |||
| task.EndTime = task.UpdatedUnix | |||
| } | |||
| } else { | |||
| task.EndTime = task.UpdatedUnix | |||
| task.StartTime = 0 | |||
| task.EndTime = 0 | |||
| } | |||
| if task.EndTime < task.StartTime { | |||
| @@ -1221,7 +1218,8 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) { | |||
| task.StartTime = task.EndTime | |||
| task.EndTime = st | |||
| } | |||
| task.ComputeAndSetDuration() | |||
| task.Duration = task.EndTime.AsTime().Unix() - task.StartTime.AsTime().Unix() | |||
| task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) | |||
| err = models.UpdateJob(task) | |||
| if err != nil { | |||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | |||
| @@ -1233,13 +1231,7 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) { | |||
| result, err := modelarts.GetNotebook2(task.JobID) | |||
| if err != nil { | |||
| log.Error("GetJob(%s) failed:%v", task.JobName, err) | |||
| task.StartTime = task.CreatedUnix | |||
| task.EndTime = task.UpdatedUnix | |||
| task.ComputeAndSetDuration() | |||
| err = models.UpdateJob(task) | |||
| if err != nil { | |||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | |||
| } | |||
| updateDefaultDuration(task) | |||
| continue | |||
| } | |||
| @@ -1248,7 +1240,7 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) { | |||
| startTime := result.Lease.CreateTime | |||
| duration := result.Lease.Duration / 1000 | |||
| if startTime > 0 { | |||
| task.StartTime = timeutil.TimeStamp(startTime) | |||
| task.StartTime = timeutil.TimeStamp(startTime / 1000) | |||
| task.EndTime = task.StartTime.Add(duration) | |||
| } | |||
| task.ComputeAndSetDuration() | |||
| @@ -1258,10 +1250,11 @@ func handleNoDurationTask(cloudBrains []*models.Cloudbrain) { | |||
| continue | |||
| } | |||
| } | |||
| } else if task.JobType == string(models.JobTypeTrain) { | |||
| } else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) { | |||
| result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) | |||
| if err != nil { | |||
| log.Error("GetTrainJob(%s) failed:%v", task.JobName, err) | |||
| updateDefaultDuration(task) | |||
| continue | |||
| } | |||
| @@ -196,7 +196,13 @@ td, th { | |||
| <span class="accordion-panel-title-content"> | |||
| <span> | |||
| <div class="ac-display-inblock title_text acc-margin-bottom"> | |||
| <span class="cti-mgRight-sm">{{TimeSinceUnix1 .CreatedUnix}}</span> | |||
| <span class="cti-mgRight-sm"> | |||
| {{if not (eq .StartTime 0)}} | |||
| <td>{{TimeSinceUnix1 .StartTime}}</td> | |||
| {{else}} | |||
| <td>{{TimeSinceUnix1 .CreatedUnix}}<td> | |||
| {{end}} | |||
| </span> | |||
| <span class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.status"}}: | |||
| <span id="{{.VersionName}}-status-span"><i id="icon" style="vertical-align: middle;" class="{{.Status}}"></i><span id="text" style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span> | |||
| @@ -252,7 +258,13 @@ td, th { | |||
| <td class="ti-text-form-content"> | |||
| <div class="text-span text-span-w"> | |||
| <span style="font-size: 12px;" class="">{{TimeSinceUnix1 .CreatedUnix}}</span> | |||
| <span style="font-size: 12px;" class=""> | |||
| {{if not (eq .StartTime 0)}} | |||
| {{TimeSinceUnix1 .StartTime}} | |||
| {{else}} | |||
| {{TimeSinceUnix1 .CreatedUnix}} | |||
| {{end}} | |||
| </span> | |||
| </div> | |||
| </td> | |||
| </tr> | |||
| @@ -74,11 +74,19 @@ | |||
| </tr> | |||
| <tr> | |||
| <td> 开始时间 </td> | |||
| <td>{{.JobStatus.StartTime}}</td> | |||
| {{if not (eq $.task.StartTime 0)}} | |||
| <td>{{TimeSinceUnix1 $.task.StartTime}}</td> | |||
| {{else}} | |||
| <td>无<td> | |||
| {{end}} | |||
| </tr> | |||
| <tr> | |||
| <td> 结束时间 </td> | |||
| <td>{{.JobStatus.EndTime}}</td> | |||
| {{if not (eq $.task.EndTime 0)}} | |||
| <td>{{TimeSinceUnix1 $.task.EndTime}}</td> | |||
| {{else}} | |||
| <td>无<td> | |||
| {{end}} | |||
| </tr> | |||
| <tr> | |||
| <td> ExitCode </td> | |||
| @@ -232,7 +232,13 @@ td, th { | |||
| <td class="ti-text-form-content"> | |||
| <div class="text-span text-span-w"> | |||
| <span style="font-size: 12px;" class="">{{TimeSinceUnix1 .CreatedUnix}}</span> | |||
| <span style="font-size: 12px;" class=""> | |||
| {{if not (eq .StartTime 0)}} | |||
| {{TimeSinceUnix1 .StartTime}} | |||
| {{else}} | |||
| {{TimeSinceUnix1 .CreatedUnix}} | |||
| {{end}} | |||
| </span> | |||
| </div> | |||
| </td> | |||
| </tr> | |||
| @@ -223,7 +223,12 @@ td, th { | |||
| </div> | |||
| <div class="ac-display-inblock title_text acc-margin-bottom"> | |||
| <span class="cti-mgRight-sm">{{TimeSinceUnix1 .Cloudbrain.CreatedUnix}}</span> | |||
| <span class="cti-mgRight-sm"> | |||
| {{if not (eq .Cloudbrain.StartTime 0)}} | |||
| {{TimeSinceUnix1 .Cloudbrain.StartTime}} | |||
| {{else}} | |||
| {{TimeSinceUnix1 .Cloudbrain.CreatedUnix}} | |||
| {{end}}</span> | |||
| <span class="cti-mgRight-sm"> {{$.i18n.Tr "repo.modelarts.current_version"}}:{{.VersionName}}</span> | |||
| <span class="cti-mgRight-sm"> {{$.i18n.Tr "repo.modelarts.parent_version"}}:{{.PreVersionName}}</span> | |||
| <span class="cti-mgRight-sm">{{$.i18n.Tr "repo.modelarts.status"}}: | |||
| @@ -293,7 +298,12 @@ td, th { | |||
| <td class="ti-text-form-content"> | |||
| <div class="text-span text-span-w"> | |||
| <span style="font-size: 12px;" class="">{{TimeSinceUnix1 .Cloudbrain.CreatedUnix}}</span> | |||
| <span style="font-size: 12px;" class=""> | |||
| {{if not (eq .Cloudbrain.StartTime 0)}} | |||
| {{TimeSinceUnix1 .Cloudbrain.StartTime}} | |||
| {{else}} | |||
| {{TimeSinceUnix1 .Cloudbrain.CreatedUnix}} | |||
| {{end}}</span> | |||
| </div> | |||
| </td> | |||
| </tr> | |||