diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 6b5f9c1ad..4c55ffe87 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -1,6 +1,7 @@ package models import ( + "code.gitea.io/gitea/modules/util" "encoding/json" "fmt" "strconv" @@ -110,15 +111,15 @@ type Cloudbrain struct { ContainerIp string CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"` UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"` - Duration int64 - TrainJobDuration string - Image string //镜像名称 - GpuQueue string //GPU类型即GPU队列 - ResourceSpecId int //GPU规格id - DeletedAt time.Time `xorm:"deleted"` - CanDebug bool `xorm:"-"` - CanDel bool `xorm:"-"` - CanModify bool `xorm:"-"` + Duration int64 `xorm:"DEFAULT 0"` //运行时长 单位秒 + TrainJobDuration string `xorm:"DEFAULT '00:00:00'"` + Image string //镜像名称 + GpuQueue string //GPU类型即GPU队列 + ResourceSpecId int //GPU规格id + DeletedAt time.Time `xorm:"deleted"` + CanDebug bool `xorm:"-"` + CanDel bool `xorm:"-"` + CanModify bool `xorm:"-"` Type int BenchmarkTypeID int BenchmarkChildTypeID int @@ -158,6 +159,44 @@ type Cloudbrain struct { Repo *Repository `xorm:"-"` BenchmarkTypeName string `xorm:"-"` BenchmarkTypeRankLink string `xorm:"-"` + StartTime timeutil.TimeStamp + EndTime timeutil.TimeStamp +} + +func (task *Cloudbrain) ComputeAndSetDuration() { + var d int64 + if task.StartTime == 0 { + d = 0 + } else if task.EndTime == 0 { + d = time.Now().Unix() - task.StartTime.AsTime().Unix() + } else { + d = task.EndTime.AsTime().Unix() - task.StartTime.AsTime().Unix() + } + + if d < 0 { + d = 0 + } + task.Duration = d + task.TrainJobDuration = ConvertDurationToStr(d) +} + +func ConvertDurationToStr(duration int64) string { + if duration == 0 { + return "00:00:00" + } + return util.AddZero(duration/3600) + ":" + util.AddZero(duration%3600/60) + ":" + util.AddZero(duration%60) +} + +func IsTrainJobTerminal(status string) bool { + return status == string(ModelArtsTrainJobCompleted) || status == string(ModelArtsTrainJobFailed) || status == string(ModelArtsTrainJobKilled) +} + +func IsModelArtsDebugJobTerminal(status string) bool { + return status == string(ModelArtsStopped) +} + +func IsCloudBrainOneDebugJobTerminal(status string) bool { + return status == string(JobStopped) || status == string(JobFailed) || status == string(JobSucceeded) } type CloudbrainInfo struct { @@ -1027,6 +1066,7 @@ type GetTrainJobResult struct { NasShareAddr string `json:"nas_share_addr"` DatasetName string ModelMetricList string `json:"model_metric_list"` //列表里包含f1_score,recall,precision,accuracy,若有的话 + StartTime int64 `json:"start_time"` //训练作业开始时间。 } type GetTrainJobLogResult struct { @@ -1335,13 +1375,13 @@ func GetCloudbrainByJobIDAndIsLatestVersion(jobID string, isLatestVersion string func GetCloudbrainsNeededStopByUserID(userID int64) ([]*Cloudbrain, error) { cloudBrains := make([]*Cloudbrain, 0) - err := x.Cols("job_id", "status", "type", "job_type", "version_id").Where("user_id=? AND status !=?", userID, string(JobStopped)).Find(&cloudBrains) + err := x.Cols("job_id", "status", "type", "job_type", "version_id", "start_time").Where("user_id=? AND status !=?", userID, string(JobStopped)).Find(&cloudBrains) return cloudBrains, err } func GetCloudbrainsNeededStopByRepoID(repoID int64) ([]*Cloudbrain, error) { cloudBrains := make([]*Cloudbrain, 0) - err := x.Cols("job_id", "status", "type", "job_type", "version_id").Where("repo_id=? AND status !=?", repoID, string(JobStopped)).Find(&cloudBrains) + err := x.Cols("job_id", "status", "type", "job_type", "version_id", "start_time").Where("repo_id=? AND status !=?", repoID, string(JobStopped)).Find(&cloudBrains) return cloudBrains, err } @@ -1385,7 +1425,7 @@ func UpdateTrainJobVersion(job *Cloudbrain) error { func updateJobTrainVersion(e Engine, job *Cloudbrain) error { var sess *xorm.Session sess = e.Where("job_id = ? AND version_name=?", job.JobID, job.VersionName) - _, err := sess.Cols("status", "train_job_duration").Update(job) + _, err := sess.Cols("status", "train_job_duration", "duration", "start_time", "end_time").Update(job) return err } @@ -1465,7 +1505,7 @@ func UpdateInferenceJob(job *Cloudbrain) error { func updateInferenceJob(e Engine, job *Cloudbrain) error { var sess *xorm.Session sess = e.Where("job_id = ?", job.JobID) - _, err := sess.Cols("status", "train_job_duration").Update(job) + _, err := sess.Cols("status", "train_job_duration", "duration", "start_time", "end_time").Update(job) return err } func RestartCloudbrain(old *Cloudbrain, new *Cloudbrain) (err error) { diff --git a/routers/api/v1/repo/cloudbrain.go b/routers/api/v1/repo/cloudbrain.go index e0e229606..d31943d42 100755 --- a/routers/api/v1/repo/cloudbrain.go +++ b/routers/api/v1/repo/cloudbrain.go @@ -17,6 +17,7 @@ import ( "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/storage" + "code.gitea.io/gitea/modules/timeutil" routerRepo "code.gitea.io/gitea/routers/repo" ) @@ -80,9 +81,17 @@ func GetCloudbrainTask(ctx *context.APIContext) { job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP job.ContainerID = taskRes.TaskStatuses[0].ContainerID job.Status = taskRes.TaskStatuses[0].State + + if job.StartTime == 0 && !taskRes.TaskStatuses[0].StartAt.IsZero() { + job.StartTime = timeutil.TimeStamp(taskRes.TaskStatuses[0].StartAt.Unix()) + } } if result.JobStatus.State != string(models.JobWaiting) { + if job.EndTime == 0 && models.IsCloudBrainOneDebugJobTerminal(job.Status) { + job.EndTime = timeutil.TimeStampNow() + } + job.ComputeAndSetDuration() err = models.UpdateJob(job) if err != nil { log.Error("UpdateJob failed:", err) diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go index 0f6f74827..dc60eb3ff 100755 --- a/routers/api/v1/repo/modelarts.go +++ b/routers/api/v1/repo/modelarts.go @@ -16,7 +16,7 @@ import ( "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/modelarts" "code.gitea.io/gitea/modules/storage" - "code.gitea.io/gitea/modules/util" + "code.gitea.io/gitea/modules/timeutil" routerRepo "code.gitea.io/gitea/routers/repo" ) @@ -67,8 +67,14 @@ func GetModelArtsNotebook2(ctx *context.APIContext) { ctx.NotFound(err) return } - + if job.StartTime == 0 && result.Lease.CreateTime > 0 { + job.StartTime = timeutil.TimeStamp(result.Lease.CreateTime / 1000) + } job.Status = result.Status + if job.EndTime == 0 && models.IsModelArtsDebugJobTerminal(job.Status) { + job.EndTime = timeutil.TimeStampNow() + } + job.ComputeAndSetDuration() err = models.UpdateJob(job) if err != nil { log.Error("UpdateJob failed:", err) @@ -166,15 +172,17 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { return } + if job.StartTime == 0 && result.StartTime > 0 { + job.StartTime = timeutil.TimeStamp(result.StartTime / 1000) + } job.Status = modelarts.TransTrainJobStatus(result.IntStatus) - job.Duration = result.Duration + job.Duration = result.Duration / 1000 job.TrainJobDuration = result.TrainJobDuration - if result.Duration != 0 { - job.TrainJobDuration = util.AddZero(result.Duration/3600000) + ":" + util.AddZero(result.Duration%3600000/60000) + ":" + util.AddZero(result.Duration%60000/1000) + job.TrainJobDuration = models.ConvertDurationToStr(job.Duration) - } else { - job.TrainJobDuration = "00:00:00" + if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 { + job.EndTime = job.StartTime.Add(job.Duration) } err = models.UpdateTrainJobVersion(job) @@ -399,16 +407,17 @@ func GetModelArtsInferenceJob(ctx *context.APIContext) { ctx.NotFound(err) return } - + if job.StartTime == 0 && result.StartTime > 0 { + job.StartTime = timeutil.TimeStamp(result.StartTime / 1000) + } job.Status = modelarts.TransTrainJobStatus(result.IntStatus) - job.Duration = result.Duration + job.Duration = result.Duration / 1000 job.TrainJobDuration = result.TrainJobDuration - if result.Duration != 0 { - job.TrainJobDuration = util.AddZero(result.Duration/3600000) + ":" + util.AddZero(result.Duration%3600000/60000) + ":" + util.AddZero(result.Duration%60000/1000) + job.TrainJobDuration = models.ConvertDurationToStr(result.Duration) - } else { - job.TrainJobDuration = "00:00:00" + if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 { + job.EndTime = job.StartTime.Add(job.Duration) } err = models.UpdateInferenceJob(job) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index e4abed413..770d02ce0 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -2,6 +2,7 @@ package repo import ( "bufio" + "code.gitea.io/gitea/modules/timeutil" "encoding/json" "errors" "fmt" @@ -422,6 +423,9 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo task.Status = taskRes.TaskStatuses[0].State task.ContainerID = taskRes.TaskStatuses[0].ContainerID task.ContainerIp = taskRes.TaskStatuses[0].ContainerIP + if task.StartTime == 0 && !taskRes.TaskStatuses[0].StartAt.IsZero() { + task.StartTime = timeutil.TimeStamp(taskRes.TaskStatuses[0].StartAt.Unix()) + } err = models.UpdateJob(task) if err != nil { ctx.Data["error"] = err.Error() @@ -447,12 +451,6 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo task.User = user } - var duration int64 - if task.Status == string(models.JobRunning) { - duration = time.Now().Unix() - int64(task.CreatedUnix) - } else { - duration = int64(task.UpdatedUnix) - int64(task.CreatedUnix) - } if task.BenchmarkTypeID > 0 { for _, benchmarkType := range GetBenchmarkTypes(ctx).BenchmarkType { if task.BenchmarkTypeID == benchmarkType.Id { @@ -467,8 +465,16 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo } } } - - ctx.Data["duration"] = util.AddZero(duration/3600000) + ":" + util.AddZero(duration%3600000/60000) + ":" + util.AddZero(duration%60000/1000) + if task.TrainJobDuration == "" { + var duration int64 + if task.Status == string(models.JobRunning) { + duration = time.Now().Unix() - int64(task.CreatedUnix) + } else { + duration = int64(task.UpdatedUnix) - int64(task.CreatedUnix) + } + task.TrainJobDuration = models.ConvertDurationToStr(duration) + } + ctx.Data["duration"] = task.TrainJobDuration ctx.Data["task"] = task ctx.Data["jobName"] = task.JobName ctx.Data["displayJobName"] = task.DisplayJobName @@ -532,6 +538,10 @@ func CloudBrainStop(ctx *context.Context) { } task.Status = string(models.JobStopped) + if task.EndTime == 0 { + task.EndTime = timeutil.TimeStampNow() + } + task.ComputeAndSetDuration() err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"]) @@ -626,6 +636,10 @@ func logErrorAndUpdateJobStatus(err error, taskInfo *models.Cloudbrain) { log.Warn("Failed to stop cloudBrain job:"+taskInfo.JobID, err) } else { taskInfo.Status = string(models.JobStopped) + if taskInfo.EndTime == 0 { + taskInfo.EndTime = timeutil.TimeStampNow() + } + taskInfo.ComputeAndSetDuration() err = models.UpdateJob(taskInfo) if err != nil { log.Warn("UpdateJob failed", err) @@ -997,6 +1011,13 @@ func SyncCloudbrainStatus() { task.Status = taskRes.TaskStatuses[0].State if task.Status != string(models.JobWaiting) { task.Duration = time.Now().Unix() - taskRes.TaskStatuses[0].StartAt.Unix() + if task.StartTime == 0 && !taskRes.TaskStatuses[0].StartAt.IsZero() { + task.StartTime = timeutil.TimeStamp(taskRes.TaskStatuses[0].StartAt.Unix()) + } + if task.EndTime == 0 && models.IsCloudBrainOneDebugJobTerminal(task.Status) { + task.EndTime = timeutil.TimeStampNow() + } + task.ComputeAndSetDuration() err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.JobName, err) @@ -1017,6 +1038,10 @@ func SyncCloudbrainStatus() { continue } task.Status = string(models.JobStopped) + if task.EndTime == 0 { + task.EndTime = timeutil.TimeStampNow() + } + task.ComputeAndSetDuration() err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.JobName, err) @@ -1035,7 +1060,13 @@ func SyncCloudbrainStatus() { if result != nil { task.Status = result.Status - + if task.StartTime == 0 && result.Lease.CreateTime > 0 { + task.StartTime = timeutil.TimeStamp(result.Lease.CreateTime / 1000) + } + if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { + task.EndTime = timeutil.TimeStampNow() + } + task.ComputeAndSetDuration() err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.JobName, err) @@ -1051,14 +1082,15 @@ func SyncCloudbrainStatus() { if result != nil { task.Status = modelarts.TransTrainJobStatus(result.IntStatus) - task.Duration = result.Duration + task.Duration = result.Duration / 1000 task.TrainJobDuration = result.TrainJobDuration - if result.Duration != 0 { - task.TrainJobDuration = util.AddZero(result.Duration/3600000) + ":" + util.AddZero(result.Duration%3600000/60000) + ":" + util.AddZero(result.Duration%60000/1000) - - } else { - task.TrainJobDuration = "00:00:00" + if task.StartTime == 0 && result.StartTime > 0 { + task.StartTime = timeutil.TimeStamp(result.StartTime / 1000) + } + task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) + if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { + task.EndTime = task.StartTime.Add(task.Duration) } err = models.UpdateJob(task) @@ -1106,13 +1138,16 @@ func CloudBrainBenchmarkIndex(ctx *context.Context) { for i, task := range ciTasks { ciTasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) ciTasks[i].Cloudbrain.ComputeResource = task.ComputeResource - var duration int64 - if task.Status == string(models.JobRunning) { - duration = time.Now().Unix() - int64(task.Cloudbrain.CreatedUnix) - } else { - duration = int64(task.Cloudbrain.UpdatedUnix) - int64(task.Cloudbrain.CreatedUnix) + if ciTasks[i].TrainJobDuration == "" { + var duration int64 + if task.Status == string(models.JobRunning) { + duration = time.Now().Unix() - int64(task.Cloudbrain.CreatedUnix) + } else { + duration = int64(task.Cloudbrain.UpdatedUnix) - int64(task.Cloudbrain.CreatedUnix) + } + ciTasks[i].TrainJobDuration = models.ConvertDurationToStr(duration) } - ciTasks[i].TrainJobDuration = util.AddZero(duration/3600000) + ":" + util.AddZero(duration%3600000/60000) + ":" + util.AddZero(duration%60000/1000) + ciTasks[i].BenchmarkTypeName = "" if task.BenchmarkTypeID > 0 { for _, benchmarkType := range GetBenchmarkTypes(ctx).BenchmarkType { diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 88ed7c4b0..47f160e06 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -2,6 +2,7 @@ package repo import ( "archive/zip" + "code.gitea.io/gitea/modules/timeutil" "encoding/json" "errors" "io" @@ -414,6 +415,10 @@ func NotebookManage(ctx *context.Context) { } task.Status = res.Status + if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { + task.EndTime = timeutil.TimeStampNow() + } + task.ComputeAndSetDuration() err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])