Browse Source

状态同步

tags/v1.22.12.1^2
ychao_1983 3 years ago
parent
commit
1da1785dbc
1 changed files with 37 additions and 33 deletions
  1. +37
    -33
      routers/repo/cloudbrain.go

+ 37
- 33
routers/repo/cloudbrain.go View File

@@ -1895,44 +1895,48 @@ func SyncCloudbrainStatus() {
log.Error("task.JobType(%s) is error:%s", task.DisplayJobName, task.JobType) log.Error("task.JobType(%s) is error:%s", task.DisplayJobName, task.JobType)
} }
} else if task.Type == models.TypeC2Net { } else if task.Type == models.TypeC2Net {
result, err := grampus.GetJob(task.JobID)
if err != nil {
log.Error("GetTrainJob(%s) failed:%v", task.DisplayJobName, err)
continue
}

if result != nil {
if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 {
task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0]
if task.JobType == string(models.JobTypeDebug) {
cloudbrainTask.SyncGrampusNotebookStatus(task)
} else {
result, err := grampus.GetJob(task.JobID)
if err != nil {
log.Error("GetTrainJob(%s) failed:%v", task.DisplayJobName, err)
continue
} }
oldStatus := task.Status
task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status)
task.Duration = result.JobInfo.RunSec


if task.Duration < 0 {
task.Duration = 0
}
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)
if result != nil {
if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 {
task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0]
}
oldStatus := task.Status
task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status)
task.Duration = result.JobInfo.RunSec


if task.StartTime == 0 && result.JobInfo.StartedAt > 0 {
task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt)
}
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 {
task.EndTime = task.StartTime.Add(task.Duration)
}
task.CorrectCreateUnix()
if oldStatus != task.Status {
notification.NotifyChangeCloudbrainStatus(task, oldStatus)
if models.IsTrainJobTerminal(task.Status) && task.ComputeResource == models.NPUResource {
if len(result.JobInfo.Tasks[0].CenterID) == 1 {
urchin.GetBackNpuModel(task.ID, grampus.GetRemoteEndPoint(result.JobInfo.Tasks[0].CenterID[0]), grampus.BucketRemote, grampus.GetNpuModelObjectKey(task.JobName), grampus.GetCenterProxy(setting.Grampus.LocalCenterID))
if task.Duration < 0 {
task.Duration = 0
}
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)

if task.StartTime == 0 && result.JobInfo.StartedAt > 0 {
task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt)
}
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 {
task.EndTime = task.StartTime.Add(task.Duration)
}
task.CorrectCreateUnix()
if oldStatus != task.Status {
notification.NotifyChangeCloudbrainStatus(task, oldStatus)
if models.IsTrainJobTerminal(task.Status) && task.ComputeResource == models.NPUResource {
if len(result.JobInfo.Tasks[0].CenterID) == 1 {
urchin.GetBackNpuModel(task.ID, grampus.GetRemoteEndPoint(result.JobInfo.Tasks[0].CenterID[0]), grampus.BucketRemote, grampus.GetNpuModelObjectKey(task.JobName), grampus.GetCenterProxy(setting.Grampus.LocalCenterID))
}
} }
} }
}
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
continue
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
continue
}
} }
} }
} else { } else {


Loading…
Cancel
Save