|
|
|
@@ -849,7 +849,7 @@ func HandleTrainJobInfo(task *models.Cloudbrain) error { |
|
|
|
if isTempJob(task.JobID, task.Status) { |
|
|
|
if task.VersionCount > VersionCountOne { |
|
|
|
//multi version |
|
|
|
result, err := GetTrainJobVersionList(1000, 1, strings.TrimPrefix(task.JobID, models.TempJobIdPrefix)) |
|
|
|
result, err := GetTrainJobVersionList(1000, 1, task.JobID) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetTrainJobVersionList failed:%v", err) |
|
|
|
return err |
|
|
|
@@ -1043,3 +1043,149 @@ func isTempJob(jobID, status string) bool { |
|
|
|
} |
|
|
|
return false |
|
|
|
} |
|
|
|
|
|
|
|
func SyncTempStatusJob() { |
|
|
|
jobs, err := models.GetCloudBrainTempJobs() |
|
|
|
if err != nil { |
|
|
|
log.Error("GetCloudBrainTempJobs failed:%v", err.Error()) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
for _, temp := range jobs { |
|
|
|
task, err := models.GetCloudbrainByID(strconv.FormatInt(temp.CloudbrainID, 10)) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetCloudbrainByID failed:%v", err) |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
if temp.Type == models.TypeCloudBrainTwo { |
|
|
|
if temp.JobType == string(models.JobTypeDebug) { |
|
|
|
result, err := GetNotebookList(1000, 0, "createTime", "DESC", temp.JobName) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetNotebookList failed:%v", err) |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
err = models.IncreaseCloudbrainTempQueryTimes(temp) |
|
|
|
if err != nil { |
|
|
|
log.Error("IncreaseCloudbrainTempQueryTimes failed:%v", err) |
|
|
|
} |
|
|
|
|
|
|
|
if result != nil { |
|
|
|
count, err := models.GetCloudbrainCountByJobName(temp.JobName, temp.JobType) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetCloudbrainCountByJobName failed:%v", err) |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
if len(result.NotebookList) == count { |
|
|
|
if result.NotebookList[0].JobName == temp.JobName { |
|
|
|
log.Info("find the record(%s)", temp.JobName) |
|
|
|
task.Status = result.NotebookList[0].Status |
|
|
|
task.JobID = result.NotebookList[0].JobID |
|
|
|
|
|
|
|
err = models.UpdateJob(task) |
|
|
|
if err != nil { |
|
|
|
log.Error("UpdateJob(%s) failed:%v", task.JobName, err) |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
err = models.DeleteCloudbrainTemp(temp) |
|
|
|
if err != nil { |
|
|
|
log.Error("DeleteCloudbrainTemp(%s) failed:%v", task.DisplayJobName, err) |
|
|
|
continue |
|
|
|
} |
|
|
|
} else { |
|
|
|
log.Error("can not find the record(%s) until now", temp.JobName) |
|
|
|
} |
|
|
|
} else { |
|
|
|
log.Error("can not find the record(%s) until now", temp.JobName) |
|
|
|
} |
|
|
|
} else { |
|
|
|
log.Error("can not find the record(%s) until now", temp.JobName) |
|
|
|
} |
|
|
|
} else if temp.JobType == string(models.JobTypeTrain) || temp.JobType == string(models.JobTypeInference) { |
|
|
|
if task.VersionCount > VersionCountOne { |
|
|
|
//multi version |
|
|
|
result, err := GetTrainJobVersionList(1000, 1, task.JobID) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetTrainJobVersionList failed:%v", err) |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
err = models.IncreaseCloudbrainTempQueryTimes(temp) |
|
|
|
if err != nil { |
|
|
|
log.Error("IncreaseCloudbrainTempQueryTimes failed:%v", err) |
|
|
|
} |
|
|
|
|
|
|
|
if result != nil { |
|
|
|
if strconv.FormatInt(result.JobID, 10) == task.JobID && result.JobName == task.JobName { |
|
|
|
if result.VersionCount == int64(task.VersionCount) { |
|
|
|
log.Info("find the record(%s)", task.DisplayJobName) |
|
|
|
task.Status = TransTrainJobStatus(result.JobVersionList[0].IntStatus) |
|
|
|
task.VersionName = result.JobVersionList[0].VersionName |
|
|
|
task.VersionID = result.JobVersionList[0].VersionID |
|
|
|
|
|
|
|
err = models.UpdateJob(task) |
|
|
|
if err != nil { |
|
|
|
log.Error("UpdateJob(%s) failed:%v", task.JobName, err) |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
err = models.DeleteCloudbrainTemp(temp) |
|
|
|
if err != nil { |
|
|
|
log.Error("DeleteCloudbrainTemp(%s) failed:%v", task.DisplayJobName, err) |
|
|
|
continue |
|
|
|
} |
|
|
|
} else { |
|
|
|
log.Error("can not find the record(%s) until now", task.DisplayJobName) |
|
|
|
} |
|
|
|
} else { |
|
|
|
log.Error("can not find the record(%s) until now", task.DisplayJobName) |
|
|
|
} |
|
|
|
} |
|
|
|
} else { |
|
|
|
//inference or one version |
|
|
|
result, err := GetTrainJobList(1000, 1, "create_time", "desc", task.JobName) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetTrainJobList failed:%v", err) |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
err = models.IncreaseCloudbrainTempQueryTimes(temp) |
|
|
|
if err != nil { |
|
|
|
log.Error("IncreaseCloudbrainTempQueryTimes failed:%v", err) |
|
|
|
} |
|
|
|
|
|
|
|
if result != nil { |
|
|
|
for _, job := range result.JobList { |
|
|
|
if task.JobName == job.JobName { |
|
|
|
log.Info("find the record(%s)", task.DisplayJobName) |
|
|
|
task.Status = TransTrainJobStatus(job.IntStatus) |
|
|
|
task.JobID = strconv.FormatInt(job.JobID, 10) |
|
|
|
|
|
|
|
err = models.UpdateJob(task) |
|
|
|
if err != nil { |
|
|
|
log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err) |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
err = models.DeleteCloudbrainTemp(temp) |
|
|
|
if err != nil { |
|
|
|
log.Error("DeleteCloudbrainTemp(%s) failed:%v", task.DisplayJobName, err) |
|
|
|
continue |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} else { |
|
|
|
log.Error("invalid job_type(%d)", temp.Type) |
|
|
|
continue |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return |
|
|
|
} |