| @@ -1110,7 +1110,8 @@ func UpdateJob(job *Cloudbrain) error { | |||||
| func updateJob(e Engine, job *Cloudbrain) error { | func updateJob(e Engine, job *Cloudbrain) error { | ||||
| var sess *xorm.Session | var sess *xorm.Session | ||||
| sess = e.Where("job_id = ?", job.JobID) | sess = e.Where("job_id = ?", job.JobID) | ||||
| _, err := sess.Cols("status", "container_id", "container_ip").Update(job) | |||||
| //_, err := sess.Cols("status", "container_id", "container_ip").Update(job) | |||||
| _, err := sess.Update(job) | |||||
| return err | return err | ||||
| } | } | ||||
| @@ -134,7 +134,7 @@ func registerHandleBlockChainUnSuccessRepos() { | |||||
| RegisterTaskFatal("handle_blockchain_unsuccess_repos", &BaseConfig{ | RegisterTaskFatal("handle_blockchain_unsuccess_repos", &BaseConfig{ | ||||
| Enabled: true, | Enabled: true, | ||||
| RunAtStart: true, | RunAtStart: true, | ||||
| Schedule: "@every 1m", | |||||
| Schedule: "@every 10m", | |||||
| }, func(ctx context.Context, _ *models.User, _ Config) error { | }, func(ctx context.Context, _ *models.User, _ Config) error { | ||||
| repo.HandleBlockChainUnSuccessRepos() | repo.HandleBlockChainUnSuccessRepos() | ||||
| return nil | return nil | ||||
| @@ -145,7 +145,7 @@ func registerHandleBlockChainMergedPulls() { | |||||
| RegisterTaskFatal("handle_blockchain_merged_pull", &BaseConfig{ | RegisterTaskFatal("handle_blockchain_merged_pull", &BaseConfig{ | ||||
| Enabled: true, | Enabled: true, | ||||
| RunAtStart: true, | RunAtStart: true, | ||||
| Schedule: "@every 1m", | |||||
| Schedule: "@every 10m", | |||||
| }, func(ctx context.Context, _ *models.User, _ Config) error { | }, func(ctx context.Context, _ *models.User, _ Config) error { | ||||
| repo.HandleBlockChainMergedPulls() | repo.HandleBlockChainMergedPulls() | ||||
| return nil | return nil | ||||
| @@ -156,7 +156,7 @@ func registerHandleBlockChainUnSuccessCommits() { | |||||
| RegisterTaskFatal("handle_blockchain_unsuccess_commits", &BaseConfig{ | RegisterTaskFatal("handle_blockchain_unsuccess_commits", &BaseConfig{ | ||||
| Enabled: true, | Enabled: true, | ||||
| RunAtStart: true, | RunAtStart: true, | ||||
| Schedule: "@every 3m", | |||||
| Schedule: "@every 10m", | |||||
| }, func(ctx context.Context, _ *models.User, _ Config) error { | }, func(ctx context.Context, _ *models.User, _ Config) error { | ||||
| repo.HandleBlockChainUnSuccessCommits() | repo.HandleBlockChainUnSuccessCommits() | ||||
| return nil | return nil | ||||
| @@ -448,23 +448,24 @@ var ( | |||||
| GpuTypes string | GpuTypes string | ||||
| DebugServerHost string | DebugServerHost string | ||||
| ResourceSpecs string | ResourceSpecs string | ||||
| MaxDuration int64 | |||||
| //benchmark config | //benchmark config | ||||
| IsBenchmarkEnabled bool | IsBenchmarkEnabled bool | ||||
| BenchmarkOwner string | |||||
| BenchmarkOwner string | |||||
| BenchmarkName string | BenchmarkName string | ||||
| BenchmarkServerHost string | BenchmarkServerHost string | ||||
| BenchmarkCategory string | BenchmarkCategory string | ||||
| //snn4imagenet config | //snn4imagenet config | ||||
| IsSnn4imagenetEnabled bool | IsSnn4imagenetEnabled bool | ||||
| Snn4imagenetOwner string | |||||
| Snn4imagenetOwner string | |||||
| Snn4imagenetName string | Snn4imagenetName string | ||||
| Snn4imagenetServerHost string | Snn4imagenetServerHost string | ||||
| //snn4imagenet config | //snn4imagenet config | ||||
| IsBrainScoreEnabled bool | IsBrainScoreEnabled bool | ||||
| BrainScoreOwner string | |||||
| BrainScoreOwner string | |||||
| BrainScoreName string | BrainScoreName string | ||||
| BrainScoreServerHost string | BrainScoreServerHost string | ||||
| @@ -1238,6 +1239,7 @@ func NewContext() { | |||||
| JobType = sec.Key("GPU_TYPE_DEFAULT").MustString("openidebug") | JobType = sec.Key("GPU_TYPE_DEFAULT").MustString("openidebug") | ||||
| GpuTypes = sec.Key("GPU_TYPES").MustString("") | GpuTypes = sec.Key("GPU_TYPES").MustString("") | ||||
| ResourceSpecs = sec.Key("RESOURCE_SPECS").MustString("") | ResourceSpecs = sec.Key("RESOURCE_SPECS").MustString("") | ||||
| MaxDuration = sec.Key("MAX_DURATION").MustInt64(14400) | |||||
| sec = Cfg.Section("benchmark") | sec = Cfg.Section("benchmark") | ||||
| IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false) | IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false) | ||||
| @@ -822,10 +822,24 @@ func SyncCloudbrainStatus() { | |||||
| taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) | taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) | ||||
| task.Status = taskRes.TaskStatuses[0].State | task.Status = taskRes.TaskStatuses[0].State | ||||
| if task.Status != string(models.JobWaiting) { | if task.Status != string(models.JobWaiting) { | ||||
| task.Duration = time.Now().Unix() - taskRes.TaskStatuses[0].StartAt.Unix() | |||||
| err = models.UpdateJob(task) | err = models.UpdateJob(task) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | ||||
| continue | |||||
| } | |||||
| if task.Duration >= setting.MaxDuration { | |||||
| log.Info("begin to stop job(%s), because of the duration", task.JobName) | |||||
| err = cloudbrain.StopJob(task.JobID) | |||||
| if err != nil { | |||||
| log.Error("StopJob(%s) failed:%v", task.JobName, err) | |||||
| continue | |||||
| } | |||||
| task.Status = string(models.JobStopped) | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||