Browse Source

debug

tags/v1.22.8.1^2
lewis 3 years ago
parent
commit
1e516cc458
2 changed files with 28 additions and 11 deletions
  1. +27
    -10
      modules/modelarts/modelarts.go
  2. +1
    -1
      modules/setting/setting.go

+ 27
- 10
modules/modelarts/modelarts.go View File

@@ -963,13 +963,30 @@ func handleTempNotebook(temp *models.CloudbrainTemp) error {
} }


if isExist { if isExist {
log.Info("find the record(%s)", temp.JobName)
_, err := ManageNotebook2(temp.JobID, models.NotebookAction{Action: models.ActionStop})
if err != nil {
log.Error("ManageNotebook2(%s) failed:%v", temp.JobName, err)
break
log.Info("find the record(%s), status(%s)", temp.JobName, temp.Status)
if temp.Status == string(models.ModelArtsCreateFailed) {
err = models.UpdateCloudbrainTemp(temp)
if err != nil {
log.Error("UpdateCloudbrainTemp failed:%v", err)
break
}

_, err := DelNotebook2(temp.JobID)
if err != nil {
log.Error("DelNotebook2 failed:%v", err)
break
}

temp.Status = string(models.ModelArtsDeleted)
} else {
_, err := ManageNotebook2(temp.JobID, models.NotebookAction{Action: models.ActionStop})
if err != nil {
log.Error("ManageNotebook2(%s) failed:%v", temp.JobName, err)
break
}
temp.Status = string(models.ModelArtsStopping)
} }
temp.Status = string(models.ModelArtsStopping)
models.UpdateCloudbrainTemp(temp) models.UpdateCloudbrainTemp(temp)
} else { } else {
log.Error("can not find the record(%s) till now", temp.JobName) log.Error("can not find the record(%s) till now", temp.JobName)
@@ -1100,12 +1117,12 @@ func handleTempTrainJobMultiVersion(temp *models.CloudbrainTemp) error {
if result != nil { if result != nil {
count, _ := models.GetCloudbrainCountByJobName(temp.JobName, temp.JobType, temp.Type) count, _ := models.GetCloudbrainCountByJobName(temp.JobName, temp.JobType, temp.Type)
if result.VersionCount == int64(count+1) { if result.VersionCount == int64(count+1) {
log.Info("find the record(%s)", temp.JobName)

isExist = true isExist = true
temp.Status = TransTrainJobStatus(result.JobVersionList[0].IntStatus) temp.Status = TransTrainJobStatus(result.JobVersionList[0].IntStatus)
temp.VersionID = strconv.FormatInt(result.JobVersionList[0].VersionID, 10) temp.VersionID = strconv.FormatInt(result.JobVersionList[0].VersionID, 10)


log.Info("find the record(%s), status(%s)", temp.JobName, temp.Status)

_, err := StopTrainJob(temp.JobID, temp.VersionID) _, err := StopTrainJob(temp.JobID, temp.VersionID)
if err != nil { if err != nil {
log.Error("StopTrainJob failed:%v", err) log.Error("StopTrainJob failed:%v", err)
@@ -1161,13 +1178,13 @@ func handleTempTrainJob(temp *models.CloudbrainTemp) error {
if result != nil { if result != nil {
for _, job := range result.JobList { for _, job := range result.JobList {
if temp.JobName == job.JobName && TransTrainJobStatus(job.IntStatus) != string(models.ModelArtsTrainJobFailed) { if temp.JobName == job.JobName && TransTrainJobStatus(job.IntStatus) != string(models.ModelArtsTrainJobFailed) {
log.Info("find the record(%s)", temp.JobName)

isExist = true isExist = true
temp.Status = TransTrainJobStatus(job.IntStatus) temp.Status = TransTrainJobStatus(job.IntStatus)
temp.JobID = strconv.FormatInt(job.JobID, 10) temp.JobID = strconv.FormatInt(job.JobID, 10)
temp.VersionID = strconv.FormatInt(job.VersionID, 10) temp.VersionID = strconv.FormatInt(job.VersionID, 10)


log.Info("find the record(%s), status(%s)", temp.JobName, temp.Status)

_, err = StopTrainJob(temp.JobID, temp.VersionID) _, err = StopTrainJob(temp.JobID, temp.VersionID)
if err != nil { if err != nil {
log.Error("StopTrainJob(%s) failed:%v", temp.JobName, err) log.Error("StopTrainJob(%s) failed:%v", temp.JobName, err)


+ 1
- 1
modules/setting/setting.go View File

@@ -1419,7 +1419,7 @@ func NewContext() {
Flavor = sec.Key("FLAVOR").MustString("") Flavor = sec.Key("FLAVOR").MustString("")
ImageInfos = sec.Key("IMAGE_INFOS").MustString("") ImageInfos = sec.Key("IMAGE_INFOS").MustString("")
Capacity = sec.Key("IMAGE_INFOS").MustInt(100) Capacity = sec.Key("IMAGE_INFOS").MustInt(100)
MaxTempQueryTimes = sec.Key("MAX_TEMP_QUERY_TIMES").MustInt(10)
MaxTempQueryTimes = sec.Key("MAX_TEMP_QUERY_TIMES").MustInt(30)
ResourcePools = sec.Key("Resource_Pools").MustString("") ResourcePools = sec.Key("Resource_Pools").MustString("")
Engines = sec.Key("Engines").MustString("") Engines = sec.Key("Engines").MustString("")
EngineVersions = sec.Key("Engine_Versions").MustString("") EngineVersions = sec.Key("Engine_Versions").MustString("")


Loading…
Cancel
Save