Browse Source

调整GPU训练任务的日志显示逻辑。#2706

Signed-off-by: zouap <zouap@pcl.ac.cn>
tags/v1.22.8.2^2
zouap 3 years ago
parent
commit
5fbc6b1862
2 changed files with 44 additions and 1 deletions
  1. +43
    -0
      routers/repo/cloudbrain.go
  2. +1
    -1
      routers/routes/routes.go

+ 43
- 0
routers/repo/cloudbrain.go View File

@@ -2885,3 +2885,46 @@ func getFlavorNameByFlavorCode(flavorCode string) string {
"核" + strconv.Itoa(cardNum*256) + "GB"
return cloudbrainTwoFlavorName
}

func GetLogFromModelDir(ctx *context.Context) {
prefix := "/" + setting.CBCodePathPrefix + ctx.Cloudbrain.JobName
files, err := storage.GetOneLevelAllObjectUnderDirMinio(setting.Attachment.Minio.Bucket, prefix, "")
if err != nil {
log.Error("query cloudbrain model failed: %v", err)
return
}
startLine := ctx.QueryInt("startLine")
endLine := ctx.QueryInt("endLine")
re := ""
for _, file := range files {
if strings.HasSuffix(file.FileName, "log.txt") {
reader, err := os.Open(storage.GetMinioPath(ctx.Cloudbrain.JobName, file.FileName))
defer reader.Close()
if err == nil {
r := bufio.NewReader(reader)
for i := 0; i < endLine; i++ {
line, error := r.ReadString('\n')
if error == io.EOF {
log.Info("read file completed.")
break
}
if error != nil {
log.Info("read file error." + error.Error())
break
}
if error == nil {
if i >= startLine {
re = re + line
}
}
}
}
break
}
}

ctx.JSON(http.StatusOK, map[string]interface{}{
"JobName": ctx.Cloudbrain.JobName,
"Content": re,
})
}

+ 1
- 1
routers/routes/routes.go View File

@@ -1100,7 +1100,7 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.CloudBrainTrainJobDel)
//m.Get("/models", reqRepoCloudBrainReader, repo.CloudBrainShowModels)
m.Get("/download_model", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.CloudBrainDownloadModel)
//m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, repo.TrainJobNewVersion)
m.Get("/get_log", cloudbrain.AdminOrJobCreaterRightForTrain, repo.GetLogFromModelDir)
//m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion)
})
m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.CloudBrainTrainJobNew)


Loading…
Cancel
Save