diff --git a/modules/grampus/resty.go b/modules/grampus/resty.go index f36721c85..a9e1aed5c 100755 --- a/modules/grampus/resty.go +++ b/modules/grampus/resty.go @@ -245,6 +245,32 @@ func GetTrainJobLog(jobID string) (string, error) { return logContent, nil } +func GetGrampusMetrics(jobID string) (models.GetTrainJobMetricStatisticResult, error) { + checkSetting() + client := getRestyClient() + var result models.GetTrainJobMetricStatisticResult + res, err := client.R(). + SetAuthToken(TOKEN). + Get(HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/metrics") + + if err != nil { + return result, fmt.Errorf("resty GetTrainJobLog: %v", err) + } + if err = json.Unmarshal([]byte(res.String()), &result); err != nil { + log.Error("GetGrampusMetrics json.Unmarshal failed(%s): %v", res.String(), err.Error()) + return result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) + } + if res.StatusCode() != http.StatusOK { + log.Error("Call GrampusMetrics failed(%d):%s(%s)", res.StatusCode(), result.ErrorCode, result.ErrorMsg) + return result, fmt.Errorf("Call GrampusMetrics failed(%d):%d(%s)", res.StatusCode(), result.ErrorCode, result.ErrorMsg) + } + if !result.IsSuccess { + log.Error("GetGrampusMetrics(%s) failed", jobID) + return result, fmt.Errorf("GetGrampusMetrics failed:%s", result.ErrorMsg) + } + return result, nil +} + func StopJob(jobID string) (*models.GrampusStopJobResponse, error) { checkSetting() client := getRestyClient() diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index 813b77c8c..5533a4c9e 100755 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -1048,6 +1048,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("", repo.GetModelArtsTrainJobVersion) m.Post("/stop_version", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo_ext.GrampusStopJob) m.Get("/log", repo_ext.GrampusGetLog) + m.Get("/metrics", repo_ext.GrampusMetrics) m.Get("/download_log", cloudbrain.AdminOrJobCreaterRightForTrain, repo_ext.GrampusDownloadLog) }) }) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index cc17773b0..581a1fbfb 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -957,6 +957,28 @@ func GrampusGetLog(ctx *context.Context) { return } +func GrampusMetrics(ctx *context.Context) { + jobID := ctx.Params(":jobid") + job, err := models.GetCloudbrainByJobID(jobID) + if err != nil { + log.Error("GetCloudbrainByJobID failed: %v", err, ctx.Data["MsgID"]) + ctx.ServerError(err.Error(), err) + return + } + + result, err := grampus.GetGrampusMetrics(job.JobID) + if err != nil { + log.Error("GetTrainJobLog failed: %v", err, ctx.Data["MsgID"]) + } + ctx.JSON(http.StatusOK, map[string]interface{}{ + "JobID": jobID, + "Interval": result.Interval, + "MetricsInfo": result.MetricsInfo, + }) + + return +} + func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName, pretrainModelPath, pretrainModelFileName, modelRemoteObsUrl string) (string, error) { var command string