diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index 6cbb97999..92113c149 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -142,8 +142,8 @@ func isAdminOrImageCreater(ctx *context.Context, image *models.Image, err error) func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) { - var ID = ctx.Params(":id") - job, err := models.GetCloudbrainByID(ID) + var id = ctx.Params(":id") + job, err := models.GetCloudbrainByID(id) if err != nil { log.Error("GetCloudbrainByID failed:%v", err.Error()) ctx.NotFound(ctx.Req.URL.RequestURI(), nil) @@ -158,8 +158,8 @@ func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) { func AdminOrJobCreaterRight(ctx *context.Context) { - var ID = ctx.Params(":id") - job, err := models.GetCloudbrainByID(ID) + var id = ctx.Params(":id") + job, err := models.GetCloudbrainByID(id) if err != nil { log.Error("GetCloudbrainByID failed:%v", err.Error()) ctx.NotFound(ctx.Req.URL.RequestURI(), nil) @@ -547,7 +547,7 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) e GPUNumber: resourceSpec.GpuNum, MemoryMB: resourceSpec.MemMiB, ShmMB: resourceSpec.ShareMemMiB, - Command: GetCloudbrainDebugCommand(),//Command, + Command: GetCloudbrainDebugCommand(), //Command, NeedIBDevice: false, IsMainRole: false, UseNNI: false, diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index 3d9452f93..9dff94135 100755 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -938,11 +938,10 @@ func RegisterRoutes(m *macaron.Macaron) { }, reqRepoReader(models.UnitTypeModelManage)) m.Group("/modelarts", func() { m.Group("/notebook", func() { - //m.Get("/:jobid", repo.GetModelArtsNotebook) m.Get("/:id", repo.GetModelArtsNotebook2) }) m.Group("/train-job", func() { - m.Group("/:jobid", func() { + m.Group("/:id", func() { m.Get("", repo.GetModelArtsTrainJobVersion) m.Get("/log", repo.TrainJobGetLog) m.Post("/del_version", repo.DelTrainJobVersion) @@ -952,7 +951,7 @@ func RegisterRoutes(m *macaron.Macaron) { }) }) m.Group("/inference-job", func() { - m.Group("/:jobid", func() { + m.Group("/:id", func() { m.Get("", repo.GetModelArtsInferenceJob) m.Get("/log", repo.TrainJobGetLog) m.Post("/del_version", repo.DelTrainJobVersion) diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go index 7f2b30d81..4bdd73a19 100755 --- a/routers/api/v1/repo/modelarts.go +++ b/routers/api/v1/repo/modelarts.go @@ -30,8 +30,8 @@ func GetModelArtsNotebook2(ctx *context.APIContext) { err error ) - ID := ctx.Params(":id") - job, err := models.GetCloudbrainByID(ID) + id := ctx.Params(":id") + job, err := models.GetCloudbrainByID(id) if err != nil { ctx.NotFound(err) return @@ -43,7 +43,7 @@ func GetModelArtsNotebook2(ctx *context.APIContext) { } ctx.JSON(http.StatusOK, map[string]interface{}{ - "ID": ID, + "ID": id, "JobName": job.JobName, "JobStatus": job.Status, }) @@ -56,9 +56,8 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { aiCenterName string ) - jobID := ctx.Params(":jobid") - versionName := ctx.Query("version_name") - job, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + id := ctx.Params(":id") + job, err := models.GetCloudbrainByID(id) if err != nil { ctx.NotFound(err) return @@ -102,7 +101,7 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { return } } else if job.Type == models.TypeC2Net { - result, err := grampus.GetJob(jobID) + result, err := grampus.GetJob(job.JobID) if err != nil { log.Error("GetJob(%s) failed:%v", job.JobName, err) ctx.NotFound(err) @@ -141,7 +140,8 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { } ctx.JSON(http.StatusOK, map[string]interface{}{ - "JobID": jobID, + "JobID": job.JobID, + "ID": id, "JobStatus": job.Status, "JobDuration": job.TrainJobDuration, "AiCenter": aiCenterName, @@ -223,7 +223,7 @@ func TrainJobGetLog(ctx *context.APIContext) { err error ) - var jobID = ctx.Params(":jobid") + var id = ctx.Params(":id") var versionName = ctx.Query("version_name") var baseLine = ctx.Query("base_line") var order = ctx.Query("order") @@ -241,14 +241,14 @@ func TrainJobGetLog(ctx *context.APIContext) { return } - task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + task, err := models.GetCloudbrainByID(id) if err != nil { - log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) + log.Error("GetCloudbrainByJobID(%s) failed:%v", id, err.Error()) return } - resultLogFile, result, err := trainJobGetLogContent(jobID, task.VersionID, baseLine, order, lines_int) + resultLogFile, result, err := trainJobGetLogContent(task.JobID, task.VersionID, baseLine, order, lines_int) if err != nil { - log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error()) + log.Error("trainJobGetLog(%s) failed:%v", task.JobID, err.Error()) // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) return } @@ -265,7 +265,7 @@ func TrainJobGetLog(ctx *context.APIContext) { ctx.Data["log_file_name"] = resultLogFile.LogFileList[0] ctx.JSON(http.StatusOK, map[string]interface{}{ - "JobID": jobID, + "ID": id, "LogFileName": resultLogFile.LogFileList[0], "StartLine": result.StartLine, "EndLine": result.EndLine, @@ -297,17 +297,16 @@ func DelTrainJobVersion(ctx *context.APIContext) { err error ) - var jobID = ctx.Params(":jobid") - var versionName = ctx.Query("version_name") - task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + var id = ctx.Params(":id") + task, err := models.GetCloudbrainByID(id) if err != nil { - log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + log.Error("GetCloudbrainByID(%s) failed:%v", id, err.Error()) ctx.NotFound(err) return } //删除modelarts上的记录 - _, err = modelarts.DelTrainJobVersion(jobID, strconv.FormatInt(task.VersionID, 10)) + _, err = modelarts.DelTrainJobVersion(task.JobID, strconv.FormatInt(task.VersionID, 10)) if err != nil { log.Error("DelTrainJobVersion(%s) failed:%v", task.JobName, err.Error()) ctx.NotFound(err) @@ -330,7 +329,7 @@ func DelTrainJobVersion(ctx *context.APIContext) { RepoID: repo.ID, Type: models.TypeCloudBrainTwo, JobTypes: jobTypes, - JobID: jobID, + JobID: task.JobID, }) if err != nil { ctx.ServerError("get VersionListCount failed", err) @@ -356,8 +355,8 @@ func DelTrainJobVersion(ctx *context.APIContext) { } ctx.JSON(http.StatusOK, map[string]interface{}{ - "JobID": jobID, - "VersionName": versionName, + "ID": id, + "VersionName": task.VersionName, "StatusOK": 0, "VersionListCount": VersionListCount, }) @@ -367,23 +366,23 @@ func StopTrainJobVersion(ctx *context.APIContext) { var ( err error ) - var jobID = ctx.Params(":jobid") - var versionName = ctx.Query("version_name") - task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + var id = ctx.Params(":id") + task, err := models.GetCloudbrainByID(id) if err != nil { - log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + log.Error("GetCloudbrainByID(%s) failed:%v", id, err.Error()) + ctx.NotFound(err) return } - _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) + _, err = modelarts.StopTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) if err != nil { log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) return } ctx.JSON(http.StatusOK, map[string]interface{}{ - "JobID": jobID, - "VersionName": versionName, + "ID": id, + "VersionName": task.VersionName, "StatusOK": 0, }) } @@ -393,19 +392,19 @@ func ModelList(ctx *context.APIContext) { err error ) - var jobID = ctx.Params(":jobid") - var versionName = ctx.Query("version_name") parentDir := ctx.Query("parentDir") dirArray := strings.Split(parentDir, "/") - task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + var id = ctx.Params(":id") + task, err := models.GetCloudbrainByID(id) if err != nil { - log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + log.Error("GetCloudbrainByID(%s) failed:%v", id, err.Error()) + ctx.NotFound(err) return } var fileInfos []storage.FileInfo if task.ComputeResource == models.NPUResource { - fileInfos, err = storage.GetObsListObject(task.JobName, "output/", parentDir, versionName) + fileInfos, err = storage.GetObsListObject(task.JobName, "output/", parentDir, task.VersionName) if err != nil { log.Info("get TrainJobListModel failed:", err) ctx.ServerError("GetObsListObject:", err) @@ -428,8 +427,8 @@ func ModelList(ctx *context.APIContext) { } ctx.JSON(http.StatusOK, map[string]interface{}{ - "JobID": jobID, - "VersionName": versionName, + "ID": id, + "VersionName": task.VersionName, "StatusOK": 0, "Path": dirArray, "Dirs": fileInfos, @@ -443,8 +442,8 @@ func GetModelArtsInferenceJob(ctx *context.APIContext) { err error ) - jobID := ctx.Params(":jobid") - job, err := models.GetCloudbrainByJobID(jobID) + id := ctx.Params(":id") + job, err := models.GetCloudbrainByID(id) if err != nil { ctx.NotFound(err) return @@ -456,7 +455,7 @@ func GetModelArtsInferenceJob(ctx *context.APIContext) { } ctx.JSON(http.StatusOK, map[string]interface{}{ - "JobID": jobID, + "ID": id, "JobStatus": job.Status, "JobDuration": job.TrainJobDuration, }) @@ -468,16 +467,15 @@ func ResultList(ctx *context.APIContext) { err error ) - var jobID = ctx.Params(":jobid") - var versionName = ctx.Query("version_name") + var id = ctx.Params(":id") parentDir := ctx.Query("parentDir") dirArray := strings.Split(parentDir, "/") - task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + task, err := models.GetCloudbrainByID(id) if err != nil { - log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + log.Error("GetCloudbrainByID(%s) failed:%v", id, err.Error()) return } - models, err := storage.GetObsListObject(task.JobName, "result/", parentDir, versionName) + models, err := storage.GetObsListObject(task.JobName, "result/", parentDir, task.VersionName) if err != nil { log.Info("get TrainJobListModel failed:", err) ctx.ServerError("GetObsListObject:", err) @@ -485,8 +483,8 @@ func ResultList(ctx *context.APIContext) { } ctx.JSON(http.StatusOK, map[string]interface{}{ - "JobID": jobID, - "VersionName": versionName, + "ID": id, + "VersionName": task.VersionName, "StatusOK": 0, "Path": dirArray, "Dirs": models, @@ -500,38 +498,37 @@ func TrainJobGetMetricStatistic(ctx *context.APIContext) { err error ) - var jobID = ctx.Params(":jobid") - var versionName = ctx.Query("version_name") + var id = ctx.Params(":id") - result, err := trainJobGetMetricStatistic(jobID, versionName) + result, err := trainJobGetMetricStatistic(id) if err != nil { - log.Error("trainJobGetMetricStatistic(%s) failed:%v", jobID, err.Error()) + log.Error("trainJobGetMetricStatistic(%s) failed:%v", id, err.Error()) return } ctx.JSON(http.StatusOK, map[string]interface{}{ - "JobID": jobID, + "ID": id, "Interval": result.Interval, "MetricsInfo": result.MetricsInfo, }) } -func trainJobGetMetricStatistic(jobID string, versionName string) (*models.GetTrainJobMetricStatisticResult, error) { - task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) +func trainJobGetMetricStatistic(id string) (*models.GetTrainJobMetricStatisticResult, error) { + task, err := models.GetCloudbrainByID(id) if err != nil { - log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) + log.Error("GetCloudbrainByID(%s) failed:%v", id, err.Error()) return nil, err } - resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10)) + resultLogFile, err := modelarts.GetTrainJobLogFileNames(task.JobID, strconv.FormatInt(task.VersionID, 10)) if err != nil { - log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error()) + log.Error("GetTrainJobLogFileNames(%s) failed:%v", task.JobID, err.Error()) return nil, err } - result, err := modelarts.GetTrainJobMetricStatistic(jobID, strconv.FormatInt(task.VersionID, 10), resultLogFile.LogFileList[0]) + result, err := modelarts.GetTrainJobMetricStatistic(task.JobID, strconv.FormatInt(task.VersionID, 10), resultLogFile.LogFileList[0]) if err != nil { - log.Error("GetTrainJobMetricStatistic(%s) failed:%v", jobID, err.Error()) + log.Error("GetTrainJobMetricStatistic(%s) failed:%v", task.JobID, err.Error()) return nil, err } diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 032ff806b..dc1f1804b 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -1266,7 +1266,7 @@ func getUserCommand(engineId int, req *modelarts.GenerateTrainJobReq) (string, s func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { ctx.Data["PageIsTrainJob"] = true - var jobID = ctx.Params(":jobid") + var jobID = ctx.Cloudbrain.JobID count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) if err != nil { @@ -1638,7 +1638,13 @@ func paramCheckCreateInferenceJob(form auth.CreateModelArtsInferenceJobForm) err func TrainJobShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true - var jobID = ctx.Params(":jobid") + var id = ctx.Params(":id") + + job, err := models.GetCloudbrainByID(id) + if err != nil { + log.Error("GetCloudbrainByID failed:%v", err.Error()) + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) + } repo := ctx.Repo.Repository page := ctx.QueryInt("page") @@ -1656,11 +1662,11 @@ func TrainJobShow(ctx *context.Context) { RepoID: repo.ID, Type: models.TypeCloudBrainTwo, JobTypes: jobTypes, - JobID: jobID, + JobID: job.JobID, }) if err != nil { - log.Error("GetVersionListTasks(%s) failed:%v", jobID, err.Error()) + log.Error("GetVersionListTasks(%s) failed:%v", job.DisplayJobName, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) return } @@ -1702,7 +1708,7 @@ func TrainJobShow(ctx *context.Context) { pager := context.NewPagination(VersionListCount, setting.UI.IssuePagingNum, page, 5) pager.SetDefaultParams(ctx) ctx.Data["Page"] = pager - ctx.Data["jobID"] = jobID + ctx.Data["jobID"] = job.JobID ctx.Data["displayJobName"] = VersionListTasks[0].DisplayJobName ctx.Data["version_list_task"] = VersionListTasks ctx.Data["version_list_count"] = VersionListCount @@ -1765,7 +1771,7 @@ func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *model } func TrainJobDel(ctx *context.Context) { - var jobID = ctx.Params(":jobid") + var jobID = ctx.Cloudbrain.JobID var listType = ctx.Query("listType") repo := ctx.Repo.Repository @@ -1816,11 +1822,10 @@ func TrainJobDel(ctx *context.Context) { } func TrainJobStop(ctx *context.Context) { - var jobID = ctx.Params(":jobid") var listType = ctx.Query("listType") task := ctx.Cloudbrain - _, err := modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) + _, err := modelarts.StopTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) if err != nil { log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) @@ -2369,15 +2374,10 @@ func ModelDownload(ctx *context.Context) { err error ) - jobID := ctx.Params(":jobid") versionName := ctx.Query("version_name") parentDir := ctx.Query("parent_dir") fileName := ctx.Query("file_name") - task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) - if err != nil { - log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", task.JobName, err.Error()) - return - } + task := ctx.Cloudbrain var url string if task.ComputeResource == models.NPUResource { @@ -2527,19 +2527,13 @@ func TrainJobDownloadLogFile(ctx *context.Context) { err error ) - var jobID = ctx.Params(":jobid") versionName := ctx.Query("version_name") - task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) - if err != nil { - log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", task.JobName, err.Error(), ctx.Data["msgID"]) - ctx.ServerError("GetCloudbrainByJobIDAndVersionName", err) - return - } + task := ctx.Cloudbrain prefix := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, modelarts.LogPath, versionName), "/") + "/job" key, err := storage.GetObsLogFileName(prefix) if err != nil { - log.Error("GetObsLogFileName(%s) failed:%v", jobID, err.Error(), ctx.Data["msgID"]) + log.Error("GetObsLogFileName(%s) failed:%v", task.DisplayJobName, err.Error(), ctx.Data["msgID"]) ctx.ServerError("GetObsLogFileName", err) return } diff --git a/routers/routes/routes.go b/routers/routes/routes.go index d433335f4..7196180cf 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1192,14 +1192,14 @@ func RegisterRoutes(m *macaron.Macaron) { m.Group("/train-job", func() { m.Get("", reqRepoCloudBrainReader, repo.TrainJobIndex) - m.Group("/:jobid", func() { + m.Group("/:id", func() { m.Get("", reqRepoCloudBrainReader, repo.TrainJobShow) - m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.TrainJobStop) - m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.TrainJobDel) - m.Get("/model_download", cloudbrain.AdminOrJobCreaterRightForTrain, repo.ModelDownload) - m.Get("/download_log_file", cloudbrain.AdminOrJobCreaterRightForTrain, repo.TrainJobDownloadLogFile) - m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, repo.TrainJobNewVersion) - m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) + m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.TrainJobStop) + m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.TrainJobDel) + m.Get("/model_download", cloudbrain.AdminOrJobCreaterRight, repo.ModelDownload) + m.Get("/download_log_file", cloudbrain.AdminOrJobCreaterRight, repo.TrainJobDownloadLogFile) + m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRight, repo.TrainJobNewVersion) + m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRight, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.TrainJobNew) m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreate) diff --git a/templates/repo/modelarts/trainjob/index.tmpl b/templates/repo/modelarts/trainjob/index.tmpl index 3e6645727..c9e2c8bc7 100755 --- a/templates/repo/modelarts/trainjob/index.tmpl +++ b/templates/repo/modelarts/trainjob/index.tmpl @@ -112,7 +112,7 @@