From d5bd1847fdc8c6af8b39d2a5f00d43c10008940c Mon Sep 17 00:00:00 2001 From: liuzx Date: Tue, 16 Nov 2021 17:09:32 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=8E=86=E5=8F=B2=E6=80=BB?= =?UTF-8?q?=E7=89=88=E6=9C=AC=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/cloudbrain.go | 31 +++++++------ modules/modelarts/modelarts.go | 78 +++++++++++++++++--------------- routers/api/v1/repo/modelarts.go | 4 +- routers/repo/modelarts.go | 65 +++++++++++++++----------- 4 files changed, 97 insertions(+), 81 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index d25ba0dd9..a45b5834f 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -80,18 +80,19 @@ type Cloudbrain struct { ComputeResource string EngineID int64 - TrainUrl string - BranchName string - Parameters string - BootFile string - DataUrl string - LogUrl string - PreVersionId int64 - FlavorCode string - Description string - WorkServerNumber int - FlavorName string - EngineName string + TrainUrl string + BranchName string + Parameters string + BootFile string + DataUrl string + LogUrl string + PreVersionId int64 + FlavorCode string + Description string + WorkServerNumber int + FlavorName string + EngineName string + TotalVersionCount int User *User `xorm:"-"` Repo *Repository `xorm:"-"` @@ -1112,9 +1113,9 @@ func SetTrainJobStatusByJobID(jobID string, status string, duration int64, train return } -func SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID string, versionName string, versionCount int, isLatestVersion string) (err error) { - cb := &Cloudbrain{JobID: jobID, VersionName: versionName, VersionCount: versionCount, IsLatestVersion: isLatestVersion} - _, err = x.Cols("version_Count", "is_latest_version").Where("cloudbrain.job_id=? AND cloudbrain.version_name=?", jobID, versionName).Update(cb) +func SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID string, versionName string, versionCount int, isLatestVersion string, totalVersionCount int) (err error) { + cb := &Cloudbrain{JobID: jobID, VersionName: versionName, VersionCount: versionCount, IsLatestVersion: isLatestVersion, TotalVersionCount: totalVersionCount} + _, err = x.Cols("version_Count", "is_latest_version", "total_version_count").Where("cloudbrain.job_id=? AND cloudbrain.version_name=?", jobID, versionName).Update(cb) return } diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index 1835770fe..9648d7447 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -35,24 +35,24 @@ const ( // "{\"code\":\"modelarts.bm.910.arm.public.4\",\"value\":\"Ascend : 4 * Ascend 910 CPU:96 核 1024GiB\"}," + // "{\"code\":\"modelarts.bm.910.arm.public.1\",\"value\":\"Ascend : 1 * Ascend 910 CPU:24 核 256GiB\"}" + // "]}" - CodePath = "/code/" - OutputPath = "/output/" - LogPath = "/log/" - JobPath = "/job/" - OrderDesc = "desc" //向下查询 - OrderAsc = "asc" //向上查询 - Lines = 500 - TrainUrl = "train_url" - DataUrl = "data_url" - PerPage = 10 - IsLatestVersion = "1" - NotLatestVersion = "0" - ComputeResource = "NPU" - InitFatherVersionName = "V0001" - VersionCount = 1 - - SortByCreateTime = "create_time" - ConfigTypeCustom = "custom" + CodePath = "/code/" + OutputPath = "/output/" + LogPath = "/log/" + JobPath = "/job/" + OrderDesc = "desc" //向下查询 + OrderAsc = "asc" //向上查询 + Lines = 500 + TrainUrl = "train_url" + DataUrl = "data_url" + PerPage = 10 + IsLatestVersion = "1" + NotLatestVersion = "0" + ComputeResource = "NPU" + VersionCount = 1 + + SortByCreateTime = "create_time" + ConfigTypeCustom = "custom" + TotalVersionCount = 1 ) var ( @@ -83,6 +83,7 @@ type GenerateTrainJobReq struct { FlavorName string VersionCount int EngineName string + TotalVersionCount int } type GenerateTrainJobVersionReq struct { @@ -107,6 +108,7 @@ type GenerateTrainJobVersionReq struct { FlavorName string EngineName string FatherVersionName string + TotalVersionCount int } type VersionInfo struct { @@ -256,22 +258,22 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error } err = models.CreateCloudbrain(&models.Cloudbrain{ - Status: TransTrainJobStatus(jobResult.Status), - UserID: ctx.User.ID, - RepoID: ctx.Repo.Repository.ID, - JobID: strconv.FormatInt(jobResult.JobID, 10), - JobName: req.JobName, - JobType: string(models.JobTypeTrain), - Type: models.TypeCloudBrainTwo, - VersionID: jobResult.VersionID, - VersionName: jobResult.VersionName, - Uuid: req.Uuid, - DatasetName: attach.Name, - CommitID: req.CommitID, - IsLatestVersion: req.IsLatestVersion, - ComputeResource: ComputeResource, - EngineID: req.EngineID, - FatherVersionName: req.FatherVersionName, + Status: TransTrainJobStatus(jobResult.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: strconv.FormatInt(jobResult.JobID, 10), + JobName: req.JobName, + JobType: string(models.JobTypeTrain), + Type: models.TypeCloudBrainTwo, + VersionID: jobResult.VersionID, + VersionName: jobResult.VersionName, + Uuid: req.Uuid, + DatasetName: attach.Name, + CommitID: req.CommitID, + IsLatestVersion: req.IsLatestVersion, + ComputeResource: ComputeResource, + EngineID: req.EngineID, + // FatherVersionName: req.FatherVersionName, TrainUrl: req.TrainUrl, BranchName: req.BranchName, Parameters: req.Params, @@ -284,6 +286,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error FlavorName: req.FlavorName, EngineName: req.EngineName, VersionCount: req.VersionCount, + TotalVersionCount: req.TotalVersionCount, }) if err != nil { @@ -352,6 +355,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR WorkServerNumber: req.WorkServerNumber, FlavorName: req.FlavorName, EngineName: req.EngineName, + TotalVersionCount: req.TotalVersionCount, }) if err != nil { log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) @@ -384,14 +388,14 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err) return err } - err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), latestTask.VersionName, VersionListCount, NotLatestVersion) + err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), latestTask.VersionName, VersionListCount, NotLatestVersion, req.TotalVersionCount) if err != nil { ctx.ServerError("UpdateJobVersionCount failed", err) return err } - //将当前版本的isLatestVersion设置为"1"和任务数量更新 - err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), jobResult.VersionName, VersionListCount, IsLatestVersion) + //将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount + err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), jobResult.VersionName, VersionListCount, IsLatestVersion, req.TotalVersionCount) if err != nil { ctx.ServerError("UpdateJobVersionCount failed", err) return err diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go index 1a257cd66..6137eb6b7 100755 --- a/routers/api/v1/repo/modelarts.go +++ b/routers/api/v1/repo/modelarts.go @@ -267,7 +267,7 @@ func DelTrainJobVersion(ctx *context.APIContext) { //判断当前的任务是否是最新版本,若是,将排序后的第一个版本设置为最新版本,若不是,最新版本不变,更改最新版本的版本数。 if task.IsLatestVersion == modelarts.IsLatestVersion { - err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID, VersionListTasks[0].Cloudbrain.VersionName, VersionListCount, modelarts.IsLatestVersion) + err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID, VersionListTasks[0].Cloudbrain.VersionName, VersionListCount, modelarts.IsLatestVersion, VersionListTasks[0].Cloudbrain.TotalVersionCount) if err != nil { ctx.ServerError("UpdateJobVersionCount failed", err) return @@ -278,7 +278,7 @@ func DelTrainJobVersion(ctx *context.APIContext) { ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err) return } - err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID, latestTask.VersionName, VersionListCount, modelarts.IsLatestVersion) + err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID, latestTask.VersionName, VersionListCount, modelarts.IsLatestVersion, VersionListTasks[0].Cloudbrain.TotalVersionCount) if err != nil { ctx.ServerError("UpdateJobVersionCount failed", err) return diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 89bf143d4..bbe350ac2 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -491,6 +491,7 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { ctx.Data["PageIsTrainJob"] = true + VersionOutputPath := "V" + strconv.Itoa(modelarts.TotalVersionCount) jobName := form.JobName uuid := form.Attachment description := form.Description @@ -504,8 +505,8 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + modelarts.CodePath codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath - outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath - logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/" + logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" branch_name := form.BranchName isLatestVersion := modelarts.IsLatestVersion @@ -554,14 +555,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) } //todo: upload code (send to file_server todo this work?) - if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) trainJobNewDataPrepare(ctx) ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form) return } - if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil { + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) trainJobNewDataPrepare(ctx) ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form) @@ -640,28 +641,29 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) } req := &modelarts.GenerateTrainJobReq{ - JobName: jobName, - DataUrl: dataPath, - Description: description, - CodeObsPath: codeObsPath, - BootFileUrl: codeObsPath + bootFile, - BootFile: bootFile, - TrainUrl: outputObsPath, - FlavorCode: flavorCode, - WorkServerNumber: workServerNumber, - EngineID: int64(engineID), - LogUrl: logObsPath, - PoolID: poolID, - Uuid: uuid, - Parameters: parameters.Parameter, - CommitID: commitID, - IsLatestVersion: isLatestVersion, - BranchName: branch_name, - Params: form.Params, - FatherVersionName: modelarts.InitFatherVersionName, + JobName: jobName, + DataUrl: dataPath, + Description: description, + CodeObsPath: codeObsPath, + BootFileUrl: codeObsPath + bootFile, + BootFile: bootFile, + TrainUrl: outputObsPath, + FlavorCode: flavorCode, + WorkServerNumber: workServerNumber, + EngineID: int64(engineID), + LogUrl: logObsPath, + PoolID: poolID, + Uuid: uuid, + Parameters: parameters.Parameter, + CommitID: commitID, + IsLatestVersion: isLatestVersion, + BranchName: branch_name, + Params: form.Params, + // FatherVersionName: InitVersionName, FlavorName: FlavorName, EngineName: EngineName, VersionCount: VersionCount, + TotalVersionCount: modelarts.TotalVersionCount, } err = modelarts.GenerateTrainJob(ctx, req) @@ -683,6 +685,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ ctx.Data["PageIsTrainJob"] = true var jobID = ctx.Params(":jobid") + latestTask, err := models.GetCloudbrainByJobIDAndIsLatestVersion(jobID, modelarts.IsLatestVersion) + if err != nil { + ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err) + return + } + + VersionOutputPath := "V" + strconv.Itoa(latestTask.TotalVersionCount+1) + jobName := form.JobName uuid := form.Attachment description := form.Description @@ -695,8 +705,8 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ isSaveParam := form.IsSaveParam repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + modelarts.CodePath - codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath - outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + VersionOutputPath + "/" + outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/" logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" branch_name := form.BranchName @@ -743,14 +753,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ } //todo: upload code (send to file_server todo this work?) - if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobVersionNew, &form) return } - if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil { + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobVersionNew, &form) @@ -861,6 +871,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ FlavorName: FlavorName, EngineName: EngineName, FatherVersionName: fatherVersionName, + TotalVersionCount: latestTask.TotalVersionCount + 1, } err = modelarts.GenerateTrainJobVersion(ctx, req, jobID) if err != nil {