diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 0df6c2145..81c3df2af 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -69,11 +69,13 @@ type Cloudbrain struct { CanDel bool `xorm:"-"` Type int `xorm:"INDEX DEFAULT 0"` - VersionID int64 `xorm:"INDEX DEFAULT 0"` - VersionName string - Uuid string - DatasetName string - VersionCount int64 `xorm:"INDEX DEFAULT 1"` + VersionID int64 `xorm:"INDEX DEFAULT 0"` + VersionName string + Uuid string + DatasetName string + VersionCount int64 `xorm:"INDEX DEFAULT 1"` + IsLatestVersion string + CommitID string User *User `xorm:"-"` Repo *Repository `xorm:"-"` @@ -89,11 +91,11 @@ type TrainjobConfigDetail struct { BootFile string `xorm:"INDEX"` Uuid string `xorm:"INDEX"` DatasetName string `xorm:"INDEX"` - Params string `xorm:"deleted"` + Params string `xorm:"INDEX"` BranchName string `xorm:"INDEX"` - // User *User `xorm:"-"` - // Repo *Repository `xorm:"-"` + User *User `xorm:"-"` + Repo *Repository `xorm:"-"` } type CloudbrainInfo struct { @@ -173,9 +175,10 @@ type CloudbrainsOptions struct { SortType string CloudbrainIDs []int64 // JobStatus CloudbrainStatus - Type int - JobType string - VersionName string + Type int + JobType string + VersionName string + IsLatestVersion string } type TaskPod struct { @@ -903,9 +906,9 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { ) } - if (opts.VersionName) != "" { + if (opts.IsLatestVersion) != "" { cond = cond.And( - builder.Eq{"cloudbrain.version_name": opts.VersionName}, + builder.Eq{"cloudbrain.is_latest_version": opts.IsLatestVersion}, ) } @@ -1056,6 +1059,11 @@ func GetCloudbrainByJobIDAndVersionName(jobID string, versionName string) (*Clou return getRepoCloudBrain(cb) } +func GetCloudbrainByJobIDAndIsLatestVersion(jobID string, isLatestVersion string) (*Cloudbrain, error) { + cb := &Cloudbrain{JobID: jobID, IsLatestVersion: isLatestVersion} + return getRepoCloudBrain(cb) +} + func GetCloudbrainsNeededStopByUserID(userID int64) ([]*Cloudbrain, error) { cloudBrains := make([]*Cloudbrain, 0) err := x.Cols("job_id", "status", "type").Where("user_id=? AND status !=?", userID, string(JobStopped)).Find(&cloudBrains) @@ -1080,9 +1088,9 @@ func SetTrainJobStatusByJobID(jobID string, status string, duration int64, train return } -func SetVersionCountByJobID(jobID string, versionName string, versionCount int64) (err error) { - cb := &Cloudbrain{JobID: jobID, VersionName: versionName, VersionCount: versionCount} - _, err = x.Cols("version_Count").Where("cloudbrain.job_id=? AND cloudbrain.version_name=?", jobID, versionName).Update(cb) +func SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID string, versionName string, versionCount int64, isLatestVersion string) (err error) { + cb := &Cloudbrain{JobID: jobID, VersionName: versionName, VersionCount: versionCount, IsLatestVersion: isLatestVersion} + _, err = x.Cols("version_Count", "is_latest_version").Where("cloudbrain.job_id=? AND cloudbrain.version_name=?", jobID, versionName).Update(cb) return } diff --git a/models/models.go b/models/models.go index 696d0949b..7ec021223 100755 --- a/models/models.go +++ b/models/models.go @@ -133,6 +133,7 @@ func init() { new(FileChunk), new(BlockChain), new(RecommendOrg), + new(TrainjobConfigDetail), ) tablesStatistic = append(tablesStatistic, diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index 4bff6a347..fcf1e8829 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -35,16 +35,18 @@ const ( // "{\"code\":\"modelarts.bm.910.arm.public.4\",\"value\":\"Ascend : 4 * Ascend 910 CPU:96 核 1024GiB\"}," + // "{\"code\":\"modelarts.bm.910.arm.public.1\",\"value\":\"Ascend : 1 * Ascend 910 CPU:24 核 256GiB\"}" + // "]}" - CodePath = "/code/" - OutputPath = "/output/" - LogPath = "/log/" - JobPath = "/job/" - OrderDesc = "desc" //向下查询 - OrderAsc = "asc" //向上查询 - Lines = 20 - TrainUrl = "train_url" - DataUrl = "data_url" - PerPage = 10 + CodePath = "/code/" + OutputPath = "/output/" + LogPath = "/log/" + JobPath = "/job/" + OrderDesc = "desc" //向下查询 + OrderAsc = "asc" //向上查询 + Lines = 20 + TrainUrl = "train_url" + DataUrl = "data_url" + PerPage = 10 + IsLatestVersion = "1" + NotLatestVersion = "0" SortByCreateTime = "create_time" ConfigTypeCustom = "custom" @@ -69,6 +71,8 @@ type GenerateTrainJobReq struct { WorkServerNumber int EngineID int64 Parameters []models.Parameter + CommitID string + IsLatestVersion string } type GenerateTrainJobVersionReq struct { @@ -86,6 +90,7 @@ type GenerateTrainJobVersionReq struct { EngineID int64 Parameters []models.Parameter PreVersionId int64 + CommitID string } type VersionInfo struct { @@ -219,17 +224,19 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error { } err = models.CreateCloudbrain(&models.Cloudbrain{ - Status: TransTrainJobStatus(jobResult.Status), - UserID: ctx.User.ID, - RepoID: ctx.Repo.Repository.ID, - JobID: strconv.FormatInt(jobResult.JobID, 10), - JobName: req.JobName, - JobType: string(models.JobTypeTrain), - Type: models.TypeCloudBrainTwo, - VersionID: jobResult.VersionID, - VersionName: jobResult.VersionName, - Uuid: req.Uuid, - DatasetName: attach.Name, + Status: TransTrainJobStatus(jobResult.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: strconv.FormatInt(jobResult.JobID, 10), + JobName: req.JobName, + JobType: string(models.JobTypeTrain), + Type: models.TypeCloudBrainTwo, + VersionID: jobResult.VersionID, + VersionName: jobResult.VersionName, + Uuid: req.Uuid, + DatasetName: attach.Name, + CommitID: req.CommitID, + IsLatestVersion: req.IsLatestVersion, }) if err != nil { @@ -282,6 +289,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR VersionName: jobResult.VersionName, Uuid: req.Uuid, DatasetName: attach.Name, + CommitID: req.CommitID, }) if err != nil { log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) @@ -307,8 +315,29 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR ctx.ServerError("Cloudbrain", err) return nil } - versionName := "V0001" - err = models.SetVersionCountByJobID(strconv.FormatInt(jobResult.JobID, 10), versionName, VersionListCount) + + //将训练任务的上一版本的isLatestVersion设置为"0" + latestTask, err := models.GetCloudbrainByJobIDAndIsLatestVersion(strconv.FormatInt(jobResult.JobID, 10), IsLatestVersion) + if err != nil { + ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err) + return nil + } + + // lastVersionNum := jobResult.VersionName[1:] + // lastVersionNumToInt64, err := strconv.ParseInt(lastVersionNum, 10, 64) + // if err != nil { + // ctx.ServerError("lastVersionNumToInt64 faild:", err) + // return nil + // } + // lastVersionName := "V" + strconv.FormatInt(lastVersionNumToInt64-1, 10) + err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), latestTask.VersionName, VersionListCount, NotLatestVersion) + if err != nil { + ctx.ServerError("UpdateJobVersionCount failed", err) + return nil + } + + //将当前版本的isLatestVersion和任务数量更新 + err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), jobResult.VersionName, VersionListCount, IsLatestVersion) if err != nil { ctx.ServerError("UpdateJobVersionCount failed", err) return nil diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 5f0b8c4f9..71efd233a 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -505,10 +505,10 @@ func TrainJobIndex(ctx *context.Context) { Page: page, PageSize: setting.UI.IssuePagingNum, }, - RepoID: repo.ID, - Type: models.TypeCloudBrainTwo, - JobType: string(models.JobTypeTrain), - VersionName: string(models.JobVersionName), + RepoID: repo.ID, + Type: models.TypeCloudBrainTwo, + JobType: string(models.JobTypeTrain), + IsLatestVersion: modelarts.IsLatestVersion, }) if err != nil { ctx.ServerError("Cloudbrain", err) @@ -614,13 +614,14 @@ func TrainJobNewVersion(ctx *context.Context) { ctx.ServerError("get new train-job info failed", err) return } - ctx.HTML(200, tplModelArtsTrainJobVersionNew) + ctx.HTML(200, tplModelArtsTrainJobNew) } func trainJobNewVersionDataPrepare(ctx *context.Context) error { ctx.Data["PageIsCloudBrain"] = true var jobID = ctx.Params(":jobid") var versionName = ctx.Query("versionName") + jobID = "19373" t := time.Now() var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] @@ -703,6 +704,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" branch_name := form.BranchName + isLatestVersion := modelarts.IsLatestVersion if err := paramCheckCreateTrainJob(form); err != nil { log.Error("paramCheckCreateTrainJob failed:(%v)", err) @@ -723,6 +725,9 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) os.RemoveAll(codeLocalPath) } + gitRepo, _ := git.OpenRepository(repo.RepoPath()) + commitID, _ := gitRepo.GetBranchCommitID(branch_name) + if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{ Branch: branch_name, }); err != nil { @@ -841,6 +846,8 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) PoolID: poolID, Uuid: uuid, Parameters: parameters.Parameter, + CommitID: commitID, + IsLatestVersion: isLatestVersion, } err = modelarts.GenerateTrainJob(ctx, req) @@ -862,6 +869,9 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ ctx.Data["PageIsTrainJob"] = true var jobID = ctx.Params(":jobid") var versionName = ctx.Query("versionName") + jobID = "19373" + versionName = "V0009" + jobName := form.JobName uuid := form.Attachment description := form.Description @@ -883,7 +893,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ if err := paramCheckCreateTrainJob(form); err != nil { log.Error("paramCheckCreateTrainJob failed:(%v)", err) trainJobNewVersionDataPrepare(ctx) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) return } @@ -899,6 +909,8 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ os.RemoveAll(codeLocalPath) } + gitRepo, _ := git.OpenRepository(repo.RepoPath()) + commitID, _ := gitRepo.GetBranchCommitID(branch_name) if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{ Branch: branch_name, }); err != nil { @@ -911,7 +923,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ ctx.Data["params"] = form.Params ctx.Data["branch_name"] = branch_name // ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form) - ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobVersionNew, &form) + ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobNew, &form) // ctx.RenderWithErr(err, tplModelArtsTrainJobNew, &form) return } @@ -920,21 +932,21 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) trainJobNewVersionDataPrepare(ctx) - ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobVersionNew, &form) + ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form) return } if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil { log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) trainJobNewVersionDataPrepare(ctx) - ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobVersionNew, &form) + ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form) return } if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) trainJobNewVersionDataPrepare(ctx) - ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobVersionNew, &form) + ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobNew, &form) return } @@ -954,7 +966,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ if err != nil { log.Error("Failed to Unmarshal params: %s (%v)", params, err) trainJobNewVersionDataPrepare(ctx) - ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobVersionNew, &form) + ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobNew, &form) return } @@ -973,7 +985,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ if form.ParameterTemplateName == "" { log.Error("ParameterTemplateName is empty") trainJobNewVersionDataPrepare(ctx) - ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobVersionNew, &form) + ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobNew, &form) return } @@ -997,7 +1009,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ if err != nil { log.Error("Failed to CreateTrainJobConfig: %v", err) trainJobNewVersionDataPrepare(ctx) - ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobVersionNew, &form) + ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobNew, &form) return } } @@ -1006,7 +1018,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) if err != nil { log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) return } req := &modelarts.GenerateTrainJobVersionReq{ @@ -1024,6 +1036,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ Uuid: uuid, Parameters: parameters.Parameter, PreVersionId: task.VersionID, + CommitID: commitID, } err = modelarts.GenerateTrainJobVersion(ctx, req, jobID) if err != nil { @@ -1036,32 +1049,32 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) return } - //保存openi创建训练任务界面的参数 - // err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{ + // 保存openi创建训练任务界面的参数 + err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{ - // JobName: req.JobName, - // ResourcePools: form.PoolID, - // EngineVersions: form.EngineID, - // FlavorInfos: form.Flavor, - // TrainUrl: outputObsPath, - // BootFile: form.BootFile, - // Uuid: form.Attachment, - // DatasetName: attach.Name, - // Params: form.Params, - // BranchName: branch_name, - // }) + JobName: req.JobName, + ResourcePools: form.PoolID, + EngineVersions: form.EngineID, + FlavorInfos: form.Flavor, + TrainUrl: outputObsPath, + BootFile: form.BootFile, + Uuid: form.Attachment, + DatasetName: attach.Name, + Params: form.Params, + BranchName: branch_name, + }) - // if err != nil { - // log.Error("CreateTrainjobConfigDetail failed:%v", err.Error()) - // trainJobNewVersionDataPrepare(ctx) - // ctx.Data["bootFile"] = form.BootFile - // ctx.Data["uuid"] = form.Attachment - // ctx.Data["datasetName"] = attach.Name - // ctx.Data["params"] = form.Params - // ctx.Data["branch_name"] = branch_name - // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) - // return - // } + if err != nil { + log.Error("CreateTrainjobConfigDetail failed:%v", err.Error()) + trainJobNewVersionDataPrepare(ctx) + ctx.Data["bootFile"] = form.BootFile + ctx.Data["uuid"] = form.Attachment + ctx.Data["datasetName"] = attach.Name + ctx.Data["params"] = form.Params + ctx.Data["branch_name"] = branch_name + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) + return + } ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") } diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 0a5464065..37807cf31 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -999,6 +999,9 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/create", reqRepoCloudBrainReader, repo.TrainJobNew) m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreate) + // m.Get("/create", reqRepoCloudBrainReader, repo.TrainJobNewVersion) + // m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) + m.Get("/para-config-list", reqRepoCloudBrainReader, repo.TrainJobGetConfigList) }) }, context.RepoRef())