diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 90b2433ad..04c4dbac3 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -69,13 +69,16 @@ type Cloudbrain struct { CanDel bool `xorm:"-"` Type int `xorm:"INDEX DEFAULT 0"` - VersionID int64 `xorm:"INDEX DEFAULT 0"` - VersionName string - Uuid string - DatasetName string - VersionCount int64 `xorm:"INDEX DEFAULT 1"` - IsLatestVersion string - CommitID string + VersionID int64 `xorm:"INDEX DEFAULT 0"` + VersionName string + Uuid string + DatasetName string + VersionCount int64 `xorm:"INDEX DEFAULT 1"` + IsLatestVersion string + CommitID string + FatherVersionName string + ComputeResource string + EngineID int64 User *User `xorm:"-"` Repo *Repository `xorm:"-"` diff --git a/modules/auth/modelarts.go b/modules/auth/modelarts.go index 1eb214392..a53661b74 100755 --- a/modules/auth/modelarts.go +++ b/modules/auth/modelarts.go @@ -38,7 +38,8 @@ type CreateModelArtsTrainJobForm struct { IsSaveParam string `form:"is_save_para"` ParameterTemplateName string `form:"parameter_template_name"` PrameterDescription string `form:"parameter_description"` - BranchName string `form:"branch_name"` + BranchName string `form:"branch_name" binding:"Required"` + VersionName string `form:"version_name" binding:"Required"` } func (f *CreateModelArtsTrainJobForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index f75bf571b..9f7b67c06 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -47,6 +47,7 @@ const ( PerPage = 10 IsLatestVersion = "1" NotLatestVersion = "0" + ComputeResource = "NPU" SortByCreateTime = "create_time" ConfigTypeCustom = "custom" @@ -237,6 +238,8 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobresult DatasetName: attach.Name, CommitID: req.CommitID, IsLatestVersion: req.IsLatestVersion, + ComputeResource: ComputeResource, + EngineID: req.EngineID, }) if err != nil { @@ -247,7 +250,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobresult return jobResult, nil } -func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionReq, jobId string) (jobresult *models.CreateTrainJobResult, err error) { +func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionReq, jobId string, fatherVersionName string) (jobresult *models.CreateTrainJobResult, err error) { jobResult, err := createTrainJobVersion(models.CreateTrainJobVersionParams{ Description: req.Description, Config: models.TrainJobVersionConfig{ @@ -278,18 +281,21 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR } err = models.CreateCloudbrain(&models.Cloudbrain{ - Status: TransTrainJobStatus(jobResult.Status), - UserID: ctx.User.ID, - RepoID: ctx.Repo.Repository.ID, - JobID: strconv.FormatInt(jobResult.JobID, 10), - JobName: req.JobName, - JobType: string(models.JobTypeTrain), - Type: models.TypeCloudBrainTwo, - VersionID: jobResult.VersionID, - VersionName: jobResult.VersionName, - Uuid: req.Uuid, - DatasetName: attach.Name, - CommitID: req.CommitID, + Status: TransTrainJobStatus(jobResult.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: strconv.FormatInt(jobResult.JobID, 10), + JobName: req.JobName, + JobType: string(models.JobTypeTrain), + Type: models.TypeCloudBrainTwo, + VersionID: jobResult.VersionID, + VersionName: jobResult.VersionName, + Uuid: req.Uuid, + DatasetName: attach.Name, + CommitID: req.CommitID, + FatherVersionName: fatherVersionName, + ComputeResource: ComputeResource, + EngineID: req.EngineID, }) if err != nil { log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) @@ -322,6 +328,11 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err) return nil, err } + err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), latestTask.VersionName, VersionListCount, NotLatestVersion) + if err != nil { + ctx.ServerError("UpdateJobVersionCount failed", err) + return nil, err + } // lastVersionNum := jobResult.VersionName[1:] // lastVersionNumToInt64, err := strconv.ParseInt(lastVersionNum, 10, 64) @@ -330,11 +341,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR // return nil // } // lastVersionName := "V" + strconv.FormatInt(lastVersionNumToInt64-1, 10) - err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), latestTask.VersionName, VersionListCount, NotLatestVersion) - if err != nil { - ctx.ServerError("UpdateJobVersionCount failed", err) - return nil, err - } + //将训练任务的本版本的isLatestVersion设置为"0" //将当前版本的isLatestVersion和任务数量更新 err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), jobResult.VersionName, VersionListCount, IsLatestVersion) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 28b66e59e..677c9530b 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -896,7 +896,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { ctx.Data["PageIsTrainJob"] = true var jobID = ctx.Params(":jobid") - var versionName = ctx.Query("versionName") + // var fatherVersionName = ctx.Query("versionName") // jobID = "19373" // versionName = "V0009" @@ -917,6 +917,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" branch_name := form.BranchName + fatherVersionName := form.VersionName if err := paramCheckCreateTrainJob(form); err != nil { log.Error("paramCheckCreateTrainJob failed:(%v)", err) @@ -1043,7 +1044,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ } // JobVersionName := "V0001" // PreVersionId := int64(67646) - task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, fatherVersionName) if err != nil { log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) @@ -1066,7 +1067,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ PreVersionId: task.VersionID, CommitID: commitID, } - jobResult, err := modelarts.GenerateTrainJobVersion(ctx, req, jobID) + jobResult, err := modelarts.GenerateTrainJobVersion(ctx, req, jobID, fatherVersionName) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) trainJobNewVersionDataPrepare(ctx) @@ -1105,7 +1106,8 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) return } - ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") + // ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") + ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) } // readDir reads the directory named by dirname and returns @@ -1383,6 +1385,52 @@ func TrainJobStop(ctx *context.Context) { ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") } +func TrainJobVersionDel(ctx *context.Context) { + var jobID = ctx.Params(":jobid") + var versionName = ctx.Params(":versionName") + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + return + } + + _, err = modelarts.DelTrainJob(jobID) + if err != nil { + log.Error("DelTrainJob(%s) failed:%v", task.JobName, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + return + } + + err = models.DeleteJob(task) + if err != nil { + ctx.ServerError("DeleteJob failed", err) + return + } + + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") +} + +func TrainJobVersionStop(ctx *context.Context) { + var jobID = ctx.Params(":jobid") + var versionName = ctx.Params(":versionName") + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + return + } + + _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) + if err != nil { + log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + return + } + + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") +} + func canUserCreateTrainJob(uid int64) (bool, error) { org, err := models.GetOrgByName(setting.AllowedOrg) if err != nil { diff --git a/routers/routes/routes.go b/routers/routes/routes.go index c3dde2274..c1702ebe1 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -985,6 +985,8 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/download_model", reqRepoCloudBrainReader, repo.TrainJobDownloadModel) m.Get("/create_version", reqRepoCloudBrainReader, repo.TrainJobNewVersion) m.Post("/create_version", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) + m.Post("/stop_version", reqRepoCloudBrainWriter, repo.TrainJobVersionStop) + m.Post("/del_version", reqRepoCloudBrainWriter, repo.TrainJobVersionDel) }) m.Get("/create", reqRepoCloudBrainReader, repo.TrainJobNew) m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreate)