diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 20e643884..5625ece95 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -30,7 +30,6 @@ const ( JobTypeSnn4imagenet JobType = "SNN4IMAGENET" JobTypeBrainScore JobType = "BRAINSCORE" JobTypeTrain JobType = "TRAIN" - JobVersionName JobType = "V0001" ModelArtsCreateQueue ModelArtsJobStatus = "CREATE_QUEUING" //免费资源创建排队中 ModelArtsCreating ModelArtsJobStatus = "CREATING" //创建中 @@ -63,35 +62,36 @@ type Cloudbrain struct { CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"` UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"` Duration int64 `xorm:"INDEX duration"` - TrainJobDuration string - DeletedAt time.Time `xorm:"deleted"` - CanDebug bool `xorm:"-"` - CanDel bool `xorm:"-"` - Type int `xorm:"INDEX DEFAULT 0"` + TrainJobDuration string `xorm:"INDEX DEFAULT '00:00:00'"` + DeletedAt time.Time `xorm:"deleted"` + CanDebug bool `xorm:"-"` + CanDel bool `xorm:"-"` + Type int `xorm:"INDEX DEFAULT 0"` VersionID int64 `xorm:"INDEX DEFAULT 0"` VersionName string `xorm:"INDEX"` - Uuid string + Uuid string //数据集id DatasetName string - VersionCount int `xorm:"INDEX DEFAULT 1"` - IsLatestVersion string - CommitID string - FatherVersionName string - ComputeResource string - EngineID int64 - - TrainUrl string - BranchName string - Parameters string - BootFile string - DataUrl string - LogUrl string - PreVersionId int64 - FlavorCode string - Description string - WorkServerNumber int - FlavorName string - EngineName string + VersionCount int `xorm:"INDEX DEFAULT 1"` //任务的当前版本数量,不包括删除的 + IsLatestVersion string //是否是最新版本,1是,0否 + CommitID string //提交的仓库代码id + FatherVersionName string //父版本名称 + ComputeResource string //计算资源,例如npu + EngineID int64 //引擎id + + TrainUrl string //输出的obs路径 + BranchName string //分支名称 + Parameters string //传给modelarts的param参数 + BootFile string //启动文件 + DataUrl string //数据集的obs路径 + LogUrl string //日志输出的obs路径 + PreVersionId int64 //父版本的版本id + FlavorCode string //modelarts上的规格id + Description string + WorkServerNumber int //节点数 + FlavorName string //规格名称 + EngineName string //引擎名称 + TotalVersionCount int //任务的所有版本数量,包括删除的 User *User `xorm:"-"` Repo *Repository `xorm:"-"` @@ -1112,9 +1112,9 @@ func SetTrainJobStatusByJobID(jobID string, status string, duration int64, train return } -func SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID string, versionName string, versionCount int, isLatestVersion string) (err error) { - cb := &Cloudbrain{JobID: jobID, VersionName: versionName, VersionCount: versionCount, IsLatestVersion: isLatestVersion} - _, err = x.Cols("version_Count", "is_latest_version").Where("cloudbrain.job_id=? AND cloudbrain.version_name=?", jobID, versionName).Update(cb) +func SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID string, versionName string, versionCount int, isLatestVersion string, totalVersionCount int) (err error) { + cb := &Cloudbrain{JobID: jobID, VersionName: versionName, VersionCount: versionCount, IsLatestVersion: isLatestVersion, TotalVersionCount: totalVersionCount} + _, err = x.Cols("version_Count", "is_latest_version", "total_version_count").Where("cloudbrain.job_id=? AND cloudbrain.version_name=?", jobID, versionName).Update(cb) return } @@ -1124,8 +1124,8 @@ func UpdateJob(job *Cloudbrain) error { func updateJob(e Engine, job *Cloudbrain) error { var sess *xorm.Session - sess = e.Where("job_id = ?", job.JobID) - _, err := sess.Cols("status", "container_id", "container_ip").Update(job) + sess = e.Where("job_id = ? AND version_name=?", job.JobID, job.VersionName) + _, err := sess.Cols("status", "train_job_duration", "container_id", "container_ip").Update(job) return err } @@ -1149,6 +1149,15 @@ func deleteJob(e Engine, job *Cloudbrain) error { return err } +func DeleteJobVersion(job *Cloudbrain) error { + return deleteJobVersion(x, job) +} + +func deleteJobVersion(e Engine, job *Cloudbrain) error { + _, err := e.ID(job.ID).Delete(job) + return err +} + func GetCloudbrainByName(jobName string) (*Cloudbrain, error) { cb := &Cloudbrain{JobName: jobName} return getRepoCloudBrain(cb) diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index 99ca262f1..9648d7447 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -35,24 +35,24 @@ const ( // "{\"code\":\"modelarts.bm.910.arm.public.4\",\"value\":\"Ascend : 4 * Ascend 910 CPU:96 核 1024GiB\"}," + // "{\"code\":\"modelarts.bm.910.arm.public.1\",\"value\":\"Ascend : 1 * Ascend 910 CPU:24 核 256GiB\"}" + // "]}" - CodePath = "/code/" - OutputPath = "/output/" - LogPath = "/log/" - JobPath = "/job/" - OrderDesc = "desc" //向下查询 - OrderAsc = "asc" //向上查询 - Lines = 20 - TrainUrl = "train_url" - DataUrl = "data_url" - PerPage = 10 - IsLatestVersion = "1" - NotLatestVersion = "0" - ComputeResource = "NPU" - InitFatherVersionName = "V0001" - VersionCount = 1 - - SortByCreateTime = "create_time" - ConfigTypeCustom = "custom" + CodePath = "/code/" + OutputPath = "/output/" + LogPath = "/log/" + JobPath = "/job/" + OrderDesc = "desc" //向下查询 + OrderAsc = "asc" //向上查询 + Lines = 500 + TrainUrl = "train_url" + DataUrl = "data_url" + PerPage = 10 + IsLatestVersion = "1" + NotLatestVersion = "0" + ComputeResource = "NPU" + VersionCount = 1 + + SortByCreateTime = "create_time" + ConfigTypeCustom = "custom" + TotalVersionCount = 1 ) var ( @@ -83,29 +83,32 @@ type GenerateTrainJobReq struct { FlavorName string VersionCount int EngineName string + TotalVersionCount int } type GenerateTrainJobVersionReq struct { - JobName string - Uuid string - Description string - CodeObsPath string - BootFile string - BootFileUrl string - DataUrl string - TrainUrl string - FlavorCode string - LogUrl string - PoolID string - WorkServerNumber int - EngineID int64 - Parameters []models.Parameter - Params string - PreVersionId int64 - CommitID string - BranchName string - FlavorName string - EngineName string + JobName string + Uuid string + Description string + CodeObsPath string + BootFile string + BootFileUrl string + DataUrl string + TrainUrl string + FlavorCode string + LogUrl string + PoolID string + WorkServerNumber int + EngineID int64 + Parameters []models.Parameter + Params string + PreVersionId int64 + CommitID string + BranchName string + FlavorName string + EngineName string + FatherVersionName string + TotalVersionCount int } type VersionInfo struct { @@ -255,22 +258,22 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error } err = models.CreateCloudbrain(&models.Cloudbrain{ - Status: TransTrainJobStatus(jobResult.Status), - UserID: ctx.User.ID, - RepoID: ctx.Repo.Repository.ID, - JobID: strconv.FormatInt(jobResult.JobID, 10), - JobName: req.JobName, - JobType: string(models.JobTypeTrain), - Type: models.TypeCloudBrainTwo, - VersionID: jobResult.VersionID, - VersionName: jobResult.VersionName, - Uuid: req.Uuid, - DatasetName: attach.Name, - CommitID: req.CommitID, - IsLatestVersion: req.IsLatestVersion, - ComputeResource: ComputeResource, - EngineID: req.EngineID, - FatherVersionName: req.FatherVersionName, + Status: TransTrainJobStatus(jobResult.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: strconv.FormatInt(jobResult.JobID, 10), + JobName: req.JobName, + JobType: string(models.JobTypeTrain), + Type: models.TypeCloudBrainTwo, + VersionID: jobResult.VersionID, + VersionName: jobResult.VersionName, + Uuid: req.Uuid, + DatasetName: attach.Name, + CommitID: req.CommitID, + IsLatestVersion: req.IsLatestVersion, + ComputeResource: ComputeResource, + EngineID: req.EngineID, + // FatherVersionName: req.FatherVersionName, TrainUrl: req.TrainUrl, BranchName: req.BranchName, Parameters: req.Params, @@ -283,6 +286,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error FlavorName: req.FlavorName, EngineName: req.EngineName, VersionCount: req.VersionCount, + TotalVersionCount: req.TotalVersionCount, }) if err != nil { @@ -293,7 +297,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error return nil } -func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionReq, jobId string, fatherVersionName string) (err error) { +func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionReq, jobId string) (err error) { jobResult, err := createTrainJobVersion(models.CreateTrainJobVersionParams{ Description: req.Description, Config: models.TrainJobVersionConfig{ @@ -336,7 +340,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR Uuid: req.Uuid, DatasetName: attach.Name, CommitID: req.CommitID, - FatherVersionName: fatherVersionName, + FatherVersionName: req.FatherVersionName, ComputeResource: ComputeResource, EngineID: req.EngineID, TrainUrl: req.TrainUrl, @@ -351,6 +355,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR WorkServerNumber: req.WorkServerNumber, FlavorName: req.FlavorName, EngineName: req.EngineName, + TotalVersionCount: req.TotalVersionCount, }) if err != nil { log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) @@ -383,14 +388,14 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err) return err } - err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), latestTask.VersionName, VersionListCount, NotLatestVersion) + err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), latestTask.VersionName, VersionListCount, NotLatestVersion, req.TotalVersionCount) if err != nil { ctx.ServerError("UpdateJobVersionCount failed", err) return err } - //将当前版本的isLatestVersion设置为"1"和任务数量更新 - err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), jobResult.VersionName, VersionListCount, IsLatestVersion) + //将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount + err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), jobResult.VersionName, VersionListCount, IsLatestVersion, req.TotalVersionCount) if err != nil { ctx.ServerError("UpdateJobVersionCount failed", err) return err diff --git a/modules/modelarts/resty.go b/modules/modelarts/resty.go index c967c0eda..8d043eebd 100755 --- a/modules/modelarts/resty.go +++ b/modules/modelarts/resty.go @@ -814,3 +814,44 @@ sendjob: return &result, nil } + +func DelTrainJobVersion(jobID string, versionID string) (*models.TrainJobResult, error) { + checkSetting() + client := getRestyClient() + var result models.TrainJobResult + + retry := 0 + +sendjob: + res, err := client.R(). + SetAuthToken(TOKEN). + SetResult(&result). + Delete(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID) + + if err != nil { + return &result, fmt.Errorf("resty DelTrainJobVersion: %v", err) + } + + if res.StatusCode() == http.StatusUnauthorized && retry < 1 { + retry++ + _ = getToken() + goto sendjob + } + + if res.StatusCode() != http.StatusOK { + var temp models.ErrorResult + if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { + log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) + return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) + } + log.Error("DelTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) + return &result, fmt.Errorf("删除训练作业版本失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) + } + + if !result.IsSuccess { + log.Error("DelTrainJob(%s) failed", jobID) + return &result, fmt.Errorf("删除训练作业版本失败:%s", result.ErrorMsg) + } + + return &result, nil +} diff --git a/modules/storage/obs.go b/modules/storage/obs.go index 7a065636d..09ec4a846 100755 --- a/modules/storage/obs.go +++ b/modules/storage/obs.go @@ -431,10 +431,10 @@ func GetObsListObject(jobName, parentDir string) ([]FileInfo, error) { } } -func GetVersionObsListObject(jobName, parentDir string) ([]FileInfo, error) { +func GetObsListObjectVersion(jobName, parentDir string, VersionOutputPath string) ([]FileInfo, error) { input := &obs.ListObjectsInput{} input.Bucket = setting.Bucket - input.Prefix = strings.TrimPrefix(path.Join(setting.TrainJobModelPath, jobName, setting.OutPutPath, parentDir), "/") + input.Prefix = strings.TrimPrefix(path.Join(setting.TrainJobModelPath, jobName, setting.OutPutPath, VersionOutputPath, parentDir), "/") strPrefix := strings.Split(input.Prefix, "/") output, err := ObsCli.ListObjects(input) fileInfos := make([]FileInfo, 0) @@ -478,6 +478,9 @@ func GetVersionObsListObject(jobName, parentDir string) ([]FileInfo, error) { } fileInfos = append(fileInfos, fileInfo) } + sort.Slice(fileInfos, func(i, j int) bool { + return fileInfos[i].ModTime > fileInfos[j].ModTime + }) return fileInfos, err } else { if obsError, ok := err.(obs.ObsError); ok { @@ -558,6 +561,27 @@ func GetObsCreateSignedUrl(jobName, parentDir, fileName string) (string, error) // return output.SignedUrl, nil } +func GetObsCreateVersionSignedUrl(jobName, parentDir, fileName string, VersionOutputPath string) (string, error) { + input := &obs.CreateSignedUrlInput{} + input.Bucket = setting.Bucket + input.Key = strings.TrimPrefix(path.Join(setting.TrainJobModelPath, jobName, setting.OutPutPath, VersionOutputPath, parentDir, fileName), "/") + + input.Expires = 60 * 60 + input.Method = obs.HttpMethodGet + + reqParams := make(map[string]string) + fileName = url.QueryEscape(fileName) + reqParams["response-content-disposition"] = "attachment; filename=\"" + fileName + "\"" + input.QueryParams = reqParams + output, err := ObsCli.CreateSignedUrl(input) + if err != nil { + log.Error("CreateSignedUrl failed:", err.Error()) + return "", err + } + + return output.SignedUrl, nil +} + func ObsGetPreSignedUrl(uuid, fileName string) (string, error) { input := &obs.CreateSignedUrlInput{} input.Method = obs.HttpMethodGet diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index e09a18fbf..661b25895 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -816,6 +816,10 @@ get_repo_info_error=Can not get the information of the repository. generate_statistic_file_error=Fail to generate file. repo_stat_inspect=ProjectAnalysis all=All +modelarts.status=Status +modelarts.createtime=CreateTime +modelarts.version_nums = Version Nums +modelarts.computing_resources=compute Resources modelarts.notebook=Debug Task modelarts.train_job=Train Task modelarts.train_job.new_debug= New Debug Task @@ -845,6 +849,7 @@ modelarts.train_job.start_file=Start File modelarts.train_job.boot_file_helper=The startup file is the entry file that your program executes, and it must be a file ending in .py modelarts.train_job.dataset=Dataset modelarts.code_version = Code Version +modelarts.parents_version = Parents Version modelarts.train_job.run_parameter=Run Parameter modelarts.train_job.add_run_parameter=Add Run Parameter modelarts.train_job.parameter_name=Parameter Name diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index b57acc683..03d9a03e7 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -819,6 +819,11 @@ get_repo_info_error=查询当前仓库信息失败。 generate_statistic_file_error=生成文件失败。 repo_stat_inspect=项目分析 all=所有 + +modelarts.status=状态 +modelarts.createtime=创建时间 +modelarts.version_nums=版本数 +modelarts.computing_resources=计算资源 modelarts.notebook=调试任务 modelarts.train_job=训练任务 modelarts.train_job.new_debug=新建调试任务 @@ -848,7 +853,9 @@ modelarts.train_job.start_file=启动文件 modelarts.train_job.boot_file_helper=启动文件是您程序执行的入口文件,必须是以.py结尾的文件。 modelarts.train_job.boot_file_place=填写启动文件路径,默认为train.py modelarts.train_job.dataset=数据集 -modelarts.code_version=代码版本 +modelarts.code_version=代码分支 +modelarts.parents_version=基于版本 + modelarts.train_job.run_parameter=运行参数 modelarts.train_job.add_run_parameter=增加运行参数 modelarts.train_job.parameter_name=参数名 diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index f2ff19217..b20713bca 100755 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -874,13 +874,12 @@ func RegisterRoutes(m *macaron.Macaron) { }) m.Group("/train-job", func() { m.Group("/:jobid", func() { - // m.Get("", repo.GetModelArtsTrainJob) m.Get("", repo.GetModelArtsTrainJobVersion) - // m.Get("/log", repo.TrainJobGetLog) m.Get("/log", repo.TrainJobGetLog) - // m.Group("/:version-name", func() { - // m.Get("", repo.GetModelArtsTrainJobVersion) - // }) + m.Post("/del_version", repo.DelTrainJobVersion) + m.Post("/stop_version", repo.StopTrainJobVersion) + m.Get("/model_list", repo.ModelList) + m.Get("/model_download", repo.ModelDownload) }) }) }, reqRepoReader(models.UnitTypeCloudBrain)) diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go index 181d97983..ba4015d3b 100755 --- a/routers/api/v1/repo/modelarts.go +++ b/routers/api/v1/repo/modelarts.go @@ -8,11 +8,14 @@ package repo import ( "net/http" "strconv" + "strings" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/modelarts" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/storage" ) func GetModelArtsNotebook(ctx *context.APIContext) { @@ -102,6 +105,14 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { job.Status = modelarts.TransTrainJobStatus(result.IntStatus) job.Duration = result.Duration job.TrainJobDuration = result.TrainJobDuration + + if result.Duration != 0 { + job.TrainJobDuration = addZero(result.Duration/3600000) + ":" + addZero(result.Duration%3600000/60000) + ":" + addZero(result.Duration%60000/1000) + + } else { + job.TrainJobDuration = "00:00:00" + } + err = models.UpdateJob(job) if err != nil { log.Error("UpdateJob failed:", err) @@ -110,23 +121,35 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { ctx.JSON(http.StatusOK, map[string]interface{}{ "JobID": jobID, "JobStatus": job.Status, - "JobDuration": job.Duration, + "JobDuration": job.TrainJobDuration, }) } +func addZero(t int64) (m string) { + if t < 10 { + m = "0" + strconv.FormatInt(t, 10) + return m + } else { + return strconv.FormatInt(t, 10) + } +} + func TrainJobGetLog(ctx *context.APIContext) { var ( err error ) - log.Info("test") - var jobID = ctx.Params(":jobid") var versionName = ctx.Query("version_name") - var logFileName = ctx.Query("file_name") + // var logFileName = ctx.Query("file_name") var baseLine = ctx.Query("base_line") var order = ctx.Query("order") + var lines = ctx.Query("lines") + lines_int, err := strconv.Atoi(lines) + if err != nil { + log.Error("change lines(%d) string to int failed", lines_int) + } if order != modelarts.OrderDesc && order != modelarts.OrderAsc { log.Error("order(%s) check failed", order) @@ -136,29 +159,202 @@ func TrainJobGetLog(ctx *context.APIContext) { return } - task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + resultLogFile, result, err := trainJobGetLogContent(jobID, versionName, baseLine, order, lines_int) if err != nil { - log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) - ctx.JSON(http.StatusInternalServerError, map[string]interface{}{ - "err_msg": "GetCloudbrainByJobIDAndVersionName failed", - }) + log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error()) + // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) return } - result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines) + ctx.Data["log_file_name"] = resultLogFile.LogFileList[0] + + ctx.JSON(http.StatusOK, map[string]interface{}{ + "JobID": jobID, + "LogFileName": resultLogFile.LogFileList[0], + "StartLine": result.StartLine, + "EndLine": result.EndLine, + "Content": result.Content, + "Lines": result.Lines, + }) +} + +func trainJobGetLogContent(jobID string, versionName string, baseLine string, order string, lines int) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) { + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) + return nil, nil, err + } + + resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10)) + if err != nil { + log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error()) + return nil, nil, err + } + + result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, resultLogFile.LogFileList[0], order, lines) if err != nil { log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error()) - ctx.JSON(http.StatusInternalServerError, map[string]interface{}{ - "err_msg": "GetTrainJobLog failed", - }) + return nil, nil, err + } + + return resultLogFile, result, err +} + +func DelTrainJobVersion(ctx *context.APIContext) { + var ( + err error + ) + + var jobID = ctx.Params(":jobid") + var versionName = ctx.Query("version_name") + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + ctx.NotFound(err) + return + } + + _, err = modelarts.DelTrainJobVersion(jobID, strconv.FormatInt(task.VersionID, 10)) + if err != nil { + log.Error("DelTrainJobVersion(%s) failed:%v", task.JobName, err.Error()) + ctx.NotFound(err) + return + } + + err = models.DeleteJobVersion(task) + if err != nil { + ctx.ServerError("DeleteJobVersion failed", err) + ctx.NotFound(err) return } + //获取删除后的版本数量 + repo := ctx.Repo.Repository + page := ctx.QueryInt("page") + if page <= 0 { + page = 1 + } + VersionListTasks, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ + ListOptions: models.ListOptions{ + Page: page, + PageSize: setting.UI.IssuePagingNum, + }, + RepoID: repo.ID, + Type: models.TypeCloudBrainTwo, + JobType: string(models.JobTypeTrain), + JobID: jobID, + }) + if err != nil { + ctx.ServerError("get VersionListCount faild", err) + return + } + + //判断当前的任务是否是最新版本,若是,将排序后的第一个版本设置为最新版本,若不是,最新版本不变,更改最新版本的版本数。 + if task.IsLatestVersion == modelarts.IsLatestVersion { + err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID, VersionListTasks[0].Cloudbrain.VersionName, VersionListCount, modelarts.IsLatestVersion, VersionListTasks[0].Cloudbrain.TotalVersionCount) + if err != nil { + ctx.ServerError("UpdateJobVersionCount failed", err) + return + } + } else { + latestTask, err := models.GetCloudbrainByJobIDAndIsLatestVersion(jobID, modelarts.IsLatestVersion) + if err != nil { + ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err) + return + } + err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID, latestTask.VersionName, VersionListCount, modelarts.IsLatestVersion, VersionListTasks[0].Cloudbrain.TotalVersionCount) + if err != nil { + ctx.ServerError("UpdateJobVersionCount failed", err) + return + } + } + ctx.JSON(http.StatusOK, map[string]interface{}{ - "JobID": jobID, - "StartLine": result.StartLine, - "EndLine": result.EndLine, - "Content": result.Content, - "Lines": result.Lines, + "JobID": jobID, + "VersionName": versionName, + "StatusOK": 0, + }) +} + +func StopTrainJobVersion(ctx *context.APIContext) { + var ( + err error + ) + var jobID = ctx.Params(":jobid") + var versionName = ctx.Query("version_name") + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + return + } + + _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) + if err != nil { + log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) + return + } + + ctx.JSON(http.StatusOK, map[string]interface{}{ + "JobID": jobID, + "VersionName": versionName, + "StatusOK": 0, + }) +} + +func ModelList(ctx *context.APIContext) { + var ( + err error + ) + + var jobID = ctx.Params(":jobid") + var versionName = ctx.Query("version_name") + parentDir := ctx.Query("parentDir") + dirArray := strings.Split(parentDir, "/") + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + return + } + VersionOutputPath := "V" + strconv.Itoa(task.TotalVersionCount) + models, err := storage.GetObsListObjectVersion(task.JobName, parentDir, VersionOutputPath) + if err != nil { + log.Info("get TrainJobListModel failed:", err) + ctx.ServerError("GetObsListObject:", err) + return + } + + ctx.JSON(http.StatusOK, map[string]interface{}{ + "JobID": jobID, + "VersionName": versionName, + "StatusOK": 0, + "Path": dirArray, + "Dirs": models, + "task": task, + "PageIsCloudBrain": true, }) } + +func ModelDownload(ctx *context.APIContext) { + var ( + err error + ) + + var jobID = ctx.Params(":jobid") + versionName := ctx.Query("version_name") + parentDir := ctx.Query("parent_dir") + fileName := ctx.Query("file_name") + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + return + } + VersionOutputPath := "V" + strconv.Itoa(task.TotalVersionCount) + + url, err := storage.GetObsCreateVersionSignedUrl(task.JobName, parentDir, fileName, VersionOutputPath) + if err != nil { + log.Error("GetObsCreateSignedUrl failed: %v", err.Error(), ctx.Data["msgID"]) + ctx.ServerError("GetObsCreateSignedUrl", err) + return + } + http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently) +} diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 0a20cf576..8824c584d 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -288,6 +288,17 @@ func TrainJobIndex(ctx *context.Context) { return } + // for i, task := range tasks { + // result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) + // if err != nil { + // log.Error("GetJob(%s) failed:%v", task.JobID, err.Error()) + // return + // } + // // tasks[i].Status = modelarts.TransTrainJobStatus(result.Status) + // tasks[i].Status = result.Status + // tasks[i].Duration = result.Duration + // } + pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5) pager.SetDefaultParams(ctx) ctx.Data["Page"] = pager @@ -463,7 +474,7 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { ctx.Data["dataset_name"] = task.DatasetName ctx.Data["work_server_number"] = task.WorkServerNumber ctx.Data["flavor_name"] = task.FlavorName - ctx.Data["engine_name"] = task.FlavorName + ctx.Data["engine_name"] = task.EngineName ctx.Data["uuid"] = task.Uuid ctx.Data["flavor_code"] = task.FlavorCode ctx.Data["engine_id"] = task.EngineID @@ -480,6 +491,7 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { ctx.Data["PageIsTrainJob"] = true + VersionOutputPath := "V" + strconv.Itoa(modelarts.TotalVersionCount) jobName := form.JobName uuid := form.Attachment description := form.Description @@ -493,8 +505,8 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + modelarts.CodePath codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath - outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath - logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/" + logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" branch_name := form.BranchName isLatestVersion := modelarts.IsLatestVersion @@ -543,14 +555,15 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) } //todo: upload code (send to file_server todo this work?) - if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { + // if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) trainJobNewDataPrepare(ctx) ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form) return } - if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil { + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) trainJobNewDataPrepare(ctx) ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form) @@ -629,28 +642,29 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) } req := &modelarts.GenerateTrainJobReq{ - JobName: jobName, - DataUrl: dataPath, - Description: description, - CodeObsPath: codeObsPath, - BootFileUrl: codeObsPath + bootFile, - BootFile: bootFile, - TrainUrl: outputObsPath, - FlavorCode: flavorCode, - WorkServerNumber: workServerNumber, - EngineID: int64(engineID), - LogUrl: logObsPath, - PoolID: poolID, - Uuid: uuid, - Parameters: parameters.Parameter, - CommitID: commitID, - IsLatestVersion: isLatestVersion, - BranchName: branch_name, - Params: form.Params, - FatherVersionName: modelarts.InitFatherVersionName, + JobName: jobName, + DataUrl: dataPath, + Description: description, + CodeObsPath: codeObsPath, + BootFileUrl: codeObsPath + bootFile, + BootFile: bootFile, + TrainUrl: outputObsPath, + FlavorCode: flavorCode, + WorkServerNumber: workServerNumber, + EngineID: int64(engineID), + LogUrl: logObsPath, + PoolID: poolID, + Uuid: uuid, + Parameters: parameters.Parameter, + CommitID: commitID, + IsLatestVersion: isLatestVersion, + BranchName: branch_name, + Params: form.Params, + // FatherVersionName: InitVersionName, FlavorName: FlavorName, EngineName: EngineName, VersionCount: VersionCount, + TotalVersionCount: modelarts.TotalVersionCount, } err = modelarts.GenerateTrainJob(ctx, req) @@ -665,42 +679,20 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) return } - // // 保存openi创建训练任务界面的参数 - // err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{ - - // JobName: req.JobName, - // JobID: strconv.FormatInt(jobResult.JobID, 10), - // VersionName: jobResult.VersionName, - // ResourcePools: form.PoolID, - // EngineVersions: form.EngineID, - // FlavorInfos: form.Flavor, - // TrainUrl: outputObsPath, - // BootFile: form.BootFile, - // Uuid: form.Attachment, - // DatasetName: attach.Name, - // Params: form.Params, - // BranchName: branch_name, - // }) - - // if err != nil { - // log.Error("CreateTrainjobConfigDetail failed:%v", err.Error()) - // trainJobNewVersionDataPrepare(ctx) - // ctx.Data["bootFile"] = form.BootFile - // ctx.Data["uuid"] = form.Attachment - // ctx.Data["datasetName"] = attach.Name - // ctx.Data["params"] = form.Params - // ctx.Data["branch_name"] = branch_name - // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) - // return - // } ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") } func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { ctx.Data["PageIsTrainJob"] = true var jobID = ctx.Params(":jobid") - // var versionName = ctx.Params(":version-name") - var versionName = ctx.Query("version_name") + + latestTask, err := models.GetCloudbrainByJobIDAndIsLatestVersion(jobID, modelarts.IsLatestVersion) + if err != nil { + ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err) + return + } + + VersionOutputPath := "V" + strconv.Itoa(latestTask.TotalVersionCount+1) jobName := form.JobName uuid := form.Attachment @@ -715,11 +707,11 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + modelarts.CodePath codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath - outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath - logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/" + logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" branch_name := form.BranchName - fatherVersionName := versionName + fatherVersionName := form.VersionName FlavorName := form.FlavorName EngineName := form.EngineName @@ -762,14 +754,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ } //todo: upload code (send to file_server todo this work?) - if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobVersionNew, &form) return } - if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil { + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobVersionNew, &form) @@ -860,27 +852,30 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ return } req := &modelarts.GenerateTrainJobVersionReq{ - JobName: task.JobName, - DataUrl: dataPath, - Description: description, - CodeObsPath: codeObsPath, - BootFileUrl: codeObsPath + bootFile, - BootFile: bootFile, - TrainUrl: outputObsPath, - FlavorCode: flavorCode, - WorkServerNumber: workServerNumber, - EngineID: int64(engineID), - LogUrl: logObsPath, - PoolID: poolID, - Uuid: uuid, - Params: form.Params, - PreVersionId: task.VersionID, - CommitID: commitID, - BranchName: branch_name, - FlavorName: FlavorName, - EngineName: EngineName, + JobName: task.JobName, + DataUrl: dataPath, + Description: description, + CodeObsPath: codeObsPath, + BootFileUrl: codeObsPath + bootFile, + BootFile: bootFile, + TrainUrl: outputObsPath, + FlavorCode: flavorCode, + WorkServerNumber: workServerNumber, + EngineID: int64(engineID), + LogUrl: logObsPath, + PoolID: poolID, + Uuid: uuid, + Params: form.Params, + Parameters: parameters.Parameter, + PreVersionId: task.VersionID, + CommitID: commitID, + BranchName: branch_name, + FlavorName: FlavorName, + EngineName: EngineName, + FatherVersionName: fatherVersionName, + TotalVersionCount: latestTask.TotalVersionCount + 1, } - err = modelarts.GenerateTrainJobVersion(ctx, req, jobID, fatherVersionName) + err = modelarts.GenerateTrainJobVersion(ctx, req, jobID) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) trainJobNewVersionDataPrepare(ctx) @@ -891,36 +886,8 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) return } - // 保存openi创建训练任务界面的参数 - // err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{ - - // JobName: req.JobName, - // JobID: strconv.FormatInt(jobResult.JobID, 10), - // VersionName: jobResult.VersionName, - // ResourcePools: form.PoolID, - // EngineVersions: form.EngineID, - // FlavorInfos: form.Flavor, - // TrainUrl: outputObsPath, - // BootFile: form.BootFile, - // Uuid: form.Attachment, - // DatasetName: attach.Name, - // Params: form.Params, - // BranchName: branch_name, - // }) - - // if err != nil { - // log.Error("CreateTrainjobConfigDetail failed:%v", err.Error()) - // trainJobNewVersionDataPrepare(ctx) - // ctx.Data["bootFile"] = form.BootFile - // ctx.Data["uuid"] = form.Attachment - // ctx.Data["datasetName"] = attach.Name - // ctx.Data["params"] = form.Params - // ctx.Data["branch_name"] = branch_name - // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) - // return - // } - // ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") - ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") + // ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) } // readDir reads the directory named by dirname and returns @@ -1014,11 +981,6 @@ func TrainJobShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true var jobID = ctx.Params(":jobid") - task, err := models.GetCloudbrainByJobID(jobID) - if err != nil { - ctx.ServerError("GetCloudbrainByJobID faild", err) - return - } repo := ctx.Repo.Repository page := ctx.QueryInt("page") @@ -1035,74 +997,48 @@ func TrainJobShow(ctx *context.Context) { JobType: string(models.JobTypeTrain), JobID: jobID, }) - if err != nil { - ctx.ServerError("Cloudbrain", err) - return - } if err != nil { - log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) + log.Error("GetVersionListTasks(%s) failed:%v", jobID, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) return } + //将运行参数转化为epoch_size = 3, device_target = Ascend的格式 + for i, _ := range VersionListTasks { - // attach, err := models.GetAttachmentByUUID(task.Uuid) - // if err != nil { - // log.Error("GetAttachmentByUUID(%s) failed:%v", jobID, err.Error()) - // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) - // return - // } + var parameters models.Parameters - result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) - if err != nil { - log.Error("GetJob(%s) failed:%v", jobID, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) - return - } - - if result != nil { - result.CreateTime = time.Unix(int64(result.LongCreateTime/1000), 0).Format("2006-01-02 15:04:05") - if result.Duration != 0 { - result.TrainJobDuration = addZero(result.Duration/3600000) + ":" + addZero(result.Duration%3600000/60000) + ":" + addZero(result.Duration%60000/1000) - - } else { - result.TrainJobDuration = "00:00:00" - } - result.Status = modelarts.TransTrainJobStatus(result.IntStatus) - err = models.SetTrainJobStatusByJobID(jobID, result.Status, result.Duration, string(result.TrainJobDuration)) + err := json.Unmarshal([]byte(VersionListTasks[i].Parameters), ¶meters) if err != nil { - ctx.ServerError("UpdateJob failed", err) + log.Error("Failed to Unmarshal Parameters: %s (%v)", VersionListTasks[i].Parameters, err) + trainJobNewDataPrepare(ctx) return } - result.DatasetName = task.DatasetName - } - - resultLogFile, resultLog, err := trainJobGetLog(jobID) - if err != nil { - log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) - return + if len(parameters.Parameter) > 0 { + paramTemp := "" + for _, Parameter := range parameters.Parameter { + param := Parameter.Label + " = " + Parameter.Value + ", " + paramTemp = paramTemp + param + } + VersionListTasks[i].Parameters = paramTemp[:len(paramTemp)-2] + } } - ctx.Data["log_file_name"] = resultLogFile.LogFileList[0] - ctx.Data["log"] = resultLog - ctx.Data["task"] = task ctx.Data["jobID"] = jobID - ctx.Data["result"] = result ctx.Data["version_list_task"] = VersionListTasks ctx.Data["version_list_count"] = VersionListCount ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) } -func addZero(t int64) (m string) { - if t < 10 { - m = "0" + strconv.FormatInt(t, 10) - return m - } else { - return strconv.FormatInt(t, 10) - } -} +// func addZero(t int64) (m string) { +// if t < 10 { +// m = "0" + strconv.FormatInt(t, 10) +// return m +// } else { +// return strconv.FormatInt(t, 10) +// } +// } func TrainJobGetLog(ctx *context.Context) { ctx.Data["PageIsTrainJob"] = true @@ -1160,26 +1096,40 @@ func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *model func TrainJobDel(ctx *context.Context) { var jobID = ctx.Params(":jobid") - task, err := models.GetCloudbrainByJobID(jobID) + repo := ctx.Repo.Repository + page := ctx.QueryInt("page") + + if page <= 0 { + page = 1 + } + VersionListTasks, _, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ + ListOptions: models.ListOptions{ + Page: page, + PageSize: setting.UI.IssuePagingNum, + }, + RepoID: repo.ID, + Type: models.TypeCloudBrainTwo, + JobType: string(models.JobTypeTrain), + JobID: jobID, + }) if err != nil { - log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + ctx.ServerError("get VersionListTasks failed", err) return } - + for _, task := range VersionListTasks { + err = models.DeleteJobVersion(&task.Cloudbrain) + if err != nil { + ctx.ServerError("DeleteJobVersion failed", err) + return + } + } _, err = modelarts.DelTrainJob(jobID) if err != nil { - log.Error("DelTrainJob(%s) failed:%v", task.JobName, err.Error()) + log.Error("DelTrainJob(%s) failed:%v", jobID, err.Error()) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) return } - err = models.DeleteJob(task) - if err != nil { - ctx.ServerError("DeleteJob failed", err) - return - } - ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") } @@ -1202,54 +1152,6 @@ func TrainJobStop(ctx *context.Context) { ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") } -func TrainJobVersionDel(ctx *context.Context) { - var jobID = ctx.Params(":jobid") - var versionName = ctx.Query(":versionName") - task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) - if err != nil { - log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) - return - } - - _, err = modelarts.DelTrainJob(jobID) - if err != nil { - log.Error("DelTrainJob(%s) failed:%v", task.JobName, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) - return - } - - err = models.DeleteJob(task) - if err != nil { - ctx.ServerError("DeleteJob failed", err) - return - } - - // ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") - ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) -} - -func TrainJobVersionStop(ctx *context.Context) { - var jobID = ctx.Params(":jobid") - var versionName = ctx.Query(":versionName") - task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) - if err != nil { - log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) - return - } - - _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) - if err != nil { - log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) - return - } - - // ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") - ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) -} - func canUserCreateTrainJob(uid int64) (bool, error) { org, err := models.GetOrgByName(setting.AllowedOrg) if err != nil { @@ -1350,15 +1252,17 @@ func TrainJobVersionShowModels(ctx *context.Context) { jobID := ctx.Params(":jobid") parentDir := ctx.Query("parentDir") versionName := ctx.Query("version_name") - dirArray := strings.Split(parentDir, "/") + // dirArray := strings.Split(parentDir, "/") task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) if err != nil { log.Error("no such job!", ctx.Data["msgID"]) ctx.ServerError("no such job:", err) return } - parentDir = versionName - models, err := storage.GetVersionObsListObject(task.JobName, parentDir) + // parentDir = versionName + VersionOutputPath := "V" + strconv.Itoa(task.TotalVersionCount) + dirArray := strings.Split(VersionOutputPath, "/") + models, err := storage.GetObsListObjectVersion(task.JobName, parentDir, VersionOutputPath) if err != nil { log.Info("get TrainJobListModel failed:", err) ctx.ServerError("GetVersionObsListObject:", err) @@ -1366,6 +1270,7 @@ func TrainJobVersionShowModels(ctx *context.Context) { } ctx.Data["Path"] = dirArray + // ctx.Data["Path"] = VersionOutputPath ctx.Data["Dirs"] = models ctx.Data["task"] = task ctx.Data["JobID"] = jobID @@ -1384,3 +1289,26 @@ func TrainJobDownloadModel(ctx *context.Context) { } http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently) } + +func TrainJobVersionDownloadModel(ctx *context.Context) { + var jobID = ctx.Params(":jobid") + + parentDir := ctx.Query("parentDir") + fileName := ctx.Query("fileName") + versionName := ctx.Query("version_name") + + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + return + } + VersionOutputPath := "V" + strconv.Itoa(task.TotalVersionCount) + + url, err := storage.GetObsCreateVersionSignedUrl(task.JobName, parentDir, fileName, VersionOutputPath) + if err != nil { + log.Error("GetObsCreateSignedUrl failed: %v", err.Error(), ctx.Data["msgID"]) + ctx.ServerError("GetObsCreateSignedUrl", err) + return + } + http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently) +} diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 994cbf36c..2c16c1eb5 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -997,23 +997,14 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("", reqRepoCloudBrainReader, repo.TrainJobShow) m.Post("/stop", reqRepoCloudBrainWriter, repo.TrainJobStop) m.Post("/del", reqRepoCloudBrainWriter, repo.TrainJobDel) - m.Get("/log", reqRepoCloudBrainReader, repo.TrainJobGetLog) - m.Get("/models", reqRepoCloudBrainReader, repo.TrainJobShowModels) - m.Get("/download_model", reqRepoCloudBrainReader, repo.TrainJobDownloadModel) - m.Get("/version_models", reqRepoCloudBrainReader, repo.TrainJobVersionShowModels) - // m.Group("/:version-name", func() { + m.Get("/models", reqRepoCloudBrainReader, repo.TrainJobVersionShowModels) + m.Get("/download_model", reqRepoCloudBrainReader, repo.TrainJobVersionDownloadModel) m.Get("/create_version", reqRepoCloudBrainReader, repo.TrainJobNewVersion) m.Post("/create_version", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) - // }) - m.Post("/stop_version", reqRepoCloudBrainWriter, repo.TrainJobVersionStop) - m.Post("/del_version", reqRepoCloudBrainWriter, repo.TrainJobVersionDel) }) m.Get("/create", reqRepoCloudBrainReader, repo.TrainJobNew) m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreate) - // m.Get("/create", reqRepoCloudBrainReader, repo.TrainJobNewVersion) - // m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) - m.Get("/para-config-list", reqRepoCloudBrainReader, repo.TrainJobGetConfigList) }) }, context.RepoRef()) diff --git a/templates/repo/cloudbrain/show.tmpl b/templates/repo/cloudbrain/show.tmpl index 842f629c9..8cec8f5d2 100755 --- a/templates/repo/cloudbrain/show.tmpl +++ b/templates/repo/cloudbrain/show.tmpl @@ -6,7 +6,19 @@ {{template "base/alert" .}}

- 返回 +

diff --git a/templates/repo/modelarts/notebook/show.tmpl b/templates/repo/modelarts/notebook/show.tmpl index 3f914b56d..cac87df79 100755 --- a/templates/repo/modelarts/notebook/show.tmpl +++ b/templates/repo/modelarts/notebook/show.tmpl @@ -6,7 +6,19 @@ {{template "base/alert" .}}

- 返回 +

diff --git a/templates/repo/modelarts/trainjob/index.tmpl b/templates/repo/modelarts/trainjob/index.tmpl index 4a8664082..6c989adef 100755 --- a/templates/repo/modelarts/trainjob/index.tmpl +++ b/templates/repo/modelarts/trainjob/index.tmpl @@ -333,7 +333,7 @@
- + {{.Status}}
@@ -381,12 +381,12 @@ {{end}}
-
- + +
{{$.CsrfTokenHtml}} @@ -442,6 +442,8 @@ {{template "base/footer" .}} \ No newline at end of file diff --git a/templates/repo/modelarts/trainjob/version_new.tmpl b/templates/repo/modelarts/trainjob/version_new.tmpl index 46fc3f678..2a335857e 100644 --- a/templates/repo/modelarts/trainjob/version_new.tmpl +++ b/templates/repo/modelarts/trainjob/version_new.tmpl @@ -156,12 +156,20 @@ {{.CsrfTokenHtml}} + + +

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

+
+ + + +
@@ -198,7 +206,7 @@
-
+
{{if .flavor_name}} @@ -331,9 +339,12 @@