diff --git a/modules/auth/modelarts.go b/modules/auth/modelarts.go index 59f72696e..a34c87870 100755 --- a/modules/auth/modelarts.go +++ b/modules/auth/modelarts.go @@ -45,6 +45,27 @@ type CreateModelArtsTrainJobForm struct { EngineName string `form:"engine_names" binding:"Required"` } +type CreateModelArtsInferenceJobForm struct { + JobName string `form:"job_name" binding:"Required"` + Attachment string `form:"attachment" binding:"Required"` + BootFile string `form:"boot_file" binding:"Required"` + WorkServerNumber int `form:"work_server_number" binding:"Required"` + EngineID int `form:"engine_id" binding:"Required"` + PoolID string `form:"pool_id" binding:"Required"` + Flavor string `form:"flavor" binding:"Required"` + Params string `form:"run_para_list" binding:"Required"` + Description string `form:"description"` + IsSaveParam string `form:"is_save_para"` + ParameterTemplateName string `form:"parameter_template_name"` + PrameterDescription string `form:"parameter_description"` + BranchName string `form:"branch_name" binding:"Required"` + VersionName string `form:"version_name" binding:"Required"` + FlavorName string `form:"flaver_names" binding:"Required"` + EngineName string `form:"engine_names" binding:"Required"` + TrainUrl string `form:"train_url" binding:"Required"` + CkptName string `form:"ckpt_name" binding:"Required"` +} + func (f *CreateModelArtsTrainJobForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { return validate(errs, ctx.Data, f, ctx.Locale) } diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index 273d59012..c0a003450 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -38,6 +38,7 @@ const ( // "]}" CodePath = "/code/" OutputPath = "/output/" + ResultPath = "/result/" LogPath = "/log/" JobPath = "/job/" OrderDesc = "desc" //向下查询 @@ -117,6 +118,33 @@ type GenerateTrainJobVersionReq struct { TotalVersionCount int } +type GenerateInferenceJobReq struct { + JobName string + Uuid string + Description string + CodeObsPath string + BootFile string + BootFileUrl string + DataUrl string + TrainUrl string + FlavorCode string + LogUrl string + PoolID string + WorkServerNumber int + EngineID int64 + Parameters []models.Parameter + CommitID string + IsLatestVersion string + Params string + BranchName string + PreVersionId int64 + PreVersionName string + FlavorName string + VersionCount int + EngineName string + TotalVersionCount int +} + type VersionInfo struct { Version []struct { ID int `json:"id"` @@ -449,3 +477,73 @@ func GetVersionOutputPathByTotalVersionCount(TotalVersionCount int) (VersionOutp VersionOutputPath = "V" + talVersionCountToString return VersionOutputPath } + +func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (err error) { + jobResult, err := createTrainJob(models.CreateTrainJobParams{ + JobName: req.JobName, + Description: req.Description, + Config: models.Config{ + WorkServerNum: req.WorkServerNumber, + AppUrl: req.CodeObsPath, + BootFileUrl: req.BootFileUrl, + DataUrl: req.DataUrl, + EngineID: req.EngineID, + TrainUrl: req.TrainUrl, + LogUrl: req.LogUrl, + PoolID: req.PoolID, + CreateVersion: true, + Flavor: models.Flavor{ + Code: req.FlavorCode, + }, + Parameter: req.Parameters, + }, + }) + if err != nil { + log.Error("CreateJob failed: %v", err.Error()) + return err + } + + attach, err := models.GetAttachmentByUUID(req.Uuid) + if err != nil { + log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error()) + return err + } + + err = models.CreateCloudbrain(&models.Cloudbrain{ + Status: TransTrainJobStatus(jobResult.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: strconv.FormatInt(jobResult.JobID, 10), + JobName: req.JobName, + JobType: string(models.JobTypeTrain), + Type: models.TypeCloudBrainTwo, + VersionID: jobResult.VersionID, + VersionName: jobResult.VersionName, + Uuid: req.Uuid, + DatasetName: attach.Name, + CommitID: req.CommitID, + IsLatestVersion: req.IsLatestVersion, + ComputeResource: NPUResource, + EngineID: req.EngineID, + TrainUrl: req.TrainUrl, + BranchName: req.BranchName, + Parameters: req.Params, + BootFile: req.BootFile, + DataUrl: req.DataUrl, + LogUrl: req.LogUrl, + FlavorCode: req.FlavorCode, + Description: req.Description, + WorkServerNumber: req.WorkServerNumber, + FlavorName: req.FlavorName, + EngineName: req.EngineName, + VersionCount: req.VersionCount, + TotalVersionCount: req.TotalVersionCount, + }) + + if err != nil { + log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) + return err + } + + return nil +} diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 920205334..c19eacb7a 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -4,6 +4,7 @@ import ( "encoding/json" "errors" "io" + "io/ioutil" "net/http" "os" "path" @@ -38,6 +39,10 @@ const ( tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new" tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show" tplModelArtsTrainJobVersionNew base.TplName = "repo/modelarts/trainjob/version_new" + + tplModelArtsInferenceJobIndex base.TplName = "repo/modelarts/inferencejob/index" + tplModelArtsInferenceJobNew base.TplName = "repo/modelarts/inferencejob/new" + tplModelArtsInferenceJobShow base.TplName = "repo/modelarts/inferencejob/show" ) func DebugJobIndex(ctx *context.Context) { @@ -737,11 +742,16 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) // } //todo: del the codeLocalPath - // _, err := ioutil.ReadDir(codeLocalPath) - // if err == nil { - // os.RemoveAll(codeLocalPath) - // } - os.RemoveAll(codeLocalPath) + _, err = ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } else { + log.Error("创建任务失败,原代码还未删除,请重试!: %s (%v)", repo.FullName(), err) + versionErrorDataPrepare(ctx, form) + ctx.RenderWithErr("创建任务失败,原代码还未删除,请重试!", tplModelArtsTrainJobVersionNew, &form) + return + } + // os.RemoveAll(codeLocalPath) gitRepo, _ := git.OpenRepository(repo.RepoPath()) commitID, _ := gitRepo.GetBranchCommitID(branch_name) @@ -954,11 +964,16 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ // } //todo: del the codeLocalPath - // _, err = ioutil.ReadDir(codeLocalPath) - // if err == nil { - // os.RemoveAll(codeLocalPath) - // } - os.RemoveAll(codeLocalPath) + _, err = ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } else { + log.Error("创建任务失败,原代码还未删除,请重试!: %s (%v)", repo.FullName(), err) + versionErrorDataPrepare(ctx, form) + ctx.RenderWithErr("创建任务失败,原代码还未删除,请重试!", tplModelArtsTrainJobVersionNew, &form) + return + } + // os.RemoveAll(codeLocalPath) gitRepo, _ := git.OpenRepository(repo.RepoPath()) commitID, _ := gitRepo.GetBranchCommitID(branch_name) @@ -1196,6 +1211,20 @@ func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error { return nil } +func paramCheckCreateInferenceJob(form auth.CreateModelArtsInferenceJobForm) error { + if !strings.HasSuffix(form.BootFile, ".py") { + log.Error("the boot file(%s) must be a python file", form.BootFile) + return errors.New("启动文件必须是python文件") + } + + if form.WorkServerNumber > 25 || form.WorkServerNumber < 1 { + log.Error("the WorkServerNumber(%d) must be in (1,25)", form.WorkServerNumber) + return errors.New("计算节点数必须在1-25之间") + } + + return nil +} + func TrainJobShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true var jobID = ctx.Params(":jobid") @@ -1474,3 +1503,485 @@ func ModelDownload(ctx *context.Context) { } http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently) } + +func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInferenceJobForm) { + ctx.Data["PageIsTrainJob"] = true + jobName := form.JobName + uuid := form.Attachment + description := form.Description + workServerNumber := form.WorkServerNumber + engineID := form.EngineID + bootFile := form.BootFile + flavorCode := form.Flavor + params := form.Params + poolID := form.PoolID + isSaveParam := form.IsSaveParam + repo := ctx.Repo.Repository + codeLocalPath := setting.JobPath + jobName + modelarts.CodePath + codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + resultObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.ResultPath + logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" + branch_name := form.BranchName + isLatestVersion := modelarts.IsLatestVersion + FlavorName := form.FlavorName + VersionCount := modelarts.VersionCount + EngineName := form.EngineName + trainUrl := form.TrainUrl + ckptName := form.CkptName + + count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) + if err != nil { + log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("system error", tplModelArtsInferenceJobNew, &form) + return + } else { + if count >= 1 { + log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsInferenceJobNew, &form) + return + } + } + + if err := paramCheckCreateInferenceJob(form); err != nil { + log.Error("paramCheckCreateInferenceJob failed:(%v)", err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobNew, &form) + return + } + + //todo: del the codeLocalPath + _, err = ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } else { + log.Error("创建任务失败,原代码还未删除,请重试!: %s (%v)", repo.FullName(), err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("创建任务失败,原代码还未删除,请重试!", tplModelArtsInferenceJobNew, &form) + return + } + // os.RemoveAll(codeLocalPath) + + gitRepo, _ := git.OpenRepository(repo.RepoPath()) + commitID, _ := gitRepo.GetBranchCommitID(branch_name) + + if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{ + Branch: branch_name, + }); err != nil { + log.Error("创建任务失败,服务器超时!: %s (%v)", repo.FullName(), err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("创建任务失败,服务器超时!", tplModelArtsInferenceJobNew, &form) + return + } + + //todo: upload code (send to file_server todo this work?) + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.ResultPath); err != nil { + log.Error("Failed to obsMkdir_result: %s (%v)", repo.FullName(), err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("Failed to obsMkdir_result", tplModelArtsInferenceJobNew, &form) + return + } + + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil { + log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsInferenceJobNew, &form) + return + } + + // parentDir := VersionOutputPath + "/" + if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { + // if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil { + log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsInferenceJobNew, &form) + return + } + + //todo: del local code? + + var parameters models.Parameters + param := make([]models.Parameter, 0) + param = append(param, models.Parameter{ + Label: modelarts.TrainUrl, + Value: trainUrl, + }, models.Parameter{ + Label: modelarts.DataUrl, + Value: dataPath, + }) + if len(params) != 0 { + err := json.Unmarshal([]byte(params), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal params: %s (%v)", params, err) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr("运行参数错误", tplModelArtsInferenceJobNew, &form) + return + } + + for _, parameter := range parameters.Parameter { + if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl { + param = append(param, models.Parameter{ + Label: parameter.Label, + Value: parameter.Value, + }) + } + } + } + + req := &modelarts.GenerateInferenceJobReq{ + JobName: jobName, + DataUrl: dataPath, + Description: description, + CodeObsPath: codeObsPath, + BootFileUrl: codeObsPath + bootFile, + BootFile: bootFile, + TrainUrl: trainUrl, + FlavorCode: flavorCode, + WorkServerNumber: workServerNumber, + EngineID: int64(engineID), + LogUrl: logObsPath, + PoolID: poolID, + Uuid: uuid, + Parameters: parameters.Parameter, + CommitID: commitID, + IsLatestVersion: isLatestVersion, + BranchName: branch_name, + Params: form.Params, + FlavorName: FlavorName, + EngineName: EngineName, + VersionCount: VersionCount, + TotalVersionCount: modelarts.TotalVersionCount, + } + + //将params转换Parameters.Parameter,出错时返回给前端 + var Parameters modelarts.Parameters + if err := json.Unmarshal([]byte(params), &Parameters); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return + } + + err = modelarts.GenerateTrainJob(ctx, req) + if err != nil { + log.Error("GenerateTrainJob failed:%v", err.Error()) + inferenceJobErrorNewDataPrepare(ctx, form) + ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobNew, &form) + return + } + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/inference-job") +} +func InferenceJobIndex(ctx *context.Context) { + MustEnableModelArts(ctx) + + repo := ctx.Repo.Repository + page := ctx.QueryInt("page") + if page <= 0 { + page = 1 + } + + tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ + ListOptions: models.ListOptions{ + Page: page, + PageSize: setting.UI.IssuePagingNum, + }, + RepoID: repo.ID, + Type: models.TypeCloudBrainTwo, + JobType: string(models.JobTypeTrain), + IsLatestVersion: modelarts.IsLatestVersion, + }) + if err != nil { + ctx.ServerError("Cloudbrain", err) + return + } + + for i, task := range tasks { + tasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) + tasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain) + } + + pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5) + pager.SetDefaultParams(ctx) + ctx.Data["Page"] = pager + + ctx.Data["PageIsCloudBrain"] = true + ctx.Data["Tasks"] = tasks + ctx.Data["CanCreate"] = cloudbrain.CanCreateOrDebugJob(ctx) + ctx.Data["RepoIsEmpty"] = repo.IsEmpty + ctx.HTML(200, tplModelArtsInferenceJobIndex) +} +func InferenceJobNew(ctx *context.Context) { + err := inferenceJobNewDataPrepare(ctx) + if err != nil { + ctx.ServerError("get new inference-job info failed", err) + return + } + ctx.HTML(200, tplModelArtsInferenceJobNew) +} +func inferenceJobNewDataPrepare(ctx *context.Context) error { + ctx.Data["PageIsCloudBrain"] = true + + t := time.Now() + var jobName = "inference" + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + ctx.Data["job_name"] = jobName + + attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) + if err != nil { + ctx.ServerError("GetAllUserAttachments failed:", err) + return err + } + ctx.Data["attachments"] = attachs + + var resourcePools modelarts.ResourcePool + if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["resource_pools"] = resourcePools.Info + + var engines modelarts.Engine + if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["engines"] = engines.Info + + var versionInfos modelarts.VersionInfo + if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["engine_versions"] = versionInfos.Version + + var flavorInfos modelarts.Flavor + if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["flavor_infos"] = flavorInfos.Info + + resultObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.ResultPath + ctx.Data["result_url"] = resultObsPath + ctx.Data["params"] = "" + ctx.Data["branchName"] = ctx.Repo.BranchName + + configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) + if err != nil { + ctx.ServerError("getConfigList failed:", err) + return err + } + ctx.Data["config_list"] = configList.ParaConfigs + + return nil +} + +func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArtsInferenceJobForm) error { + ctx.Data["PageIsCloudBrain"] = true + + t := time.Now() + var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + ctx.Data["job_name"] = jobName + + attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) + if err != nil { + ctx.ServerError("GetAllUserAttachments failed:", err) + return err + } + ctx.Data["attachments"] = attachs + + var resourcePools modelarts.ResourcePool + if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["resource_pools"] = resourcePools.Info + + var engines modelarts.Engine + if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["engines"] = engines.Info + + var versionInfos modelarts.VersionInfo + if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["engine_versions"] = versionInfos.Version + + var flavorInfos modelarts.Flavor + if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["flavor_infos"] = flavorInfos.Info + + outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + ctx.Data["train_url"] = outputObsPath + + configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) + if err != nil { + ctx.ServerError("getConfigList failed:", err) + return err + } + var Parameters modelarts.Parameters + if err = json.Unmarshal([]byte(form.Params), &Parameters); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["params"] = Parameters.Parameter + ctx.Data["config_list"] = configList.ParaConfigs + ctx.Data["bootFile"] = form.BootFile + ctx.Data["uuid"] = form.Attachment + ctx.Data["branch_name"] = form.BranchName + + return nil +} +func InferenceJobShow(ctx *context.Context) { + ctx.Data["PageIsCloudBrain"] = true + var jobID = ctx.Params(":jobid") + + repo := ctx.Repo.Repository + page := ctx.QueryInt("page") + if page <= 0 { + page = 1 + } + VersionListTasks, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ + ListOptions: models.ListOptions{ + Page: page, + PageSize: setting.UI.IssuePagingNum, + }, + RepoID: repo.ID, + Type: models.TypeCloudBrainTwo, + JobType: string(models.JobTypeTrain), + JobID: jobID, + }) + + if err != nil { + log.Error("GetVersionListTasks(%s) failed:%v", jobID, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) + return + } + //设置权限 + canNewJob, err := canUserCreateTrainJobVersion(ctx, VersionListTasks[0].UserID) + if err != nil { + ctx.ServerError("canNewJob failed", err) + return + } + ctx.Data["canNewJob"] = canNewJob + + //将运行参数转化为epoch_size = 3, device_target = Ascend的格式 + for i, _ := range VersionListTasks { + + var parameters models.Parameters + + err := json.Unmarshal([]byte(VersionListTasks[i].Parameters), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal Parameters: %s (%v)", VersionListTasks[i].Parameters, err) + trainJobNewDataPrepare(ctx) + return + } + + if len(parameters.Parameter) > 0 { + paramTemp := "" + for _, Parameter := range parameters.Parameter { + param := Parameter.Label + " = " + Parameter.Value + "; " + paramTemp = paramTemp + param + } + VersionListTasks[i].Parameters = paramTemp[:len(paramTemp)-2] + } else { + VersionListTasks[i].Parameters = "" + } + } + + pager := context.NewPagination(VersionListCount, setting.UI.IssuePagingNum, page, 5) + pager.SetDefaultParams(ctx) + ctx.Data["Page"] = pager + ctx.Data["jobID"] = jobID + ctx.Data["jobName"] = VersionListTasks[0].JobName + ctx.Data["version_list_task"] = VersionListTasks + ctx.Data["version_list_count"] = VersionListCount + ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) +} +func InferenceJobStop(ctx *context.Context) { + var jobID = ctx.Params(":jobid") + task, err := models.GetCloudbrainByJobID(jobID) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + return + } + + _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) + if err != nil { + log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + return + } + + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") +} + +func InferenceJobDel(ctx *context.Context) { + var jobID = ctx.Params(":jobid") + repo := ctx.Repo.Repository + + VersionListTasks, _, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ + RepoID: repo.ID, + Type: models.TypeCloudBrainTwo, + JobType: string(models.JobTypeTrain), + JobID: jobID, + }) + if err != nil { + ctx.ServerError("get VersionListTasks failed", err) + return + } + + //删除modelarts上的任务记录 + _, err = modelarts.DelTrainJob(jobID) + if err != nil { + log.Error("DelTrainJob(%s) failed:%v", jobID, err.Error()) + ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) + return + } + + //删除数据库Cloudbrain表的记录 + for _, task := range VersionListTasks { + err = models.DeleteJob(&task.Cloudbrain) + if err != nil { + ctx.ServerError("DeleteJob failed", err) + return + } + } + + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") +} + +func ResultDownload(ctx *context.Context) { + var ( + err error + ) + + var jobID = ctx.Params(":jobid") + versionName := ctx.Query("version_name") + parentDir := ctx.Query("parent_dir") + fileName := ctx.Query("file_name") + log.Info("DownloadSingleModelFile start.") + task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) + if err != nil { + log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) + return + } + + path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, setting.OutPutPath, versionName, parentDir, fileName), "/") + log.Info("Download path is:%s", path) + + url, err := storage.GetObsCreateSignedUrlByBucketAndKey(setting.Bucket, path) + if err != nil { + log.Error("GetObsCreateSignedUrl failed: %v", err.Error(), ctx.Data["msgID"]) + ctx.ServerError("GetObsCreateSignedUrl", err) + return + } + http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently) +} diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 30e486b98..05ae5fbef 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1022,6 +1022,18 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/para-config-list", reqRepoCloudBrainReader, repo.TrainJobGetConfigList) }) + + m.Group("/inference-job", func() { + m.Get("", reqRepoCloudBrainReader, repo.InferenceJobIndex) + m.Group("/:jobid", func() { + m.Get("", reqRepoCloudBrainReader, repo.InferenceJobShow) + m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.InferenceJobStop) + m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.InferenceJobDel) + m.Get("/model_download", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.ResultDownload) + }) + m.Get("/create", reqRepoCloudBrainWriter, repo.InferenceJobNew) + m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsInferenceJobForm{}), repo.InferenceJobCreate) + }) }, context.RepoRef()) m.Group("/blockchain", func() { diff --git a/templates/repo/modelarts/inferencejob/index.tmpl b/templates/repo/modelarts/inferencejob/index.tmpl new file mode 100644 index 000000000..30d74d258 --- /dev/null +++ b/templates/repo/modelarts/inferencejob/index.tmpl @@ -0,0 +1 @@ +test \ No newline at end of file diff --git a/templates/repo/modelarts/inferencejob/new.tmpl b/templates/repo/modelarts/inferencejob/new.tmpl new file mode 100644 index 000000000..30d74d258 --- /dev/null +++ b/templates/repo/modelarts/inferencejob/new.tmpl @@ -0,0 +1 @@ +test \ No newline at end of file diff --git a/templates/repo/modelarts/inferencejob/show.tmpl b/templates/repo/modelarts/inferencejob/show.tmpl new file mode 100644 index 000000000..30d74d258 --- /dev/null +++ b/templates/repo/modelarts/inferencejob/show.tmpl @@ -0,0 +1 @@ +test \ No newline at end of file