| @@ -30,6 +30,7 @@ const ( | |||
| JobTypeSnn4imagenet JobType = "SNN4IMAGENET" | |||
| JobTypeBrainScore JobType = "BRAINSCORE" | |||
| JobTypeTrain JobType = "TRAIN" | |||
| JobVersionName JobType = "V0001" | |||
| ModelArtsCreateQueue ModelArtsJobStatus = "CREATE_QUEUING" //免费资源创建排队中 | |||
| ModelArtsCreating ModelArtsJobStatus = "CREATING" //创建中 | |||
| @@ -68,10 +69,35 @@ type Cloudbrain struct { | |||
| CanDel bool `xorm:"-"` | |||
| Type int `xorm:"INDEX DEFAULT 0"` | |||
| VersionID int64 `xorm:"INDEX DEFAULT 0"` | |||
| VersionName string | |||
| Uuid string | |||
| DatasetName string | |||
| VersionID int64 `xorm:"INDEX DEFAULT 0"` | |||
| VersionName string | |||
| Uuid string | |||
| DatasetName string | |||
| VersionCount int64 `xorm:"INDEX DEFAULT 1"` | |||
| IsLatestVersion string | |||
| CommitID string | |||
| FatherVersionName string | |||
| ComputeResource string | |||
| EngineID int64 | |||
| User *User `xorm:"-"` | |||
| Repo *Repository `xorm:"-"` | |||
| } | |||
| type TrainjobConfigDetail struct { | |||
| ID int64 `xorm:"pk autoincr"` | |||
| JobID string `xorm:"INDEX"` | |||
| JobName string `xorm:"INDEX"` | |||
| ResourcePools string `xorm:"INDEX"` | |||
| EngineVersions int `xorm:"INDEX"` | |||
| FlavorInfos string `xorm:"INDEX"` | |||
| TrainUrl string `xorm:"INDEX"` | |||
| BootFile string `xorm:"INDEX"` | |||
| Uuid string `xorm:"INDEX"` | |||
| DatasetName string `xorm:"INDEX"` | |||
| Params string `xorm:"INDEX"` | |||
| BranchName string `xorm:"INDEX"` | |||
| VersionName string `xorm:"INDEX"` | |||
| User *User `xorm:"-"` | |||
| Repo *Repository `xorm:"-"` | |||
| @@ -150,13 +176,16 @@ type CloudbrainsOptions struct { | |||
| ListOptions | |||
| RepoID int64 // include all repos if empty | |||
| UserID int64 | |||
| JobID int64 | |||
| JobID string | |||
| SortType string | |||
| CloudbrainIDs []int64 | |||
| // JobStatus CloudbrainStatus | |||
| Type int | |||
| JobType string | |||
| Type int | |||
| JobType string | |||
| VersionName string | |||
| IsLatestVersion string | |||
| } | |||
| type TaskPod struct { | |||
| TaskRoleStatus struct { | |||
| Name string `json:"name"` | |||
| @@ -594,6 +623,33 @@ type Config struct { | |||
| PoolID string `json:"pool_id"` | |||
| } | |||
| type CreateTrainJobVersionParams struct { | |||
| Description string `json:"job_desc"` | |||
| Config TrainJobVersionConfig `json:"config"` | |||
| } | |||
| type TrainJobVersionConfig struct { | |||
| WorkServerNum int `json:"worker_server_num"` | |||
| AppUrl string `json:"app_url"` //训练作业的代码目录 | |||
| BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下 | |||
| Parameter []Parameter `json:"parameter"` | |||
| DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL | |||
| //DatasetID string `json:"dataset_id"` | |||
| //DataVersionID string `json:"dataset_version_id"` | |||
| //DataSource []DataSource `json:"data_source"` | |||
| //SpecID int64 `json:"spec_id"` | |||
| EngineID int64 `json:"engine_id"` | |||
| //ModelID int64 `json:"model_id"` | |||
| TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL | |||
| LogUrl string `json:"log_url"` | |||
| //UserImageUrl string `json:"user_image_url"` | |||
| //UserCommand string `json:"user_command"` | |||
| //Volumes []Volumes `json:"volumes"` | |||
| Flavor Flavor `json:"flavor"` | |||
| PoolID string `json:"pool_id"` | |||
| PreVersionId int64 `json:"pre_version_id"` | |||
| } | |||
| type CreateConfigParams struct { | |||
| ConfigName string `json:"config_name"` | |||
| Description string `json:"config_desc"` | |||
| @@ -784,12 +840,13 @@ type GetTrainJobResult struct { | |||
| //UserImageUrl string `json:"user_image_url"` | |||
| //UserCommand string `json:"user_command"` | |||
| //Volumes []Volumes `json:"volumes"` | |||
| Flavor Flavor `json:"flavor"` | |||
| PoolID string `json:"pool_id"` | |||
| PoolName string `json:"pool_name"` | |||
| NasMountPath string `json:"nas_mount_path"` | |||
| NasShareAddr string `json:"nas_share_addr"` | |||
| DatasetName string | |||
| Flavor Flavor `json:"flavor"` | |||
| PoolID string `json:"pool_id"` | |||
| PoolName string `json:"pool_name"` | |||
| NasMountPath string `json:"nas_mount_path"` | |||
| NasShareAddr string `json:"nas_share_addr"` | |||
| DatasetName string | |||
| ModelMetricList string `json:"model_metric_list"` //列表里包含f1_score,recall,precision,accuracy,若有的话 | |||
| } | |||
| type GetTrainJobLogResult struct { | |||
| @@ -836,7 +893,7 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { | |||
| ) | |||
| } | |||
| if (opts.JobID) > 0 { | |||
| if (opts.JobID) != "" { | |||
| cond = cond.And( | |||
| builder.Eq{"cloudbrain.job_id": opts.JobID}, | |||
| ) | |||
| @@ -854,6 +911,12 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { | |||
| ) | |||
| } | |||
| if (opts.IsLatestVersion) != "" { | |||
| cond = cond.And( | |||
| builder.Eq{"cloudbrain.is_latest_version": opts.IsLatestVersion}, | |||
| ) | |||
| } | |||
| // switch opts.JobStatus { | |||
| // case JobWaiting: | |||
| // cond.And(builder.Eq{"cloudbrain.status": int(JobWaiting)}) | |||
| @@ -896,11 +959,83 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { | |||
| return cloudbrains, count, nil | |||
| } | |||
| func CloudbrainsVersionList(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { | |||
| sess := x.NewSession() | |||
| defer sess.Close() | |||
| var cond = builder.NewCond() | |||
| if opts.RepoID > 0 { | |||
| cond = cond.And( | |||
| builder.Eq{"cloudbrain.repo_id": opts.RepoID}, | |||
| ) | |||
| } | |||
| if opts.UserID > 0 { | |||
| cond = cond.And( | |||
| builder.Eq{"cloudbrain.user_id": opts.UserID}, | |||
| ) | |||
| } | |||
| if (opts.Type) >= 0 { | |||
| cond = cond.And( | |||
| builder.Eq{"cloudbrain.type": opts.Type}, | |||
| ) | |||
| } | |||
| if (opts.JobID) != "" { | |||
| cond = cond.And( | |||
| builder.Eq{"cloudbrain.job_id": opts.JobID}, | |||
| ) | |||
| } | |||
| if (opts.JobType) != "" { | |||
| cond = cond.And( | |||
| builder.Eq{"cloudbrain.job_type": opts.JobType}, | |||
| ) | |||
| } | |||
| if len(opts.CloudbrainIDs) > 0 { | |||
| cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs)) | |||
| } | |||
| count, err := sess.Where(cond).Count(new(Cloudbrain)) | |||
| if err != nil { | |||
| return nil, 0, fmt.Errorf("Count: %v", err) | |||
| } | |||
| if opts.Page >= 0 && opts.PageSize > 0 { | |||
| var start int | |||
| if opts.Page == 0 { | |||
| start = 0 | |||
| } else { | |||
| start = (opts.Page - 1) * opts.PageSize | |||
| } | |||
| sess.Limit(opts.PageSize, start) | |||
| } | |||
| sess.OrderBy("cloudbrain.created_unix DESC") | |||
| cloudbrains := make([]*CloudbrainInfo, 0, setting.UI.IssuePagingNum) | |||
| if err := sess.Table(&Cloudbrain{}).Where(cond). | |||
| Join("left", "`user`", "cloudbrain.user_id = `user`.id"). | |||
| Find(&cloudbrains); err != nil { | |||
| return nil, 0, fmt.Errorf("Find: %v", err) | |||
| } | |||
| sess.Close() | |||
| return cloudbrains, count, nil | |||
| } | |||
| func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) { | |||
| if _, err = x.Insert(cloudbrain); err != nil { | |||
| return err | |||
| } | |||
| return nil | |||
| } | |||
| func CreateTrainjobConfigDetail(trainjobConfigDetail *TrainjobConfigDetail) (err error) { | |||
| if _, err = x.Insert(trainjobConfigDetail); err != nil { | |||
| return err | |||
| } | |||
| return nil | |||
| } | |||
| @@ -924,6 +1059,16 @@ func GetCloudbrainByJobID(jobID string) (*Cloudbrain, error) { | |||
| return getRepoCloudBrain(cb) | |||
| } | |||
| func GetCloudbrainByJobIDAndVersionName(jobID string, versionName string) (*Cloudbrain, error) { | |||
| cb := &Cloudbrain{JobID: jobID, VersionName: versionName} | |||
| return getRepoCloudBrain(cb) | |||
| } | |||
| func GetCloudbrainByJobIDAndIsLatestVersion(jobID string, isLatestVersion string) (*Cloudbrain, error) { | |||
| cb := &Cloudbrain{JobID: jobID, IsLatestVersion: isLatestVersion} | |||
| return getRepoCloudBrain(cb) | |||
| } | |||
| func GetCloudbrainsNeededStopByUserID(userID int64) ([]*Cloudbrain, error) { | |||
| cloudBrains := make([]*Cloudbrain, 0) | |||
| err := x.Cols("job_id", "status", "type").Where("user_id=? AND status !=?", userID, string(JobStopped)).Find(&cloudBrains) | |||
| @@ -948,6 +1093,12 @@ func SetTrainJobStatusByJobID(jobID string, status string, duration int64, train | |||
| return | |||
| } | |||
| func SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID string, versionName string, versionCount int64, isLatestVersion string) (err error) { | |||
| cb := &Cloudbrain{JobID: jobID, VersionName: versionName, VersionCount: versionCount, IsLatestVersion: isLatestVersion} | |||
| _, err = x.Cols("version_Count", "is_latest_version").Where("cloudbrain.job_id=? AND cloudbrain.version_name=?", jobID, versionName).Update(cb) | |||
| return | |||
| } | |||
| func UpdateJob(job *Cloudbrain) error { | |||
| return updateJob(x, job) | |||
| } | |||
| @@ -134,6 +134,7 @@ func init() { | |||
| new(BlockChain), | |||
| new(RecommendOrg), | |||
| new(AiModelManage), | |||
| new(TrainjobConfigDetail), | |||
| ) | |||
| tablesStatistic = append(tablesStatistic, | |||
| @@ -38,6 +38,8 @@ type CreateModelArtsTrainJobForm struct { | |||
| IsSaveParam string `form:"is_save_para"` | |||
| ParameterTemplateName string `form:"parameter_template_name"` | |||
| PrameterDescription string `form:"parameter_description"` | |||
| BranchName string `form:"branch_name" binding:"Required"` | |||
| VersionName string `form:"version_name" binding:"Required"` | |||
| } | |||
| func (f *CreateModelArtsTrainJobForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { | |||
| @@ -35,16 +35,19 @@ const ( | |||
| // "{\"code\":\"modelarts.bm.910.arm.public.4\",\"value\":\"Ascend : 4 * Ascend 910 CPU:96 核 1024GiB\"}," + | |||
| // "{\"code\":\"modelarts.bm.910.arm.public.1\",\"value\":\"Ascend : 1 * Ascend 910 CPU:24 核 256GiB\"}" + | |||
| // "]}" | |||
| CodePath = "/code/" | |||
| OutputPath = "/output/" | |||
| LogPath = "/log/" | |||
| JobPath = "/job/" | |||
| OrderDesc = "desc" //向下查询 | |||
| OrderAsc = "asc" //向上查询 | |||
| Lines = 20 | |||
| TrainUrl = "train_url" | |||
| DataUrl = "data_url" | |||
| PerPage = 10 | |||
| CodePath = "/code/" | |||
| OutputPath = "/output/" | |||
| LogPath = "/log/" | |||
| JobPath = "/job/" | |||
| OrderDesc = "desc" //向下查询 | |||
| OrderAsc = "asc" //向上查询 | |||
| Lines = 20 | |||
| TrainUrl = "train_url" | |||
| DataUrl = "data_url" | |||
| PerPage = 10 | |||
| IsLatestVersion = "1" | |||
| NotLatestVersion = "0" | |||
| ComputeResource = "NPU" | |||
| SortByCreateTime = "create_time" | |||
| ConfigTypeCustom = "custom" | |||
| @@ -69,6 +72,26 @@ type GenerateTrainJobReq struct { | |||
| WorkServerNumber int | |||
| EngineID int64 | |||
| Parameters []models.Parameter | |||
| CommitID string | |||
| IsLatestVersion string | |||
| } | |||
| type GenerateTrainJobVersionReq struct { | |||
| JobName string | |||
| Uuid string | |||
| Description string | |||
| CodeObsPath string | |||
| BootFile string | |||
| DataUrl string | |||
| TrainUrl string | |||
| FlavorCode string | |||
| LogUrl string | |||
| PoolID string | |||
| WorkServerNumber int | |||
| EngineID int64 | |||
| Parameters []models.Parameter | |||
| PreVersionId int64 | |||
| CommitID string | |||
| } | |||
| type VersionInfo struct { | |||
| @@ -170,7 +193,7 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description string) error | |||
| return nil | |||
| } | |||
| func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error { | |||
| func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobresult *models.CreateTrainJobResult, err error) { | |||
| jobResult, err := createTrainJob(models.CreateTrainJobParams{ | |||
| JobName: req.JobName, | |||
| Description: req.Description, | |||
| @@ -192,35 +215,142 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error { | |||
| }) | |||
| if err != nil { | |||
| log.Error("CreateJob failed: %v", err.Error()) | |||
| return err | |||
| return nil, err | |||
| } | |||
| attach, err := models.GetAttachmentByUUID(req.Uuid) | |||
| if err != nil { | |||
| log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error()) | |||
| return nil | |||
| return nil, err | |||
| } | |||
| err = models.CreateCloudbrain(&models.Cloudbrain{ | |||
| Status: TransTrainJobStatus(jobResult.Status), | |||
| UserID: ctx.User.ID, | |||
| RepoID: ctx.Repo.Repository.ID, | |||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||
| JobName: req.JobName, | |||
| JobType: string(models.JobTypeTrain), | |||
| Type: models.TypeCloudBrainTwo, | |||
| VersionID: jobResult.VersionID, | |||
| VersionName: jobResult.VersionName, | |||
| Uuid: req.Uuid, | |||
| DatasetName: attach.Name, | |||
| Status: TransTrainJobStatus(jobResult.Status), | |||
| UserID: ctx.User.ID, | |||
| RepoID: ctx.Repo.Repository.ID, | |||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||
| JobName: req.JobName, | |||
| JobType: string(models.JobTypeTrain), | |||
| Type: models.TypeCloudBrainTwo, | |||
| VersionID: jobResult.VersionID, | |||
| VersionName: jobResult.VersionName, | |||
| Uuid: req.Uuid, | |||
| DatasetName: attach.Name, | |||
| CommitID: req.CommitID, | |||
| IsLatestVersion: req.IsLatestVersion, | |||
| ComputeResource: ComputeResource, | |||
| EngineID: req.EngineID, | |||
| }) | |||
| if err != nil { | |||
| log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) | |||
| return err | |||
| return nil, err | |||
| } | |||
| return nil | |||
| return jobResult, nil | |||
| } | |||
| func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionReq, jobId string, fatherVersionName string) (jobresult *models.CreateTrainJobResult, err error) { | |||
| jobResult, err := createTrainJobVersion(models.CreateTrainJobVersionParams{ | |||
| Description: req.Description, | |||
| Config: models.TrainJobVersionConfig{ | |||
| WorkServerNum: req.WorkServerNumber, | |||
| AppUrl: req.CodeObsPath, | |||
| BootFileUrl: req.BootFile, | |||
| DataUrl: req.DataUrl, | |||
| EngineID: req.EngineID, | |||
| TrainUrl: req.TrainUrl, | |||
| LogUrl: req.LogUrl, | |||
| PoolID: req.PoolID, | |||
| Flavor: models.Flavor{ | |||
| Code: req.FlavorCode, | |||
| }, | |||
| Parameter: req.Parameters, | |||
| PreVersionId: req.PreVersionId, | |||
| }, | |||
| }, jobId) | |||
| if err != nil { | |||
| log.Error("CreateJob failed: %v", err.Error()) | |||
| return nil, err | |||
| } | |||
| attach, err := models.GetAttachmentByUUID(req.Uuid) | |||
| if err != nil { | |||
| log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error()) | |||
| return nil, err | |||
| } | |||
| err = models.CreateCloudbrain(&models.Cloudbrain{ | |||
| Status: TransTrainJobStatus(jobResult.Status), | |||
| UserID: ctx.User.ID, | |||
| RepoID: ctx.Repo.Repository.ID, | |||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||
| JobName: req.JobName, | |||
| JobType: string(models.JobTypeTrain), | |||
| Type: models.TypeCloudBrainTwo, | |||
| VersionID: jobResult.VersionID, | |||
| VersionName: jobResult.VersionName, | |||
| Uuid: req.Uuid, | |||
| DatasetName: attach.Name, | |||
| CommitID: req.CommitID, | |||
| FatherVersionName: fatherVersionName, | |||
| ComputeResource: ComputeResource, | |||
| EngineID: req.EngineID, | |||
| }) | |||
| if err != nil { | |||
| log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) | |||
| return nil, err | |||
| } | |||
| repo := ctx.Repo.Repository | |||
| page := ctx.QueryInt("page") | |||
| if page <= 0 { | |||
| page = 1 | |||
| } | |||
| _, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ | |||
| ListOptions: models.ListOptions{ | |||
| Page: page, | |||
| PageSize: setting.UI.IssuePagingNum, | |||
| }, | |||
| RepoID: repo.ID, | |||
| Type: models.TypeCloudBrainTwo, | |||
| JobType: string(models.JobTypeTrain), | |||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||
| }) | |||
| if err != nil { | |||
| ctx.ServerError("Cloudbrain", err) | |||
| return nil, err | |||
| } | |||
| //将训练任务的上一版本的isLatestVersion设置为"0" | |||
| latestTask, err := models.GetCloudbrainByJobIDAndIsLatestVersion(strconv.FormatInt(jobResult.JobID, 10), IsLatestVersion) | |||
| if err != nil { | |||
| ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err) | |||
| return nil, err | |||
| } | |||
| err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), latestTask.VersionName, VersionListCount, NotLatestVersion) | |||
| if err != nil { | |||
| ctx.ServerError("UpdateJobVersionCount failed", err) | |||
| return nil, err | |||
| } | |||
| // lastVersionNum := jobResult.VersionName[1:] | |||
| // lastVersionNumToInt64, err := strconv.ParseInt(lastVersionNum, 10, 64) | |||
| // if err != nil { | |||
| // ctx.ServerError("lastVersionNumToInt64 faild:", err) | |||
| // return nil | |||
| // } | |||
| // lastVersionName := "V" + strconv.FormatInt(lastVersionNumToInt64-1, 10) | |||
| //将训练任务的本版本的isLatestVersion设置为"0" | |||
| //将当前版本的isLatestVersion和任务数量更新 | |||
| err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), jobResult.VersionName, VersionListCount, IsLatestVersion) | |||
| if err != nil { | |||
| ctx.ServerError("UpdateJobVersionCount failed", err) | |||
| return nil, err | |||
| } | |||
| return jobResult, err | |||
| } | |||
| func TransTrainJobStatus(status int) string { | |||
| @@ -377,6 +377,52 @@ sendjob: | |||
| return &result, nil | |||
| } | |||
| func createTrainJobVersion(createJobVersionParams models.CreateTrainJobVersionParams, jobID string) (*models.CreateTrainJobResult, error) { | |||
| checkSetting() | |||
| client := getRestyClient() | |||
| var result models.CreateTrainJobResult | |||
| retry := 0 | |||
| sendjob: | |||
| res, err := client.R(). | |||
| SetHeader("Content-Type", "application/json"). | |||
| SetAuthToken(TOKEN). | |||
| SetBody(createJobVersionParams). | |||
| SetResult(&result). | |||
| Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions") | |||
| if err != nil { | |||
| return nil, fmt.Errorf("resty create train-job version: %s", err) | |||
| } | |||
| req, _ := json.Marshal(createJobVersionParams) | |||
| log.Info("%s", req) | |||
| if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
| retry++ | |||
| _ = getToken() | |||
| goto sendjob | |||
| } | |||
| if res.StatusCode() != http.StatusOK { | |||
| var temp models.ErrorResult | |||
| if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
| log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
| return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
| } | |||
| log.Error("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
| return &result, fmt.Errorf("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
| } | |||
| if !result.IsSuccess { | |||
| log.Error("createTrainJobVersion failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
| return &result, fmt.Errorf("createTrainJobVersion failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
| } | |||
| return &result, nil | |||
| } | |||
| func GetResourceSpecs() (*models.GetResourceSpecsResult, error) { | |||
| checkSetting() | |||
| client := getRestyClient() | |||
| @@ -40,6 +40,7 @@ const ( | |||
| tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new" | |||
| tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show" | |||
| tplModelArtsTrainJobShowModels base.TplName = "repo/modelarts/trainjob/models/index" | |||
| tplModelArtsTrainJobVersionNew base.TplName = "repo/modelarts/trainjob/version_new" | |||
| ) | |||
| // MustEnableDataset check if repository enable internal cb | |||
| @@ -493,14 +494,6 @@ func NotebookDel(ctx *context.Context) { | |||
| func TrainJobIndex(ctx *context.Context) { | |||
| MustEnableModelArts(ctx) | |||
| //can, err := canUserCreateTrainJob(ctx.User.ID) | |||
| //if err != nil { | |||
| // ctx.ServerError("canUserCreateTrainJob", err) | |||
| // return | |||
| //} | |||
| // | |||
| //ctx.Data["CanCreate"] = can | |||
| repo := ctx.Repo.Repository | |||
| page := ctx.QueryInt("page") | |||
| if page <= 0 { | |||
| @@ -512,9 +505,10 @@ func TrainJobIndex(ctx *context.Context) { | |||
| Page: page, | |||
| PageSize: setting.UI.IssuePagingNum, | |||
| }, | |||
| RepoID: repo.ID, | |||
| Type: models.TypeCloudBrainTwo, | |||
| JobType: string(models.JobTypeTrain), | |||
| RepoID: repo.ID, | |||
| Type: models.TypeCloudBrainTwo, | |||
| JobType: string(models.JobTypeTrain), | |||
| IsLatestVersion: modelarts.IsLatestVersion, | |||
| }) | |||
| if err != nil { | |||
| ctx.ServerError("Cloudbrain", err) | |||
| @@ -596,12 +590,96 @@ func trainJobNewDataPrepare(ctx *context.Context) error { | |||
| outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath | |||
| ctx.Data["train_url"] = outputObsPath | |||
| Branches, err := ctx.Repo.GitRepo.GetBranches() | |||
| if err != nil { | |||
| ctx.ServerError("GetBranches error:", err) | |||
| return err | |||
| } | |||
| ctx.Data["Branches"] = Branches | |||
| ctx.Data["BranchesCount"] = len(Branches) | |||
| configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) | |||
| if err != nil { | |||
| ctx.ServerError("getConfigList failed:", err) | |||
| return err | |||
| } | |||
| ctx.Data["config_list"] = configList.ParaConfigs | |||
| return nil | |||
| } | |||
| func TrainJobNewVersion(ctx *context.Context) { | |||
| err := trainJobNewVersionDataPrepare(ctx) | |||
| if err != nil { | |||
| ctx.ServerError("get new train-job info failed", err) | |||
| return | |||
| } | |||
| ctx.HTML(200, tplModelArtsTrainJobVersionNew) | |||
| } | |||
| func trainJobNewVersionDataPrepare(ctx *context.Context) error { | |||
| ctx.Data["PageIsCloudBrain"] = true | |||
| var jobID = ctx.Params(":jobid") | |||
| var versionName = ctx.Query("versionName") | |||
| jobID = "19373" | |||
| t := time.Now() | |||
| var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] | |||
| ctx.Data["job_name"] = jobName | |||
| attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) | |||
| if err != nil { | |||
| ctx.ServerError("GetAllUserAttachments failed:", err) | |||
| return err | |||
| } | |||
| ctx.Data["attachments"] = attachs | |||
| var resourcePools modelarts.ResourcePool | |||
| if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { | |||
| ctx.ServerError("json.Unmarshal failed:", err) | |||
| return err | |||
| } | |||
| ctx.Data["resource_pools"] = resourcePools.Info | |||
| var engines modelarts.Engine | |||
| if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { | |||
| ctx.ServerError("json.Unmarshal failed:", err) | |||
| return err | |||
| } | |||
| ctx.Data["engines"] = engines.Info | |||
| var versionInfos modelarts.VersionInfo | |||
| if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { | |||
| ctx.ServerError("json.Unmarshal failed:", err) | |||
| return err | |||
| } | |||
| ctx.Data["engine_versions"] = versionInfos.Version | |||
| var flavorInfos modelarts.Flavor | |||
| if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { | |||
| ctx.ServerError("json.Unmarshal failed:", err) | |||
| return err | |||
| } | |||
| ctx.Data["flavor_infos"] = flavorInfos.Info | |||
| outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath | |||
| ctx.Data["train_url"] = outputObsPath | |||
| Branches, err := ctx.Repo.GitRepo.GetBranches() | |||
| if err != nil { | |||
| ctx.ServerError("GetBranches error:", err) | |||
| return err | |||
| } | |||
| ctx.Data["Branches"] = Branches | |||
| ctx.Data["BranchesCount"] = len(Branches) | |||
| ctx.Data["jobID"] = jobID | |||
| ctx.Data["versionName"] = versionName | |||
| configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) | |||
| if err != nil { | |||
| ctx.ServerError("getConfigList failed:", err) | |||
| return err | |||
| } | |||
| ctx.Data["config_list"] = configList.ParaConfigs | |||
| return nil | |||
| @@ -625,20 +703,9 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath | |||
| logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath | |||
| dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" | |||
| branch_name := form.BranchName | |||
| isLatestVersion := modelarts.IsLatestVersion | |||
| //can, err := canUserCreateTrainJob(ctx.User.ID) | |||
| //if err != nil { | |||
| // ctx.ServerError("canUserCreateTrainJob", err) | |||
| // return | |||
| //} | |||
| // | |||
| //if !can { | |||
| // log.Error("the user can not create train-job") | |||
| // ctx.RenderWithErr("the user can not create train-job", tplModelArtsTrainJobNew, &form) | |||
| // return | |||
| //} | |||
| //param check | |||
| if err := paramCheckCreateTrainJob(form); err != nil { | |||
| log.Error("paramCheckCreateTrainJob failed:(%v)", err) | |||
| trainJobNewDataPrepare(ctx) | |||
| @@ -657,7 +724,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| if err == nil { | |||
| os.RemoveAll(codeLocalPath) | |||
| } | |||
| if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil { | |||
| gitRepo, _ := git.OpenRepository(repo.RepoPath()) | |||
| commitID, _ := gitRepo.GetBranchCommitID(branch_name) | |||
| if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{ | |||
| Branch: branch_name, | |||
| }); err != nil { | |||
| log.Error("创建任务失败,任务名称已存在!: %s (%v)", repo.FullName(), err) | |||
| trainJobNewDataPrepare(ctx) | |||
| @@ -665,6 +738,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| ctx.Data["uuid"] = form.Attachment | |||
| ctx.Data["datasetName"] = attach.Name | |||
| ctx.Data["params"] = form.Params | |||
| ctx.Data["branch_name"] = branch_name | |||
| trainJobNewDataPrepare(ctx) | |||
| // ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form) | |||
| ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobNew, &form) | |||
| @@ -771,10 +845,12 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| LogUrl: logObsPath, | |||
| PoolID: poolID, | |||
| Uuid: uuid, | |||
| Parameters: param, | |||
| Parameters: parameters.Parameter, | |||
| CommitID: commitID, | |||
| IsLatestVersion: isLatestVersion, | |||
| } | |||
| err = modelarts.GenerateTrainJob(ctx, req) | |||
| jobResult, err := modelarts.GenerateTrainJob(ctx, req) | |||
| if err != nil { | |||
| log.Error("GenerateTrainJob failed:%v", err.Error()) | |||
| trainJobNewDataPrepare(ctx) | |||
| @@ -782,12 +858,258 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| ctx.Data["uuid"] = form.Attachment | |||
| ctx.Data["datasetName"] = attach.Name | |||
| ctx.Data["params"] = form.Params | |||
| ctx.Data["branch_name"] = branch_name | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) | |||
| return | |||
| } | |||
| // 保存openi创建训练任务界面的参数 | |||
| err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{ | |||
| JobName: req.JobName, | |||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||
| VersionName: jobResult.VersionName, | |||
| ResourcePools: form.PoolID, | |||
| EngineVersions: form.EngineID, | |||
| FlavorInfos: form.Flavor, | |||
| TrainUrl: outputObsPath, | |||
| BootFile: form.BootFile, | |||
| Uuid: form.Attachment, | |||
| DatasetName: attach.Name, | |||
| Params: form.Params, | |||
| BranchName: branch_name, | |||
| }) | |||
| if err != nil { | |||
| log.Error("CreateTrainjobConfigDetail failed:%v", err.Error()) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| ctx.Data["bootFile"] = form.BootFile | |||
| ctx.Data["uuid"] = form.Attachment | |||
| ctx.Data["datasetName"] = attach.Name | |||
| ctx.Data["params"] = form.Params | |||
| ctx.Data["branch_name"] = branch_name | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) | |||
| return | |||
| } | |||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") | |||
| } | |||
| func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { | |||
| ctx.Data["PageIsTrainJob"] = true | |||
| var jobID = ctx.Params(":jobid") | |||
| // var fatherVersionName = ctx.Query("versionName") | |||
| // jobID = "19373" | |||
| // versionName = "V0009" | |||
| jobName := form.JobName | |||
| uuid := form.Attachment | |||
| description := form.Description | |||
| workServerNumber := form.WorkServerNumber | |||
| engineID := form.EngineID | |||
| bootFile := form.BootFile | |||
| flavorCode := form.Flavor | |||
| params := form.Params | |||
| poolID := form.PoolID | |||
| isSaveParam := form.IsSaveParam | |||
| repo := ctx.Repo.Repository | |||
| codeLocalPath := setting.JobPath + jobName + modelarts.CodePath | |||
| codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath | |||
| outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath | |||
| logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath | |||
| dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" | |||
| branch_name := form.BranchName | |||
| fatherVersionName := form.VersionName | |||
| if err := paramCheckCreateTrainJob(form); err != nil { | |||
| log.Error("paramCheckCreateTrainJob failed:(%v)", err) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) | |||
| return | |||
| } | |||
| attach, err := models.GetAttachmentByUUID(uuid) | |||
| if err != nil { | |||
| log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error()) | |||
| return | |||
| } | |||
| //todo: del the codeLocalPath | |||
| _, err = ioutil.ReadDir(codeLocalPath) | |||
| if err == nil { | |||
| os.RemoveAll(codeLocalPath) | |||
| } | |||
| gitRepo, _ := git.OpenRepository(repo.RepoPath()) | |||
| commitID, _ := gitRepo.GetBranchCommitID(branch_name) | |||
| if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{ | |||
| Branch: branch_name, | |||
| }); err != nil { | |||
| log.Error("创建任务失败,任务名称已存在!: %s (%v)", repo.FullName(), err) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| ctx.Data["bootFile"] = form.BootFile | |||
| ctx.Data["uuid"] = form.Attachment | |||
| ctx.Data["datasetName"] = attach.Name | |||
| ctx.Data["params"] = form.Params | |||
| ctx.Data["branch_name"] = branch_name | |||
| // ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form) | |||
| ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobVersionNew, &form) | |||
| // ctx.RenderWithErr(err, tplModelArtsTrainJobNew, &form) | |||
| return | |||
| } | |||
| //todo: upload code (send to file_server todo this work?) | |||
| if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { | |||
| log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobVersionNew, &form) | |||
| return | |||
| } | |||
| if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil { | |||
| log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobVersionNew, &form) | |||
| return | |||
| } | |||
| if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { | |||
| log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobVersionNew, &form) | |||
| return | |||
| } | |||
| //todo: del local code? | |||
| var parameters models.Parameters | |||
| param := make([]models.Parameter, 0) | |||
| param = append(param, models.Parameter{ | |||
| Label: modelarts.TrainUrl, | |||
| Value: outputObsPath, | |||
| }, models.Parameter{ | |||
| Label: modelarts.DataUrl, | |||
| Value: dataPath, | |||
| }) | |||
| if len(params) != 0 { | |||
| err := json.Unmarshal([]byte(params), ¶meters) | |||
| if err != nil { | |||
| log.Error("Failed to Unmarshal params: %s (%v)", params, err) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobVersionNew, &form) | |||
| return | |||
| } | |||
| for _, parameter := range parameters.Parameter { | |||
| if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl { | |||
| param = append(param, models.Parameter{ | |||
| Label: parameter.Label, | |||
| Value: parameter.Value, | |||
| }) | |||
| } | |||
| } | |||
| } | |||
| //save param config | |||
| if isSaveParam == "on" { | |||
| if form.ParameterTemplateName == "" { | |||
| log.Error("ParameterTemplateName is empty") | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobVersionNew, &form) | |||
| return | |||
| } | |||
| _, err := modelarts.CreateTrainJobConfig(models.CreateConfigParams{ | |||
| ConfigName: form.ParameterTemplateName, | |||
| Description: form.PrameterDescription, | |||
| DataUrl: dataPath, | |||
| AppUrl: codeObsPath, | |||
| BootFileUrl: codeObsPath + bootFile, | |||
| TrainUrl: outputObsPath, | |||
| Flavor: models.Flavor{ | |||
| Code: flavorCode, | |||
| }, | |||
| WorkServerNum: workServerNumber, | |||
| EngineID: int64(engineID), | |||
| LogUrl: logObsPath, | |||
| PoolID: poolID, | |||
| Parameter: parameters.Parameter, | |||
| }) | |||
| if err != nil { | |||
| log.Error("Failed to CreateTrainJobConfig: %v", err) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobVersionNew, &form) | |||
| return | |||
| } | |||
| } | |||
| // JobVersionName := "V0001" | |||
| // PreVersionId := int64(67646) | |||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, fatherVersionName) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) | |||
| return | |||
| } | |||
| req := &modelarts.GenerateTrainJobVersionReq{ | |||
| JobName: task.JobName, | |||
| DataUrl: dataPath, | |||
| Description: description, | |||
| CodeObsPath: codeObsPath, | |||
| BootFile: codeObsPath + bootFile, | |||
| TrainUrl: outputObsPath, | |||
| FlavorCode: flavorCode, | |||
| WorkServerNumber: workServerNumber, | |||
| EngineID: int64(engineID), | |||
| LogUrl: logObsPath, | |||
| PoolID: poolID, | |||
| Uuid: uuid, | |||
| Parameters: parameters.Parameter, | |||
| PreVersionId: task.VersionID, | |||
| CommitID: commitID, | |||
| } | |||
| jobResult, err := modelarts.GenerateTrainJobVersion(ctx, req, jobID, fatherVersionName) | |||
| if err != nil { | |||
| log.Error("GenerateTrainJob failed:%v", err.Error()) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| ctx.Data["bootFile"] = form.BootFile | |||
| ctx.Data["uuid"] = form.Attachment | |||
| ctx.Data["datasetName"] = attach.Name | |||
| ctx.Data["params"] = form.Params | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) | |||
| return | |||
| } | |||
| // 保存openi创建训练任务界面的参数 | |||
| err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{ | |||
| JobName: req.JobName, | |||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||
| VersionName: jobResult.VersionName, | |||
| ResourcePools: form.PoolID, | |||
| EngineVersions: form.EngineID, | |||
| FlavorInfos: form.Flavor, | |||
| TrainUrl: outputObsPath, | |||
| BootFile: form.BootFile, | |||
| Uuid: form.Attachment, | |||
| DatasetName: attach.Name, | |||
| Params: form.Params, | |||
| BranchName: branch_name, | |||
| }) | |||
| if err != nil { | |||
| log.Error("CreateTrainjobConfigDetail failed:%v", err.Error()) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| ctx.Data["bootFile"] = form.BootFile | |||
| ctx.Data["uuid"] = form.Attachment | |||
| ctx.Data["datasetName"] = attach.Name | |||
| ctx.Data["params"] = form.Params | |||
| ctx.Data["branch_name"] = branch_name | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) | |||
| return | |||
| } | |||
| // ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") | |||
| ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) | |||
| } | |||
| // readDir reads the directory named by dirname and returns | |||
| // a list of directory entries sorted by filename. | |||
| func readDir(dirname string) ([]os.FileInfo, error) { | |||
| @@ -880,6 +1202,27 @@ func TrainJobShow(ctx *context.Context) { | |||
| var jobID = ctx.Params(":jobid") | |||
| task, err := models.GetCloudbrainByJobID(jobID) | |||
| repo := ctx.Repo.Repository | |||
| page := ctx.QueryInt("page") | |||
| if page <= 0 { | |||
| page = 1 | |||
| } | |||
| VersionListTasks, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ | |||
| ListOptions: models.ListOptions{ | |||
| Page: page, | |||
| PageSize: setting.UI.IssuePagingNum, | |||
| }, | |||
| RepoID: repo.ID, | |||
| Type: models.TypeCloudBrainTwo, | |||
| JobType: string(models.JobTypeTrain), | |||
| JobID: jobID, | |||
| }) | |||
| if err != nil { | |||
| ctx.ServerError("Cloudbrain", err) | |||
| return | |||
| } | |||
| if err != nil { | |||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) | |||
| @@ -930,6 +1273,8 @@ func TrainJobShow(ctx *context.Context) { | |||
| ctx.Data["task"] = task | |||
| ctx.Data["jobID"] = jobID | |||
| ctx.Data["result"] = result | |||
| ctx.Data["VersionListTasks"] = VersionListTasks | |||
| ctx.Data["VersionLisCount"] = VersionListCount | |||
| ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) | |||
| } | |||
| @@ -1040,6 +1385,52 @@ func TrainJobStop(ctx *context.Context) { | |||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") | |||
| } | |||
| func TrainJobVersionDel(ctx *context.Context) { | |||
| var jobID = ctx.Params(":jobid") | |||
| var versionName = ctx.Params(":versionName") | |||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) | |||
| return | |||
| } | |||
| _, err = modelarts.DelTrainJob(jobID) | |||
| if err != nil { | |||
| log.Error("DelTrainJob(%s) failed:%v", task.JobName, err.Error()) | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) | |||
| return | |||
| } | |||
| err = models.DeleteJob(task) | |||
| if err != nil { | |||
| ctx.ServerError("DeleteJob failed", err) | |||
| return | |||
| } | |||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") | |||
| } | |||
| func TrainJobVersionStop(ctx *context.Context) { | |||
| var jobID = ctx.Params(":jobid") | |||
| var versionName = ctx.Params(":versionName") | |||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) | |||
| return | |||
| } | |||
| _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) | |||
| if err != nil { | |||
| log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) | |||
| return | |||
| } | |||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") | |||
| } | |||
| func canUserCreateTrainJob(uid int64) (bool, error) { | |||
| org, err := models.GetOrgByName(setting.AllowedOrg) | |||
| if err != nil { | |||
| @@ -975,16 +975,6 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
| }, context.RepoRef()) | |||
| m.Group("/modelarts", func() { | |||
| // m.Get("", reqRepoCloudBrainReader, repo.ModelArtsIndex) | |||
| // m.Group("/:jobid", func() { | |||
| // m.Get("", reqRepoCloudBrainReader, repo.ModelArtsShow) | |||
| // m.Get("/debug", reqRepoCloudBrainReader, repo.ModelArtsDebug) | |||
| // m.Post("/stop", reqRepoCloudBrainWriter, repo.ModelArtsStop) | |||
| // m.Post("/del", reqRepoCloudBrainWriter, repo.ModelArtsDel) | |||
| // }) | |||
| // m.Get("/create", reqRepoCloudBrainWriter, repo.ModelArtsNew) | |||
| // m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsForm{}), repo.ModelArtsCreate) | |||
| m.Group("/notebook", func() { | |||
| m.Get("", reqRepoCloudBrainReader, repo.NotebookIndex) | |||
| m.Group("/:jobid", func() { | |||
| @@ -1006,9 +996,17 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
| m.Get("/log", reqRepoCloudBrainReader, repo.TrainJobGetLog) | |||
| m.Get("/models", reqRepoCloudBrainReader, repo.TrainJobShowModels) | |||
| m.Get("/download_model", reqRepoCloudBrainReader, repo.TrainJobDownloadModel) | |||
| m.Get("/create_version", reqRepoCloudBrainReader, repo.TrainJobNewVersion) | |||
| m.Post("/create_version", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) | |||
| m.Post("/stop_version", reqRepoCloudBrainWriter, repo.TrainJobVersionStop) | |||
| m.Post("/del_version", reqRepoCloudBrainWriter, repo.TrainJobVersionDel) | |||
| }) | |||
| m.Get("/create", reqRepoCloudBrainReader, repo.TrainJobNew) | |||
| m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreate) | |||
| // m.Get("/create", reqRepoCloudBrainReader, repo.TrainJobNewVersion) | |||
| // m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) | |||
| m.Get("/para-config-list", reqRepoCloudBrainReader, repo.TrainJobGetConfigList) | |||
| }) | |||
| }, context.RepoRef()) | |||