| Author | SHA1 | Message | Date |
|---|---|---|---|
|
|
6af7c8b767 | update | 4 years ago |
|
|
a96284c138 | update | 4 years ago |
|
|
4f262e6e0f | merge | 4 years ago |
|
|
6bd1bd27b7 | update | 4 years ago |
|
|
de8aed830e | Merge remote-tracking branch 'origin/V20220125' into inference-job | 4 years ago |
|
|
9af248b967 | fix-1404 | 4 years ago |
|
|
6355766298 | 去除云脑1调试任务名限制 | 4 years ago |
|
|
cf3fd3306e | 修改任务查询类型 | 4 years ago |
|
|
187b19ff40 | Merge remote-tracking branch 'origin/V20220125' into inference-job | 4 years ago |
|
|
5145766631 | update | 4 years ago |
|
|
5edc9c3bdb | 项目内不同任务可重名 | 4 years ago |
| @@ -133,6 +133,7 @@ type Cloudbrain struct { | |||||
| ModelVersion string //模型版本 | ModelVersion string //模型版本 | ||||
| CkptName string //权重文件名称 | CkptName string //权重文件名称 | ||||
| ResultUrl string //推理结果的obs路径 | ResultUrl string //推理结果的obs路径 | ||||
| ApiJobName string //对接后台的任务名称,如在modelarts上任务名称 | |||||
| User *User `xorm:"-"` | User *User `xorm:"-"` | ||||
| Repo *Repository `xorm:"-"` | Repo *Repository `xorm:"-"` | ||||
| @@ -1,11 +1,12 @@ | |||||
| package cloudbrain | package cloudbrain | ||||
| import ( | import ( | ||||
| "code.gitea.io/gitea/modules/storage" | |||||
| "encoding/json" | "encoding/json" | ||||
| "errors" | "errors" | ||||
| "strconv" | "strconv" | ||||
| "code.gitea.io/gitea/modules/storage" | |||||
| "code.gitea.io/gitea/modules/setting" | "code.gitea.io/gitea/modules/setting" | ||||
| "code.gitea.io/gitea/models" | "code.gitea.io/gitea/models" | ||||
| @@ -107,7 +108,7 @@ func AdminOrJobCreaterRight(ctx *context.Context) { | |||||
| } | } | ||||
| func GenerateTask(ctx *context.Context, jobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, brainScorePath, jobType, gpuQueue, description string, benchmarkTypeID, benchmarkChildTypeID, resourceSpecId int) error { | |||||
| func GenerateTask(ctx *context.Context, openiJobName, apiJobName, image, command, uuid, codePath, modelPath, benchmarkPath, snn4imagenetPath, brainScorePath, jobType, gpuQueue, description string, benchmarkTypeID, benchmarkChildTypeID, resourceSpecId int) error { | |||||
| dataActualPath := setting.Attachment.Minio.RealPath + | dataActualPath := setting.Attachment.Minio.RealPath + | ||||
| setting.Attachment.Minio.Bucket + "/" + | setting.Attachment.Minio.Bucket + "/" + | ||||
| setting.Attachment.Minio.BasePath + | setting.Attachment.Minio.BasePath + | ||||
| @@ -128,9 +129,8 @@ func GenerateTask(ctx *context.Context, jobName, image, command, uuid, codePath, | |||||
| log.Error("no such resourceSpecId(%d)", resourceSpecId, ctx.Data["MsgID"]) | log.Error("no such resourceSpecId(%d)", resourceSpecId, ctx.Data["MsgID"]) | ||||
| return errors.New("no such resourceSpec") | return errors.New("no such resourceSpec") | ||||
| } | } | ||||
| jobResult, err := CreateJob(jobName, models.CreateJobParams{ | |||||
| JobName: jobName, | |||||
| jobResult, err := CreateJob(apiJobName, models.CreateJobParams{ | |||||
| JobName: apiJobName, | |||||
| RetryCount: 1, | RetryCount: 1, | ||||
| GpuType: gpuQueue, | GpuType: gpuQueue, | ||||
| Image: image, | Image: image, | ||||
| @@ -200,7 +200,7 @@ func GenerateTask(ctx *context.Context, jobName, image, command, uuid, codePath, | |||||
| return err | return err | ||||
| } | } | ||||
| if jobResult.Code != Success { | if jobResult.Code != Success { | ||||
| log.Error("CreateJob(%s) failed:%s", jobName, jobResult.Msg, ctx.Data["MsgID"]) | |||||
| log.Error("CreateJob(%s) failed:%s", apiJobName, jobResult.Msg, ctx.Data["MsgID"]) | |||||
| return errors.New(jobResult.Msg) | return errors.New(jobResult.Msg) | ||||
| } | } | ||||
| @@ -210,7 +210,6 @@ func GenerateTask(ctx *context.Context, jobName, image, command, uuid, codePath, | |||||
| UserID: ctx.User.ID, | UserID: ctx.User.ID, | ||||
| RepoID: ctx.Repo.Repository.ID, | RepoID: ctx.Repo.Repository.ID, | ||||
| JobID: jobID, | JobID: jobID, | ||||
| JobName: jobName, | |||||
| SubTaskName: SubTaskName, | SubTaskName: SubTaskName, | ||||
| JobType: jobType, | JobType: jobType, | ||||
| Type: models.TypeCloudBrainOne, | Type: models.TypeCloudBrainOne, | ||||
| @@ -221,7 +220,8 @@ func GenerateTask(ctx *context.Context, jobName, image, command, uuid, codePath, | |||||
| ComputeResource: models.GPUResource, | ComputeResource: models.GPUResource, | ||||
| BenchmarkTypeID: benchmarkTypeID, | BenchmarkTypeID: benchmarkTypeID, | ||||
| BenchmarkChildTypeID: benchmarkChildTypeID, | BenchmarkChildTypeID: benchmarkChildTypeID, | ||||
| Description: description, | |||||
| JobName: openiJobName, | |||||
| ApiJobName: apiJobName, | |||||
| }) | }) | ||||
| if err != nil { | if err != nil { | ||||
| @@ -65,7 +65,8 @@ var ( | |||||
| ) | ) | ||||
| type GenerateTrainJobReq struct { | type GenerateTrainJobReq struct { | ||||
| JobName string | |||||
| OpeniJobName string | |||||
| ApiJobName string | |||||
| Uuid string | Uuid string | ||||
| Description string | Description string | ||||
| CodeObsPath string | CodeObsPath string | ||||
| @@ -117,7 +118,8 @@ type GenerateTrainJobVersionReq struct { | |||||
| } | } | ||||
| type GenerateInferenceJobReq struct { | type GenerateInferenceJobReq struct { | ||||
| JobName string | |||||
| OpeniJobName string | |||||
| ApiJobName string | |||||
| Uuid string | Uuid string | ||||
| Description string | Description string | ||||
| CodeObsPath string | CodeObsPath string | ||||
| @@ -190,7 +192,7 @@ type Parameters struct { | |||||
| } `json:"parameter"` | } `json:"parameter"` | ||||
| } | } | ||||
| func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor string) error { | |||||
| func GenerateTask(ctx *context.Context, openiJobName, apiJobName, uuid, description, flavor string) error { | |||||
| var dataActualPath string | var dataActualPath string | ||||
| if uuid != "" { | if uuid != "" { | ||||
| dataActualPath = setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" | dataActualPath = setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" | ||||
| @@ -216,7 +218,7 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor strin | |||||
| json.Unmarshal([]byte(setting.PoolInfos), &poolInfos) | json.Unmarshal([]byte(setting.PoolInfos), &poolInfos) | ||||
| } | } | ||||
| jobResult, err := CreateJob(models.CreateNotebookParams{ | jobResult, err := CreateJob(models.CreateNotebookParams{ | ||||
| JobName: jobName, | |||||
| JobName: apiJobName, | |||||
| Description: description, | Description: description, | ||||
| ProfileID: setting.ProfileID, | ProfileID: setting.ProfileID, | ||||
| Flavor: flavor, | Flavor: flavor, | ||||
| @@ -248,11 +250,12 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor strin | |||||
| UserID: ctx.User.ID, | UserID: ctx.User.ID, | ||||
| RepoID: ctx.Repo.Repository.ID, | RepoID: ctx.Repo.Repository.ID, | ||||
| JobID: jobResult.ID, | JobID: jobResult.ID, | ||||
| JobName: jobName, | |||||
| JobName: openiJobName, | |||||
| JobType: string(models.JobTypeDebug), | JobType: string(models.JobTypeDebug), | ||||
| Type: models.TypeCloudBrainTwo, | Type: models.TypeCloudBrainTwo, | ||||
| Uuid: uuid, | Uuid: uuid, | ||||
| ComputeResource: models.NPUResource, | ComputeResource: models.NPUResource, | ||||
| ApiJobName: jobResult.Name, | |||||
| }) | }) | ||||
| if err != nil { | if err != nil { | ||||
| @@ -264,7 +267,7 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor strin | |||||
| func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { | func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { | ||||
| jobResult, err := createTrainJob(models.CreateTrainJobParams{ | jobResult, err := createTrainJob(models.CreateTrainJobParams{ | ||||
| JobName: req.JobName, | |||||
| JobName: req.ApiJobName, | |||||
| Description: req.Description, | Description: req.Description, | ||||
| Config: models.Config{ | Config: models.Config{ | ||||
| WorkServerNum: req.WorkServerNumber, | WorkServerNum: req.WorkServerNumber, | ||||
| @@ -298,7 +301,8 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||||
| UserID: ctx.User.ID, | UserID: ctx.User.ID, | ||||
| RepoID: ctx.Repo.Repository.ID, | RepoID: ctx.Repo.Repository.ID, | ||||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | JobID: strconv.FormatInt(jobResult.JobID, 10), | ||||
| JobName: req.JobName, | |||||
| JobName: req.OpeniJobName, | |||||
| ApiJobName: req.ApiJobName, | |||||
| JobType: string(models.JobTypeTrain), | JobType: string(models.JobTypeTrain), | ||||
| Type: models.TypeCloudBrainTwo, | Type: models.TypeCloudBrainTwo, | ||||
| VersionID: jobResult.VersionID, | VersionID: jobResult.VersionID, | ||||
| @@ -325,7 +329,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||||
| }) | }) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) | |||||
| log.Error("CreateCloudbrain(%s) failed:%v", req.OpeniJobName, err.Error()) | |||||
| return err | return err | ||||
| } | } | ||||
| @@ -382,7 +386,8 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job | |||||
| UserID: ctx.User.ID, | UserID: ctx.User.ID, | ||||
| RepoID: ctx.Repo.Repository.ID, | RepoID: ctx.Repo.Repository.ID, | ||||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | JobID: strconv.FormatInt(jobResult.JobID, 10), | ||||
| JobName: req.JobName, | |||||
| JobName: req.OpeniJobName, | |||||
| ApiJobName: req.ApiJobName, | |||||
| JobType: string(models.JobTypeTrain), | JobType: string(models.JobTypeTrain), | ||||
| Type: models.TypeCloudBrainTwo, | Type: models.TypeCloudBrainTwo, | ||||
| VersionID: jobResult.VersionID, | VersionID: jobResult.VersionID, | ||||
| @@ -410,7 +415,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job | |||||
| VersionCount: VersionListCount + 1, | VersionCount: VersionListCount + 1, | ||||
| }) | }) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) | |||||
| log.Error("CreateCloudbrain(%s) failed:%v", req.OpeniJobName, err.Error()) | |||||
| return err | return err | ||||
| } | } | ||||
| @@ -484,7 +489,7 @@ func GetOutputPathByCount(TotalVersionCount int) (VersionOutputPath string) { | |||||
| func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (err error) { | func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (err error) { | ||||
| jobResult, err := createInferenceJob(models.CreateInferenceJobParams{ | jobResult, err := createInferenceJob(models.CreateInferenceJobParams{ | ||||
| JobName: req.JobName, | |||||
| JobName: req.ApiJobName, | |||||
| Description: req.Description, | Description: req.Description, | ||||
| InfConfig: models.InfConfig{ | InfConfig: models.InfConfig{ | ||||
| WorkServerNum: req.WorkServerNumber, | WorkServerNum: req.WorkServerNumber, | ||||
| @@ -518,7 +523,8 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e | |||||
| UserID: ctx.User.ID, | UserID: ctx.User.ID, | ||||
| RepoID: ctx.Repo.Repository.ID, | RepoID: ctx.Repo.Repository.ID, | ||||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | JobID: strconv.FormatInt(jobResult.JobID, 10), | ||||
| JobName: req.JobName, | |||||
| JobName: req.OpeniJobName, | |||||
| ApiJobName: jobResult.JobName, | |||||
| JobType: string(models.JobTypeInference), | JobType: string(models.JobTypeInference), | ||||
| Type: models.TypeCloudBrainTwo, | Type: models.TypeCloudBrainTwo, | ||||
| VersionID: jobResult.VersionID, | VersionID: jobResult.VersionID, | ||||
| @@ -549,7 +555,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e | |||||
| }) | }) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) | |||||
| log.Error("CreateCloudbrain(%s) failed:%v", req.OpeniJobName, err.Error()) | |||||
| return err | return err | ||||
| } | } | ||||
| @@ -8,6 +8,7 @@ import ( | |||||
| "bytes" | "bytes" | ||||
| "strconv" | "strconv" | ||||
| "strings" | "strings" | ||||
| "time" | |||||
| ) | ) | ||||
| // OptionalBool a boolean that can be "null" | // OptionalBool a boolean that can be "null" | ||||
| @@ -110,3 +111,16 @@ func AddZero(t int64) (m string) { | |||||
| return strconv.FormatInt(t, 10) | return strconv.FormatInt(t, 10) | ||||
| } | } | ||||
| } | } | ||||
| func ConvertToApiJobName(OpeniJobName string) (ApiJobName string) { | |||||
| t := time.Now() | |||||
| ApiJobName = "openi" + strings.ToLower(cutNameString(OpeniJobName, 15)) + "t" + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] | |||||
| return ApiJobName | |||||
| } | |||||
| func cutNameString(str string, lens int) string { | |||||
| if len(str) < lens { | |||||
| return str | |||||
| } | |||||
| return str[:lens] | |||||
| } | |||||
| @@ -176,7 +176,8 @@ func CloudBrainNew(ctx *context.Context) { | |||||
| func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | ||||
| ctx.Data["PageIsCloudBrain"] = true | ctx.Data["PageIsCloudBrain"] = true | ||||
| jobName := form.JobName | |||||
| openiJobName := form.JobName | |||||
| jobName := util.ConvertToApiJobName(openiJobName) | |||||
| image := form.Image | image := form.Image | ||||
| uuid := form.Attachment | uuid := form.Attachment | ||||
| jobType := form.JobType | jobType := form.JobType | ||||
| @@ -185,11 +186,41 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||||
| codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath | codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath | ||||
| resourceSpecId := form.ResourceSpecId | resourceSpecId := form.ResourceSpecId | ||||
| if !jobNamePattern.MatchString(jobName) { | |||||
| ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplCloudBrainNew, &form) | |||||
| return | |||||
| //检查任务名称是否重复 | |||||
| var jobTypes []string | |||||
| jobTypes = append(jobTypes, string(models.JobTypeDebug)) | |||||
| tasks, _, err := models.Cloudbrains(&models.CloudbrainsOptions{ | |||||
| ListOptions: models.ListOptions{ | |||||
| PageSize: setting.UI.IssuePagingNum, | |||||
| }, | |||||
| RepoID: ctx.Repo.Repository.ID, | |||||
| Type: -1, | |||||
| JobTypeNot: false, | |||||
| JobTypes: jobTypes, | |||||
| }) | |||||
| if err == nil { | |||||
| for _, task := range tasks { | |||||
| if strings.EqualFold(task.JobName, openiJobName) { | |||||
| log.Error("the job name did already exist", ctx.Data["MsgID"]) | |||||
| cloudBrainNewDataPrepare(ctx) | |||||
| ctx.RenderWithErr("任务名称已经被使用!", tplCloudBrainNew, &form) | |||||
| return | |||||
| } | |||||
| } | |||||
| } else { | |||||
| if !models.IsErrJobNotExist(err) { | |||||
| log.Error("system error, %v", err, ctx.Data["MsgID"]) | |||||
| cloudBrainNewDataPrepare(ctx) | |||||
| ctx.RenderWithErr("system error", tplCloudBrainNew, &form) | |||||
| return | |||||
| } | |||||
| } | } | ||||
| // if !jobNamePattern.MatchString(jobName) { | |||||
| // ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplCloudBrainNew, &form) | |||||
| // return | |||||
| // } | |||||
| if jobType != string(models.JobTypeBenchmark) && jobType != string(models.JobTypeDebug) && jobType != string(models.JobTypeSnn4imagenet) && jobType != string(models.JobTypeBrainScore) { | if jobType != string(models.JobTypeBenchmark) && jobType != string(models.JobTypeDebug) && jobType != string(models.JobTypeSnn4imagenet) && jobType != string(models.JobTypeBrainScore) { | ||||
| log.Error("jobtype error:", jobType, ctx.Data["MsgID"]) | log.Error("jobtype error:", jobType, ctx.Data["MsgID"]) | ||||
| cloudBrainNewDataPrepare(ctx) | cloudBrainNewDataPrepare(ctx) | ||||
| @@ -212,20 +243,6 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||||
| } | } | ||||
| } | } | ||||
| _, err = models.GetCloudbrainByName(jobName) | |||||
| if err == nil { | |||||
| log.Error("the job name did already exist", ctx.Data["MsgID"]) | |||||
| cloudBrainNewDataPrepare(ctx) | |||||
| ctx.RenderWithErr("the job name did already exist", tplCloudBrainNew, &form) | |||||
| return | |||||
| } else { | |||||
| if !models.IsErrJobNotExist(err) { | |||||
| log.Error("system error, %v", err, ctx.Data["MsgID"]) | |||||
| cloudBrainNewDataPrepare(ctx) | |||||
| ctx.RenderWithErr("system error", tplCloudBrainNew, &form) | |||||
| return | |||||
| } | |||||
| } | |||||
| repo := ctx.Repo.Repository | repo := ctx.Repo.Repository | ||||
| downloadCode(repo, codePath) | downloadCode(repo, codePath) | ||||
| uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/") | uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/") | ||||
| @@ -258,7 +275,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||||
| uploadCodeToMinio(brainScorePath+"/", jobName, cloudbrain.BrainScoreMountPath+"/") | uploadCodeToMinio(brainScorePath+"/", jobName, cloudbrain.BrainScoreMountPath+"/") | ||||
| } | } | ||||
| err = cloudbrain.GenerateTask(ctx, jobName, image, command, uuid, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), | |||||
| err = cloudbrain.GenerateTask(ctx, openiJobName, jobName, image, command, uuid, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), | |||||
| storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), | storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), | ||||
| storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), | storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), | ||||
| storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, form.Description, | storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), jobType, gpuQueue, form.Description, | ||||
| @@ -1190,7 +1207,8 @@ func getBenchmarkResourceSpec(resourceSpecID int) (int, error) { | |||||
| func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | ||||
| ctx.Data["PageIsCloudBrain"] = true | ctx.Data["PageIsCloudBrain"] = true | ||||
| jobName := form.JobName | |||||
| openiJobName := form.JobName | |||||
| jobName := util.ConvertToApiJobName(openiJobName) | |||||
| image := form.Image | image := form.Image | ||||
| gpuQueue := form.GpuType | gpuQueue := form.GpuType | ||||
| command := cloudbrain.CommandBenchmark | command := cloudbrain.CommandBenchmark | ||||
| @@ -1322,7 +1340,7 @@ func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainF | |||||
| //return | //return | ||||
| } | } | ||||
| err = cloudbrain.GenerateTask(ctx, jobName, image, command, childInfo.Attachment, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), | |||||
| err = cloudbrain.GenerateTask(ctx, openiJobName, jobName, image, command, childInfo.Attachment, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), | |||||
| storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), | storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), | ||||
| storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), | storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), | ||||
| storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), string(models.JobTypeBenchmark), gpuQueue, form.Description, | storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), string(models.JobTypeBenchmark), gpuQueue, form.Description, | ||||
| @@ -25,6 +25,7 @@ import ( | |||||
| "code.gitea.io/gitea/modules/obs" | "code.gitea.io/gitea/modules/obs" | ||||
| "code.gitea.io/gitea/modules/setting" | "code.gitea.io/gitea/modules/setting" | ||||
| "code.gitea.io/gitea/modules/storage" | "code.gitea.io/gitea/modules/storage" | ||||
| "code.gitea.io/gitea/modules/util" | |||||
| "github.com/unknwon/com" | "github.com/unknwon/com" | ||||
| ) | ) | ||||
| @@ -105,6 +106,15 @@ func MustEnableModelArts(ctx *context.Context) { | |||||
| } | } | ||||
| func NotebookNew(ctx *context.Context) { | func NotebookNew(ctx *context.Context) { | ||||
| err := modelartsNewDataPrepare(ctx) | |||||
| if err != nil { | |||||
| ctx.ServerError("get new cloudbrain info failed", err) | |||||
| return | |||||
| } | |||||
| ctx.HTML(200, tplModelArtsNotebookNew) | |||||
| } | |||||
| func modelartsNewDataPrepare(ctx *context.Context) error { | |||||
| ctx.Data["PageIsCloudBrain"] = true | ctx.Data["PageIsCloudBrain"] = true | ||||
| t := time.Now() | t := time.Now() | ||||
| @@ -113,8 +123,9 @@ func NotebookNew(ctx *context.Context) { | |||||
| attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) | attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetAllUserAttachments failed:", err) | |||||
| ctx.ServerError("GetAllUserAttachments failed:", err) | ctx.ServerError("GetAllUserAttachments failed:", err) | ||||
| return | |||||
| return err | |||||
| } | } | ||||
| ctx.Data["attachments"] = attachs | ctx.Data["attachments"] = attachs | ||||
| @@ -125,47 +136,62 @@ func NotebookNew(ctx *context.Context) { | |||||
| json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos) | json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos) | ||||
| } | } | ||||
| ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo | ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo | ||||
| ctx.HTML(200, tplModelArtsNotebookNew) | |||||
| return nil | |||||
| } | } | ||||
| func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) { | func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) { | ||||
| ctx.Data["PageIsNotebook"] = true | ctx.Data["PageIsNotebook"] = true | ||||
| jobName := form.JobName | |||||
| openiJobName := form.JobName | |||||
| jobName := util.ConvertToApiJobName(openiJobName) | |||||
| uuid := form.Attachment | uuid := form.Attachment | ||||
| description := form.Description | description := form.Description | ||||
| flavor := form.Flavor | flavor := form.Flavor | ||||
| //判断任务名是否重名 | |||||
| var jobTypes []string | |||||
| jobTypes = append(jobTypes, string(models.JobTypeDebug)) | |||||
| tasks, _, err := models.Cloudbrains(&models.CloudbrainsOptions{ | |||||
| ListOptions: models.ListOptions{ | |||||
| PageSize: setting.UI.IssuePagingNum, | |||||
| }, | |||||
| RepoID: ctx.Repo.Repository.ID, | |||||
| Type: -1, | |||||
| JobTypeNot: false, | |||||
| JobTypes: jobTypes, | |||||
| }) | |||||
| if err == nil { | |||||
| for _, task := range tasks { | |||||
| if strings.EqualFold(task.JobName, openiJobName) { | |||||
| log.Error("the job name did already exist", ctx.Data["MsgID"]) | |||||
| modelartsNewDataPrepare(ctx) | |||||
| ctx.RenderWithErr("任务名称已经被使用!", tplModelArtsNotebookNew, &form) | |||||
| return | |||||
| } | |||||
| } | |||||
| } else { | |||||
| if !models.IsErrJobNotExist(err) { | |||||
| log.Error("system error, %v", err, ctx.Data["MsgID"]) | |||||
| modelartsNewDataPrepare(ctx) | |||||
| ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form) | |||||
| return | |||||
| } | |||||
| } | |||||
| count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) | count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) | log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) | ||||
| cloudBrainNewDataPrepare(ctx) | |||||
| modelartsNewDataPrepare(ctx) | |||||
| ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form) | ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form) | ||||
| return | return | ||||
| } else { | } else { | ||||
| if count >= 1 { | if count >= 1 { | ||||
| log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | ||||
| cloudBrainNewDataPrepare(ctx) | |||||
| modelartsNewDataPrepare(ctx) | |||||
| ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsNotebookNew, &form) | ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsNotebookNew, &form) | ||||
| return | return | ||||
| } | } | ||||
| } | } | ||||
| _, err = models.GetCloudbrainByName(jobName) | |||||
| if err == nil { | |||||
| log.Error("the job name did already exist", ctx.Data["MsgID"]) | |||||
| cloudBrainNewDataPrepare(ctx) | |||||
| ctx.RenderWithErr("the job name did already exist", tplModelArtsNotebookNew, &form) | |||||
| return | |||||
| } else { | |||||
| if !models.IsErrJobNotExist(err) { | |||||
| log.Error("system error, %v", err, ctx.Data["MsgID"]) | |||||
| cloudBrainNewDataPrepare(ctx) | |||||
| ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form) | |||||
| return | |||||
| } | |||||
| } | |||||
| err = modelarts.GenerateTask(ctx, jobName, uuid, description, flavor) | |||||
| err = modelarts.GenerateTask(ctx, openiJobName, jobName, uuid, description, flavor) | |||||
| if err != nil { | if err != nil { | ||||
| ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form) | ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form) | ||||
| return | return | ||||
| @@ -762,7 +788,9 @@ func versionErrorDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrai | |||||
| func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { | func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { | ||||
| ctx.Data["PageIsTrainJob"] = true | ctx.Data["PageIsTrainJob"] = true | ||||
| VersionOutputPath := modelarts.GetOutputPathByCount(modelarts.TotalVersionCount) | VersionOutputPath := modelarts.GetOutputPathByCount(modelarts.TotalVersionCount) | ||||
| jobName := form.JobName | |||||
| openiJobName := form.JobName | |||||
| apiJobName := util.ConvertToApiJobName(openiJobName) | |||||
| jobName := apiJobName | |||||
| uuid := form.Attachment | uuid := form.Attachment | ||||
| description := form.Description | description := form.Description | ||||
| workServerNumber := form.WorkServerNumber | workServerNumber := form.WorkServerNumber | ||||
| @@ -784,6 +812,44 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||||
| VersionCount := modelarts.VersionCount | VersionCount := modelarts.VersionCount | ||||
| EngineName := form.EngineName | EngineName := form.EngineName | ||||
| if err := paramCheckCreateTrainJob(form); err != nil { | |||||
| log.Error("paramCheckCreateTrainJob failed:(%v)", err) | |||||
| trainJobErrorNewDataPrepare(ctx, form) | |||||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) | |||||
| return | |||||
| } | |||||
| //判断项目内任务名称是否重复 | |||||
| var jobTypes []string | |||||
| jobTypes = append(jobTypes, string(models.JobTypeTrain)) | |||||
| tasks, _, err := models.Cloudbrains(&models.CloudbrainsOptions{ | |||||
| ListOptions: models.ListOptions{ | |||||
| PageSize: setting.UI.IssuePagingNum, | |||||
| }, | |||||
| RepoID: repo.ID, | |||||
| Type: models.TypeCloudBrainTwo, | |||||
| JobTypeNot: false, | |||||
| JobTypes: jobTypes, | |||||
| IsLatestVersion: modelarts.IsLatestVersion, | |||||
| }) | |||||
| if err == nil { | |||||
| for _, task := range tasks { | |||||
| if strings.EqualFold(task.JobName, openiJobName) { | |||||
| log.Error("the job name did already exist", ctx.Data["MsgID"]) | |||||
| trainJobErrorNewDataPrepare(ctx, form) | |||||
| ctx.RenderWithErr("任务名称已经被使用!", tplModelArtsInferenceJobNew, &form) | |||||
| return | |||||
| } | |||||
| } | |||||
| } else { | |||||
| if !models.IsErrJobNotExist(err) { | |||||
| log.Error("system error, %v", err, ctx.Data["MsgID"]) | |||||
| trainJobErrorNewDataPrepare(ctx, form) | |||||
| ctx.RenderWithErr("system error", tplModelArtsInferenceJobNew, &form) | |||||
| return | |||||
| } | |||||
| } | |||||
| count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) | count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) | log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) | ||||
| @@ -799,13 +865,6 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||||
| } | } | ||||
| } | } | ||||
| if err := paramCheckCreateTrainJob(form); err != nil { | |||||
| log.Error("paramCheckCreateTrainJob failed:(%v)", err) | |||||
| trainJobErrorNewDataPrepare(ctx, form) | |||||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) | |||||
| return | |||||
| } | |||||
| //todo: del the codeLocalPath | //todo: del the codeLocalPath | ||||
| _, err = ioutil.ReadDir(codeLocalPath) | _, err = ioutil.ReadDir(codeLocalPath) | ||||
| if err == nil { | if err == nil { | ||||
| @@ -913,7 +972,8 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||||
| } | } | ||||
| req := &modelarts.GenerateTrainJobReq{ | req := &modelarts.GenerateTrainJobReq{ | ||||
| JobName: jobName, | |||||
| OpeniJobName: openiJobName, | |||||
| ApiJobName: jobName, | |||||
| DataUrl: dataPath, | DataUrl: dataPath, | ||||
| Description: description, | Description: description, | ||||
| CodeObsPath: codeObsPath, | CodeObsPath: codeObsPath, | ||||
| @@ -980,7 +1040,30 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||||
| } | } | ||||
| VersionOutputPath := modelarts.GetOutputPathByCount(latestTask.TotalVersionCount + 1) | VersionOutputPath := modelarts.GetOutputPathByCount(latestTask.TotalVersionCount + 1) | ||||
| jobName := form.JobName | |||||
| //判断权限 | |||||
| canNewJob, _ := canUserCreateTrainJobVersion(ctx, latestTask.UserID) | |||||
| if !canNewJob { | |||||
| ctx.RenderWithErr("user cann't new trainjob", tplModelArtsTrainJobVersionNew, &form) | |||||
| return | |||||
| } | |||||
| if err := paramCheckCreateTrainJob(form); err != nil { | |||||
| log.Error("paramCheckCreateTrainJob failed:(%v)", err) | |||||
| versionErrorDataPrepare(ctx, form) | |||||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) | |||||
| return | |||||
| } | |||||
| PreVersionName := form.VersionName | |||||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, PreVersionName) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) | |||||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) | |||||
| return | |||||
| } | |||||
| apiJobName := task.ApiJobName | |||||
| jobName := apiJobName | |||||
| uuid := form.Attachment | uuid := form.Attachment | ||||
| description := form.Description | description := form.Description | ||||
| workServerNumber := form.WorkServerNumber | workServerNumber := form.WorkServerNumber | ||||
| @@ -997,34 +1080,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||||
| logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" | logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" | ||||
| dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" | dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" | ||||
| branch_name := form.BranchName | branch_name := form.BranchName | ||||
| PreVersionName := form.VersionName | |||||
| FlavorName := form.FlavorName | FlavorName := form.FlavorName | ||||
| EngineName := form.EngineName | EngineName := form.EngineName | ||||
| isLatestVersion := modelarts.IsLatestVersion | isLatestVersion := modelarts.IsLatestVersion | ||||
| //判断权限 | |||||
| canNewJob, _ := canUserCreateTrainJobVersion(ctx, latestTask.UserID) | |||||
| if !canNewJob { | |||||
| ctx.RenderWithErr("user cann't new trainjob", tplModelArtsTrainJobVersionNew, &form) | |||||
| return | |||||
| } | |||||
| if err := paramCheckCreateTrainJob(form); err != nil { | |||||
| log.Error("paramCheckCreateTrainJob failed:(%v)", err) | |||||
| versionErrorDataPrepare(ctx, form) | |||||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) | |||||
| return | |||||
| } | |||||
| //todo: del the codeLocalPath | //todo: del the codeLocalPath | ||||
| _, err = ioutil.ReadDir(codeLocalPath) | _, err = ioutil.ReadDir(codeLocalPath) | ||||
| if err == nil { | if err == nil { | ||||
| os.RemoveAll(codeLocalPath) | os.RemoveAll(codeLocalPath) | ||||
| } else { | |||||
| log.Error("创建任务失败,原代码还未删除,请重试!: %s (%v)", repo.FullName(), err) | |||||
| versionErrorDataPrepare(ctx, form) | |||||
| ctx.RenderWithErr("创建任务失败,原代码还未删除,请重试!", tplModelArtsTrainJobVersionNew, &form) | |||||
| return | |||||
| } | } | ||||
| // os.RemoveAll(codeLocalPath) | // os.RemoveAll(codeLocalPath) | ||||
| @@ -1134,14 +1197,9 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||||
| return | return | ||||
| } | } | ||||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, PreVersionName) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) | |||||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) | |||||
| return | |||||
| } | |||||
| req := &modelarts.GenerateTrainJobReq{ | req := &modelarts.GenerateTrainJobReq{ | ||||
| JobName: task.JobName, | |||||
| OpeniJobName: task.JobName, | |||||
| ApiJobName: task.ApiJobName, | |||||
| DataUrl: dataPath, | DataUrl: dataPath, | ||||
| Description: description, | Description: description, | ||||
| CodeObsPath: codeObsPath, | CodeObsPath: codeObsPath, | ||||
| @@ -1566,7 +1624,9 @@ func getConfigList(perPage, page int, sortBy, order, searchContent, configType s | |||||
| func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInferenceJobForm) { | func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInferenceJobForm) { | ||||
| ctx.Data["PageIsTrainJob"] = true | ctx.Data["PageIsTrainJob"] = true | ||||
| VersionOutputPath := modelarts.GetOutputPathByCount(modelarts.TotalVersionCount) | VersionOutputPath := modelarts.GetOutputPathByCount(modelarts.TotalVersionCount) | ||||
| jobName := form.JobName | |||||
| openiJobName := form.JobName | |||||
| apiJobName := util.ConvertToApiJobName(openiJobName) | |||||
| jobName := apiJobName | |||||
| uuid := form.Attachment | uuid := form.Attachment | ||||
| description := form.Description | description := form.Description | ||||
| workServerNumber := form.WorkServerNumber | workServerNumber := form.WorkServerNumber | ||||
| @@ -1591,7 +1651,6 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference | |||||
| modelName := form.ModelName | modelName := form.ModelName | ||||
| modelVersion := form.ModelVersion | modelVersion := form.ModelVersion | ||||
| ckptName := form.CkptName | ckptName := form.CkptName | ||||
| ckptUrl := form.TrainUrl + form.CkptName | ckptUrl := form.TrainUrl + form.CkptName | ||||
| if err := paramCheckCreateInferenceJob(form); err != nil { | if err := paramCheckCreateInferenceJob(form); err != nil { | ||||
| @@ -1601,6 +1660,35 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference | |||||
| return | return | ||||
| } | } | ||||
| var jobTypes []string | |||||
| jobTypes = append(jobTypes, string(models.JobTypeInference)) | |||||
| tasks, _, err := models.Cloudbrains(&models.CloudbrainsOptions{ | |||||
| ListOptions: models.ListOptions{ | |||||
| PageSize: setting.UI.IssuePagingNum, | |||||
| }, | |||||
| RepoID: repo.ID, | |||||
| Type: models.TypeCloudBrainTwo, | |||||
| JobTypes: jobTypes, | |||||
| }) | |||||
| if err == nil { | |||||
| for _, task := range tasks { | |||||
| if strings.EqualFold(task.JobName, openiJobName) { | |||||
| log.Error("the job name did already exist", ctx.Data["MsgID"]) | |||||
| inferenceJobErrorNewDataPrepare(ctx, form) | |||||
| ctx.RenderWithErr("任务名称已经被使用!", tplModelArtsInferenceJobNew, &form) | |||||
| return | |||||
| } | |||||
| } | |||||
| } else { | |||||
| if !models.IsErrJobNotExist(err) { | |||||
| log.Error("system error, %v", err, ctx.Data["MsgID"]) | |||||
| inferenceJobErrorNewDataPrepare(ctx, form) | |||||
| ctx.RenderWithErr("system error", tplModelArtsInferenceJobNew, &form) | |||||
| return | |||||
| } | |||||
| } | |||||
| count, err := models.GetCloudbrainInferenceJobCountByUserID(ctx.User.ID) | count, err := models.GetCloudbrainInferenceJobCountByUserID(ctx.User.ID) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainInferenceJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) | log.Error("GetCloudbrainInferenceJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) | ||||
| @@ -1686,7 +1774,8 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference | |||||
| } | } | ||||
| req := &modelarts.GenerateInferenceJobReq{ | req := &modelarts.GenerateInferenceJobReq{ | ||||
| JobName: jobName, | |||||
| OpeniJobName: openiJobName, | |||||
| ApiJobName: apiJobName, | |||||
| DataUrl: dataPath, | DataUrl: dataPath, | ||||
| Description: description, | Description: description, | ||||
| CodeObsPath: codeObsPath, | CodeObsPath: codeObsPath, | ||||
| @@ -2000,7 +2089,7 @@ func ModelDownload(ctx *context.Context) { | |||||
| return | return | ||||
| } | } | ||||
| path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, setting.OutPutPath, versionName, parentDir, fileName), "/") | |||||
| path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.ApiJobName, setting.OutPutPath, versionName, parentDir, fileName), "/") | |||||
| log.Info("Download path is:%s", path) | log.Info("Download path is:%s", path) | ||||
| url, err := storage.GetObsCreateSignedUrlByBucketAndKey(setting.Bucket, path) | url, err := storage.GetObsCreateSignedUrlByBucketAndKey(setting.Bucket, path) | ||||
| @@ -2026,7 +2115,7 @@ func ResultDownload(ctx *context.Context) { | |||||
| if err != nil { | if err != nil { | ||||
| ctx.Data["error"] = err.Error() | ctx.Data["error"] = err.Error() | ||||
| } | } | ||||
| path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, "result/", versionName, parentDir, fileName), "/") | |||||
| path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.ApiJobName, "result/", versionName, parentDir, fileName), "/") | |||||
| log.Info("Download path is:%s", path) | log.Info("Download path is:%s", path) | ||||
| url, err := storage.GetObsCreateSignedUrlByBucketAndKey(setting.Bucket, path) | url, err := storage.GetObsCreateSignedUrlByBucketAndKey(setting.Bucket, path) | ||||