diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index dd502dfd0..65984d95f 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -1,13 +1,19 @@ package modelarts import ( + "encoding/base64" "encoding/json" "errors" "fmt" + "io/ioutil" + "net/http" + "os" "path" "strconv" "strings" + "code.gitea.io/gitea/modules/cloudbrain" + "code.gitea.io/gitea/modules/modelarts_cd" "code.gitea.io/gitea/models" @@ -276,7 +282,7 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor strin return nil } -func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, description, imageId string, spec *models.Specification) error { +func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, description, imageId string, spec *models.Specification, bootFile string) (string, error) { if poolInfos == nil { json.Unmarshal([]byte(setting.PoolInfos), &poolInfos) } @@ -284,7 +290,7 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc imageName, err := GetNotebookImageName(imageId) if err != nil { log.Error("GetNotebookImageName failed: %v", err.Error()) - return err + return "", err } createTime := timeutil.TimeStampNow() jobResult, err := createNotebook2(models.CreateNotebook2Params{ @@ -316,10 +322,10 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc }) if errTemp != nil { log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error()) - return errTemp + return "", errTemp } } - return err + return "", err } task := &models.Cloudbrain{ Status: jobResult.Status, @@ -334,6 +340,7 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc Uuid: uuid, ComputeResource: models.NPUResource, Image: imageName, + BootFile: bootFile, Description: description, CreatedUnix: createTime, UpdatedUnix: createTime, @@ -342,12 +349,12 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc err = models.CreateCloudbrain(task) if err != nil { - return err + return "", err } stringId := strconv.FormatInt(task.ID, 10) notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugNPUTask) - return nil + return jobResult.ID, nil } func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId string, err error) { @@ -907,6 +914,11 @@ func HandleNotebookInfo(task *models.Cloudbrain) error { if task.FlavorCode == "" { task.FlavorCode = result.Flavor } + + if oldStatus != task.Status && task.Status == string(models.ModelArtsRunning) && task.BootFile != "" { + uploadNoteBookFile(task, result) + + } err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err) @@ -917,6 +929,51 @@ func HandleNotebookInfo(task *models.Cloudbrain) error { return nil } +func uploadNoteBookFile(task *models.Cloudbrain, result *models.GetNotebook2Result) { + jupyterUrl := result.Url + "?token=" + result.Token + client := getRestyClient() + res, err := client.R().Get(jupyterUrl) + codePath := setting.JobPath + task.JobName + cloudbrain.CodeMountPath + if err != nil { + log.Error("browser jupyterUrl failed:%v", task.DisplayJobName, err) + + } else { + cookies := res.Cookies() + xsrf := "" + for _, cookie := range cookies { + if cookie.Name == "_xsrf" { + xsrf = cookie.Value + } + } + + fileContents, err := ioutil.ReadFile(codePath + "/" + task.BootFile) + if err != nil { + log.Error("read jupyter file failed:%v", task.DisplayJobName, err) + } + + base64Content := base64.StdEncoding.EncodeToString(fileContents) + + uploadUrl := result.Url + "/api/contents/" + path.Base(task.BootFile) + res, err = client.R(). + SetCookies(cookies). + SetHeader("X-XSRFToken", xsrf). + SetBody(map[string]interface{}{ + "type": "file", + "format": "base64", + "name": path.Base(task.BootFile), + "path": path.Base(task.BootFile), + "content": base64Content}). + Put(uploadUrl) + if err != nil { + log.Error("upload jupyter file failed:%v", task.DisplayJobName, err) + } else if res.StatusCode() != http.StatusCreated { + log.Error("upload jupyter file failed:%v", task.DisplayJobName, err) + } + + } + go os.RemoveAll(codePath) +} + func SyncTempStatusJob() { jobs, err := models.GetCloudBrainTempJobs() if err != nil { diff --git a/modules/modelarts_cd/modelarts.go b/modules/modelarts_cd/modelarts.go index 330b048ca..712cdc89b 100755 --- a/modules/modelarts_cd/modelarts.go +++ b/modules/modelarts_cd/modelarts.go @@ -88,11 +88,11 @@ type Parameters struct { } `json:"parameter"` } -func GenerateNotebook(ctx *context.Context, displayJobName, jobName, uuid, description, imageId string, spec *models.Specification) error { +func GenerateNotebook(ctx *context.Context, displayJobName, jobName, uuid, description, imageId string, spec *models.Specification, bootFile string) (string, error) { imageName, err := GetNotebookImageName(imageId) if err != nil { log.Error("GetNotebookImageName failed: %v", err.Error()) - return err + return "", err } createTime := timeutil.TimeStampNow() jobResult, err := createNotebook(models.CreateNotebookWithoutPoolParams{ @@ -123,10 +123,10 @@ func GenerateNotebook(ctx *context.Context, displayJobName, jobName, uuid, descr }) if errTemp != nil { log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error()) - return errTemp + return "", errTemp } } - return err + return "", err } task := &models.Cloudbrain{ Status: jobResult.Status, @@ -145,16 +145,17 @@ func GenerateNotebook(ctx *context.Context, displayJobName, jobName, uuid, descr CreatedUnix: createTime, UpdatedUnix: createTime, Spec: spec, + BootFile: bootFile, } err = models.CreateCloudbrain(task) if err != nil { - return err + return "", err } stringId := strconv.FormatInt(task.ID, 10) notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugNPUTask) - return nil + return jobResult.ID, nil } func GetNotebookImageName(imageId string) (string, error) { @@ -175,41 +176,3 @@ func GetNotebookImageName(imageId string) (string, error) { return imageName, nil } - -/* -func HandleNotebookInfo(task *models.Cloudbrain) error { - - result, err := GetNotebook(task.JobID) - if err != nil { - log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err) - return err - } - - if result != nil { - oldStatus := task.Status - task.Status = result.Status - if task.StartTime == 0 && result.Lease.UpdateTime > 0 { - task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000) - } - if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { - task.EndTime = timeutil.TimeStampNow() - } - task.CorrectCreateUnix() - task.ComputeAndSetDuration() - if oldStatus != task.Status { - notification.NotifyChangeCloudbrainStatus(task, oldStatus) - } - if task.FlavorCode == "" { - task.FlavorCode = result.Flavor - } - err = models.UpdateJob(task) - if err != nil { - log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err) - return err - } - } - - return nil -} - -*/ diff --git a/modules/setting/setting.go b/modules/setting/setting.go index 88557ee60..a358a85d4 100755 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -609,9 +609,9 @@ var ( AiCenterInfo string }{} - C2NetInfos *C2NetSqInfos - CenterInfos *AiCenterInfos - C2NetMapInfo map[string]*C2NetSequenceInfo + C2NetInfos *C2NetSqInfos + CenterInfos *AiCenterInfos + C2NetMapInfo map[string]*C2NetSequenceInfo //elk config ElkUrl string @@ -712,6 +712,16 @@ var ( TeamName string }{} + FileNoteBook = struct { + ProjectName string + ImageGPU string + SpecIdGPU int64 + SpecIdCPU int64 + ImageIdNPU string + ImageNPU string + SpecIdNPU int64 + }{} + ModelConvert = struct { GPU_PYTORCH_IMAGE string GpuQueue string @@ -1655,9 +1665,9 @@ func getGrampusConfig() { if err := json.Unmarshal([]byte(Grampus.C2NetSequence), &C2NetInfos); err != nil { log.Error("Unmarshal(C2NetSequence) failed:%v", err) } - C2NetMapInfo=make(map[string]*C2NetSequenceInfo) - for _,value :=range C2NetInfos.C2NetSqInfo{ - C2NetMapInfo[value.Name]=value + C2NetMapInfo = make(map[string]*C2NetSequenceInfo) + for _, value := range C2NetInfos.C2NetSqInfo { + C2NetMapInfo[value.Name] = value } } Grampus.SyncScriptProject = sec.Key("SYNC_SCRIPT_PROJECT").MustString("script_for_grampus") diff --git a/modules/structs/cloudbrain.go b/modules/structs/cloudbrain.go index 866c85dad..9ea5601c9 100644 --- a/modules/structs/cloudbrain.go +++ b/modules/structs/cloudbrain.go @@ -41,6 +41,14 @@ type CreateTrainJobOption struct { SpecId int64 `json:"spec_id" binding:"Required"` } +type CreateFileNotebookJobOption struct { + Type int `json:"type"` //0 CPU 1 GPU 2 NPU + File string `json:"file" binding:"Required"` + BranchName string `json:"branch_name" binding:"Required"` + OwnerName string `json:"owner_name" binding:"Required"` + ProjectName string `json:"project_name" binding:"Required"` +} + type Cloudbrain struct { ID int64 `json:"id"` JobID string `json:"job_id"` diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index e8e8722e2..3f0c0a010 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1007,6 +1007,7 @@ readme = README readme_helper = Select a README file template. auto_init = Initialize Repository (Adds .gitignore, License and README) create_repo = Create Repository +failed_to_create_repo=Failed to create repository, please try again later. create_course = Publish Course failed_to_create_course=Failed to publish course, please try again later. default_branch = Default Branch @@ -1041,6 +1042,9 @@ model_experience = Model Experience model_noright=You have no right to do the operation. model_rename=Duplicate model name, please modify model name. +notebook_file_not_exist=Notebook file does not exist. +notebook_select_wrong=Please select a Notebook(.ipynb) file first. + date=Date repo_add=Project Increment repo_total=Project Total diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index d13c99443..8b635112b 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -1013,6 +1013,7 @@ readme=自述 readme_helper=选择自述文件模板。 auto_init=初始化存储库 (添加. gitignore、许可证和自述文件) create_repo=创建项目 +failed_to_create_repo=创建项目失败,请稍后再试。 create_course=发布课程 failed_to_create_course=发布课程失败,请稍后再试。 default_branch=默认分支 @@ -1041,6 +1042,8 @@ model_experience = 模型体验 model_noright=您没有操作权限。 model_rename=模型名称重复,请修改模型名称 +notebook_file_not_exist=Notebook文件不存在。 +notebook_select_wrong=请先选择Notebook(.ipynb)文件。 date=日期 repo_add=新增项目 diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index 813b77c8c..b7e3c6415 100755 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -736,6 +736,12 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/my_favorite", repo.MyFavoriteDatasetMultiple) }, reqToken(), repoAssignment()) + m.Group("/file_notebook", func() { + + m.Post("/create", reqToken(), reqWeChat(), bind(api.CreateFileNotebookJobOption{}), repo.CreateFileNoteBook) + + }) + m.Group("/repos", func() { m.Get("/search", repo.Search) diff --git a/routers/api/v1/repo/cloudbrain.go b/routers/api/v1/repo/cloudbrain.go index 2e25fdefe..5ea797e97 100755 --- a/routers/api/v1/repo/cloudbrain.go +++ b/routers/api/v1/repo/cloudbrain.go @@ -79,6 +79,9 @@ func CloudBrainShow(ctx *context.APIContext) { ctx.JSON(http.StatusOK, models.BaseMessageWithDataApi{Code: 0, Message: "", Data: convert.ToCloudBrain(task)}) } +func CreateFileNoteBook(ctx *context.APIContext, option api.CreateFileNotebookJobOption) { + cloudbrainTask.FileNotebookCreate(ctx.Context, option) +} func CreateCloudBrain(ctx *context.APIContext, option api.CreateTrainJobOption) { if option.Type == cloudbrainTask.TaskTypeCloudbrainOne { diff --git a/routers/repo/aisafety.go b/routers/repo/aisafety.go index b638a486b..6176fcda5 100644 --- a/routers/repo/aisafety.go +++ b/routers/repo/aisafety.go @@ -11,7 +11,8 @@ import ( "os" "strconv" "strings" - "time" + + cloudbrainService "code.gitea.io/gitea/services/cloudbrain" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/aisafety" @@ -483,7 +484,6 @@ func isTaskNotFinished(status string) bool { } func AiSafetyCreateForGetGPU(ctx *context.Context) { - t := time.Now() ctx.Data["PageIsCloudBrain"] = true ctx.Data["IsCreate"] = true ctx.Data["type"] = models.TypeCloudBrainOne @@ -497,7 +497,7 @@ func AiSafetyCreateForGetGPU(ctx *context.Context) { log.Info("GPUBaseDataSetUUID=" + setting.ModelSafetyTest.GPUBaseDataSetUUID) log.Info("GPUCombatDataSetName=" + setting.ModelSafetyTest.GPUCombatDataSetName) log.Info("GPUCombatDataSetUUID=" + setting.ModelSafetyTest.GPUCombatDataSetUUID) - var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + var displayJobName = cloudbrainService.GetDisplayJobName(ctx.User.Name) ctx.Data["display_job_name"] = displayJobName prepareCloudbrainOneSpecs(ctx) queuesDetail, _ := cloudbrain.GetQueuesDetail() @@ -514,12 +514,11 @@ func AiSafetyCreateForGetGPU(ctx *context.Context) { } func AiSafetyCreateForGetNPU(ctx *context.Context) { - t := time.Now() ctx.Data["PageIsCloudBrain"] = true ctx.Data["IsCreate"] = true ctx.Data["type"] = models.TypeCloudBrainTwo ctx.Data["compute_resource"] = models.NPUResource - var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + var displayJobName = cloudbrainService.GetDisplayJobName(ctx.User.Name) ctx.Data["display_job_name"] = displayJobName ctx.Data["datasetType"] = models.TypeCloudBrainTwo ctx.Data["BaseDataSetName"] = setting.ModelSafetyTest.NPUBaseDataSetName diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 61c2925b8..607d05d3c 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -2,7 +2,6 @@ package repo import ( "bufio" - "code.gitea.io/gitea/modules/urfs_client/urchin" "encoding/json" "errors" "fmt" @@ -16,6 +15,10 @@ import ( "time" "unicode/utf8" + cloudbrainService "code.gitea.io/gitea/services/cloudbrain" + + "code.gitea.io/gitea/modules/urfs_client/urchin" + "code.gitea.io/gitea/modules/dataset" "code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" @@ -91,28 +94,9 @@ func MustEnableCloudbrain(ctx *context.Context) { } } -func cutString(str string, lens int) string { - if len(str) < lens { - return str - } - return str[:lens] -} - -func jobNamePrefixValid(s string) string { - lowStr := strings.ToLower(s) - re := regexp.MustCompile(`[^a-z0-9_\\-]+`) - - removeSpecial := re.ReplaceAllString(lowStr, "") - - re = regexp.MustCompile(`^[_\\-]+`) - return re.ReplaceAllString(removeSpecial, "") - -} - func cloudBrainNewDataPrepare(ctx *context.Context, jobType string) error { ctx.Data["PageIsCloudBrain"] = true - t := time.Now() - var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + var displayJobName = cloudbrainService.GetDisplayJobName(ctx.User.Name) ctx.Data["display_job_name"] = displayJobName ctx.Data["command"] = cloudbrain.GetCloudbrainDebugCommand() diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 0620350f6..c94bc2a5b 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -11,7 +11,6 @@ import ( "path" "strconv" "strings" - "time" "code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" @@ -75,8 +74,7 @@ func GrampusTrainJobNPUNew(ctx *context.Context) { func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) error { ctx.Data["PageIsCloudBrain"] = true - t := time.Now() - var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + var displayJobName = cloudbrainService.GetDisplayJobName(ctx.User.Name) ctx.Data["display_job_name"] = displayJobName //get valid images diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index fabf7e555..214d4b0a1 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -15,6 +15,8 @@ import ( "time" "unicode/utf8" + cloudbrainService "code.gitea.io/gitea/services/cloudbrain" + "code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" "code.gitea.io/gitea/modules/dataset" @@ -128,8 +130,7 @@ func NotebookNew(ctx *context.Context) { func notebookNewDataPrepare(ctx *context.Context) error { ctx.Data["PageIsCloudBrain"] = true - t := time.Now() - var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + var displayJobName = cloudbrainService.GetDisplayJobName(ctx.User.Name) ctx.Data["display_job_name"] = displayJobName attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) @@ -239,9 +240,9 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm } if setting.ModelartsCD.Enabled { - err = modelarts_cd.GenerateNotebook(ctx, displayJobName, jobName, uuid, description, imageId, spec) + _, err = modelarts_cd.GenerateNotebook(ctx, displayJobName, jobName, uuid, description, imageId, spec, "") } else { - err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, imageId, spec) + _, err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, imageId, spec, "") } if err != nil { @@ -714,8 +715,7 @@ func trainJobNewDataPrepare(ctx *context.Context) error { // return //} - t := time.Now() - var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + var displayJobName = cloudbrainService.GetDisplayJobName(ctx.User.Name) ctx.Data["display_job_name"] = displayJobName attachs, err := models.GetModelArtsTrainAttachments(ctx.User.ID) @@ -2351,8 +2351,7 @@ func inferenceJobNewDataPrepare(ctx *context.Context) error { ctx.Data["PageIsCloudBrain"] = true ctx.Data["newInference"] = true - t := time.Now() - var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + var displayJobName = cloudbrainService.GetDisplayJobName(ctx.User.Name) ctx.Data["display_job_name"] = displayJobName attachs, err := models.GetModelArtsTrainAttachments(ctx.User.ID) diff --git a/services/cloudbrain/cloudbrainTask/notebook.go b/services/cloudbrain/cloudbrainTask/notebook.go new file mode 100644 index 000000000..ea1f8b081 --- /dev/null +++ b/services/cloudbrain/cloudbrainTask/notebook.go @@ -0,0 +1,330 @@ +package cloudbrainTask + +import ( + "fmt" + "net/http" + "os" + + "code.gitea.io/gitea/modules/modelarts" + "code.gitea.io/gitea/modules/modelarts_cd" + + "code.gitea.io/gitea/modules/git" + + "code.gitea.io/gitea/modules/cloudbrain" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/redis/redis_key" + "code.gitea.io/gitea/modules/redis/redis_lock" + "code.gitea.io/gitea/modules/storage" + "code.gitea.io/gitea/services/cloudbrain/resource" + "code.gitea.io/gitea/services/reward/point/account" + + "code.gitea.io/gitea/modules/setting" + cloudbrainService "code.gitea.io/gitea/services/cloudbrain" + repo_service "code.gitea.io/gitea/services/repository" + + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/context" + api "code.gitea.io/gitea/modules/structs" + "code.gitea.io/gitea/modules/util" +) + +func FileNotebookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption) { + + if ctx.Written() { + return + } + //create repo if not exist + repo, err := models.GetRepositoryByName(ctx.User.ID, setting.FileNoteBook.ProjectName) + if repo == nil { + repo, err = repo_service.CreateRepository(ctx.User, ctx.User, models.CreateRepoOptions{ + Name: setting.FileNoteBook.ProjectName, + Alias: "", + Description: "", + IssueLabels: "", + Gitignores: "", + License: "", + Readme: "Default", + IsPrivate: false, + AutoInit: true, + DefaultBranch: "master", + }) + } + if err != nil { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("repo.failed_to_create_repo")) + } + +} + +func cloudBrainFileNoteBookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption, repo *models.Repository) { + + displayJobName := cloudbrainService.GetDisplayJobName(ctx.User.Name) + jobName := util.ConvertDisplayJobNameToJobName(displayJobName) + jobType := string(models.JobTypeDebug) + + codePath := getCodePath(jobName) + + lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), jobType, displayJobName)) + defer lock.UnLock() + isOk, err := lock.Lock(models.CloudbrainKeyDuration) + if !isOk { + log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err"))) + return + } + + tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) + if err == nil { + if len(tasks) != 0 { + log.Error("the job name did already exist", ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err"))) + return + } + } else { + if !models.IsErrJobNotExist(err) { + log.Error("system error, %v", err, ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error.")) + return + } + } + + count, err := GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType) + if err != nil { + log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error.")) + return + } else { + if count >= 1 { + log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain.morethanonejob"))) + return + } + } + repoPath := models.RepoPath(repo.OwnerName, repo.Name) + gitRepo, err := git.OpenRepository(repoPath) + if err != nil { + ctx.Error(500, "RepoRef Invalid repo "+repoPath, err.Error()) + return + } + // We opened it, we should close it + defer func() { + // If it's been set to nil then assume someone else has closed it. + if ctx.Repo.GitRepo != nil { + ctx.Repo.GitRepo.Close() + } + }() + + ctx.Repo = &context.Repository{ + Repository: repo, + GitRepo: gitRepo, + } + + fileExist, err := ctx.Repo.FileExists(option.File, option.BranchName) + if err != nil || !fileExist { + log.Error("Get file error:", err, ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist"))) + return + } + + command := cloudbrain.GetCloudbrainDebugCommand() + + errStr := uploadCodeFile(repo, codePath, option.BranchName, option.File, jobName) + if errStr != "" { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist"))) + return + } + + commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(option.BranchName) + specId := setting.FileNoteBook.SpecIdGPU + if option.Type == 0 { + specId = setting.FileNoteBook.SpecIdCPU + } + spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{ + JobType: models.JobType(jobType), + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne}) + if err != nil || spec == nil { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.wrong_specification"))) + return + } + + if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { + log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("points.insufficient_points_balance"))) + return + } + + req := cloudbrain.GenerateCloudBrainTaskReq{ + Ctx: ctx, + DisplayJobName: displayJobName, + JobName: jobName, + Image: setting.FileNoteBook.ImageGPU, + Command: command, + Uuids: "", + DatasetNames: "", + DatasetInfos: nil, + CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), + ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), + BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), + Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), + BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), + JobType: jobType, + Description: getDescription(option), + BranchName: option.BranchName, + BootFile: option.File, + Params: "{\"parameter\":[]}", + CommitID: commitID, + BenchmarkTypeID: 0, + BenchmarkChildTypeID: 0, + ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), + Spec: spec, + } + + jobId, err := cloudbrain.GenerateTask(req) + if err != nil { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + return + } + ctx.JSON(http.StatusOK, models.BaseMessageApi{ + Code: 0, + Message: jobId, + }) + +} + +func getCodePath(jobName string) string { + return setting.JobPath + jobName + cloudbrain.CodeMountPath +} + +func getDescription(option api.CreateFileNotebookJobOption) string { + return option.OwnerName + "/" + option.ProjectName + "/" + option.File +} + +func modelartsFileNoteBookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption, repo *models.Repository) { + displayJobName := cloudbrainService.GetDisplayJobName(ctx.User.Name) + jobName := util.ConvertDisplayJobNameToJobName(displayJobName) + + lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName)) + isOk, err := lock.Lock(models.CloudbrainKeyDuration) + if !isOk { + log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err"))) + return + } + defer lock.UnLock() + + count, err := GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeDebug)) + + if err != nil { + log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error.")) + return + } else { + if count >= 1 { + log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain.morethanonejob"))) + return + } + } + + tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeDebug), displayJobName) + if err == nil { + if len(tasks) != 0 { + log.Error("the job name did already exist", ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err"))) + return + } + } else { + if !models.IsErrJobNotExist(err) { + log.Error("system error, %v", err, ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error.")) + return + } + } + repoPath := models.RepoPath(repo.OwnerName, repo.Name) + gitRepo, err := git.OpenRepository(repoPath) + if err != nil { + ctx.Error(500, "RepoRef Invalid repo "+repoPath, err.Error()) + return + } + // We opened it, we should close it + defer func() { + // If it's been set to nil then assume someone else has closed it. + if ctx.Repo.GitRepo != nil { + ctx.Repo.GitRepo.Close() + } + }() + + ctx.Repo = &context.Repository{ + Repository: repo, + GitRepo: gitRepo, + } + + fileExist, err := ctx.Repo.FileExists(option.File, option.BranchName) + if err != nil || !fileExist { + log.Error("Get file error:", err, ctx.Data["MsgID"]) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist"))) + return + } + + err = downloadCode(repo, getCodePath(jobName), option.BranchName) + if err != nil { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed"))) + return + } + + var aiCenterCode = models.AICenterOfCloudBrainTwo + if setting.ModelartsCD.Enabled { + aiCenterCode = models.AICenterOfChengdu + } + spec, err := resource.GetAndCheckSpec(ctx.User.ID, setting.FileNoteBook.SpecIdNPU, models.FindSpecsOptions{ + JobType: models.JobTypeDebug, + ComputeResource: models.NPU, + Cluster: models.OpenICluster, + AiCenterCode: aiCenterCode}) + if err != nil || spec == nil { + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.wrong_specification"))) + return + } + if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { + log.Error("point balance is not enough,userId=%d specId=%d ", ctx.User.ID, spec.ID) + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("points.insufficient_points_balance"))) + return + } + + var jobId string + if setting.ModelartsCD.Enabled { + jobId, err = modelarts_cd.GenerateNotebook(ctx, displayJobName, jobName, "", getDescription(option), setting.FileNoteBook.ImageIdNPU, spec, option.File) + } else { + jobId, err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, "", getDescription(option), setting.FileNoteBook.ImageIdNPU, spec, option.File) + } + + if err != nil { + log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"]) + + ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) + + return + } + + ctx.JSON(http.StatusOK, models.BaseMessageApi{ + Code: 0, + Message: jobId, + }) + +} + +func uploadCodeFile(repo *models.Repository, codePath string, branchName string, filePath string, jobName string) string { + err := downloadCode(repo, codePath, branchName) + if err != nil { + return "cloudbrain.load_code_failed" + } + + err = uploadOneFileToMinio(codePath, filePath, jobName, cloudbrain.CodeMountPath+"/") + if err != nil { + return "cloudbrain.load_code_failed" + } + go os.RemoveAll(codePath) + return "" +} diff --git a/services/cloudbrain/cloudbrainTask/train.go b/services/cloudbrain/cloudbrainTask/train.go index 8e4673d66..5de69335a 100644 --- a/services/cloudbrain/cloudbrainTask/train.go +++ b/services/cloudbrain/cloudbrainTask/train.go @@ -810,6 +810,18 @@ func uploadCodeToMinio(codePath, jobName, parentDir string) error { return nil } +func uploadOneFileToMinio(codePath, filePath, jobName, parentDir string) error { + destObject := setting.CBCodePathPrefix + jobName + parentDir + path.Base(filePath) + sourceFile := codePath + filePath + err := storage.Attachments.UploadObject(destObject, sourceFile) + if err != nil { + log.Error("UploadObject(%s) failed: %s", filePath, err.Error()) + return err + } + return nil + +} + func readDir(dirname string) ([]os.FileInfo, error) { f, err := os.Open(dirname) if err != nil { diff --git a/services/cloudbrain/util.go b/services/cloudbrain/util.go index ab738927e..d9f0510be 100644 --- a/services/cloudbrain/util.go +++ b/services/cloudbrain/util.go @@ -1,27 +1,31 @@ package cloudbrain import ( + "regexp" + "strconv" + "strings" + "time" + "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/setting" - "strings" ) -func GetAiCenterShow(aiCenter string,ctx *context.Context) string{ +func GetAiCenterShow(aiCenter string, ctx *context.Context) string { aiCenterInfo := strings.Split(aiCenter, "+") - if len(aiCenterInfo) == 2{ - if setting.C2NetMapInfo!=nil { - if info,ok:=setting.C2NetMapInfo[aiCenterInfo[0]];ok { + if len(aiCenterInfo) == 2 { + if setting.C2NetMapInfo != nil { + if info, ok := setting.C2NetMapInfo[aiCenterInfo[0]]; ok { if ctx.Language() == "zh-CN" { return info.Content } else { return info.ContentEN } - }else{ + } else { return aiCenterInfo[1] } - }else{ + } else { return aiCenterInfo[1] } @@ -29,5 +33,26 @@ func GetAiCenterShow(aiCenter string,ctx *context.Context) string{ return "" +} +func GetDisplayJobName(username string) string { + t := time.Now() + return jobNamePrefixValid(cutString(username, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] +} + +func cutString(str string, lens int) string { + if len(str) < lens { + return str + } + return str[:lens] +} + +func jobNamePrefixValid(s string) string { + lowStr := strings.ToLower(s) + re := regexp.MustCompile(`[^a-z0-9_\\-]+`) + + removeSpecial := re.ReplaceAllString(lowStr, "") + + re = regexp.MustCompile(`^[_\\-]+`) + return re.ReplaceAllString(removeSpecial, "") }