From 6400af1d7bb3d8ea539ff8dda6822a43d3752353 Mon Sep 17 00:00:00 2001 From: zouap Date: Mon, 26 Sep 2022 15:32:52 +0800 Subject: [PATCH] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zouap --- models/cloudbrain.go | 5 + routers/repo/aisafety.go | 197 ++++++++++++++++++++++++++++--------- routers/repo/cloudbrain.go | 2 + 3 files changed, 157 insertions(+), 47 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 5a3d9e584..48ecc3998 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -1953,6 +1953,11 @@ func GetBenchmarkCountByUserID(userID int64) (int, error) { return int(count), err } +func GetModelSafetyCountByUserID(userID int64) (int, error) { + count, err := x.In("status", JobWaiting, JobRunning).And("job_type = ? and user_id = ?", string(JobTypeModelSafety), userID).Count(new(Cloudbrain)) + return int(count), err +} + func GetWaitingCloudbrainCount(cloudbrainType int, computeResource string, jobTypes ...JobType) (int64, error) { sess := x.Where("status=? and type=?", JobWaiting, cloudbrainType) if len(jobTypes) > 0 { diff --git a/routers/repo/aisafety.go b/routers/repo/aisafety.go index adbffc89a..436ae41da 100644 --- a/routers/repo/aisafety.go +++ b/routers/repo/aisafety.go @@ -1,9 +1,12 @@ package repo import ( + "encoding/json" + "errors" "io/ioutil" "net/http" "os" + "strconv" "strings" "time" @@ -56,104 +59,134 @@ func CloudBrainAiSafetyCreate(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true displayJobName := ctx.Query("DisplayJobName") jobName := util.ConvertDisplayJobNameToJobName(displayJobName) - image := strings.TrimSpace(ctx.Query("Image")) - command := "python /code/inferench.py > " + cloudbrain.ModelMountPath + "/" + displayJobName + "-" + cloudbrain.LogFile - codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath + + taskType := ctx.QueryInt("type") description := ctx.Query("Description") - specId := ctx.QueryInt64("SpecId") ctx.Data["description"] = description repo := ctx.Repo.Repository - tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeBenchmark), displayJobName) + tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeModelSafety), displayJobName) if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("the job name did already exist", tplCloudBrainBenchmarkNew, nil) + modelSafetyNewDataPrepare(ctx) + ctx.RenderWithErr("the job name did already exist", tplCloudBrainModelSafetyNew, nil) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, nil) + modelSafetyNewDataPrepare(ctx) + ctx.RenderWithErr("system error", tplCloudBrainModelSafetyNew, nil) return } } if !jobNamePattern.MatchString(jobName) { - cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplCloudBrainBenchmarkNew, nil) + modelSafetyNewDataPrepare(ctx) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplCloudBrainModelSafetyNew, nil) return } - spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{ - JobType: models.JobTypeBenchmark, - ComputeResource: models.GPU, - Cluster: models.OpenICluster, - AiCenterCode: models.AICenterOfCloudBrainOne}) - if err != nil || spec == nil { - cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("Resource specification not available", tplCloudBrainBenchmarkNew, nil) - return - } - - count, err := models.GetBenchmarkCountByUserID(ctx.User.ID) + count, err := models.GetModelSafetyCountByUserID(ctx.User.ID) if err != nil { log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, nil) + modelSafetyNewDataPrepare(ctx) + ctx.RenderWithErr("system error", tplCloudBrainModelSafetyNew, nil) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tplCloudBrainBenchmarkNew, nil) + modelSafetyNewDataPrepare(ctx) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tplCloudBrainModelSafetyNew, nil) return } } + if taskType == models.TypeCloudBrainTwo { + createForNPU(ctx) + } else if taskType == models.TypeCloudBrainOne { + createForGPU(ctx, jobName) + } + + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/cloudbrain/benchmark") +} + +func createForGPU(ctx *context.Context, jobName string) { + BootFile := ctx.Query("BootFile") + displayJobName := ctx.Query("DisplayJobName") + description := ctx.Query("Description") + image := strings.TrimSpace(ctx.Query("Image")) + srcDataset := ctx.Query("srcDataset") //uuid + combatDataset := ctx.Query("combatDataset") //uuid + evaluationIndex := ctx.Query("evaluationIndex") + Params := ctx.Query("RunParaList") + specId := ctx.QueryInt64("SpecId") + TrainUrl := ctx.Query("TrainUrl") + CkptName := ctx.Query("CkptName") + ckptUrl := setting.Attachment.Minio.RealPath + TrainUrl + CkptName + log.Info("ckpt url:" + ckptUrl) + spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{ + JobType: models.JobTypeBenchmark, + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne}) + if err != nil || spec == nil { + modelSafetyNewDataPrepare(ctx) + ctx.RenderWithErr("Resource specification not available", tplCloudBrainModelSafetyNew, nil) + return + } + + repo := ctx.Repo.Repository + codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath os.RemoveAll(codePath) if err := downloadCode(repo, codePath, cloudbrain.DefaultBranchName); err != nil { log.Error("downloadCode failed, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, nil) + modelSafetyNewDataPrepare(ctx) + ctx.RenderWithErr("system error", tplCloudBrainModelSafetyNew, nil) return } - if _, err := os.Stat(codePath + "/inference.py"); err != nil { + if _, err := os.Stat(codePath + "/" + BootFile); err != nil { if os.IsNotExist(err) { // file does not exist - log.Error("inference.py does not exist, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("inference.py does not exist", tplCloudBrainBenchmarkNew, nil) + log.Error(BootFile+" does not exist, %v", err, ctx.Data["MsgID"]) + modelSafetyNewDataPrepare(ctx) + ctx.RenderWithErr(BootFile+" does not exist", tplCloudBrainModelSafetyNew, nil) } else { log.Error("Stat failed, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, nil) + modelSafetyNewDataPrepare(ctx) + ctx.RenderWithErr("system error", tplCloudBrainModelSafetyNew, nil) } return } - - if err := uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { + err = uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/") + if err != nil { log.Error("uploadCodeToMinio failed, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, nil) + modelSafetyNewDataPrepare(ctx) + ctx.RenderWithErr("system error", tplCloudBrainModelSafetyNew, nil) return } - uuid := "dee79f68-19f1-42dd-b004-bc9ce08415ca" + uuid := srcDataset + ";" + combatDataset datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid) if err != nil { log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + modelSafetyNewDataPrepare(ctx) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplCloudBrainBenchmarkNew, nil) return } + command, err := getGpuModelSafetyCommand(BootFile, Params, CkptName, displayJobName) + if err != nil { + log.Error("Get Command failed: %v", err, ctx.Data["MsgID"]) + modelSafetyNewDataPrepare(ctx) + //ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplCloudBrainBenchmarkNew, nil) TODO + return + } log.Info("Command=" + command) - log.Info("ModelPath=" + storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/")) + req := cloudbrain.GenerateCloudBrainTaskReq{ Ctx: ctx, DisplayJobName: displayJobName, @@ -164,15 +197,15 @@ func CloudBrainAiSafetyCreate(ctx *context.Context) { DatasetNames: datasetNames, DatasetInfos: datasetInfos, CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), - ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), + ModelPath: setting.Attachment.Minio.RealPath + TrainUrl, BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), JobType: string(models.JobTypeModelSafety), Description: description, BranchName: cloudbrain.DefaultBranchName, - BootFile: "", - Params: "", + BootFile: BootFile, + Params: Params, CommitID: "", ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), Spec: spec, @@ -180,13 +213,83 @@ func CloudBrainAiSafetyCreate(ctx *context.Context) { err = cloudbrain.GenerateTask(req) if err != nil { - cloudBrainNewDataPrepare(ctx) + modelSafetyNewDataPrepare(ctx) ctx.RenderWithErr(err.Error(), tplCloudBrainBenchmarkNew, nil) return } + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/cloudbrain/modelsafety_test") +} - ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/cloudbrain/benchmark") +func getGpuModelSafetyCommand(BootFile string, params string, CkptName string, DisplayJobName string) (string, error) { + var command string + bootFile := strings.TrimSpace(BootFile) + + if !strings.HasSuffix(bootFile, ".py") { + log.Error("bootFile(%s) format error", bootFile) + return command, errors.New("bootFile format error") + } + + var parameters models.Parameters + var param string + if len(params) != 0 { + err := json.Unmarshal([]byte(params), ¶meters) + if err != nil { + log.Error("Failed to Unmarshal params: %s (%v)", params, err) + return command, err + } + + for _, parameter := range parameters.Parameter { + param += " --" + parameter.Label + "=" + parameter.Value + } + } + + param += " --modelname" + "=" + CkptName + + command += "python /code/" + bootFile + param + " > " + cloudbrain.ResultPath + "/" + DisplayJobName + "-" + cloudbrain.LogFile + + return command, nil +} + +func modelSafetyNewDataPrepare(ctx *context.Context) error { + ctx.Data["PageIsCloudBrain"] = true + t := time.Now() + var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + ctx.Data["display_job_name"] = displayJobName + + ctx.Data["command"] = cloudbrain.GetCloudbrainDebugCommand() + ctx.Data["code_path"] = cloudbrain.CodeMountPath + ctx.Data["dataset_path"] = cloudbrain.DataSetMountPath + ctx.Data["model_path"] = cloudbrain.ModelMountPath + ctx.Data["benchmark_path"] = cloudbrain.BenchMarkMountPath + ctx.Data["is_benchmark_enabled"] = setting.IsBenchmarkEnabled + + if categories == nil { + json.Unmarshal([]byte(setting.BenchmarkCategory), &categories) + } + ctx.Data["benchmark_categories"] = categories.Category + + ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType + queuesDetail, _ := cloudbrain.GetQueuesDetail() + if queuesDetail != nil { + ctx.Data["QueuesDetail"] = queuesDetail + } + + prepareCloudbrainOneSpecs(ctx) + + ctx.Data["params"] = "" + ctx.Data["branchName"] = ctx.Repo.BranchName + + ctx.Data["snn4imagenet_path"] = cloudbrain.Snn4imagenetMountPath + ctx.Data["is_snn4imagenet_enabled"] = setting.IsSnn4imagenetEnabled + + ctx.Data["brainscore_path"] = cloudbrain.BrainScoreMountPath + ctx.Data["is_brainscore_enabled"] = setting.IsBrainScoreEnabled + + ctx.Data["datasetType"] = models.TypeCloudBrainOne + + ctx.Data["benchmarkMode"] = ctx.Query("benchmarkMode") + return nil } func getJsonContent(url string) (string, error) { diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 24a5ac685..ae29abf46 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -46,6 +46,8 @@ const ( tplCloudBrainBenchmarkNew base.TplName = "repo/cloudbrain/benchmark/new" tplCloudBrainBenchmarkShow base.TplName = "repo/cloudbrain/benchmark/show" + tplCloudBrainModelSafetyNew base.TplName = "repo/cloudbrain/modelsafety/new" + tplCloudBrainImageSubmit base.TplName = "repo/cloudbrain/image/submit" tplCloudBrainImageEdit base.TplName = "repo/cloudbrain/image/edit"