|
|
|
@@ -1,9 +1,12 @@ |
|
|
|
package repo |
|
|
|
|
|
|
|
import ( |
|
|
|
"encoding/json" |
|
|
|
"errors" |
|
|
|
"io/ioutil" |
|
|
|
"net/http" |
|
|
|
"os" |
|
|
|
"strconv" |
|
|
|
"strings" |
|
|
|
"time" |
|
|
|
|
|
|
|
@@ -56,104 +59,134 @@ func CloudBrainAiSafetyCreate(ctx *context.Context) { |
|
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
displayJobName := ctx.Query("DisplayJobName") |
|
|
|
jobName := util.ConvertDisplayJobNameToJobName(displayJobName) |
|
|
|
image := strings.TrimSpace(ctx.Query("Image")) |
|
|
|
command := "python /code/inferench.py > " + cloudbrain.ModelMountPath + "/" + displayJobName + "-" + cloudbrain.LogFile |
|
|
|
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath |
|
|
|
|
|
|
|
taskType := ctx.QueryInt("type") |
|
|
|
description := ctx.Query("Description") |
|
|
|
specId := ctx.QueryInt64("SpecId") |
|
|
|
ctx.Data["description"] = description |
|
|
|
|
|
|
|
repo := ctx.Repo.Repository |
|
|
|
|
|
|
|
tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeBenchmark), displayJobName) |
|
|
|
tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeModelSafety), displayJobName) |
|
|
|
if err == nil { |
|
|
|
if len(tasks) != 0 { |
|
|
|
log.Error("the job name did already exist", ctx.Data["MsgID"]) |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("the job name did already exist", tplCloudBrainBenchmarkNew, nil) |
|
|
|
modelSafetyNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("the job name did already exist", tplCloudBrainModelSafetyNew, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
} else { |
|
|
|
if !models.IsErrJobNotExist(err) { |
|
|
|
log.Error("system error, %v", err, ctx.Data["MsgID"]) |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, nil) |
|
|
|
modelSafetyNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("system error", tplCloudBrainModelSafetyNew, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if !jobNamePattern.MatchString(jobName) { |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplCloudBrainBenchmarkNew, nil) |
|
|
|
modelSafetyNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplCloudBrainModelSafetyNew, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{ |
|
|
|
JobType: models.JobTypeBenchmark, |
|
|
|
ComputeResource: models.GPU, |
|
|
|
Cluster: models.OpenICluster, |
|
|
|
AiCenterCode: models.AICenterOfCloudBrainOne}) |
|
|
|
if err != nil || spec == nil { |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("Resource specification not available", tplCloudBrainBenchmarkNew, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
count, err := models.GetBenchmarkCountByUserID(ctx.User.ID) |
|
|
|
count, err := models.GetModelSafetyCountByUserID(ctx.User.ID) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, nil) |
|
|
|
modelSafetyNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("system error", tplCloudBrainModelSafetyNew, nil) |
|
|
|
return |
|
|
|
} else { |
|
|
|
if count >= 1 { |
|
|
|
log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tplCloudBrainBenchmarkNew, nil) |
|
|
|
modelSafetyNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tplCloudBrainModelSafetyNew, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
|
if taskType == models.TypeCloudBrainTwo { |
|
|
|
createForNPU(ctx) |
|
|
|
} else if taskType == models.TypeCloudBrainOne { |
|
|
|
createForGPU(ctx, jobName) |
|
|
|
} |
|
|
|
|
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/cloudbrain/benchmark") |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
func createForGPU(ctx *context.Context, jobName string) { |
|
|
|
BootFile := ctx.Query("BootFile") |
|
|
|
displayJobName := ctx.Query("DisplayJobName") |
|
|
|
description := ctx.Query("Description") |
|
|
|
image := strings.TrimSpace(ctx.Query("Image")) |
|
|
|
srcDataset := ctx.Query("srcDataset") //uuid |
|
|
|
combatDataset := ctx.Query("combatDataset") //uuid |
|
|
|
evaluationIndex := ctx.Query("evaluationIndex") |
|
|
|
Params := ctx.Query("RunParaList") |
|
|
|
specId := ctx.QueryInt64("SpecId") |
|
|
|
TrainUrl := ctx.Query("TrainUrl") |
|
|
|
CkptName := ctx.Query("CkptName") |
|
|
|
ckptUrl := setting.Attachment.Minio.RealPath + TrainUrl + CkptName |
|
|
|
log.Info("ckpt url:" + ckptUrl) |
|
|
|
spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{ |
|
|
|
JobType: models.JobTypeBenchmark, |
|
|
|
ComputeResource: models.GPU, |
|
|
|
Cluster: models.OpenICluster, |
|
|
|
AiCenterCode: models.AICenterOfCloudBrainOne}) |
|
|
|
if err != nil || spec == nil { |
|
|
|
modelSafetyNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("Resource specification not available", tplCloudBrainModelSafetyNew, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
repo := ctx.Repo.Repository |
|
|
|
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath |
|
|
|
os.RemoveAll(codePath) |
|
|
|
|
|
|
|
if err := downloadCode(repo, codePath, cloudbrain.DefaultBranchName); err != nil { |
|
|
|
log.Error("downloadCode failed, %v", err, ctx.Data["MsgID"]) |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, nil) |
|
|
|
modelSafetyNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("system error", tplCloudBrainModelSafetyNew, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if _, err := os.Stat(codePath + "/inference.py"); err != nil { |
|
|
|
if _, err := os.Stat(codePath + "/" + BootFile); err != nil { |
|
|
|
if os.IsNotExist(err) { |
|
|
|
// file does not exist |
|
|
|
log.Error("inference.py does not exist, %v", err, ctx.Data["MsgID"]) |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("inference.py does not exist", tplCloudBrainBenchmarkNew, nil) |
|
|
|
log.Error(BootFile+" does not exist, %v", err, ctx.Data["MsgID"]) |
|
|
|
modelSafetyNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(BootFile+" does not exist", tplCloudBrainModelSafetyNew, nil) |
|
|
|
} else { |
|
|
|
log.Error("Stat failed, %v", err, ctx.Data["MsgID"]) |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, nil) |
|
|
|
modelSafetyNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("system error", tplCloudBrainModelSafetyNew, nil) |
|
|
|
} |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if err := uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { |
|
|
|
err = uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/") |
|
|
|
if err != nil { |
|
|
|
log.Error("uploadCodeToMinio failed, %v", err, ctx.Data["MsgID"]) |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, nil) |
|
|
|
modelSafetyNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("system error", tplCloudBrainModelSafetyNew, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
uuid := "dee79f68-19f1-42dd-b004-bc9ce08415ca" |
|
|
|
uuid := srcDataset + ";" + combatDataset |
|
|
|
datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
modelSafetyNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplCloudBrainBenchmarkNew, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
command, err := getGpuModelSafetyCommand(BootFile, Params, CkptName, displayJobName) |
|
|
|
if err != nil { |
|
|
|
log.Error("Get Command failed: %v", err, ctx.Data["MsgID"]) |
|
|
|
modelSafetyNewDataPrepare(ctx) |
|
|
|
//ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplCloudBrainBenchmarkNew, nil) TODO |
|
|
|
return |
|
|
|
} |
|
|
|
log.Info("Command=" + command) |
|
|
|
log.Info("ModelPath=" + storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/")) |
|
|
|
|
|
|
|
req := cloudbrain.GenerateCloudBrainTaskReq{ |
|
|
|
Ctx: ctx, |
|
|
|
DisplayJobName: displayJobName, |
|
|
|
@@ -164,15 +197,15 @@ func CloudBrainAiSafetyCreate(ctx *context.Context) { |
|
|
|
DatasetNames: datasetNames, |
|
|
|
DatasetInfos: datasetInfos, |
|
|
|
CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), |
|
|
|
ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), |
|
|
|
ModelPath: setting.Attachment.Minio.RealPath + TrainUrl, |
|
|
|
BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), |
|
|
|
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), |
|
|
|
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), |
|
|
|
JobType: string(models.JobTypeModelSafety), |
|
|
|
Description: description, |
|
|
|
BranchName: cloudbrain.DefaultBranchName, |
|
|
|
BootFile: "", |
|
|
|
Params: "", |
|
|
|
BootFile: BootFile, |
|
|
|
Params: Params, |
|
|
|
CommitID: "", |
|
|
|
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), |
|
|
|
Spec: spec, |
|
|
|
@@ -180,13 +213,83 @@ func CloudBrainAiSafetyCreate(ctx *context.Context) { |
|
|
|
|
|
|
|
err = cloudbrain.GenerateTask(req) |
|
|
|
if err != nil { |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
modelSafetyNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(err.Error(), tplCloudBrainBenchmarkNew, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/cloudbrain/modelsafety_test") |
|
|
|
} |
|
|
|
|
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/cloudbrain/benchmark") |
|
|
|
func getGpuModelSafetyCommand(BootFile string, params string, CkptName string, DisplayJobName string) (string, error) { |
|
|
|
var command string |
|
|
|
bootFile := strings.TrimSpace(BootFile) |
|
|
|
|
|
|
|
if !strings.HasSuffix(bootFile, ".py") { |
|
|
|
log.Error("bootFile(%s) format error", bootFile) |
|
|
|
return command, errors.New("bootFile format error") |
|
|
|
} |
|
|
|
|
|
|
|
var parameters models.Parameters |
|
|
|
var param string |
|
|
|
if len(params) != 0 { |
|
|
|
err := json.Unmarshal([]byte(params), ¶meters) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to Unmarshal params: %s (%v)", params, err) |
|
|
|
return command, err |
|
|
|
} |
|
|
|
|
|
|
|
for _, parameter := range parameters.Parameter { |
|
|
|
param += " --" + parameter.Label + "=" + parameter.Value |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
param += " --modelname" + "=" + CkptName |
|
|
|
|
|
|
|
command += "python /code/" + bootFile + param + " > " + cloudbrain.ResultPath + "/" + DisplayJobName + "-" + cloudbrain.LogFile |
|
|
|
|
|
|
|
return command, nil |
|
|
|
} |
|
|
|
|
|
|
|
func modelSafetyNewDataPrepare(ctx *context.Context) error { |
|
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
t := time.Now() |
|
|
|
var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] |
|
|
|
ctx.Data["display_job_name"] = displayJobName |
|
|
|
|
|
|
|
ctx.Data["command"] = cloudbrain.GetCloudbrainDebugCommand() |
|
|
|
ctx.Data["code_path"] = cloudbrain.CodeMountPath |
|
|
|
ctx.Data["dataset_path"] = cloudbrain.DataSetMountPath |
|
|
|
ctx.Data["model_path"] = cloudbrain.ModelMountPath |
|
|
|
ctx.Data["benchmark_path"] = cloudbrain.BenchMarkMountPath |
|
|
|
ctx.Data["is_benchmark_enabled"] = setting.IsBenchmarkEnabled |
|
|
|
|
|
|
|
if categories == nil { |
|
|
|
json.Unmarshal([]byte(setting.BenchmarkCategory), &categories) |
|
|
|
} |
|
|
|
ctx.Data["benchmark_categories"] = categories.Category |
|
|
|
|
|
|
|
ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType |
|
|
|
queuesDetail, _ := cloudbrain.GetQueuesDetail() |
|
|
|
if queuesDetail != nil { |
|
|
|
ctx.Data["QueuesDetail"] = queuesDetail |
|
|
|
} |
|
|
|
|
|
|
|
prepareCloudbrainOneSpecs(ctx) |
|
|
|
|
|
|
|
ctx.Data["params"] = "" |
|
|
|
ctx.Data["branchName"] = ctx.Repo.BranchName |
|
|
|
|
|
|
|
ctx.Data["snn4imagenet_path"] = cloudbrain.Snn4imagenetMountPath |
|
|
|
ctx.Data["is_snn4imagenet_enabled"] = setting.IsSnn4imagenetEnabled |
|
|
|
|
|
|
|
ctx.Data["brainscore_path"] = cloudbrain.BrainScoreMountPath |
|
|
|
ctx.Data["is_brainscore_enabled"] = setting.IsBrainScoreEnabled |
|
|
|
|
|
|
|
ctx.Data["datasetType"] = models.TypeCloudBrainOne |
|
|
|
|
|
|
|
ctx.Data["benchmarkMode"] = ctx.Query("benchmarkMode") |
|
|
|
|
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func getJsonContent(url string) (string, error) { |
|
|
|
|