|
|
|
@@ -46,6 +46,9 @@ const ( |
|
|
|
|
|
|
|
tplCloudBrainTrainJobNew base.TplName = "repo/cloudbrain/trainjob/new" |
|
|
|
tplCloudBrainTrainJobShow base.TplName = "repo/cloudbrain/trainjob/show" |
|
|
|
|
|
|
|
tplCloudBrainInferenceJobNew base.TplName = "repo/cloudbrain/inference/new" |
|
|
|
tplCloudBrainInferenceJobShow base.TplName = "repo/cloudbrain/inference/show" |
|
|
|
) |
|
|
|
|
|
|
|
var ( |
|
|
|
@@ -200,6 +203,8 @@ func CloudBrainNew(ctx *context.Context) { |
|
|
|
ctx.ServerError("get new cloudbrain info failed", err) |
|
|
|
return |
|
|
|
} |
|
|
|
waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainOne, "", models.JobTypeDebug) |
|
|
|
ctx.Data["WaitCount"] = waitCount |
|
|
|
ctx.HTML(200, tplCloudBrainNew) |
|
|
|
} |
|
|
|
|
|
|
|
@@ -318,6 +323,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { |
|
|
|
BenchmarkTypeID: 0, |
|
|
|
BenchmarkChildTypeID: 0, |
|
|
|
ResourceSpecId: resourceSpecId, |
|
|
|
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), |
|
|
|
} |
|
|
|
|
|
|
|
err = cloudbrain.GenerateTask(req) |
|
|
|
@@ -334,6 +340,138 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBrainInferencForm) { |
|
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
displayJobName := form.DisplayJobName |
|
|
|
jobName := util.ConvertDisplayJobNameToJobName(displayJobName) |
|
|
|
image := strings.TrimSpace(form.Image) |
|
|
|
uuid := form.Attachment |
|
|
|
jobType := form.JobType |
|
|
|
gpuQueue := form.GpuType |
|
|
|
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath |
|
|
|
resourceSpecId := form.ResourceSpecId |
|
|
|
branchName := form.BranchName |
|
|
|
repo := ctx.Repo.Repository |
|
|
|
|
|
|
|
ckptUrl := form.TrainUrl + form.CkptName |
|
|
|
|
|
|
|
tpl := tplCloudBrainInferenceJobNew |
|
|
|
command := cloudbrain.Command |
|
|
|
if jobType == string(models.JobTypeTrain) { |
|
|
|
tpl = tplCloudBrainTrainJobNew |
|
|
|
commandTrain, err := getInferenceJobCommand(form) |
|
|
|
if err != nil { |
|
|
|
log.Error("getTrainJobCommand failed: %v", err) |
|
|
|
ctx.RenderWithErr(err.Error(), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
command = commandTrain |
|
|
|
} |
|
|
|
|
|
|
|
tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) |
|
|
|
if err == nil { |
|
|
|
if len(tasks) != 0 { |
|
|
|
log.Error("the job name did already exist", ctx.Data["MsgID"]) |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("the job name did already exist", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} else { |
|
|
|
if !models.IsErrJobNotExist(err) { |
|
|
|
log.Error("system error, %v", err, ctx.Data["MsgID"]) |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("system error", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if !jobNamePattern.MatchString(displayJobName) { |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if jobType != string(models.JobTypeBenchmark) && jobType != string(models.JobTypeDebug) && jobType != string(models.JobTypeTrain) { |
|
|
|
log.Error("jobtype error:", jobType, ctx.Data["MsgID"]) |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("jobtype error", tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
count, err := models.GetCloudbrainCountByUserID(ctx.User.ID, jobType) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("system error", tpl, &form) |
|
|
|
return |
|
|
|
} else { |
|
|
|
if count >= 1 { |
|
|
|
log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if branchName == "" { |
|
|
|
branchName = cloudbrain.DefaultBranchName |
|
|
|
} |
|
|
|
downloadCode(repo, codePath, branchName) |
|
|
|
uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/") |
|
|
|
resultPath := setting.JobPath + jobName + cloudbrain.ResultPath + "/" |
|
|
|
mkResultPath(resultPath) |
|
|
|
uploadCodeToMinio(resultPath, jobName, cloudbrain.ResultPath+"/") |
|
|
|
|
|
|
|
commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) |
|
|
|
|
|
|
|
datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
req := cloudbrain.GenerateCloudBrainTaskReq{ |
|
|
|
Ctx: ctx, |
|
|
|
DisplayJobName: displayJobName, |
|
|
|
JobName: jobName, |
|
|
|
Image: image, |
|
|
|
Command: command, |
|
|
|
Uuids: uuid, |
|
|
|
DatasetNames: datasetNames, |
|
|
|
DatasetInfos: datasetInfos, |
|
|
|
CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), |
|
|
|
ModelPath: ckptUrl, |
|
|
|
BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), |
|
|
|
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), |
|
|
|
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), |
|
|
|
JobType: jobType, |
|
|
|
GpuQueue: gpuQueue, |
|
|
|
Description: form.Description, |
|
|
|
BranchName: branchName, |
|
|
|
BootFile: form.BootFile, |
|
|
|
Params: form.Params, |
|
|
|
CommitID: commitID, |
|
|
|
ResourceSpecId: resourceSpecId, |
|
|
|
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), |
|
|
|
ModelName: form.ModelName, |
|
|
|
ModelVersion: form.ModelVersion, |
|
|
|
CkptName: form.CkptName, |
|
|
|
TrainUrl: form.TrainUrl, |
|
|
|
} |
|
|
|
|
|
|
|
err = cloudbrain.GenerateTask(req) |
|
|
|
if err != nil { |
|
|
|
cloudBrainNewDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(err.Error(), tpl, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/inference-job") |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
func CloudBrainRestart(ctx *context.Context) { |
|
|
|
var ID = ctx.Params(":id") |
|
|
|
var resultCode = "0" |
|
|
|
@@ -1181,6 +1319,20 @@ func CloudBrainDownloadModel(ctx *context.Context) { |
|
|
|
ctx.Resp.Header().Set("Cache-Control", "max-age=0") |
|
|
|
http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently) |
|
|
|
} |
|
|
|
func CloudBrainDownloadInferenceResult(ctx *context.Context) { |
|
|
|
parentDir := ctx.Query("parentDir") |
|
|
|
fileName := ctx.Query("fileName") |
|
|
|
jobName := ctx.Query("jobName") |
|
|
|
filePath := "jobs/" + jobName + "/result/" + parentDir |
|
|
|
url, err := storage.Attachments.PresignedGetURL(filePath, fileName) |
|
|
|
if err != nil { |
|
|
|
log.Error("PresignedGetURL failed: %v", err.Error(), ctx.Data["msgID"]) |
|
|
|
ctx.ServerError("PresignedGetURL", err) |
|
|
|
return |
|
|
|
} |
|
|
|
ctx.Resp.Header().Set("Cache-Control", "max-age=0") |
|
|
|
http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently) |
|
|
|
} |
|
|
|
|
|
|
|
func GetRate(ctx *context.Context) { |
|
|
|
isObjectDetcionAll := ctx.QueryBool("isObjectDetcionAll") |
|
|
|
@@ -1334,13 +1486,21 @@ func uploadCodeToMinio(codePath, jobName, parentDir string) error { |
|
|
|
} |
|
|
|
|
|
|
|
func mkModelPath(modelPath string) error { |
|
|
|
err := os.MkdirAll(modelPath, os.ModePerm) |
|
|
|
return mkPathAndReadMeFile(modelPath, "You can put the model file into this directory and download it by the web page.") |
|
|
|
} |
|
|
|
|
|
|
|
func mkResultPath(resultPath string) error { |
|
|
|
return mkPathAndReadMeFile(resultPath, "You can put the result file into this directory and download it by the web page.") |
|
|
|
} |
|
|
|
|
|
|
|
func mkPathAndReadMeFile(path string, text string) error { |
|
|
|
err := os.MkdirAll(path, os.ModePerm) |
|
|
|
if err != nil { |
|
|
|
log.Error("MkdirAll(%s) failed:%v", modelPath, err) |
|
|
|
log.Error("MkdirAll(%s) failed:%v", path, err) |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
fileName := modelPath + "README" |
|
|
|
fileName := path + "README" |
|
|
|
f, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm) |
|
|
|
if err != nil { |
|
|
|
log.Error("OpenFile failed", err.Error()) |
|
|
|
@@ -1349,7 +1509,7 @@ func mkModelPath(modelPath string) error { |
|
|
|
|
|
|
|
defer f.Close() |
|
|
|
|
|
|
|
_, err = f.WriteString("You can put the model file into this directory and download it by the web page.") |
|
|
|
_, err = f.WriteString(text) |
|
|
|
if err != nil { |
|
|
|
log.Error("WriteString failed", err.Error()) |
|
|
|
return err |
|
|
|
@@ -1802,6 +1962,8 @@ func CloudBrainBenchmarkNew(ctx *context.Context) { |
|
|
|
ctx.ServerError("get new cloudbrain info failed", err) |
|
|
|
return |
|
|
|
} |
|
|
|
waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainOne, "", models.JobTypeBrainScore, models.JobTypeSnn4imagenet, models.JobTypeBenchmark) |
|
|
|
ctx.Data["WaitCount"] = waitCount |
|
|
|
ctx.HTML(200, tplCloudBrainBenchmarkNew) |
|
|
|
} |
|
|
|
|
|
|
|
@@ -2068,6 +2230,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo |
|
|
|
BenchmarkTypeID: benchmarkTypeID, |
|
|
|
BenchmarkChildTypeID: benchmarkChildTypeID, |
|
|
|
ResourceSpecId: resourceSpecId, |
|
|
|
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), |
|
|
|
} |
|
|
|
|
|
|
|
err = cloudbrain.GenerateTask(req) |
|
|
|
@@ -2196,6 +2359,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) |
|
|
|
BenchmarkTypeID: 0, |
|
|
|
BenchmarkChildTypeID: benchmarkChildTypeID, |
|
|
|
ResourceSpecId: resourceSpecId, |
|
|
|
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), |
|
|
|
} |
|
|
|
|
|
|
|
err = cloudbrain.GenerateTask(req) |
|
|
|
@@ -2243,9 +2407,121 @@ func CloudBrainTrainJobNew(ctx *context.Context) { |
|
|
|
ctx.ServerError("get new train-job info failed", err) |
|
|
|
return |
|
|
|
} |
|
|
|
waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainOne, "", models.JobTypeTrain) |
|
|
|
ctx.Data["WaitCount"] = waitCount |
|
|
|
ctx.HTML(http.StatusOK, tplCloudBrainTrainJobNew) |
|
|
|
} |
|
|
|
|
|
|
|
func InferenceCloudBrainJobNew(ctx *context.Context) { |
|
|
|
err := cloudBrainNewDataPrepare(ctx) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("get new train-job info failed", err) |
|
|
|
return |
|
|
|
} |
|
|
|
waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainOne, "", models.JobTypeInference) |
|
|
|
ctx.Data["WaitCount"] = waitCount |
|
|
|
ctx.HTML(http.StatusOK, tplCloudBrainInferenceJobNew) |
|
|
|
} |
|
|
|
|
|
|
|
func InferenceCloudBrainJobShow(ctx *context.Context) { |
|
|
|
err := cloudBrainNewDataPrepare(ctx) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("get new train-job info failed", err) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
|
|
|
|
task, err := models.GetCloudbrainByJobID(jobID) |
|
|
|
|
|
|
|
if err != nil { |
|
|
|
log.Error("GetInferenceTask(%s) failed:%v", jobID, err.Error()) |
|
|
|
ctx.RenderWithErr(err.Error(), tplCloudBrainInferenceJobShow, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
//将运行参数转化为epoch_size = 3, device_target = Ascend的格式 |
|
|
|
var parameters models.Parameters |
|
|
|
err = json.Unmarshal([]byte(task.Parameters), ¶meters) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err) |
|
|
|
ctx.RenderWithErr(err.Error(), tplCloudBrainInferenceJobShow, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if len(parameters.Parameter) > 0 { |
|
|
|
paramTemp := "" |
|
|
|
for _, Parameter := range parameters.Parameter { |
|
|
|
param := Parameter.Label + " = " + Parameter.Value + "; " |
|
|
|
paramTemp = paramTemp + param |
|
|
|
} |
|
|
|
task.Parameters = paramTemp[:len(paramTemp)-2] |
|
|
|
} else { |
|
|
|
task.Parameters = "" |
|
|
|
} |
|
|
|
|
|
|
|
LabelName := strings.Fields(task.LabelName) |
|
|
|
ctx.Data["labelName"] = LabelName |
|
|
|
ctx.Data["jobID"] = jobID |
|
|
|
ctx.Data["jobName"] = task.JobName |
|
|
|
ctx.Data["displayJobName"] = task.DisplayJobName |
|
|
|
ctx.Data["task"] = task |
|
|
|
ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task) |
|
|
|
|
|
|
|
tempUids := []int64{} |
|
|
|
tempUids = append(tempUids, task.UserID) |
|
|
|
JobCreater, err := models.GetUserNamesByIDs(tempUids) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetUserNamesByIDs (WhitelistUserIDs): %v", err) |
|
|
|
} |
|
|
|
ctx.Data["userName"] = JobCreater[0] |
|
|
|
|
|
|
|
ctx.HTML(http.StatusOK, tplCloudBrainInferenceJobShow) |
|
|
|
} |
|
|
|
|
|
|
|
func DownloadInferenceResultFile(ctx *context.Context) { |
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
var versionName = ctx.Query("version_name") |
|
|
|
task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
allFile, err := storage.GetAllObjectByBucketAndPrefixMinio(setting.Attachment.Minio.Bucket, task.ResultUrl) |
|
|
|
returnFileName := task.DisplayJobName + ".zip" |
|
|
|
MinioDownloadManyFile(task.ResultUrl, ctx, returnFileName, allFile) |
|
|
|
} |
|
|
|
|
|
|
|
func getInferenceJobCommand(form auth.CreateCloudBrainInferencForm) (string, error) { |
|
|
|
var command string |
|
|
|
bootFile := strings.TrimSpace(form.BootFile) |
|
|
|
params := form.Params |
|
|
|
|
|
|
|
if !strings.HasSuffix(bootFile, ".py") { |
|
|
|
log.Error("bootFile(%s) format error", bootFile) |
|
|
|
return command, errors.New("bootFile format error") |
|
|
|
} |
|
|
|
|
|
|
|
var parameters models.Parameters |
|
|
|
var param string |
|
|
|
if len(params) != 0 { |
|
|
|
err := json.Unmarshal([]byte(params), ¶meters) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to Unmarshal params: %s (%v)", params, err) |
|
|
|
return command, err |
|
|
|
} |
|
|
|
|
|
|
|
for _, parameter := range parameters.Parameter { |
|
|
|
param += " --" + parameter.Label + "=" + parameter.Value |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
command += "python /code/" + bootFile + param + " > " + cloudbrain.ResultPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile |
|
|
|
|
|
|
|
return command, nil |
|
|
|
} |
|
|
|
|
|
|
|
func getTrainJobCommand(form auth.CreateCloudBrainForm) (string, error) { |
|
|
|
var command string |
|
|
|
bootFile := strings.TrimSpace(form.BootFile) |
|
|
|
|