Browse Source

增加两个路由,支持智算网络

Signed-off-by: zouap <zouap@pcl.ac.cn>
tags/v1.22.10.1^2
zouap 3 years ago
parent
commit
833902f8ec
3 changed files with 98 additions and 44 deletions
  1. +58
    -6
      routers/repo/aisafety.go
  2. +38
    -38
      routers/repo/grampus.go
  3. +2
    -0
      routers/routes/routes.go

+ 58
- 6
routers/repo/aisafety.go View File

@@ -17,6 +17,7 @@ import (
"code.gitea.io/gitea/modules/cloudbrain" "code.gitea.io/gitea/modules/cloudbrain"
"code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/grampus"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/modelarts" "code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
@@ -29,9 +30,11 @@ import (
) )


const ( const (
tplModelSafetyTestCreateGpu = "repo/modelsafety/newgpu"
tplModelSafetyTestCreateNpu = "repo/modelsafety/newnpu"
tplModelSafetyTestShow = "repo/modelsafety/show"
tplModelSafetyTestCreateGrampusGpu = "repo/modelsafety/newgrampusgpu"
tplModelSafetyTestCreateGrampusNpu = "repo/modelsafety/newgrampusnpu"
tplModelSafetyTestCreateGpu = "repo/modelsafety/newgpu"
tplModelSafetyTestCreateNpu = "repo/modelsafety/newnpu"
tplModelSafetyTestShow = "repo/modelsafety/show"
) )


func CloudBrainAiSafetyCreateTest(ctx *context.Context) { func CloudBrainAiSafetyCreateTest(ctx *context.Context) {
@@ -471,6 +474,40 @@ func AiSafetyCreateForGetGPU(ctx *context.Context) {
} }
ctx.HTML(200, tplModelSafetyTestCreateGpu) ctx.HTML(200, tplModelSafetyTestCreateGpu)
} }
func AiSafetyCreateForGetGrampusGPU(ctx *context.Context) {
ctx.Data["PageIsCloudBrain"] = true
ctx.Data["IsCreate"] = true
ctx.Data["datasetType"] = models.TypeCloudBrainOne
ctx.Data["BaseDataSetName"] = setting.ModelSafetyTest.BaseDataSetName
ctx.Data["BaseDataSetUUID"] = setting.ModelSafetyTest.BaseDataSetUUID
ctx.Data["CombatDataSetName"] = setting.ModelSafetyTest.CombatDataSetName
ctx.Data["CombatDataSetUUID"] = setting.ModelSafetyTest.CombatDataSetUUID
err := GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
if err != nil {
ctx.ServerError("get new train-job info failed", err)
return
}
ctx.HTML(200, tplModelSafetyTestCreateGrampusGpu)
}

func AiSafetyCreateForGetGrampusNPU(ctx *context.Context) {
ctx.Data["PageIsCloudBrain"] = true
ctx.Data["IsCreate"] = true

ctx.Data["datasetType"] = models.TypeCloudBrainTwo
ctx.Data["BaseDataSetName"] = setting.ModelSafetyTest.BaseDataSetName
ctx.Data["BaseDataSetUUID"] = setting.ModelSafetyTest.BaseDataSetUUID
ctx.Data["CombatDataSetName"] = setting.ModelSafetyTest.CombatDataSetName
ctx.Data["CombatDataSetUUID"] = setting.ModelSafetyTest.CombatDataSetUUID

err := GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
if err != nil {
ctx.ServerError("get new train-job info failed", err)
return
}

ctx.HTML(200, tplModelSafetyTestCreateGrampusNpu)
}


func AiSafetyCreateForGetNPU(ctx *context.Context) { func AiSafetyCreateForGetNPU(ctx *context.Context) {
t := time.Now() t := time.Now()
@@ -510,11 +547,11 @@ func AiSafetyCreateForGetNPU(ctx *context.Context) {


func AiSafetyCreateForPost(ctx *context.Context) { func AiSafetyCreateForPost(ctx *context.Context) {
ctx.Data["PageIsCloudBrain"] = true ctx.Data["PageIsCloudBrain"] = true
displayJobName := ctx.Query("DisplayJobName")
displayJobName := ctx.Query("display_job_name")
jobName := util.ConvertDisplayJobNameToJobName(displayJobName) jobName := util.ConvertDisplayJobNameToJobName(displayJobName)


taskType := ctx.QueryInt("type") taskType := ctx.QueryInt("type")
description := ctx.Query("Description")
description := ctx.Query("description")
ctx.Data["description"] = description ctx.Data["description"] = description


repo := ctx.Repo.Repository repo := ctx.Repo.Repository
@@ -561,7 +598,7 @@ func AiSafetyCreateForPost(ctx *context.Context) {
return return
} }
} }
BootFile := ctx.Query("BootFile")
BootFile := ctx.Query("boot_file")
bootFileExist, err := ctx.Repo.FileExists(BootFile, cloudbrain.DefaultBranchName) bootFileExist, err := ctx.Repo.FileExists(BootFile, cloudbrain.DefaultBranchName)
if err != nil || !bootFileExist { if err != nil || !bootFileExist {
log.Error("Get bootfile error:", err) log.Error("Get bootfile error:", err)
@@ -574,10 +611,25 @@ func AiSafetyCreateForPost(ctx *context.Context) {
createForNPU(ctx, jobName) createForNPU(ctx, jobName)
} else if taskType == models.TypeCloudBrainOne { } else if taskType == models.TypeCloudBrainOne {
createForGPU(ctx, jobName) createForGPU(ctx, jobName)
} else if taskType == models.TypeC2Net {
ComputeResource := ctx.Query("compute_resource")
if ComputeResource == models.NPUResource {
createForGrampusNPU(ctx, jobName)
} else if ComputeResource == models.GPUResource {
createForGrampusGPU(ctx, jobName)
}
} }
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/cloudbrain/benchmark") ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/cloudbrain/benchmark")
} }


func createForGrampusGPU(ctx *context.Context, jobName string) {

}

func createForGrampusNPU(ctx *context.Context, jobName string) {

}

func createForNPU(ctx *context.Context, jobName string) { func createForNPU(ctx *context.Context, jobName string) {
VersionOutputPath := modelarts.GetOutputPathByCount(modelarts.TotalVersionCount) VersionOutputPath := modelarts.GetOutputPathByCount(modelarts.TotalVersionCount)
BootFile := ctx.Query("boot_file") BootFile := ctx.Query("boot_file")


+ 38
- 38
routers/repo/grampus.go View File

@@ -49,7 +49,7 @@ const (


func GrampusTrainJobGPUNew(ctx *context.Context) { func GrampusTrainJobGPUNew(ctx *context.Context) {
ctx.Data["IsCreate"] = true ctx.Data["IsCreate"] = true
err := grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
err := GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
if err != nil { if err != nil {
ctx.ServerError("get new train-job info failed", err) ctx.ServerError("get new train-job info failed", err)
return return
@@ -60,7 +60,7 @@ func GrampusTrainJobGPUNew(ctx *context.Context) {


func GrampusTrainJobNPUNew(ctx *context.Context) { func GrampusTrainJobNPUNew(ctx *context.Context) {
ctx.Data["IsCreate"] = true ctx.Data["IsCreate"] = true
err := grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
err := GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
if err != nil { if err != nil {
ctx.ServerError("get new train-job info failed", err) ctx.ServerError("get new train-job info failed", err)
return return
@@ -68,7 +68,7 @@ func GrampusTrainJobNPUNew(ctx *context.Context) {
ctx.HTML(200, tplGrampusTrainJobNPUNew) ctx.HTML(200, tplGrampusTrainJobNPUNew)
} }


func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) error {
func GrampusTrainJobNewDataPrepare(ctx *context.Context, processType string) error {
ctx.Data["PageIsCloudBrain"] = true ctx.Data["PageIsCloudBrain"] = true


t := time.Now() t := time.Now()
@@ -176,14 +176,14 @@ func GrampusTrainJobVersionNew(ctx *context.Context) {
task := ctx.Cloudbrain task := ctx.Cloudbrain
ctx.Data["IsCreate"] = false ctx.Data["IsCreate"] = false
if task.ComputeResource == models.GPUResource { if task.ComputeResource == models.GPUResource {
err := grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
err := GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
if err != nil { if err != nil {
ctx.ServerError("get new train-job version info failed", err) ctx.ServerError("get new train-job version info failed", err)
return return
} }
ctx.HTML(http.StatusOK, tplGrampusTrainJobGPUNew) ctx.HTML(http.StatusOK, tplGrampusTrainJobGPUNew)
} else if task.ComputeResource == models.NPUResource { } else if task.ComputeResource == models.NPUResource {
err := grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
err := GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
if err != nil { if err != nil {
ctx.ServerError("get new train-job version info failed", err) ctx.ServerError("get new train-job version info failed", err)
return return
@@ -279,14 +279,14 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
isOk, err := lock.Lock(models.CloudbrainKeyDuration) isOk, err := lock.Lock(models.CloudbrainKeyDuration)
if !isOk { if !isOk {
log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) log.Error("lock processed failed:%v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tplGrampusTrainJobGPUNew, &form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tplGrampusTrainJobGPUNew, &form)
return return
} }
defer lock.UnLock() defer lock.UnLock()


if !jobNamePattern.MatchString(displayJobName) { if !jobNamePattern.MatchString(displayJobName) {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form)
return return
} }
@@ -294,7 +294,7 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName)
if err != nil || !bootFileExist { if err != nil || !bootFileExist {
log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) log.Error("Get bootfile error:", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form)
return return
} }
@@ -303,13 +303,13 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.GPUResource) count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.GPUResource)
if err != nil { if err != nil {
log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("system error", tpl, &form) ctx.RenderWithErr("system error", tpl, &form)
return return
} else { } else {
if count >= 1 { if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form) ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form)
return return
} }
@@ -318,7 +318,7 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
//check param //check param
if err := grampusParamCheckCreateTrainJob(form); err != nil { if err := grampusParamCheckCreateTrainJob(form); err != nil {
log.Error("paramCheckCreateTrainJob failed:(%v)", err, ctx.Data["MsgID"]) log.Error("paramCheckCreateTrainJob failed:(%v)", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(err.Error(), tpl, &form) ctx.RenderWithErr(err.Error(), tpl, &form)
return return
} }
@@ -328,14 +328,14 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err == nil { if err == nil {
if len(tasks) != 0 { if len(tasks) != 0 {
log.Error("the job name did already exist", ctx.Data["MsgID"]) log.Error("the job name did already exist", ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("the job name did already exist", tpl, &form) ctx.RenderWithErr("the job name did already exist", tpl, &form)
return return
} }
} else { } else {
if !models.IsErrJobNotExist(err) { if !models.IsErrJobNotExist(err) {
log.Error("system error, %v", err, ctx.Data["MsgID"]) log.Error("system error, %v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("system error", tpl, &form) ctx.RenderWithErr("system error", tpl, &form)
return return
} }
@@ -348,14 +348,14 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
Cluster: models.C2NetCluster, Cluster: models.C2NetCluster,
}) })
if err != nil || spec == nil { if err != nil || spec == nil {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("Resource specification not available", tpl, &form) ctx.RenderWithErr("Resource specification not available", tpl, &form)
return return
} }


if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) {
log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tplGrampusTrainJobGPUNew, &form) ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tplGrampusTrainJobGPUNew, &form)
return return
} }
@@ -365,7 +365,7 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, models.GPU) datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, models.GPU)
if err != nil { if err != nil {
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form)
return return
} }
@@ -378,7 +378,7 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain


if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil {
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form)
return return
} }
@@ -387,7 +387,7 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
//upload code //upload code
if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil {
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form)
return return
} }
@@ -395,7 +395,7 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
modelPath := setting.JobPath + jobName + cloudbrain.ModelMountPath + "/" modelPath := setting.JobPath + jobName + cloudbrain.ModelMountPath + "/"
if err := mkModelPath(modelPath); err != nil { if err := mkModelPath(modelPath); err != nil {
log.Error("Failed to mkModelPath: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) log.Error("Failed to mkModelPath: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form)
return return
} }
@@ -403,7 +403,7 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
//init model readme //init model readme
if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil { if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil {
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form)
return return
} }
@@ -426,7 +426,7 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", allFileName, preTrainModelPath, form.CkptName) command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", allFileName, preTrainModelPath, form.CkptName)
if err != nil { if err != nil {
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("Create task failed, internal error", tpl, &form) ctx.RenderWithErr("Create task failed, internal error", tpl, &form)
return return
} }
@@ -468,7 +468,7 @@ func grampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
err = grampus.GenerateTrainJob(ctx, req) err = grampus.GenerateTrainJob(ctx, req)
if err != nil { if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(err.Error(), tpl, &form) ctx.RenderWithErr(err.Error(), tpl, &form)
return return
} }
@@ -549,14 +549,14 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
isOk, err := lock.Lock(models.CloudbrainKeyDuration) isOk, err := lock.Lock(models.CloudbrainKeyDuration)
if !isOk { if !isOk {
log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) log.Error("lock processed failed:%v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tplGrampusTrainJobNPUNew, &form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tplGrampusTrainJobNPUNew, &form)
return return
} }
defer lock.UnLock() defer lock.UnLock()


if !jobNamePattern.MatchString(displayJobName) { if !jobNamePattern.MatchString(displayJobName) {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form)
return return
} }
@@ -564,7 +564,7 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName)
if err != nil || !bootFileExist { if err != nil || !bootFileExist {
log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) log.Error("Get bootfile error:", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form)
return return
} }
@@ -573,13 +573,13 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.NPUResource) count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.NPUResource)
if err != nil { if err != nil {
log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr("system error", tpl, &form) ctx.RenderWithErr("system error", tpl, &form)
return return
} else { } else {
if count >= 1 { if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form) ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form)
return return
} }
@@ -588,7 +588,7 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
//check param //check param
if err := grampusParamCheckCreateTrainJob(form); err != nil { if err := grampusParamCheckCreateTrainJob(form); err != nil {
log.Error("paramCheckCreateTrainJob failed:(%v)", err) log.Error("paramCheckCreateTrainJob failed:(%v)", err)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(err.Error(), tpl, &form) ctx.RenderWithErr(err.Error(), tpl, &form)
return return
} }
@@ -598,14 +598,14 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err == nil { if err == nil {
if len(tasks) != 0 { if len(tasks) != 0 {
log.Error("the job name did already exist", ctx.Data["MsgID"]) log.Error("the job name did already exist", ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr("the job name did already exist", tpl, &form) ctx.RenderWithErr("the job name did already exist", tpl, &form)
return return
} }
} else { } else {
if !models.IsErrJobNotExist(err) { if !models.IsErrJobNotExist(err) {
log.Error("system error, %v", err, ctx.Data["MsgID"]) log.Error("system error, %v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr("system error", tpl, &form) ctx.RenderWithErr("system error", tpl, &form)
return return
} }
@@ -618,13 +618,13 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
Cluster: models.C2NetCluster, Cluster: models.C2NetCluster,
}) })
if err != nil || spec == nil { if err != nil || spec == nil {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr("Resource specification not available", tpl, &form) ctx.RenderWithErr("Resource specification not available", tpl, &form)
return return
} }
if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) {
log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tplGrampusTrainJobNPUNew, &form) ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tplGrampusTrainJobNPUNew, &form)
return return
} }
@@ -633,7 +633,7 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, models.NPU) datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, models.NPU)
if err != nil { if err != nil {
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form)
return return
} }
@@ -646,7 +646,7 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain


if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil {
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err) log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form)
return return
} }
@@ -654,14 +654,14 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
//todo: upload code (send to file_server todo this work?) //todo: upload code (send to file_server todo this work?)
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form)
return return
} }


if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form)
return return
} }
@@ -683,7 +683,7 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, allFileName, preTrainModelPath, form.CkptName) command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, allFileName, preTrainModelPath, form.CkptName)
if err != nil { if err != nil {
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr("Create task failed, internal error", tpl, &form) ctx.RenderWithErr("Create task failed, internal error", tpl, &form)
return return
} }
@@ -726,7 +726,7 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
err = grampus.GenerateTrainJob(ctx, req) err = grampus.GenerateTrainJob(ctx, req)
if err != nil { if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error()) log.Error("GenerateTrainJob failed:%v", err.Error())
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(err.Error(), tpl, &form) ctx.RenderWithErr(err.Error(), tpl, &form)
return return
} }


+ 2
- 0
routers/routes/routes.go View File

@@ -1236,6 +1236,8 @@ func RegisterRoutes(m *macaron.Macaron) {
}) })
m.Get("/create_gpu", reqWechatBind, reqRepoCloudBrainWriter, repo.AiSafetyCreateForGetGPU) m.Get("/create_gpu", reqWechatBind, reqRepoCloudBrainWriter, repo.AiSafetyCreateForGetGPU)
m.Get("/create_npu", reqWechatBind, reqRepoCloudBrainWriter, repo.AiSafetyCreateForGetNPU) m.Get("/create_npu", reqWechatBind, reqRepoCloudBrainWriter, repo.AiSafetyCreateForGetNPU)
m.Get("/create_grampus_gpu", reqWechatBind, reqRepoCloudBrainWriter, repo.AiSafetyCreateForGetGrampusGPU)
m.Get("/create_grampus_npu", reqWechatBind, reqRepoCloudBrainWriter, repo.AiSafetyCreateForGetGrampusNPU)
m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.AiSafetyCreateForPost) m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.AiSafetyCreateForPost)
}, context.RepoRef()) }, context.RepoRef())




Loading…
Cancel
Save