Browse Source

去掉智算网络创建分支。

Signed-off-by: zouap <zouap@pcl.ac.cn>
tags/v1.22.10.1^2
zouap 3 years ago
parent
commit
f1eafddffe
2 changed files with 37 additions and 266 deletions
  1. +17
    -8
      modules/setting/setting.go
  2. +20
    -258
      routers/repo/aisafety.go

+ 17
- 8
modules/setting/setting.go View File

@@ -709,10 +709,15 @@ var (
}{}

ModelSafetyTest = struct {
BaseDataSetName string
BaseDataSetUUID string
CombatDataSetName string
CombatDataSetUUID string
NPUBaseDataSetName string
NPUBaseDataSetUUID string
NPUCombatDataSetName string
NPUCombatDataSetUUID string

GPUBaseDataSetName string
GPUBaseDataSetUUID string
GPUCombatDataSetName string
GPUCombatDataSetUUID string
}{}
)

@@ -1539,10 +1544,14 @@ func NewContext() {

func getModelSafetyConfig() {
sec := Cfg.Section("model_safety_test")
ModelSafetyTest.BaseDataSetName = sec.Key("BaseDataSetName").MustString("ImageNet1000_100基础数据集;CIFAR10_1000基础数据集")
ModelSafetyTest.BaseDataSetUUID = sec.Key("BaseDataSetUUID").MustString("20e5e8c8-58fd-4c15-95ff-3811da3ec200;18fc091a-cc00-4862-b04a-dfa798a14909")
ModelSafetyTest.CombatDataSetName = sec.Key("CombatDataSetName").MustString("ImageNet1000_100_FGSM;CIFAR10_1000_FGSM.zip")
ModelSafetyTest.CombatDataSetUUID = sec.Key("CombatDataSetUUID").MustString("3e76f133-d9c6-4819-976d-4487d41f4d6d;787b561d-57bd-4ecf-b833-ef26492cdd87")
ModelSafetyTest.GPUBaseDataSetName = sec.Key("GPUBaseDataSetName").MustString("ImageNet1000_100基础数据集;CIFAR10_1000基础数据集")
ModelSafetyTest.GPUBaseDataSetUUID = sec.Key("GPUBaseDataSetUUID").MustString("20e5e8c8-58fd-4c15-95ff-3811da3ec200;18fc091a-cc00-4862-b04a-dfa798a14909")
ModelSafetyTest.GPUCombatDataSetName = sec.Key("GPUCombatDataSetName").MustString("ImageNet1000_100_FGSM;CIFAR10_1000_FGSM.zip")
ModelSafetyTest.GPUCombatDataSetUUID = sec.Key("GPUCombatDataSetUUID").MustString("3e76f133-d9c6-4819-976d-4487d41f4d6d;787b561d-57bd-4ecf-b833-ef26492cdd87")
ModelSafetyTest.NPUBaseDataSetName = sec.Key("NPUBaseDataSetName").MustString("ImageNet1000_100基础数据集;CIFAR10_1000基础数据集")
ModelSafetyTest.NPUBaseDataSetUUID = sec.Key("NPUBaseDataSetUUID").MustString("20e5e8c8-58fd-4c15-95ff-3811da3ec200;18fc091a-cc00-4862-b04a-dfa798a14909")
ModelSafetyTest.NPUCombatDataSetName = sec.Key("NPUCombatDataSetName").MustString("ImageNet1000_100_FGSM;CIFAR10_1000_FGSM.zip")
ModelSafetyTest.NPUCombatDataSetUUID = sec.Key("NPUCombatDataSetUUID").MustString("3e76f133-d9c6-4819-976d-4487d41f4d6d;787b561d-57bd-4ecf-b833-ef26492cdd87")
}

func getModelConvertConfig() {


+ 20
- 258
routers/repo/aisafety.go View File

@@ -18,7 +18,6 @@ import (
"code.gitea.io/gitea/modules/cloudbrain"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/grampus"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/modules/setting"
@@ -232,8 +231,6 @@ func syncAiSafetyTaskStatus(job *models.Cloudbrain) {
queryTaskStatusFromCloudbrainTwo(job)
} else if job.Type == models.TypeCloudBrainOne {
queryTaskStatusFromCloudbrain(job)
} else if job.Type == models.TypeC2Net {
queryTaskStatusFromGrampus(job)
}
} else {
if job.Status == string(models.ModelSafetyTesting) {
@@ -260,55 +257,6 @@ func TimerHandleModelSafetyTestTask() {
}
}

func queryTaskStatusFromGrampus(task *models.Cloudbrain) {
log.Info("The task not finished,name=" + task.DisplayJobName)
if task.DeletedAt.IsZero() { //normal record
result, err := grampus.GetJob(task.JobID)
resultJson, _ := json.Marshal(result)
log.Info("resultJson=" + string(resultJson))
if err != nil {
log.Error("GetJob failed:" + err.Error())
return
}
if result != nil {
if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 {
task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0]
}
task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status)
if task.Status != models.GrampusStatusSucceeded {
if task.Status != result.JobInfo.Status || result.JobInfo.Status == models.GrampusStatusRunning {
task.Duration = result.JobInfo.RunSec
if task.Duration < 0 {
task.Duration = 0
}
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)

if task.StartTime == 0 && result.JobInfo.StartedAt > 0 {
task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt)
}
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 {
task.EndTime = task.StartTime.Add(task.Duration)
}
task.CorrectCreateUnix()
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob failed:" + err.Error())
}
}
} else {
task.Status = string(models.ModelSafetyTesting)
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob failed:", err)
}
//send msg to beihang
sendGPUInferenceResultToTest(task)
}
}
}

}

func queryTaskStatusFromCloudbrainTwo(job *models.Cloudbrain) {
log.Info("The task not finished,name=" + job.DisplayJobName)
result, err := modelarts.GetTrainJob(job.JobID, strconv.FormatInt(job.VersionID, 10))
@@ -574,10 +522,10 @@ func AiSafetyCreateForGetGPU(ctx *context.Context) {
ctx.Data["type"] = models.TypeCloudBrainOne
ctx.Data["compute_resource"] = models.GPUResource
ctx.Data["datasetType"] = models.TypeCloudBrainOne
ctx.Data["BaseDataSetName"] = setting.ModelSafetyTest.BaseDataSetName
ctx.Data["BaseDataSetUUID"] = setting.ModelSafetyTest.BaseDataSetUUID
ctx.Data["CombatDataSetName"] = setting.ModelSafetyTest.CombatDataSetName
ctx.Data["CombatDataSetUUID"] = setting.ModelSafetyTest.CombatDataSetUUID
ctx.Data["BaseDataSetName"] = setting.ModelSafetyTest.GPUBaseDataSetName
ctx.Data["BaseDataSetUUID"] = setting.ModelSafetyTest.GPUBaseDataSetUUID
ctx.Data["CombatDataSetName"] = setting.ModelSafetyTest.GPUCombatDataSetName
ctx.Data["CombatDataSetUUID"] = setting.ModelSafetyTest.GPUCombatDataSetUUID
var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
ctx.Data["display_job_name"] = displayJobName
prepareCloudbrainOneSpecs(ctx)
@@ -587,43 +535,6 @@ func AiSafetyCreateForGetGPU(ctx *context.Context) {
}
ctx.HTML(200, tplModelSafetyTestCreateGpu)
}
func AiSafetyCreateForGetGrampusGPU(ctx *context.Context) {
ctx.Data["PageIsCloudBrain"] = true
ctx.Data["IsCreate"] = true
ctx.Data["type"] = models.TypeC2Net
ctx.Data["compute_resource"] = models.GPUResource
ctx.Data["datasetType"] = models.TypeCloudBrainOne
ctx.Data["BaseDataSetName"] = setting.ModelSafetyTest.BaseDataSetName
ctx.Data["BaseDataSetUUID"] = setting.ModelSafetyTest.BaseDataSetUUID
ctx.Data["CombatDataSetName"] = setting.ModelSafetyTest.CombatDataSetName
ctx.Data["CombatDataSetUUID"] = setting.ModelSafetyTest.CombatDataSetUUID
err := GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
if err != nil {
ctx.ServerError("get new train-job info failed", err)
return
}
ctx.HTML(200, tplModelSafetyTestCreateGrampusGpu)
}

func AiSafetyCreateForGetGrampusNPU(ctx *context.Context) {
ctx.Data["PageIsCloudBrain"] = true
ctx.Data["IsCreate"] = true
ctx.Data["type"] = models.TypeC2Net
ctx.Data["compute_resource"] = models.NPUResource
ctx.Data["datasetType"] = models.TypeCloudBrainTwo
ctx.Data["BaseDataSetName"] = setting.ModelSafetyTest.BaseDataSetName
ctx.Data["BaseDataSetUUID"] = setting.ModelSafetyTest.BaseDataSetUUID
ctx.Data["CombatDataSetName"] = setting.ModelSafetyTest.CombatDataSetName
ctx.Data["CombatDataSetUUID"] = setting.ModelSafetyTest.CombatDataSetUUID

err := GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
if err != nil {
ctx.ServerError("get new train-job info failed", err)
return
}

ctx.HTML(200, tplModelSafetyTestCreateGrampusNpu)
}

func AiSafetyCreateForGetNPU(ctx *context.Context) {
t := time.Now()
@@ -634,10 +545,10 @@ func AiSafetyCreateForGetNPU(ctx *context.Context) {
var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
ctx.Data["display_job_name"] = displayJobName
ctx.Data["datasetType"] = models.TypeCloudBrainTwo
ctx.Data["BaseDataSetName"] = setting.ModelSafetyTest.BaseDataSetName
ctx.Data["BaseDataSetUUID"] = setting.ModelSafetyTest.BaseDataSetUUID
ctx.Data["CombatDataSetName"] = setting.ModelSafetyTest.CombatDataSetName
ctx.Data["CombatDataSetUUID"] = setting.ModelSafetyTest.CombatDataSetUUID
ctx.Data["BaseDataSetName"] = setting.ModelSafetyTest.NPUBaseDataSetName
ctx.Data["BaseDataSetUUID"] = setting.ModelSafetyTest.NPUBaseDataSetUUID
ctx.Data["CombatDataSetName"] = setting.ModelSafetyTest.NPUCombatDataSetName
ctx.Data["CombatDataSetUUID"] = setting.ModelSafetyTest.NPUCombatDataSetUUID

var resourcePools modelarts.ResourcePool
if err := json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil {
@@ -733,167 +644,11 @@ func AiSafetyCreateForPost(ctx *context.Context) {
} else if taskType == models.TypeCloudBrainOne {
ctx.Data["datasetType"] = models.TypeCloudBrainOne
createForGPU(ctx, jobName)
} else if taskType == models.TypeC2Net {
ComputeResource := ctx.Query("compute_resource")
if ComputeResource == models.NPUResource {
createForGrampusNPU(ctx, jobName)
} else if ComputeResource == models.GPUResource {
createForGrampusGPU(ctx, jobName)
}
}
log.Info("to redirect...")
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/cloudbrain/benchmark")
}

func createForGrampusGPU(ctx *context.Context, jobName string) {
BootFile := ctx.Query("boot_file")
displayJobName := ctx.Query("display_job_name")
description := ctx.Query("description")
image := strings.TrimSpace(ctx.Query("image"))
srcDataset := ctx.Query("src_dataset") //uuid
combatDataset := ctx.Query("combat_dataset") //uuid
evaluationIndex := ctx.Query("evaluation_index")
Params := ctx.Query("run_para_list")
specId := ctx.QueryInt64("spec_id")
TrainUrl := ctx.Query("train_url")
CkptName := ctx.Query("ckpt_name")
ModelName := ctx.Query("model_name")
ModelVersion := ctx.Query("model_version")
repo := ctx.Repo.Repository
codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/"
codeMinioPath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/"
//check specification
spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{
JobType: models.JobTypeTrain,
ComputeResource: models.GPU,
Cluster: models.C2NetCluster,
})
if err != nil || spec == nil {
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("Resource specification not available", tplCloudBrainModelSafetyNewGrampusGpu, nil)
return
}

if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) {
log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID)
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tplCloudBrainModelSafetyNewGrampusGpu, nil)
return
}

//check dataset
uuid := srcDataset + ";" + combatDataset
datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, models.GPU)
if err != nil {
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"])
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplCloudBrainModelSafetyNewGrampusGpu, nil)
return
}

//prepare code and out path
_, err = ioutil.ReadDir(codeLocalPath)
if err == nil {
os.RemoveAll(codeLocalPath)
}

if err := downloadZipCode(ctx, codeLocalPath, cloudbrain.DefaultBranchName); err != nil {
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplCloudBrainModelSafetyNewGrampusGpu, nil)
return
}

//todo: upload code (send to file_server todo this work?)
//upload code
if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil {
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplCloudBrainModelSafetyNewGrampusGpu, nil)
return
}

modelPath := setting.JobPath + jobName + cloudbrain.ModelMountPath + "/"
if err := mkModelPath(modelPath); err != nil {
log.Error("Failed to mkModelPath: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplCloudBrainModelSafetyNewGrampusGpu, nil)
return
}

//init model readme
if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil {
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplCloudBrainModelSafetyNewGrampusGpu, nil)
return
}

var datasetRemotePath, allFileName string
for _, datasetInfo := range datasetInfos {
if datasetRemotePath == "" {
datasetRemotePath = datasetInfo.DataLocalPath
allFileName = datasetInfo.FullName
} else {
datasetRemotePath = datasetRemotePath + ";" + datasetInfo.DataLocalPath
allFileName = allFileName + ";" + datasetInfo.FullName
}

}

//prepare command
preTrainModelPath := getPreTrainModelPath(TrainUrl, CkptName)

command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, BootFile, Params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", allFileName, preTrainModelPath, CkptName)
if err != nil {
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"])
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("Create task failed, internal error", tplCloudBrainModelSafetyNewGrampusGpu, nil)
return
}

commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(cloudbrain.DefaultBranchName)

req := &grampus.GenerateTrainJobReq{
JobName: jobName,
DisplayJobName: displayJobName,
ComputeResource: models.GPUResource,
ProcessType: grampus.ProcessorTypeGPU,
Command: command,
ImageUrl: image,
Description: description,
BootFile: BootFile,
Uuid: uuid,
CommitID: commitID,
BranchName: cloudbrain.DefaultBranchName,
Params: Params,
EngineName: image,
DatasetNames: datasetNames,
DatasetInfos: datasetInfos,

IsLatestVersion: modelarts.IsLatestVersion,
VersionCount: modelarts.VersionCountOne,
WorkServerNumber: 1,
Spec: spec,
ModelName: ModelName,
LabelName: evaluationIndex,
CkptName: CkptName,
ModelVersion: ModelVersion,
PreTrainModelUrl: TrainUrl,
}
err = grampus.GenerateTrainJob(ctx, req)
if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"])
GrampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(err.Error(), tplCloudBrainModelSafetyNewGrampusGpu, nil)
return
}
}

func createForGrampusNPU(ctx *context.Context, jobName string) {

}

func createForNPU(ctx *context.Context, jobName string) {
VersionOutputPath := modelarts.GetOutputPathByCount(modelarts.TotalVersionCount)
BootFile := ctx.Query("boot_file")
@@ -1214,7 +969,7 @@ func getGpuModelSafetyCommand(BootFile string, params string, CkptName string, D

func modelSafetyNewDataPrepare(ctx *context.Context) error {
ctx.Data["PageIsCloudBrain"] = true
ctx.Data["type"] = ctx.QueryInt("type")
ctx.Data["boot_file"] = ctx.Query("boot_file")
ctx.Data["display_job_name"] = ctx.Query("display_job_name")
ctx.Data["description"] = ctx.Query("description")
@@ -1232,10 +987,17 @@ func modelSafetyNewDataPrepare(ctx *context.Context) error {
ctx.Data["model_name"] = ctx.Query("model_name")
ctx.Data["model_version"] = ctx.Query("model_version")

ctx.Data["BaseDataSetName"] = setting.ModelSafetyTest.BaseDataSetName
ctx.Data["BaseDataSetUUID"] = setting.ModelSafetyTest.BaseDataSetUUID
ctx.Data["CombatDataSetName"] = setting.ModelSafetyTest.CombatDataSetName
ctx.Data["CombatDataSetUUID"] = setting.ModelSafetyTest.CombatDataSetUUID
if ctx.QueryInt("type") == models.TypeCloudBrainOne {
ctx.Data["BaseDataSetName"] = setting.ModelSafetyTest.GPUBaseDataSetName
ctx.Data["BaseDataSetUUID"] = setting.ModelSafetyTest.GPUBaseDataSetUUID
ctx.Data["CombatDataSetName"] = setting.ModelSafetyTest.GPUCombatDataSetName
ctx.Data["CombatDataSetUUID"] = setting.ModelSafetyTest.GPUCombatDataSetUUID
} else {
ctx.Data["BaseDataSetName"] = setting.ModelSafetyTest.NPUBaseDataSetName
ctx.Data["BaseDataSetUUID"] = setting.ModelSafetyTest.NPUBaseDataSetUUID
ctx.Data["CombatDataSetName"] = setting.ModelSafetyTest.NPUCombatDataSetName
ctx.Data["CombatDataSetUUID"] = setting.ModelSafetyTest.NPUCombatDataSetUUID
}

prepareCloudbrainOneSpecs(ctx)



Loading…
Cancel
Save