Browse Source

创建NPU的模型安全检测任务

Signed-off-by: zouap <zouap@pcl.ac.cn>
tags/v1.22.10.1^2
zouap 3 years ago
parent
commit
d83229936f
3 changed files with 203 additions and 10 deletions
  1. +12
    -10
      modules/modelarts/modelarts.go
  2. +190
    -0
      routers/repo/aisafety.go
  3. +1
    -0
      routers/repo/modelarts.go

+ 12
- 10
modules/modelarts/modelarts.go View File

@@ -1,7 +1,6 @@
package modelarts

import (
"code.gitea.io/gitea/modules/modelarts_cd"
"encoding/json"
"errors"
"fmt"
@@ -9,6 +8,8 @@ import (
"strconv"
"strings"

"code.gitea.io/gitea/modules/modelarts_cd"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
@@ -70,8 +71,8 @@ const (
var (
poolInfos *models.PoolInfos
TrainFlavorInfos *Flavor
SpecialPools *models.SpecialPools
MultiNodeConfig *MultiNodes
SpecialPools *models.SpecialPools
MultiNodeConfig *MultiNodes
)

type GenerateTrainJobReq struct {
@@ -141,6 +142,7 @@ type GenerateInferenceJobReq struct {
ResultUrl string
Spec *models.Specification
DatasetName string
JobType string
}

type VersionInfo struct {
@@ -173,12 +175,12 @@ type ResourcePool struct {
} `json:"resource_pool"`
}

type MultiNodes struct{
type MultiNodes struct {
Info []OrgMultiNode `json:"multinode"`
}
type OrgMultiNode struct{
type OrgMultiNode struct {
Org string `json:"org"`
Node []int `json:"node"`
Node []int `json:"node"`
}

// type Parameter struct {
@@ -709,7 +711,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e
Status: models.TempJobStatus,
Type: models.TypeCloudBrainTwo,
JobName: req.JobName,
JobType: string(models.JobTypeInference),
JobType: req.JobType,
})
if err != nil {
log.Error("InsertCloudbrainTemp failed: %v", err.Error())
@@ -732,7 +734,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e
JobID: jobID,
JobName: req.JobName,
DisplayJobName: req.DisplayJobName,
JobType: string(models.JobTypeInference),
JobType: req.JobType,
Type: models.TypeCloudBrainTwo,
VersionID: jobResult.VersionID,
VersionName: jobResult.VersionName,
@@ -798,8 +800,8 @@ func InitSpecialPool() {
}
}

func InitMultiNode(){
if MultiNodeConfig ==nil && setting.ModelArtsMultiNode!=""{
func InitMultiNode() {
if MultiNodeConfig == nil && setting.ModelArtsMultiNode != "" {
json.Unmarshal([]byte(setting.ModelArtsMultiNode), &MultiNodeConfig)
}



+ 190
- 0
routers/repo/aisafety.go View File

@@ -16,11 +16,14 @@ import (
"code.gitea.io/gitea/modules/aisafety"
"code.gitea.io/gitea/modules/cloudbrain"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/modules/util"
"code.gitea.io/gitea/services/cloudbrain/resource"
"code.gitea.io/gitea/services/reward/point/account"
uuid "github.com/satori/go.uuid"
)

@@ -315,7 +318,194 @@ func AiSafetyCreateForPost(ctx *context.Context) {
}

func createForNPU(ctx *context.Context, jobName string) {
VersionOutputPath := modelarts.GetOutputPathByCount(modelarts.TotalVersionCount)
BootFile := ctx.Query("BootFile")
displayJobName := ctx.Query("DisplayJobName")
description := ctx.Query("Description")
engineID := ctx.QueryInt("EngineID")
poolID := ctx.Query("PoolID")
//image := strings.TrimSpace(ctx.Query("Image"))
srcDataset := ctx.Query("srcDataset") //uuid
combatDataset := ctx.Query("combatDataset") //uuid
evaluationIndex := ctx.Query("evaluationIndex")
Params := ctx.Query("RunParaList")
specId := ctx.QueryInt64("SpecId")

repo := ctx.Repo.Repository

trainUrl := ctx.Query("TrainUrl")
modelName := ctx.Query("ModelName")
modelVersion := ctx.Query("ModelVersion")
ckptName := ctx.Query("CkptName")
ckptUrl := "/" + trainUrl + ckptName
log.Info("ckpt url:" + ckptUrl)

FlavorName := ctx.Query("FlavorName")
EngineName := ctx.Query("EngineName")

isLatestVersion := modelarts.IsLatestVersion
VersionCount := modelarts.VersionCountOne

codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath
resultObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.ResultPath + VersionOutputPath + "/"
logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/"
log.Info("ckpt url:" + ckptUrl)
spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{
JobType: models.JobTypeInference,
ComputeResource: models.NPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainTwo})
if err != nil || spec == nil {
modelSafetyNewDataPrepare(ctx)
ctx.RenderWithErr("Resource specification not available", tplCloudBrainModelSafetyNew, nil)
return
}
if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) {
log.Error("point balance is not enough,userId=%d specId=%d ", ctx.User.ID, spec.ID)
modelSafetyNewDataPrepare(ctx)
ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tplCloudBrainModelSafetyNew, nil)
return
}

//todo: del the codeLocalPath
_, err = ioutil.ReadDir(codeLocalPath)
if err == nil {
os.RemoveAll(codeLocalPath)
}

gitRepo, _ := git.OpenRepository(repo.RepoPath())
commitID, _ := gitRepo.GetBranchCommitID(cloudbrain.DefaultBranchName)

if err := downloadCode(repo, codeLocalPath, cloudbrain.DefaultBranchName); err != nil {
log.Error("Create task failed, server timed out: %s (%v)", repo.FullName(), err)
modelSafetyNewDataPrepare(ctx)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplCloudBrainModelSafetyNew, nil)
return
}

//todo: upload code (send to file_server todo this work?)
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.ResultPath + VersionOutputPath + "/"); err != nil {
log.Error("Failed to obsMkdir_result: %s (%v)", repo.FullName(), err)
modelSafetyNewDataPrepare(ctx)
ctx.RenderWithErr("Failed to obsMkdir_result", tplCloudBrainModelSafetyNew, nil)
return
}

if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil {
log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err)
modelSafetyNewDataPrepare(ctx)
ctx.RenderWithErr("Failed to obsMkdir_log", tplCloudBrainModelSafetyNew, nil)
return
}

if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
modelSafetyNewDataPrepare(ctx)
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplCloudBrainModelSafetyNew, nil)
return
}

var parameters models.Parameters
param := make([]models.Parameter, 0)
param = append(param, models.Parameter{
Label: modelarts.ResultUrl,
Value: "s3:/" + resultObsPath,
}, models.Parameter{
Label: modelarts.CkptUrl,
Value: "s3:/" + ckptUrl,
})
uuid := srcDataset + ";" + combatDataset
datasUrlList, dataUrl, datasetNames, isMultiDataset, err := getDatasUrlListByUUIDS(uuid)
if err != nil {
modelSafetyNewDataPrepare(ctx)
ctx.RenderWithErr(err.Error(), tplCloudBrainModelSafetyNew, nil)
return
}
dataPath := dataUrl
jsondatas, err := json.Marshal(datasUrlList)
if err != nil {
log.Error("Failed to Marshal: %v", err)
modelSafetyNewDataPrepare(ctx)
ctx.RenderWithErr("json error:"+err.Error(), tplCloudBrainModelSafetyNew, nil)
return
}
if isMultiDataset {
param = append(param, models.Parameter{
Label: modelarts.MultiDataUrl,
Value: string(jsondatas),
})
}

existDeviceTarget := false
if len(Params) != 0 {
err := json.Unmarshal([]byte(Params), &parameters)
if err != nil {
log.Error("Failed to Unmarshal params: %s (%v)", Params, err)
modelSafetyNewDataPrepare(ctx)
ctx.RenderWithErr("运行参数错误", tplCloudBrainModelSafetyNew, nil)
return
}

for _, parameter := range parameters.Parameter {
if parameter.Label == modelarts.DeviceTarget {
existDeviceTarget = true
}
if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl {
param = append(param, models.Parameter{
Label: parameter.Label,
Value: parameter.Value,
})
}
}
}
if !existDeviceTarget {
param = append(param, models.Parameter{
Label: modelarts.DeviceTarget,
Value: modelarts.Ascend,
})
}

req := &modelarts.GenerateInferenceJobReq{
JobName: jobName,
DisplayJobName: displayJobName,
DataUrl: dataPath,
Description: description,
CodeObsPath: codeObsPath,
BootFileUrl: codeObsPath + BootFile,
BootFile: BootFile,
TrainUrl: trainUrl,
WorkServerNumber: 1,
EngineID: int64(engineID),
LogUrl: logObsPath,
PoolID: poolID,
Uuid: uuid,
Parameters: param, //modelarts train parameters
CommitID: commitID,
BranchName: cloudbrain.DefaultBranchName,
Params: Params,
FlavorName: FlavorName,
EngineName: EngineName,
LabelName: evaluationIndex,
IsLatestVersion: isLatestVersion,
VersionCount: VersionCount,
TotalVersionCount: modelarts.TotalVersionCount,
ModelName: modelName,
ModelVersion: modelVersion,
CkptName: ckptName,
ResultUrl: resultObsPath,
Spec: spec,
DatasetName: datasetNames,
JobType: string(models.JobTypeModelSafety),
}

err = modelarts.GenerateInferenceJob(ctx, req)
if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error())
modelSafetyNewDataPrepare(ctx)
ctx.RenderWithErr(err.Error(), tplCloudBrainModelSafetyNew, nil)
return
}
}

func createForGPU(ctx *context.Context, jobName string) {


+ 1
- 0
routers/repo/modelarts.go View File

@@ -2200,6 +2200,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
ResultUrl: resultObsPath,
Spec: spec,
DatasetName: datasetNames,
JobType: string(models.JobTypeInference),
}

err = modelarts.GenerateInferenceJob(ctx, req)


Loading…
Cancel
Save