Browse Source

提交代码

tags/v1.22.12.1^2
ychao_1983 3 years ago
parent
commit
5405546144
11 changed files with 440 additions and 174 deletions
  1. +4
    -0
      models/action.go
  2. +39
    -0
      models/cloudbrain.go
  3. +2
    -0
      models/task_config.go
  4. +16
    -0
      modules/auth/grampus.go
  5. +96
    -0
      modules/grampus/grampus.go
  6. +67
    -0
      modules/grampus/resty.go
  7. +1
    -91
      modules/modelarts/modelarts.go
  8. +1
    -1
      routers/api/v1/repo/images.go
  9. +193
    -72
      routers/repo/grampus.go
  10. +11
    -10
      routers/routes/routes.go
  11. +10
    -0
      services/cloudbrain/cloudbrainTask/count.go

+ 4
- 0
models/action.go View File

@@ -65,6 +65,8 @@ const (
ActionCreateImage //36
ActionImageRecommend //37
ActionChangeUserAvatar //38
ActionCreateGrampusNPUDebugTask //39
ActionCreateGrampusGPUDebugTask //40
)

// Action represents user operation type and other information to
@@ -375,6 +377,8 @@ func (a *Action) IsCloudbrainAction() bool {
ActionCreateInferenceTask,
ActionCreateBenchMarkTask,
ActionCreateGPUTrainTask,
ActionCreateGrampusGPUDebugTask,
ActionCreateGrampusNPUDebugTask,
ActionCreateGrampusNPUTrainTask,
ActionCreateGrampusGPUTrainTask:
return true


+ 39
- 0
models/cloudbrain.go View File

@@ -1442,6 +1442,20 @@ type GrampusJobInfo struct {
UserID string `json:"userId"`
Tasks []GrampusTasks `json:"tasks"`
}

type GrampusNotebookInfo struct {
StartedAt int64 `json:"startedAt"`
RunSec int64 `json:"runSec"`
CompletedAt int64 `json:"completedAt"`
CreatedAt int64 `json:"createdAt"`
UpdatedAt int64 `json:"updatedAt"`
Desc string `json:"desc"`
JobID string `json:"id"`
Name string `json:"name"`
Status string `json:"status"`
UserID string `json:"userId"`
Tasks []GrampusNotebookTask `json:"tasks"`
}
type Center struct {
ID string `json:"id"`
Name string `json:"name"`
@@ -1518,6 +1532,11 @@ type GetGrampusJobResponse struct {
JobInfo GrampusJobInfo `json:"otJob"`
}

type GrampusNotebookResponse struct {
GrampusResult
JobInfo GrampusNotebookInfo `json:"otJob"`
}

type GrampusStopJobResponse struct {
GrampusResult
StoppedAt int64 `json:"stoppedAt"`
@@ -1537,6 +1556,21 @@ type GrampusTasks struct {
Code GrampusDataset `json:"code"`
BootFile string `json:"bootFile"`
}
type GrampusNotebookTask struct {
AutoStopDuration int `json:"autoStopDuration"`
Name string `json:"name"`
Capacity int `json:"capacity"`
CenterID []string `json:"centerID"`
CenterName []string `json:"centerName"`
Code GrampusDataset `json:"code"`
Datasets []GrampusDataset `json:"datasets"`
ImageId string `json:"imageId"`
ImageUrl string `json:"imageUrl"`
ResourceSpecId string `json:"resourceSpecId"`
Token string `json:"token"`
Url string `json:"url"`
Status string `json:"status"`
}

type GrampusDataset struct {
Name string `json:"name"`
@@ -1550,6 +1584,11 @@ type CreateGrampusJobRequest struct {
Tasks []GrampusTasks `json:"tasks"`
}

type CreateGrampusNotebookRequest struct {
Name string `json:"name"`
Tasks []GrampusNotebookTask `json:"tasks"`
}

type GetTrainJobMetricStatisticResult struct {
TrainJobResult
Interval int `json:"interval"` //查询的时间间隔,单位为分钟


+ 2
- 0
models/task_config.go View File

@@ -36,6 +36,8 @@ func GetTaskTypeFromAction(a ActionType) TaskType {
ActionCreateInferenceTask,
ActionCreateBenchMarkTask,
ActionCreateGPUTrainTask,
ActionCreateGrampusGPUDebugTask,
ActionCreateGrampusNPUDebugTask,
ActionCreateGrampusNPUTrainTask,
ActionCreateGrampusGPUTrainTask:
return TaskCreateCloudbrainTask


+ 16
- 0
modules/auth/grampus.go View File

@@ -29,3 +29,19 @@ type CreateGrampusTrainJobForm struct {
func (f *CreateGrampusTrainJobForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors {
return validate(errs, ctx.Data, f, ctx.Locale)
}

type CreateGrampusNotebookForm struct {
Type int `form:"type"`
DisplayJobName string `form:"display_job_name" binding:"Required"`
Attachment string `form:"attachment"`
ImageID string `form:"image_id" binding:"Required"`
Description string `form:"description"`
BranchName string `form:"branch_name" binding:"Required"`
Image string `form:"image" binding:"Required"`
DatasetName string `form:"dataset_name"`
SpecId int64 `form:"spec_id" binding:"Required"`
}

func (f *CreateGrampusNotebookForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors {
return validate(errs, ctx.Data, f, ctx.Locale)
}

+ 96
- 0
modules/grampus/grampus.go View File

@@ -28,6 +28,7 @@ const (

BucketRemote = "grampus"
RemoteModelPath = "/output/" + models.ModelSuffix
autoStopDurationMs = 4 * 60 * 60 * 1000
)

var (
@@ -81,6 +82,25 @@ type GenerateTrainJobReq struct {
CodeName string
}

type GenerateNotebookJobReq struct {
JobName string
Command string
ImageUrl string
ImageId string
DisplayJobName string
Uuid string
Description string
CodeObsPath string
CommitID string
BranchName string
ComputeResource string
ProcessType string
DatasetNames string
DatasetInfos map[string]models.DatasetInfo
Spec *models.Specification
CodeName string
}

func getEndPoint() string {
index := strings.Index(setting.Endpoint, "//")
endpoint := setting.Endpoint[index+2:]
@@ -102,6 +122,82 @@ func getDatasetGrampus(datasetInfos map[string]models.DatasetInfo) []models.Gram
return datasetGrampus
}

func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (jobId string, err error) {
createTime := timeutil.TimeStampNow()

var datasetGrampus []models.GrampusDataset
var codeGrampus models.GrampusDataset
if ProcessorTypeNPU == req.ProcessType {
datasetGrampus = getDatasetGrampus(req.DatasetInfos)
codeGrampus = models.GrampusDataset{
Name: req.CodeName,
Bucket: setting.Bucket,
EndPoint: getEndPoint(),
ObjectKey: req.CodeObsPath + cloudbrain.DefaultBranchName + ".zip",
}
}

jobResult, err := createNotebookJob(models.CreateGrampusNotebookRequest{
Name: req.JobName,
Tasks: []models.GrampusNotebookTask{
{
Name: req.JobName,
ResourceSpecId: req.Spec.SourceSpecId,
ImageId: req.ImageId,
ImageUrl: req.ImageUrl,
Datasets: datasetGrampus,
Code: codeGrampus,
AutoStopDuration:autoStopDurationMs,
Capacity: setting.Capacity,
},
},
})
if err != nil {
log.Error("createNotebookJob failed: %v", err.Error())
return "", err
}

jobID := jobResult.JobInfo.JobID
err = models.CreateCloudbrain(&models.Cloudbrain{
Status: TransTrainJobStatus(jobResult.JobInfo.Status),
UserID: ctx.User.ID,
RepoID: ctx.Repo.Repository.ID,
JobID: jobID,
JobName: req.JobName,
DisplayJobName: req.DisplayJobName,
JobType: string(models.JobTypeDebug),
Type: models.TypeC2Net,
Uuid: req.Uuid,
DatasetName: req.DatasetNames,
CommitID: req.CommitID,
IsLatestVersion: "1",
ComputeResource: req.ComputeResource,
ImageID: req.ImageId,
BranchName: req.BranchName,
Description: req.Description,
WorkServerNumber: 1,
EngineName: req.ImageUrl,
CreatedUnix: createTime,
UpdatedUnix: createTime,
Spec: req.Spec,
})

if err != nil {
log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error())
return "", err
}

var actionType models.ActionType
if req.ComputeResource == models.NPUResource {
actionType = models.ActionCreateGrampusNPUDebugTask
} else if req.ComputeResource == models.GPUResource {
actionType = models.ActionCreateGrampusGPUDebugTask
}
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.DisplayJobName, actionType)

return jobID, nil
}

func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId string, err error) {
createTime := timeutil.TimeStampNow()



+ 67
- 0
modules/grampus/resty.go View File

@@ -26,6 +26,7 @@ const (
urlGetResourceSpecs = urlOpenApiV1 + "resourcespec"
urlGetAiCenter = urlOpenApiV1 + "sharescreen/aicenter"
urlGetImages = urlOpenApiV1 + "image"
urlNotebookJob = urlOpenApiV1 + "notebook"

errorIllegalToken = 1005
)
@@ -87,6 +88,39 @@ func getToken() error {
return nil
}

func createNotebookJob(req models.CreateGrampusNotebookRequest) (*models.GrampusNotebookResponse, error) {
checkSetting()
client := getRestyClient()
var result models.GrampusNotebookResponse

retry := 0

sendjob:
_, err := client.R().
SetHeader("Content-Type", "application/json").
SetAuthToken(TOKEN).
SetBody(req).
SetResult(&result).
Post(HOST + urlNotebookJob)

if err != nil {
return nil, fmt.Errorf("resty CreateNotebookJob: %s", err)
}

if result.ErrorCode == errorIllegalToken && retry < 1 {
retry++
_ = getToken()
goto sendjob
}

if result.ErrorCode != 0 {
log.Error("CreateNotebookJob failed(%d): %s", result.ErrorCode, result.ErrorMsg)
return &result, fmt.Errorf("CreateNotebookJob failed(%d): %s", result.ErrorCode, result.ErrorMsg)
}

return &result, nil
}

func createJob(req models.CreateGrampusJobRequest) (*models.CreateGrampusJobResponse, error) {
checkSetting()
client := getRestyClient()
@@ -120,6 +154,39 @@ sendjob:
return &result, nil
}


func GetNotebookJob(jobID string)(*models.GrampusNotebookResponse, error){
checkSetting()
client := getRestyClient()
var result models.GrampusNotebookResponse

retry := 0

sendjob:
_, err := client.R().
SetAuthToken(TOKEN).
SetResult(&result).
Get(HOST + urlNotebookJob + "/" + jobID)

if err != nil {
return nil, fmt.Errorf("resty GetNotebookJob: %v", err)
}

if result.ErrorCode == errorIllegalToken && retry < 1 {
retry++
log.Info("retry get token")
_ = getToken()
goto sendjob
}

if result.ErrorCode != 0 {
log.Error("GetNotebookJob failed(%d): %s", result.ErrorCode, result.ErrorMsg)
return nil, fmt.Errorf("GetNotebookJob failed(%d): %s", result.ErrorCode, result.ErrorMsg)
}

return &result, nil
}

func GetJob(jobID string) (*models.GetGrampusJobResponse, error) {
checkSetting()
client := getRestyClient()


+ 1
- 91
modules/modelarts/modelarts.go View File

@@ -4,7 +4,6 @@ import (
"encoding/json"
"errors"
"fmt"
"path"
"strconv"
"strings"

@@ -15,20 +14,13 @@ import (
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/modules/timeutil"
)

const (
//notebook
storageTypeOBS = "obs"
autoStopDuration = 4 * 60 * 60
autoStopDurationMs = 4 * 60 * 60 * 1000
MORDELART_USER_IMAGE_ENGINE_ID = -1
DataSetMountPath = "/home/ma-user/work"
NotebookEnv = "Python3"
NotebookType = "Ascend"
FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)"


//train-job
// ResourcePools = "{\"resource_pool\":[{\"id\":\"pool1328035d\", \"value\":\"专属资源池\"}]}"
@@ -185,14 +177,6 @@ type OrgMultiNode struct {
Node []int `json:"node"`
}

// type Parameter struct {
// Label string `json:"label"`
// Value string `json:"value"`
// }

// type Parameters struct {
// Parameter []Parameter `json:"parameter"`
// }

type Parameters struct {
Parameter []struct {
@@ -201,80 +185,6 @@ type Parameters struct {
} `json:"parameter"`
}

func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor string) error {
var dataActualPath string
if uuid != "" {
dataActualPath = setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
} else {
userPath := setting.UserBasePath + ctx.User.Name + "/"
isExist, err := storage.ObsHasObject(userPath)
if err != nil {
log.Error("ObsHasObject failed:%v", err.Error(), ctx.Data["MsgID"])
return err
}

if !isExist {
if err = storage.ObsCreateObject(userPath); err != nil {
log.Error("ObsCreateObject failed:%v", err.Error(), ctx.Data["MsgID"])
return err
}
}

dataActualPath = setting.Bucket + "/" + userPath
}

if poolInfos == nil {
json.Unmarshal([]byte(setting.PoolInfos), &poolInfos)
}
createTime := timeutil.TimeStampNow()
jobResult, err := CreateJob(models.CreateNotebookParams{
JobName: jobName,
Description: description,
ProfileID: setting.ProfileID,
Flavor: flavor,
Pool: models.Pool{
ID: poolInfos.PoolInfo[0].PoolId,
Name: poolInfos.PoolInfo[0].PoolName,
Type: poolInfos.PoolInfo[0].PoolType,
},
Spec: models.Spec{
Storage: models.Storage{
Type: storageTypeOBS,
Location: models.Location{
Path: dataActualPath,
},
},
AutoStop: models.AutoStop{
Enable: true,
Duration: autoStopDuration,
},
},
})
if err != nil {
log.Error("CreateJob failed: %v", err.Error())
return err
}
err = models.CreateCloudbrain(&models.Cloudbrain{

Status: string(models.JobWaiting),
UserID: ctx.User.ID,
RepoID: ctx.Repo.Repository.ID,
JobID: jobResult.ID,
JobName: jobName,
JobType: string(models.JobTypeDebug),
Type: models.TypeCloudBrainTwo,
Uuid: uuid,
ComputeResource: models.NPUResource,
CreatedUnix: createTime,
UpdatedUnix: createTime,
})

if err != nil {
return err
}
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobResult.ID, jobName, models.ActionCreateDebugNPUTask)
return nil
}

func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, description, imageId string, spec *models.Specification) error {
if poolInfos == nil {


+ 1
- 1
routers/api/v1/repo/images.go View File

@@ -88,7 +88,7 @@ func getModelArtsImages(ctx *context.APIContext) {
}

func getC2netNpuImages(ctx *context.APIContext) {
images, err := grampus.GetImages(grampus.ProcessorTypeNPU)
images, err := grampus.GetImages(grampus.ProcessorTypeNPU, string(models.JobTypeTrain))
var npuImageInfos []NPUImageINFO
if err != nil {
log.Error("GetImages failed:", err.Error())


+ 193
- 72
routers/repo/grampus.go View File

@@ -47,12 +47,34 @@ const (
tplGrampusTrainJobShow base.TplName = "repo/grampus/trainjob/show"

//GPU
tplGrampusNotebookGPUNew base.TplName = "repo/grampus/notebook/gpu/new"
tplGrampusTrainJobGPUNew base.TplName = "repo/grampus/trainjob/gpu/new"

//NPU
tplGrampusNotebookNPUNew base.TplName = "repo/grampus/notebook/npu/new"
tplGrampusTrainJobNPUNew base.TplName = "repo/grampus/trainjob/npu/new"
)

func GrampusNotebookNew(ctx *context.Context) {
ctx.Data["IsCreate"] = true
notebookType := ctx.QueryInt("type")
processType := grampus.ProcessorTypeGPU
if notebookType == 1 {
processType = grampus.ProcessorTypeNPU
}
err := grampusNotebookNewDataPrepare(ctx, processType)
if err != nil {
ctx.ServerError("get new notebook-job info failed", err)
return
}
if processType == grampus.ProcessorTypeGPU {
ctx.HTML(http.StatusOK, tplGrampusNotebookGPUNew)
} else {
ctx.HTML(http.StatusOK, tplGrampusNotebookNPUNew)
}

}

func GrampusTrainJobGPUNew(ctx *context.Context) {
ctx.Data["IsCreate"] = true
err := grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
@@ -73,8 +95,125 @@ func GrampusTrainJobNPUNew(ctx *context.Context) {
}
ctx.HTML(200, tplGrampusTrainJobNPUNew)
}
func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebookForm) {
ctx.Data["IsCreate"] = true
displayJobName := form.DisplayJobName
jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
uuid := form.Attachment
description := form.Description
repo := ctx.Repo.Repository
branchName := form.BranchName
image := strings.TrimSpace(form.Image)
tpl := tplGrampusNotebookGPUNew
processType := grampus.ProcessorTypeGPU
computeSource := models.GPUResource
computeSourceSimple := models.GPU
if form.Type == 1 {
tpl = tplGrampusNotebookNPUNew
processType = grampus.ProcessorTypeNPU
computeSource = models.NPUResource
computeSourceSimple := models.NPU
}

lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName))
defer lock.UnLock()
isOk, err := lock.Lock(models.CloudbrainKeyDuration)
if !isOk {
log.Error("lock processed failed:%v", err, ctx.Data["MsgID"])
grampusNotebookNewDataPrepare(ctx, processType)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_samejob_err"), tpl, &form)
return
}

func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) error {
if !jobNamePattern.MatchString(displayJobName) {
grampusNotebookNewDataPrepare(ctx, processType)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form)
return
}

//check count limit
count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeDebug), computeSource)
if err != nil {
log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, processType)
ctx.RenderWithErr("system error", tpl, &form)
return
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, processType)
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form)
return
}
}

//check whether the task name in the project is duplicated
tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeDebug), displayJobName)
if err == nil {
if len(tasks) != 0 {
log.Error("the job name did already exist", ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, processType)
ctx.RenderWithErr("the job name did already exist", tpl, &form)
return
}
} else {
if !models.IsErrJobNotExist(err) {
log.Error("system error, %v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, processType)
ctx.RenderWithErr("system error", tpl, &form)
return
}
}

//check specification
spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{
JobType: models.JobTypeDebug,
ComputeResource: computeSourceSimple,
Cluster: models.C2NetCluster,
})
if err != nil || spec == nil {
grampusTrainJobNewDataPrepare(ctx, processType)
ctx.RenderWithErr("Resource specification not available", tpl, &form)
return
}

if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) {
log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID)
grampusTrainJobNewDataPrepare(ctx, processType)
ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tpl, &form)
return
}

commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName)
command := ""

req := &grampus.GenerateNotebookReq{
JobName: jobName,
DisplayJobName: displayJobName,
ComputeResource: computeSource,
ProcessType: processType,
Command: command,
ImageUrl: image,
ImageId: form.ImageID,
Description: description,
Uuid: uuid,
CommitID: commitID,
BranchName: branchName,
DatasetNames: form.DatasetName,
WorkServerNumber: 1,
Spec: spec,
}

_, err = grampus.GenerateNotebook(ctx, req)
if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, processType)
ctx.RenderWithErr(err.Error(), tpl, &form)
return
}
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all")
}
func grampusNotebookNewDataPrepare(ctx *context.Context, processType string) error {
ctx.Data["PageIsCloudBrain"] = true

t := time.Now()
@@ -82,49 +221,67 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err
ctx.Data["display_job_name"] = displayJobName

//get valid images
images, err := grampus.GetImages(processType)
if processType == grampus.ProcessorTypeNPU {
images, err := grampus.GetImages(processType, string(models.JobTypeDebug))
if err != nil {
log.Error("GetImages failed:", err.Error())
} else {
ctx.Data["images"] = images.Infos
}
}
//prepare available specs
computeResourceSimple := models.GPU
datasetType := models.TypeCloudBrainOne
computeResource := models.GPUResource
if processType == grampus.ProcessorTypeNPU {
computeResourceSimple = models.NPU
datasetType = models.TypeCloudBrainTwo
computeResource = models.NPUResource
}

prepareGrampusSpecs(ctx, computeResourceSimple, models.JobTypeDebug)

//get branches
branches, _, err := ctx.Repo.GitRepo.GetBranches(0, 0)
if err != nil {
log.Error("GetImages failed:", err.Error())
log.Error("GetBranches error:", err.Error())
} else {
ctx.Data["images"] = images.Infos
ctx.Data["branches"] = branches
}

grampus.InitSpecialPool()
ctx.Data["branchName"] = ctx.Repo.BranchName

ctx.Data["GPUEnabled"] = true
ctx.Data["NPUEnabled"] = true
includeCenters := make(map[string]struct{})
excludeCenters := make(map[string]struct{})
if grampus.SpecialPools != nil {
for _, pool := range grampus.SpecialPools.Pools {
if pool.IsExclusive {
if !IsUserInOrgPool(ctx.User.ID, pool) {
ctx.Data[pool.Type+"Enabled"] = false
}
} else {
if strings.Contains(strings.ToLower(processType), strings.ToLower(pool.Type)) {
if IsUserInOrgPool(ctx.User.ID, pool) {
for _, center := range pool.Pool {
includeCenters[center.Queue] = struct{}{}
}
} else {
for _, center := range pool.Pool {
excludeCenters[center.Queue] = struct{}{}
}
ctx.Data["datasetType"] = datasetType
waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, computeResource, models.JobTypeDebug)
ctx.Data["WaitCount"] = waitCount
NotStopTaskCount, _ := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeDebug), computeResource)
ctx.Data["NotStopTaskCount"] = NotStopTaskCount

}
return nil
}

}
func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) error {
ctx.Data["PageIsCloudBrain"] = true

}
t := time.Now()
var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
ctx.Data["display_job_name"] = displayJobName

//get valid images
if processType == grampus.ProcessorTypeNPU {
images, err := grampus.GetImages(processType, string(models.JobTypeTrain))
if err != nil {
log.Error("GetImages failed:", err.Error())
} else {
ctx.Data["images"] = images.Infos
}
}

//prepare available specs
if processType == grampus.ProcessorTypeNPU {
prepareGrampusTrainSpecs(ctx, models.NPU)
prepareGrampusSpecs(ctx, models.NPU)
} else if processType == grampus.ProcessorTypeGPU {
prepareGrampusTrainSpecs(ctx, models.GPU)
prepareGrampusSpecs(ctx, models.GPU)
}

//get branches
@@ -203,55 +360,19 @@ func GrampusTrainJobVersionNew(ctx *context.Context) {
}
}

func prepareGrampusTrainSpecs(ctx *context.Context, computeResource string) {
func prepareGrampusSpecs(ctx *context.Context, computeResource string, jobType ...models.JobType) {
tempJobType := models.JobTypeTrain
if len(jobType) > 0 {
tempJobType = jobType[0]
}
noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{
JobType: models.JobTypeTrain,
JobType: tempJobType,
ComputeResource: computeResource,
Cluster: models.C2NetCluster,
})
ctx.Data["Specs"] = noteBookSpecs
}

func getFilterSpecBySpecialPool(specs *models.GetGrampusResourceSpecsResult, includeCenters map[string]struct{}, excludeCenters map[string]struct{}) []models.GrampusSpec {
if len(includeCenters) == 0 && len(excludeCenters) == 0 {
return specs.Infos
}
var grampusSpecs []models.GrampusSpec
for _, info := range specs.Infos {
if isInIncludeCenters(info, includeCenters) || (len(excludeCenters) != 0 && isNotAllInExcludeCenters(info, excludeCenters)) {
grampusSpecs = append(grampusSpecs, info)
}

}
return grampusSpecs
}

func isInIncludeCenters(grampusSpec models.GrampusSpec, centers map[string]struct{}) bool {
for _, center := range grampusSpec.Centers {
if _, ok := centers[center.ID]; ok {
return true
}
}
return false
}
func isNotAllInExcludeCenters(grampusSpec models.GrampusSpec, centers map[string]struct{}) bool {
for _, center := range grampusSpec.Centers {
if _, ok := centers[center.ID]; !ok {
return true
}
}
return false
}

func IsUserInOrgPool(userId int64, pool *models.SpecialPool) bool {
org, _ := models.GetOrgByName(pool.Org)
if org != nil {
isOrgMember, _ := models.IsOrganizationMember(org.ID, userId)
return isOrgMember
}
return false
}

func grampusParamCheckCreateTrainJob(form auth.CreateGrampusTrainJobForm) error {
if !strings.HasSuffix(strings.TrimSpace(form.BootFile), ".py") {
log.Error("the boot file(%s) must be a python file", form.BootFile)


+ 11
- 10
routers/routes/routes.go View File

@@ -1216,6 +1216,17 @@ func RegisterRoutes(m *macaron.Macaron) {
})
}, context.RepoRef())
m.Group("/grampus", func() {
m.Group("/notebook", func() {
m.Group("/:jobid", func() {
m.Get("", reqRepoCloudBrainReader, repo.GrampusTrainJobShow)
m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.GrampusStopJob)
m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.GrampusTrainJobDel)
})

m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.GrampusNotebookNew)
m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateGrampusNotebookForm{}), repo.GrampusNotebookCreate)
})

m.Group("/train-job", func() {
m.Group("/:jobid", func() {
m.Get("", reqRepoCloudBrainReader, repo.GrampusTrainJobShow)
@@ -1288,16 +1299,6 @@ func RegisterRoutes(m *macaron.Macaron) {

m.Group("/modelarts", func() {
m.Group("/notebook", func() {
/* v1.0
m.Group("/:jobid", func() {
m.Get("", reqRepoCloudBrainReader, repo.NotebookShow)
m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug)
m.Post("/:action", reqRepoCloudBrainWriter, repo.NotebookManage)
m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel)
})
m.Get("/create", reqRepoCloudBrainWriter, repo.NotebookNew)
m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsNotebookForm{}), repo.NotebookCreate)
*/
m.Group("/:id", func() {
m.Get("", reqRepoCloudBrainReader, repo.NotebookShow)
m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug2)


+ 10
- 0
services/cloudbrain/cloudbrainTask/count.go View File

@@ -62,6 +62,16 @@ var StatusInfoDict = map[string]StatusInfo{string(models.JobTypeDebug) + "-" + s
JobType: []models.JobType{models.JobTypeTrain},
NotFinalStatuses: GrampusNotFinalStatuses,
ComputeResource: models.NPUResource,
}, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.GPUResource: {
CloudBrainTypes: []int{models.TypeC2Net},
JobType: []models.JobType{models.JobTypeDebug},
NotFinalStatuses: GrampusNotFinalStatuses,
ComputeResource: models.GPUResource,
}, string(models.JobTypeDebug) + "-" + strconv.Itoa(models.TypeC2Net) + "-" + models.NPUResource: {
CloudBrainTypes: []int{models.TypeC2Net},
JobType: []models.JobType{models.JobTypeDebug},
NotFinalStatuses: GrampusNotFinalStatuses,
ComputeResource: models.NPUResource,
}}

func GetNotFinalStatusTaskCount(uid int64, cloudbrainType int, jobType string, computeResource ...string) (int, error) {


Loading…
Cancel
Save