Browse Source

Merge remote-tracking branch 'origin/pretrainmodel' into fix-2817

tags/v1.22.9.2^2
liuzx 3 years ago
parent
commit
57e10b7482
19 changed files with 467 additions and 64 deletions
  1. +29
    -6
      models/cloudbrain.go
  2. +9
    -5
      models/dataset.go
  3. +5
    -0
      modules/auth/cloudbrain.go
  4. +5
    -0
      modules/auth/grampus.go
  5. +5
    -0
      modules/auth/modelarts.go
  6. +14
    -0
      modules/cloudbrain/cloudbrain.go
  7. +18
    -5
      modules/grampus/grampus.go
  8. +15
    -0
      modules/modelarts/modelarts.go
  9. +9
    -6
      modules/setting/setting.go
  10. +13
    -0
      routers/repo/cloudbrain.go
  11. +127
    -34
      routers/repo/grampus.go
  12. +40
    -0
      routers/repo/modelarts.go
  13. +37
    -0
      templates/custom/select_model.tmpl
  14. +2
    -2
      templates/repo/cloudbrain/trainjob/new.tmpl
  15. +2
    -1
      templates/repo/grampus/trainjob/gpu/new.tmpl
  16. +2
    -0
      templates/repo/grampus/trainjob/npu/new.tmpl
  17. +3
    -3
      templates/repo/modelarts/trainjob/new.tmpl
  18. +2
    -2
      templates/repo/modelarts/trainjob/version_new.tmpl
  19. +130
    -0
      web_src/js/features/cloudbrainShow.js

+ 29
- 6
models/cloudbrain.go View File

@@ -4,6 +4,7 @@ import (
"encoding/json"
"errors"
"fmt"
"path"
"strconv"
"strings"
"time"
@@ -187,6 +188,7 @@ type Cloudbrain struct {
ModelName string //模型名称
ModelVersion string //模型版本
CkptName string //权重文件名称
PreTrainModelUrl string //预训练模型地址
ResultUrl string //推理结果的obs路径

User *User `xorm:"-"`
@@ -603,6 +605,16 @@ type ResourceSpec struct {
ShareMemMiB int `json:"shareMemMiB"`
}

type FlavorInfos struct {
FlavorInfo []*FlavorInfo `json:"flavor_info"`
}

type FlavorInfo struct {
Id int `json:"id"`
Value string `json:"value"`
Desc string `json:"desc"`
}

type SpecialPools struct {
Pools []*SpecialPool `json:"pools"`
}
@@ -2223,9 +2235,10 @@ func CloudbrainAllStatic(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, er
type DatasetInfo struct {
DataLocalPath string
Name string
FullName string
}

func GetDatasetInfo(uuidStr string) (map[string]DatasetInfo, string, error) {
func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetInfo, string, error) {
var datasetNames string
uuids := strings.Split(uuidStr, ";")
if len(uuids) > setting.MaxDatasetNum {
@@ -2258,16 +2271,26 @@ func GetDatasetInfo(uuidStr string) (map[string]DatasetInfo, string, error) {
return nil, datasetNames, errors.New("the dataset name is same")
}
}
var dataLocalPath string
if len(grampusType) > 0 {
if grampusType[0] == GPU {
dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID
} else {
dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/"
}

dataLocalPath := setting.Attachment.Minio.RealPath +
setting.Attachment.Minio.Bucket + "/" +
setting.Attachment.Minio.BasePath +
AttachmentRelativePath(attach.UUID) +
attach.UUID
} else {
dataLocalPath = setting.Attachment.Minio.RealPath +
setting.Attachment.Minio.Bucket + "/" +
setting.Attachment.Minio.BasePath +
AttachmentRelativePath(attach.UUID) +
attach.UUID
}

datasetInfos[attach.UUID] = DatasetInfo{
DataLocalPath: dataLocalPath,
Name: fileName,
FullName: attach.Name,
}
if i == 0 {
datasetNames = attach.Name


+ 9
- 5
models/dataset.go View File

@@ -131,13 +131,17 @@ func (datasets DatasetList) loadAttachmentAttributes(opts *SearchDatasetOptions)
permission = false
datasets[i].Repo.GetOwner()
if !permission {
isCollaborator, _ := datasets[i].Repo.IsCollaborator(opts.User.ID)
isInRepoTeam,_:=datasets[i].Repo.IsInRepoTeam(opts.User.ID)

if isCollaborator ||isInRepoTeam {
log.Info("Collaborator user may visit the attach.")
if datasets[i].Repo.OwnerID==opts.User.ID{
permission = true
}else{
isCollaborator, _ := datasets[i].Repo.IsCollaborator(opts.User.ID)
isInRepoTeam,_:=datasets[i].Repo.IsInRepoTeam(opts.User.ID)

if isCollaborator ||isInRepoTeam {
permission = true
}
}

}

permissionMap[datasets[i].ID] = permission


+ 5
- 0
modules/auth/cloudbrain.go View File

@@ -23,6 +23,11 @@ type CreateCloudBrainForm struct {
BootFile string `form:"boot_file"`
Params string `form:"run_para_list"`
BranchName string `form:"branch_name"`
ModelName string `form:"model_name"`
ModelVersion string `form:"model_version"`
CkptName string `form:"ckpt_name"`
LabelName string `form:"label_names"`
PreTrainModelUrl string `form:"pre_train_model_url"`
DatasetName string `form:"dataset_name"`
SpecId int64 `form:"spec_id"`
}


+ 5
- 0
modules/auth/grampus.go View File

@@ -18,6 +18,11 @@ type CreateGrampusTrainJobForm struct {
WorkServerNumber int `form:"work_server_number" binding:"Required"`
Image string `form:"image"`
DatasetName string `form:"dataset_name"`
ModelName string `form:"model_name"`
ModelVersion string `form:"model_version"`
CkptName string `form:"ckpt_name"`
LabelName string `form:"label_names"`
PreTrainModelUrl string `form:"pre_train_model_url"`
SpecId int64 `form:"spec_id"`
}



+ 5
- 0
modules/auth/modelarts.go View File

@@ -48,6 +48,11 @@ type CreateModelArtsTrainJobForm struct {
FlavorName string `form:"flaver_names" binding:"Required"`
EngineName string `form:"engine_names" binding:"Required"`
SpecId int64 `form:"spec_id" binding:"Required"`
ModelName string `form:"model_name"`
ModelVersion string `form:"model_version"`
CkptName string `form:"ckpt_name"`
LabelName string `form:"label_names"`
PreTrainModelUrl string `form:"pre_train_model_url"`
}

type CreateModelArtsInferenceJobForm struct {


+ 14
- 0
modules/cloudbrain/cloudbrain.go View File

@@ -24,6 +24,7 @@ const (
CodeMountPath = "/code"
DataSetMountPath = "/dataset"
ModelMountPath = "/model"
PretrainModelMountPath = "/pretrainmodel"
LogFile = "log.txt"
BenchMarkMountPath = "/benchmark"
BenchMarkResourceID = 1
@@ -77,6 +78,8 @@ type GenerateCloudBrainTaskReq struct {
ModelVersion string
CkptName string
LabelName string
PreTrainModelPath string
PreTrainModelUrl string
Spec *models.Specification
}

@@ -276,6 +279,16 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error {
},
}

if req.PreTrainModelUrl != "" { //预训练
volumes = append(volumes, models.Volume{
HostPath: models.StHostPath{
Path: req.PreTrainModelPath,
MountPath: PretrainModelMountPath,
ReadOnly: true,
},
})
}

if len(req.DatasetInfos) == 1 {
volumes = append(volumes, models.Volume{
HostPath: models.StHostPath{
@@ -359,6 +372,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error {
CkptName: req.CkptName,
ResultUrl: req.ResultPath,
LabelName: req.LabelName,
PreTrainModelUrl: req.PreTrainModelUrl,
CreatedUnix: createTime,
UpdatedUnix: createTime,
CommitID: req.CommitID,


+ 18
- 5
modules/grampus/grampus.go View File

@@ -22,9 +22,6 @@ const (
GpuWorkDir = "/tmp/"
NpuWorkDir = "/cache/"

CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;echo \"start loading script\";wget -q https://git.openi.org.cn/OpenIOSSG/script_for_grampus/archive/master.zip;" +
"echo \"finish loading script\";unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_npu downloader_for_minio uploader_for_gpu;"

CodeArchiveName = "master.zip"
)

@@ -34,6 +31,9 @@ var (
ImageInfos *setting.StImageInfosModelArts

SpecialPools *models.SpecialPools

CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;mkdir -p pretrainmodel;echo \"start loading script\";wget https://git.openi.org.cn/OpenIOSSG/%s/archive/master.zip;" +
"echo \"finish loading script\";unzip -q master.zip;cd %s;chmod 777 downloader_for_obs uploader_for_npu downloader_for_minio uploader_for_gpu;"
)

type GenerateTrainJobReq struct {
@@ -62,8 +62,16 @@ type GenerateTrainJobReq struct {
TotalVersionCount int
ComputeResource string
ProcessType string
DatasetName string

DatasetNames string
DatasetInfos map[string]models.DatasetInfo
Params string
ModelName string
LabelName string
CkptName string
ModelVersion string
PreTrainModelPath string
PreTrainModelUrl string
Spec *models.Specification
}

@@ -103,7 +111,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
JobType: string(models.JobTypeTrain),
Type: models.TypeC2Net,
Uuid: req.Uuid,
DatasetName: req.DatasetName,
DatasetName: req.DatasetNames,
CommitID: req.CommitID,
IsLatestVersion: req.IsLatestVersion,
ComputeResource: req.ComputeResource,
@@ -121,6 +129,11 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
CreatedUnix: createTime,
UpdatedUnix: createTime,
Spec: req.Spec,
ModelName: req.ModelName,
ModelVersion: req.ModelVersion,
LabelName: req.LabelName,
PreTrainModelUrl: req.PreTrainModelUrl,
CkptName: req.CkptName,
})

if err != nil {


+ 15
- 0
modules/modelarts/modelarts.go View File

@@ -104,6 +104,11 @@ type GenerateTrainJobReq struct {
UserCommand string
DatasetName string
Spec *models.Specification
ModelName string
LabelName string
CkptName string
ModelVersion string
PreTrainModelUrl string
}

type GenerateInferenceJobReq struct {
@@ -439,6 +444,11 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
CreatedUnix: createTime,
UpdatedUnix: createTime,
Spec: req.Spec,
ModelName: req.ModelName,
ModelVersion: req.ModelVersion,
LabelName: req.LabelName,
PreTrainModelUrl: req.PreTrainModelUrl,
CkptName: req.CkptName,
})

if createErr != nil {
@@ -588,6 +598,11 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job
CreatedUnix: createTime,
UpdatedUnix: createTime,
Spec: req.Spec,
ModelName: req.ModelName,
ModelVersion: req.ModelVersion,
LabelName: req.LabelName,
PreTrainModelUrl: req.PreTrainModelUrl,
CkptName: req.CkptName,
})
if createErr != nil {
log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, createErr.Error())


+ 9
- 6
modules/setting/setting.go View File

@@ -583,12 +583,13 @@ var (

//grampus config
Grampus = struct {
Env string
Host string
UserName string
Password string
SpecialPools string
C2NetSequence string
Env string
Host string
UserName string
Password string
SpecialPools string
C2NetSequence string
SyncScriptProject string
}{}

C2NetInfos *C2NetSqInfos
@@ -1558,6 +1559,8 @@ func getGrampusConfig() {
log.Error("Unmarshal(C2NetSequence) failed:%v", err)
}
}
Grampus.SyncScriptProject = sec.Key("SYNC_SCRIPT_PROJECT").MustString("script_for_grampus")

}

func SetRadarMapConfig() {


+ 13
- 0
routers/repo/cloudbrain.go View File

@@ -368,6 +368,16 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
Spec: spec,
}

if form.ModelName != "" { //使用预训练模型训练
req.ModelName = form.ModelName
req.LabelName = form.LabelName
req.CkptName = form.CkptName
req.ModelVersion = form.ModelVersion
req.PreTrainModelPath = setting.Attachment.Minio.RealPath + form.PreTrainModelUrl
req.PreTrainModelUrl = form.PreTrainModelUrl

}

err = cloudbrain.GenerateTask(req)
if err != nil {
cloudBrainNewDataPrepare(ctx)
@@ -2682,6 +2692,9 @@ func getTrainJobCommand(form auth.CreateCloudBrainForm) (string, error) {
param += " --" + parameter.Label + "=" + parameter.Value
}
}
if form.CkptName != "" {
param += " --pretrainmodelname" + "=" + form.CkptName
}

command += "python /code/" + bootFile + param + " > " + cloudbrain.ModelMountPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile



+ 127
- 34
routers/repo/grampus.go View File

@@ -334,7 +334,6 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
repo := ctx.Repo.Repository
codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/"
codeMinioPath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/"
dataMinioPath := setting.Attachment.Minio.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid
branchName := form.BranchName
image := strings.TrimSpace(form.Image)

@@ -414,11 +413,12 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
}

//check dataset
attachment, err := models.GetAttachmentByUUID(uuid)

datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, models.GPU)
if err != nil {
log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"])
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobGPUNew, &form)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobGPUNew, &form)
return
}

@@ -460,8 +460,22 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
return
}

var datasetRemotePath, allFileName string
for _, datasetInfo := range datasetInfos {
if datasetRemotePath == "" {
datasetRemotePath = datasetInfo.DataLocalPath
allFileName = datasetInfo.FullName
} else {
datasetRemotePath = datasetRemotePath + ";" + datasetInfo.DataLocalPath
allFileName = allFileName + ";" + datasetInfo.FullName
}

}

//prepare command
command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", dataMinioPath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", attachment.Name)
preTrainModelPath := getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName)

command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", allFileName, preTrainModelPath, form.CkptName)
if err != nil {
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
@@ -472,26 +486,37 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName)

req := &grampus.GenerateTrainJobReq{
JobName: jobName,
DisplayJobName: displayJobName,
ComputeResource: models.GPUResource,
ProcessType: grampus.ProcessorTypeGPU,
Command: command,
ImageUrl: image,
Description: description,
BootFile: bootFile,
Uuid: uuid,
CommitID: commitID,
BranchName: branchName,
Params: form.Params,
EngineName: image,
DatasetName: attachment.Name,
JobName: jobName,
DisplayJobName: displayJobName,
ComputeResource: models.GPUResource,
ProcessType: grampus.ProcessorTypeGPU,
Command: command,
ImageUrl: image,
Description: description,
BootFile: bootFile,
Uuid: uuid,
CommitID: commitID,
BranchName: branchName,
Params: form.Params,
EngineName: image,
DatasetNames: datasetNames,
DatasetInfos: datasetInfos,

IsLatestVersion: modelarts.IsLatestVersion,
VersionCount: modelarts.VersionCountOne,
WorkServerNumber: 1,
Spec: spec,
}

if form.ModelName != "" { //使用预训练模型训练
req.ModelName = form.ModelName
req.LabelName = form.LabelName
req.CkptName = form.CkptName
req.ModelVersion = form.ModelVersion
req.PreTrainModelUrl = form.PreTrainModelUrl

}

err = grampus.GenerateTrainJob(ctx, req)
if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"])
@@ -502,6 +527,17 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
}

func getPreTrainModelPath(pretrainModelDir string, fileName string) string {
index := strings.Index(pretrainModelDir, "/")
if index > 0 {
filterBucket := pretrainModelDir[index+1:]
return filterBucket + fileName
} else {
return ""
}

}

func GrampusTrainJobVersionCreate(ctx *context.Context, form auth.CreateGrampusTrainJobForm) {
computeResource := ctx.Query("compute_resource")
if computeResource == models.GPUResource {
@@ -547,7 +583,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
repo := ctx.Repo.Repository
codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
codeObsPath := grampus.JobPath + jobName + modelarts.CodePath
dataObsPath := setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
//dataObsPath := setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
branchName := form.BranchName
isLatestVersion := modelarts.IsLatestVersion
versionCount := modelarts.VersionCountOne
@@ -629,11 +665,11 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
}

//check dataset
attachment, err := models.GetAttachmentByUUID(uuid)
datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, models.NPU)
if err != nil {
log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"])
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobNPUNew, &form)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobNPUNew, &form)
return
}

@@ -665,8 +701,21 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
return
}

var datasetRemotePath, allFileName string
for _, datasetInfo := range datasetInfos {
if datasetRemotePath == "" {
datasetRemotePath = datasetInfo.DataLocalPath + "'" + datasetInfo.FullName + "'"
allFileName = datasetInfo.FullName
} else {
datasetRemotePath = datasetRemotePath + ";" + datasetInfo.DataLocalPath + "'" + datasetInfo.FullName + "'"
allFileName = allFileName + ";" + datasetInfo.FullName
}

}

//prepare command
command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", dataObsPath+"'"+attachment.Name+"'", bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, attachment.Name)
preTrainModelPath := getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName)
command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, allFileName, preTrainModelPath, form.CkptName)
if err != nil {
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
@@ -683,7 +732,6 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
ProcessType: grampus.ProcessorTypeNPU,
Command: command,
ImageId: form.ImageID,
DataUrl: dataObsPath,
Description: description,
CodeObsPath: codeObsPath,
BootFileUrl: codeObsPath + bootFile,
@@ -697,9 +745,18 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
EngineName: engineName,
VersionCount: versionCount,
TotalVersionCount: modelarts.TotalVersionCount,
DatasetName: attachment.Name,
DatasetNames: datasetNames,
DatasetInfos: datasetInfos,
Spec: spec,
}
if form.ModelName != "" { //使用预训练模型训练
req.ModelName = form.ModelName
req.LabelName = form.LabelName
req.CkptName = form.CkptName
req.ModelVersion = form.ModelVersion
req.PreTrainModelUrl = form.PreTrainModelUrl

}

err = grampus.GenerateTrainJob(ctx, req)
if err != nil {
@@ -913,7 +970,7 @@ func GrampusGetLog(ctx *context.Context) {
return
}

func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName string) (string, error) {
func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName, pretrainModelPath, pretrainModelFileName string) (string, error) {
var command string

workDir := grampus.NpuWorkDir
@@ -921,22 +978,22 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo
workDir = grampus.GpuWorkDir
}

command += "pwd;cd " + workDir + grampus.CommandPrepareScript
command += "pwd;cd " + workDir + fmt.Sprintf(grampus.CommandPrepareScript, setting.Grampus.SyncScriptProject, setting.Grampus.SyncScriptProject)
//download code & dataset
if processorType == grampus.ProcessorTypeNPU {
commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "';"
commandDownload = processPretrainModelParameter(pretrainModelPath, pretrainModelFileName, commandDownload)
command += commandDownload
} else if processorType == grampus.ProcessorTypeGPU {
commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "';"
commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "'"
commandDownload = processPretrainModelParameter(pretrainModelPath, pretrainModelFileName, commandDownload)
command += commandDownload
}

//unzip code & dataset
toolUnzip := "unzip -q '"
if strings.HasSuffix(datasetName, ".tar.gz") {
toolUnzip = "tar -zxvf '"
}
commandUnzip := "cd " + workDir + "code;unzip -q master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + toolUnzip + datasetName + "';"
unZipDatasetCommand := generateDatasetUnzipCommand(datasetName)

commandUnzip := "cd " + workDir + "code;unzip -q master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + unZipDatasetCommand
command += commandUnzip

command += "echo \"unzip finished;start to exec code;\";"
@@ -967,6 +1024,10 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo
}
}

if pretrainModelFileName != "" {
paramCode += " --pretrainmodelname" + "=" + pretrainModelFileName
}

var commandCode string
if processorType == grampus.ProcessorTypeNPU {
commandCode = "/bin/bash /home/work/run_train_for_openi.sh " + workDir + "code/" + strings.ToLower(repoName) + "/" + bootFile + " /tmp/log/train.log" + paramCode + ";"
@@ -996,6 +1057,38 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo
return command, nil
}

func processPretrainModelParameter(pretrainModelPath string, pretrainModelFileName string, commandDownload string) string {
commandDownloadTemp := commandDownload
if pretrainModelPath != "" {
commandDownloadTemp += " '" + pretrainModelPath + "' '" + pretrainModelFileName + "'"
}
commandDownloadTemp += ";"
return commandDownloadTemp
}

func generateDatasetUnzipCommand(datasetName string) string {
var unZipDatasetCommand string

datasetNameArray := strings.Split(datasetName, ";")
if len(datasetNameArray) == 1 { //单数据集
unZipDatasetCommand = "unzip -q '" + datasetName + "';"
if strings.HasSuffix(datasetName, ".tar.gz") {
unZipDatasetCommand = "tar --strip-components=1 -zxvf '" + datasetName + "';"
}

} else { //多数据集
for _, datasetNameTemp := range datasetNameArray {
if strings.HasSuffix(datasetName, ".tar.gz") {
unZipDatasetCommand = unZipDatasetCommand + "tar -zxvf '" + datasetName + "';"
} else {
unZipDatasetCommand = unZipDatasetCommand + "unzip -q '" + datasetNameTemp + "' -d './" + strings.TrimSuffix(datasetNameTemp, ".zip") + "';"
}
}

}
return unZipDatasetCommand
}

func downloadZipCode(ctx *context.Context, codePath, branchName string) error {
archiveType := git.ZIP
archivePath := codePath


+ 40
- 0
routers/repo/modelarts.go View File

@@ -1010,6 +1010,13 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error {
ctx.Data["engine_id"] = task.EngineID
ctx.Data["datasetType"] = models.TypeCloudBrainTwo

//pretrain model
ctx.Data["model_name"] = task.ModelName
ctx.Data["model_version"] = task.ModelVersion
ctx.Data["ckpt_name"] = task.CkptName
ctx.Data["label_names"] = task.LabelName
ctx.Data["pre_train_model_url"] = task.PreTrainModelUrl

configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom)
if err != nil {
ctx.ServerError("getConfigList failed:", err)
@@ -1290,6 +1297,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
Value: string(jsondatas),
})
}
if form.ModelName != "" { //使用预训练模型训练
ckptUrl := "/" + form.PreTrainModelUrl + form.CkptName
param = append(param, models.Parameter{
Label: modelarts.CkptUrl,
Value: "s3:/" + ckptUrl,
})
}

//save param config
// if isSaveParam == "on" {
@@ -1358,6 +1372,15 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
DatasetName: datasetNames,
Spec: spec,
}
if form.ModelName != "" { //使用预训练模型训练
req.ModelName = form.ModelName
req.LabelName = form.LabelName
req.CkptName = form.CkptName
req.ModelVersion = form.ModelVersion
req.PreTrainModelUrl = form.PreTrainModelUrl

}

userCommand, userImageUrl := getUserCommand(engineID, req)
req.UserCommand = userCommand
req.UserImageUrl = userImageUrl
@@ -1633,6 +1656,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
})
}

if form.ModelName != "" { //使用预训练模型训练
ckptUrl := "/" + form.PreTrainModelUrl + form.CkptName
param = append(param, models.Parameter{
Label: modelarts.CkptUrl,
Value: "s3:/" + ckptUrl,
})
}

// //save param config
// if isSaveParam == "on" {
// saveparams := append(param, models.Parameter{
@@ -1707,6 +1738,15 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
DatasetName: datasetNames,
Spec: spec,
}

if form.ModelName != "" { //使用预训练模型训练
req.ModelName = form.ModelName
req.LabelName = form.LabelName
req.CkptName = form.CkptName
req.ModelVersion = form.ModelVersion
req.PreTrainModelUrl = form.PreTrainModelUrl

}
userCommand, userImageUrl := getUserCommand(engineID, req)
req.UserCommand = userCommand
req.UserImageUrl = userImageUrl


+ 37
- 0
templates/custom/select_model.tmpl View File

@@ -0,0 +1,37 @@
<input type="hidden" id="ai_model_version" name="model_version" value="{{$.model_version}}">
<div class="inline min_title fields" style="{{if not .job_name}}width: 96.8%{{else}}width: 94.8%{{end}};">
<label class="{{if not .job_name}}label-fix-width{{end}}" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.infer_job.select_model"}}</label>&nbsp;
<div class="six wide field">
<div class="ui fluid search selection dropdown" id="select_model">
<input type="hidden" name="model_name" required value="{{$.model_name}}">
<div class="text"></div>
<i class="dropdown icon"></i>
<div class="menu" id="model_name">
</div>
</div>
</div>
<div class="three wide field">
<div class="ui fluid search selection dropdown" id="select_model_version">
<input type="hidden" name="pre_train_model_url" value="{{$.pre_train_model_url}}" required>
<div class="text"></div>
<i class="dropdown icon"></i>
<div class="menu" id="model_name_version">
</div>

</div>
</div>
<div class="five wide field">
<div class="ui fluid search selection dropdown" id="select_model_checkpoint">
<input type="hidden" name="ckpt_name" value="{{$.ckpt_name}}" required>
<div class="text"></div>
<i class="dropdown icon"></i>
<div class="menu" id="model_checkpoint">
</div>
</div>

</div>
<span >
<i class="question circle icon" data-content="{{.i18n.Tr "cloudbrain.model_file_postfix_rule"}}" data-position="top center" data-variation="inverted mini"></i>
</span>

</div>

+ 2
- 2
templates/repo/cloudbrain/trainjob/new.tmpl View File

@@ -70,7 +70,7 @@
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-queue="{{.QueuesDetail}}" data-queue-start="{{.i18n.Tr "repo.wait_count_start"}}" data-queue-end="{{.i18n.Tr "repo.wait_count_end"}}"></div>
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-queue="{{.QueuesDetail}}" data-queue-start="{{.i18n.Tr "repo.wait_count_start"}}" data-queue-end="{{.i18n.Tr "repo.wait_count_end"}}"></div>
{{template "base/alert" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
@@ -168,7 +168,7 @@
{{end}}
</select>
</div>
{{template "custom/select_model" .}}
<div class="inline required field" style="display: none;">
<label>{{.i18n.Tr "cloudbrain.task_type"}}</label>
<select id="cloudbrain_job_type" class="ui search dropdown" placeholder="选择任务类型" style='width:385px'


+ 2
- 1
templates/repo/grampus/trainjob/gpu/new.tmpl View File

@@ -62,6 +62,7 @@
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true"></div>
{{template "base/alert" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
@@ -155,7 +156,7 @@
{{end}}
</select>
</div>
{{template "custom/select_model" .}}
<div id="images-new-grampus">
</div>



+ 2
- 0
templates/repo/grampus/trainjob/npu/new.tmpl View File

@@ -57,6 +57,7 @@
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true"></div>
{{template "base/alert" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
@@ -149,6 +150,7 @@
{{end}}
</select>
</div>
{{template "custom/select_model" .}}
<div class="required min_title inline field" id="engine_name">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.mirror"}}</label>
<select class="ui dropdown width81" id="trainjob_images" name="image_id">


+ 3
- 3
templates/repo/modelarts/trainjob/new.tmpl View File

@@ -62,7 +62,7 @@
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}"></div>
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true"></div>
{{template "base/alert" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
@@ -157,8 +157,8 @@
{{end}}
</select>
</div>
{{template "custom/select_model" .}}

<div class="required inline min_title fields" style="width: 95%;">
<label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.AI_driver"}}</label>


+ 2
- 2
templates/repo/modelarts/trainjob/version_new.tmpl View File

@@ -55,7 +55,7 @@
<div class="repository">
{{template "repo/header" .}}
<div class="ui container">
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-dataset-uuid="{{.uuid}}" data-dataset-name="{{.dataset_name}}"></div>
<div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.uuid}}" data-dataset-name="{{.dataset_name}}"></div>
{{template "base/alert" .}}
<h4 class="ui top attached header">
{{.i18n.Tr "repo.modelarts.train_job.new"}}
@@ -154,7 +154,7 @@

</div>

{{template "custom/select_model" .}}

<div class="required unite min_title inline fields" style="width: 90%;">
<label style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.AI_driver"}}&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</label>


+ 130
- 0
web_src/js/features/cloudbrainShow.js View File

@@ -443,4 +443,134 @@ export default async function initCloudrainSow() {
html += "</div>";
$(`#dir_list${version_name}`).append(html);
}

let nameMap, nameList;
let RepoLink = $(".cloudbrain-type").data("repo-link");
let type = $(".cloudbrain-type").data("cloudbrain-type");
let flagModel = $(".cloudbrain-type").data("flag-model");
// 获取模型列表和模型名称对应的模型版本
$(document).ready(function () {
if (!flagModel) return;
else {
$.get(
`${RepoLink}/modelmanage/query_model_for_predict?type=${type}`,
(data) => {
nameMap = data.nameMap;
nameList = data.nameList;
let html = "";
nameList.forEach((element) => {
html += `<div class="item" data-value=${element}>${element}</div>`;
});
if (nameList.length !== 0) {
$("#model_name").append(html);
}
let faildModelName = $('input[name="model_name"]').val();
let faildModelVersion = $('input[name="model_version"]').val();
let faildTrainUrl = $('input[name="pre_train_model_url"]').val();
let faildCkptName = $('input[name="ckpt_name"]').val();
// 新建错误的表单返回初始化
if (faildModelName) {
$("#select_model").dropdown("set text", faildModelName);
$("#select_model").dropdown("set value", faildModelName);
$("#select_model_version").dropdown("set text", faildModelVersion);
$("#select_model_version").dropdown("set value", faildTrainUrl);
$("#select_model_checkpoint").dropdown("set text", faildCkptName);
$("#select_model_checkpoint").dropdown("set value", faildCkptName);
}
}
);
}
$("#select_model").dropdown({
onChange: function (value, text, $selectedItem) {
$("#model_name_version").empty();
let html = "";
nameMap[value].forEach((element) => {
let { TrainTaskInfo } = element;
TrainTaskInfo = JSON.parse(TrainTaskInfo);
html += `<div class="item" data-label="${element.Label}" data-id="${element.ID}" data-value="${element.Path}">${element.Version}</div>`;
});
$("#model_name_version").append(html);
const initVersionText = $(
"#model_name_version div.item:first-child"
).text();
const initVersionValue = $(
"#model_name_version div.item:first-child"
).data("value");

$("#select_model_version").dropdown("set text", initVersionText);
$("#select_model_version").dropdown(
"set value",
initVersionValue,
initVersionText,
$("#model_name_version div.item:first-child")
);
},
});

$("#select_model_version").dropdown({
onChange: function (value, text, $selectedItem) {
const dataID =
$selectedItem && $selectedItem[0].getAttribute("data-id");
$("input#ai_model_version").val(text);
$("#select_model_checkpoint").addClass("loading");
$("#model_checkpoint").empty();
let html = "";
loadCheckpointList(dataID).then((res) => {
res.forEach((element) => {
const ckptSuffix = element.FileName.split(".");
const loadCheckpointFile = [
"ckpt",
"pb",
"h5",
"json",
"pkl",
"pth",
"t7",
"pdparams",
"onnx",
"pbtxt",
"keras",
"mlmodel",
"cfg",
"pt",
];
if (
!element.IsDir &&
loadCheckpointFile.includes(ckptSuffix[ckptSuffix.length - 1])
) {
html += `<div class="item" data-value=${element.FileName}>${element.FileName}</div>`;
}
});
$("#model_checkpoint").append(html);
$("#select_model_checkpoint").removeClass("loading");
const initVersionText = $(
"#model_checkpoint div.item:first-child"
).text();
const initVersionValue = $(
"#model_checkpoint div.item:first-child"
).data("value");

$("#select_model_checkpoint").dropdown("set text", initVersionText);
$("#select_model_checkpoint").dropdown(
"set value",
initVersionValue,
initVersionText,
$("#model_name_version div.item:first-child")
);
});
},
});
});

function loadCheckpointList(value) {
return new Promise((resolve, reject) => {
$.get(
`${RepoLink}/modelmanage/query_modelfile_for_predict`,
{ ID: value },
(data) => {
resolve(data);
}
);
});
}
}

Loading…
Cancel
Save