diff --git a/models/cloudbrain.go b/models/cloudbrain.go index dc56efef7..f93b653e1 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -4,6 +4,7 @@ import ( "encoding/json" "errors" "fmt" + "path" "strconv" "strings" "time" @@ -187,6 +188,7 @@ type Cloudbrain struct { ModelName string //模型名称 ModelVersion string //模型版本 CkptName string //权重文件名称 + PreTrainModelUrl string //预训练模型地址 ResultUrl string //推理结果的obs路径 User *User `xorm:"-"` @@ -603,6 +605,16 @@ type ResourceSpec struct { ShareMemMiB int `json:"shareMemMiB"` } +type FlavorInfos struct { + FlavorInfo []*FlavorInfo `json:"flavor_info"` +} + +type FlavorInfo struct { + Id int `json:"id"` + Value string `json:"value"` + Desc string `json:"desc"` +} + type SpecialPools struct { Pools []*SpecialPool `json:"pools"` } @@ -2223,9 +2235,10 @@ func CloudbrainAllStatic(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, er type DatasetInfo struct { DataLocalPath string Name string + FullName string } -func GetDatasetInfo(uuidStr string) (map[string]DatasetInfo, string, error) { +func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetInfo, string, error) { var datasetNames string uuids := strings.Split(uuidStr, ";") if len(uuids) > setting.MaxDatasetNum { @@ -2258,16 +2271,26 @@ func GetDatasetInfo(uuidStr string) (map[string]DatasetInfo, string, error) { return nil, datasetNames, errors.New("the dataset name is same") } } + var dataLocalPath string + if len(grampusType) > 0 { + if grampusType[0] == GPU { + dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + } else { + dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" + } - dataLocalPath := setting.Attachment.Minio.RealPath + - setting.Attachment.Minio.Bucket + "/" + - setting.Attachment.Minio.BasePath + - AttachmentRelativePath(attach.UUID) + - attach.UUID + } else { + dataLocalPath = setting.Attachment.Minio.RealPath + + setting.Attachment.Minio.Bucket + "/" + + setting.Attachment.Minio.BasePath + + AttachmentRelativePath(attach.UUID) + + attach.UUID + } datasetInfos[attach.UUID] = DatasetInfo{ DataLocalPath: dataLocalPath, Name: fileName, + FullName: attach.Name, } if i == 0 { datasetNames = attach.Name diff --git a/models/dataset.go b/models/dataset.go index 4cff4d6d1..720850ed9 100755 --- a/models/dataset.go +++ b/models/dataset.go @@ -131,13 +131,17 @@ func (datasets DatasetList) loadAttachmentAttributes(opts *SearchDatasetOptions) permission = false datasets[i].Repo.GetOwner() if !permission { - isCollaborator, _ := datasets[i].Repo.IsCollaborator(opts.User.ID) - isInRepoTeam,_:=datasets[i].Repo.IsInRepoTeam(opts.User.ID) - - if isCollaborator ||isInRepoTeam { - log.Info("Collaborator user may visit the attach.") + if datasets[i].Repo.OwnerID==opts.User.ID{ permission = true + }else{ + isCollaborator, _ := datasets[i].Repo.IsCollaborator(opts.User.ID) + isInRepoTeam,_:=datasets[i].Repo.IsInRepoTeam(opts.User.ID) + + if isCollaborator ||isInRepoTeam { + permission = true + } } + } permissionMap[datasets[i].ID] = permission diff --git a/modules/auth/cloudbrain.go b/modules/auth/cloudbrain.go index 5bd294f2a..48e23efac 100755 --- a/modules/auth/cloudbrain.go +++ b/modules/auth/cloudbrain.go @@ -23,6 +23,11 @@ type CreateCloudBrainForm struct { BootFile string `form:"boot_file"` Params string `form:"run_para_list"` BranchName string `form:"branch_name"` + ModelName string `form:"model_name"` + ModelVersion string `form:"model_version"` + CkptName string `form:"ckpt_name"` + LabelName string `form:"label_names"` + PreTrainModelUrl string `form:"pre_train_model_url"` DatasetName string `form:"dataset_name"` SpecId int64 `form:"spec_id"` } diff --git a/modules/auth/grampus.go b/modules/auth/grampus.go index 21008ea09..414a7c25d 100755 --- a/modules/auth/grampus.go +++ b/modules/auth/grampus.go @@ -18,6 +18,11 @@ type CreateGrampusTrainJobForm struct { WorkServerNumber int `form:"work_server_number" binding:"Required"` Image string `form:"image"` DatasetName string `form:"dataset_name"` + ModelName string `form:"model_name"` + ModelVersion string `form:"model_version"` + CkptName string `form:"ckpt_name"` + LabelName string `form:"label_names"` + PreTrainModelUrl string `form:"pre_train_model_url"` SpecId int64 `form:"spec_id"` } diff --git a/modules/auth/modelarts.go b/modules/auth/modelarts.go index 23e1f325a..ced5ea1e8 100755 --- a/modules/auth/modelarts.go +++ b/modules/auth/modelarts.go @@ -48,6 +48,11 @@ type CreateModelArtsTrainJobForm struct { FlavorName string `form:"flaver_names" binding:"Required"` EngineName string `form:"engine_names" binding:"Required"` SpecId int64 `form:"spec_id" binding:"Required"` + ModelName string `form:"model_name"` + ModelVersion string `form:"model_version"` + CkptName string `form:"ckpt_name"` + LabelName string `form:"label_names"` + PreTrainModelUrl string `form:"pre_train_model_url"` } type CreateModelArtsInferenceJobForm struct { diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index 748af4a29..4e527b6bf 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -24,6 +24,7 @@ const ( CodeMountPath = "/code" DataSetMountPath = "/dataset" ModelMountPath = "/model" + PretrainModelMountPath = "/pretrainmodel" LogFile = "log.txt" BenchMarkMountPath = "/benchmark" BenchMarkResourceID = 1 @@ -77,6 +78,8 @@ type GenerateCloudBrainTaskReq struct { ModelVersion string CkptName string LabelName string + PreTrainModelPath string + PreTrainModelUrl string Spec *models.Specification } @@ -276,6 +279,16 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { }, } + if req.PreTrainModelUrl != "" { //预训练 + volumes = append(volumes, models.Volume{ + HostPath: models.StHostPath{ + Path: req.PreTrainModelPath, + MountPath: PretrainModelMountPath, + ReadOnly: true, + }, + }) + } + if len(req.DatasetInfos) == 1 { volumes = append(volumes, models.Volume{ HostPath: models.StHostPath{ @@ -359,6 +372,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { CkptName: req.CkptName, ResultUrl: req.ResultPath, LabelName: req.LabelName, + PreTrainModelUrl: req.PreTrainModelUrl, CreatedUnix: createTime, UpdatedUnix: createTime, CommitID: req.CommitID, diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 687fb4959..9ff2ed212 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -22,9 +22,6 @@ const ( GpuWorkDir = "/tmp/" NpuWorkDir = "/cache/" - CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;echo \"start loading script\";wget -q https://git.openi.org.cn/OpenIOSSG/script_for_grampus/archive/master.zip;" + - "echo \"finish loading script\";unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_npu downloader_for_minio uploader_for_gpu;" - CodeArchiveName = "master.zip" ) @@ -34,6 +31,9 @@ var ( ImageInfos *setting.StImageInfosModelArts SpecialPools *models.SpecialPools + + CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;mkdir -p pretrainmodel;echo \"start loading script\";wget https://git.openi.org.cn/OpenIOSSG/%s/archive/master.zip;" + + "echo \"finish loading script\";unzip -q master.zip;cd %s;chmod 777 downloader_for_obs uploader_for_npu downloader_for_minio uploader_for_gpu;" ) type GenerateTrainJobReq struct { @@ -62,8 +62,16 @@ type GenerateTrainJobReq struct { TotalVersionCount int ComputeResource string ProcessType string - DatasetName string + + DatasetNames string + DatasetInfos map[string]models.DatasetInfo Params string + ModelName string + LabelName string + CkptName string + ModelVersion string + PreTrainModelPath string + PreTrainModelUrl string Spec *models.Specification } @@ -103,7 +111,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error JobType: string(models.JobTypeTrain), Type: models.TypeC2Net, Uuid: req.Uuid, - DatasetName: req.DatasetName, + DatasetName: req.DatasetNames, CommitID: req.CommitID, IsLatestVersion: req.IsLatestVersion, ComputeResource: req.ComputeResource, @@ -121,6 +129,11 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error CreatedUnix: createTime, UpdatedUnix: createTime, Spec: req.Spec, + ModelName: req.ModelName, + ModelVersion: req.ModelVersion, + LabelName: req.LabelName, + PreTrainModelUrl: req.PreTrainModelUrl, + CkptName: req.CkptName, }) if err != nil { diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index 4539699ad..5f318a546 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -104,6 +104,11 @@ type GenerateTrainJobReq struct { UserCommand string DatasetName string Spec *models.Specification + ModelName string + LabelName string + CkptName string + ModelVersion string + PreTrainModelUrl string } type GenerateInferenceJobReq struct { @@ -439,6 +444,11 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error CreatedUnix: createTime, UpdatedUnix: createTime, Spec: req.Spec, + ModelName: req.ModelName, + ModelVersion: req.ModelVersion, + LabelName: req.LabelName, + PreTrainModelUrl: req.PreTrainModelUrl, + CkptName: req.CkptName, }) if createErr != nil { @@ -588,6 +598,11 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job CreatedUnix: createTime, UpdatedUnix: createTime, Spec: req.Spec, + ModelName: req.ModelName, + ModelVersion: req.ModelVersion, + LabelName: req.LabelName, + PreTrainModelUrl: req.PreTrainModelUrl, + CkptName: req.CkptName, }) if createErr != nil { log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, createErr.Error()) diff --git a/modules/setting/setting.go b/modules/setting/setting.go index 1992baf54..7d726a773 100755 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -583,12 +583,13 @@ var ( //grampus config Grampus = struct { - Env string - Host string - UserName string - Password string - SpecialPools string - C2NetSequence string + Env string + Host string + UserName string + Password string + SpecialPools string + C2NetSequence string + SyncScriptProject string }{} C2NetInfos *C2NetSqInfos @@ -1558,6 +1559,8 @@ func getGrampusConfig() { log.Error("Unmarshal(C2NetSequence) failed:%v", err) } } + Grampus.SyncScriptProject = sec.Key("SYNC_SCRIPT_PROJECT").MustString("script_for_grampus") + } func SetRadarMapConfig() { diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index d870d575c..bfa7cff35 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -368,6 +368,16 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { Spec: spec, } + if form.ModelName != "" { //使用预训练模型训练 + req.ModelName = form.ModelName + req.LabelName = form.LabelName + req.CkptName = form.CkptName + req.ModelVersion = form.ModelVersion + req.PreTrainModelPath = setting.Attachment.Minio.RealPath + form.PreTrainModelUrl + req.PreTrainModelUrl = form.PreTrainModelUrl + + } + err = cloudbrain.GenerateTask(req) if err != nil { cloudBrainNewDataPrepare(ctx) @@ -2682,6 +2692,9 @@ func getTrainJobCommand(form auth.CreateCloudBrainForm) (string, error) { param += " --" + parameter.Label + "=" + parameter.Value } } + if form.CkptName != "" { + param += " --pretrainmodelname" + "=" + form.CkptName + } command += "python /code/" + bootFile + param + " > " + cloudbrain.ModelMountPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 0c55067da..e55e492e3 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -334,7 +334,6 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/" codeMinioPath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" - dataMinioPath := setting.Attachment.Minio.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid branchName := form.BranchName image := strings.TrimSpace(form.Image) @@ -414,11 +413,12 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain } //check dataset - attachment, err := models.GetAttachmentByUUID(uuid) + + datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, models.GPU) if err != nil { - log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"]) + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobGPUNew, &form) return } @@ -460,8 +460,22 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain return } + var datasetRemotePath, allFileName string + for _, datasetInfo := range datasetInfos { + if datasetRemotePath == "" { + datasetRemotePath = datasetInfo.DataLocalPath + allFileName = datasetInfo.FullName + } else { + datasetRemotePath = datasetRemotePath + ";" + datasetInfo.DataLocalPath + allFileName = allFileName + ";" + datasetInfo.FullName + } + + } + //prepare command - command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", dataMinioPath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", attachment.Name) + preTrainModelPath := getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName) + + command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", allFileName, preTrainModelPath, form.CkptName) if err != nil { log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) @@ -472,26 +486,37 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) req := &grampus.GenerateTrainJobReq{ - JobName: jobName, - DisplayJobName: displayJobName, - ComputeResource: models.GPUResource, - ProcessType: grampus.ProcessorTypeGPU, - Command: command, - ImageUrl: image, - Description: description, - BootFile: bootFile, - Uuid: uuid, - CommitID: commitID, - BranchName: branchName, - Params: form.Params, - EngineName: image, - DatasetName: attachment.Name, + JobName: jobName, + DisplayJobName: displayJobName, + ComputeResource: models.GPUResource, + ProcessType: grampus.ProcessorTypeGPU, + Command: command, + ImageUrl: image, + Description: description, + BootFile: bootFile, + Uuid: uuid, + CommitID: commitID, + BranchName: branchName, + Params: form.Params, + EngineName: image, + DatasetNames: datasetNames, + DatasetInfos: datasetInfos, + IsLatestVersion: modelarts.IsLatestVersion, VersionCount: modelarts.VersionCountOne, WorkServerNumber: 1, Spec: spec, } + if form.ModelName != "" { //使用预训练模型训练 + req.ModelName = form.ModelName + req.LabelName = form.LabelName + req.CkptName = form.CkptName + req.ModelVersion = form.ModelVersion + req.PreTrainModelUrl = form.PreTrainModelUrl + + } + err = grampus.GenerateTrainJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) @@ -502,6 +527,17 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") } +func getPreTrainModelPath(pretrainModelDir string, fileName string) string { + index := strings.Index(pretrainModelDir, "/") + if index > 0 { + filterBucket := pretrainModelDir[index+1:] + return filterBucket + fileName + } else { + return "" + } + +} + func GrampusTrainJobVersionCreate(ctx *context.Context, form auth.CreateGrampusTrainJobForm) { computeResource := ctx.Query("compute_resource") if computeResource == models.GPUResource { @@ -547,7 +583,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + modelarts.CodePath codeObsPath := grampus.JobPath + jobName + modelarts.CodePath - dataObsPath := setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" + //dataObsPath := setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" branchName := form.BranchName isLatestVersion := modelarts.IsLatestVersion versionCount := modelarts.VersionCountOne @@ -629,11 +665,11 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain } //check dataset - attachment, err := models.GetAttachmentByUUID(uuid) + datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, models.NPU) if err != nil { - log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"]) + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobNPUNew, &form) return } @@ -665,8 +701,21 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain return } + var datasetRemotePath, allFileName string + for _, datasetInfo := range datasetInfos { + if datasetRemotePath == "" { + datasetRemotePath = datasetInfo.DataLocalPath + "'" + datasetInfo.FullName + "'" + allFileName = datasetInfo.FullName + } else { + datasetRemotePath = datasetRemotePath + ";" + datasetInfo.DataLocalPath + "'" + datasetInfo.FullName + "'" + allFileName = allFileName + ";" + datasetInfo.FullName + } + + } + //prepare command - command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", dataObsPath+"'"+attachment.Name+"'", bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, attachment.Name) + preTrainModelPath := getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName) + command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, allFileName, preTrainModelPath, form.CkptName) if err != nil { log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) @@ -683,7 +732,6 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain ProcessType: grampus.ProcessorTypeNPU, Command: command, ImageId: form.ImageID, - DataUrl: dataObsPath, Description: description, CodeObsPath: codeObsPath, BootFileUrl: codeObsPath + bootFile, @@ -697,9 +745,18 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain EngineName: engineName, VersionCount: versionCount, TotalVersionCount: modelarts.TotalVersionCount, - DatasetName: attachment.Name, + DatasetNames: datasetNames, + DatasetInfos: datasetInfos, Spec: spec, } + if form.ModelName != "" { //使用预训练模型训练 + req.ModelName = form.ModelName + req.LabelName = form.LabelName + req.CkptName = form.CkptName + req.ModelVersion = form.ModelVersion + req.PreTrainModelUrl = form.PreTrainModelUrl + + } err = grampus.GenerateTrainJob(ctx, req) if err != nil { @@ -913,7 +970,7 @@ func GrampusGetLog(ctx *context.Context) { return } -func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName string) (string, error) { +func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName, pretrainModelPath, pretrainModelFileName string) (string, error) { var command string workDir := grampus.NpuWorkDir @@ -921,22 +978,22 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo workDir = grampus.GpuWorkDir } - command += "pwd;cd " + workDir + grampus.CommandPrepareScript + command += "pwd;cd " + workDir + fmt.Sprintf(grampus.CommandPrepareScript, setting.Grampus.SyncScriptProject, setting.Grampus.SyncScriptProject) //download code & dataset if processorType == grampus.ProcessorTypeNPU { commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "';" + commandDownload = processPretrainModelParameter(pretrainModelPath, pretrainModelFileName, commandDownload) command += commandDownload } else if processorType == grampus.ProcessorTypeGPU { - commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "';" + commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "'" + commandDownload = processPretrainModelParameter(pretrainModelPath, pretrainModelFileName, commandDownload) command += commandDownload } //unzip code & dataset - toolUnzip := "unzip -q '" - if strings.HasSuffix(datasetName, ".tar.gz") { - toolUnzip = "tar -zxvf '" - } - commandUnzip := "cd " + workDir + "code;unzip -q master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + toolUnzip + datasetName + "';" + unZipDatasetCommand := generateDatasetUnzipCommand(datasetName) + + commandUnzip := "cd " + workDir + "code;unzip -q master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + unZipDatasetCommand command += commandUnzip command += "echo \"unzip finished;start to exec code;\";" @@ -967,6 +1024,10 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo } } + if pretrainModelFileName != "" { + paramCode += " --pretrainmodelname" + "=" + pretrainModelFileName + } + var commandCode string if processorType == grampus.ProcessorTypeNPU { commandCode = "/bin/bash /home/work/run_train_for_openi.sh " + workDir + "code/" + strings.ToLower(repoName) + "/" + bootFile + " /tmp/log/train.log" + paramCode + ";" @@ -996,6 +1057,38 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo return command, nil } +func processPretrainModelParameter(pretrainModelPath string, pretrainModelFileName string, commandDownload string) string { + commandDownloadTemp := commandDownload + if pretrainModelPath != "" { + commandDownloadTemp += " '" + pretrainModelPath + "' '" + pretrainModelFileName + "'" + } + commandDownloadTemp += ";" + return commandDownloadTemp +} + +func generateDatasetUnzipCommand(datasetName string) string { + var unZipDatasetCommand string + + datasetNameArray := strings.Split(datasetName, ";") + if len(datasetNameArray) == 1 { //单数据集 + unZipDatasetCommand = "unzip -q '" + datasetName + "';" + if strings.HasSuffix(datasetName, ".tar.gz") { + unZipDatasetCommand = "tar --strip-components=1 -zxvf '" + datasetName + "';" + } + + } else { //多数据集 + for _, datasetNameTemp := range datasetNameArray { + if strings.HasSuffix(datasetName, ".tar.gz") { + unZipDatasetCommand = unZipDatasetCommand + "tar -zxvf '" + datasetName + "';" + } else { + unZipDatasetCommand = unZipDatasetCommand + "unzip -q '" + datasetNameTemp + "' -d './" + strings.TrimSuffix(datasetNameTemp, ".zip") + "';" + } + } + + } + return unZipDatasetCommand +} + func downloadZipCode(ctx *context.Context, codePath, branchName string) error { archiveType := git.ZIP archivePath := codePath diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index b4f6f000e..13ae93dcf 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -1010,6 +1010,13 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { ctx.Data["engine_id"] = task.EngineID ctx.Data["datasetType"] = models.TypeCloudBrainTwo + //pretrain model + ctx.Data["model_name"] = task.ModelName + ctx.Data["model_version"] = task.ModelVersion + ctx.Data["ckpt_name"] = task.CkptName + ctx.Data["label_names"] = task.LabelName + ctx.Data["pre_train_model_url"] = task.PreTrainModelUrl + configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) if err != nil { ctx.ServerError("getConfigList failed:", err) @@ -1290,6 +1297,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) Value: string(jsondatas), }) } + if form.ModelName != "" { //使用预训练模型训练 + ckptUrl := "/" + form.PreTrainModelUrl + form.CkptName + param = append(param, models.Parameter{ + Label: modelarts.CkptUrl, + Value: "s3:/" + ckptUrl, + }) + } //save param config // if isSaveParam == "on" { @@ -1358,6 +1372,15 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) DatasetName: datasetNames, Spec: spec, } + if form.ModelName != "" { //使用预训练模型训练 + req.ModelName = form.ModelName + req.LabelName = form.LabelName + req.CkptName = form.CkptName + req.ModelVersion = form.ModelVersion + req.PreTrainModelUrl = form.PreTrainModelUrl + + } + userCommand, userImageUrl := getUserCommand(engineID, req) req.UserCommand = userCommand req.UserImageUrl = userImageUrl @@ -1633,6 +1656,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ }) } + if form.ModelName != "" { //使用预训练模型训练 + ckptUrl := "/" + form.PreTrainModelUrl + form.CkptName + param = append(param, models.Parameter{ + Label: modelarts.CkptUrl, + Value: "s3:/" + ckptUrl, + }) + } + // //save param config // if isSaveParam == "on" { // saveparams := append(param, models.Parameter{ @@ -1707,6 +1738,15 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ DatasetName: datasetNames, Spec: spec, } + + if form.ModelName != "" { //使用预训练模型训练 + req.ModelName = form.ModelName + req.LabelName = form.LabelName + req.CkptName = form.CkptName + req.ModelVersion = form.ModelVersion + req.PreTrainModelUrl = form.PreTrainModelUrl + + } userCommand, userImageUrl := getUserCommand(engineID, req) req.UserCommand = userCommand req.UserImageUrl = userImageUrl diff --git a/templates/custom/select_model.tmpl b/templates/custom/select_model.tmpl new file mode 100644 index 000000000..81332b873 --- /dev/null +++ b/templates/custom/select_model.tmpl @@ -0,0 +1,37 @@ + +