| @@ -4,6 +4,7 @@ import ( | |||
| "encoding/json" | |||
| "errors" | |||
| "fmt" | |||
| "path" | |||
| "strconv" | |||
| "strings" | |||
| "time" | |||
| @@ -187,6 +188,7 @@ type Cloudbrain struct { | |||
| ModelName string //模型名称 | |||
| ModelVersion string //模型版本 | |||
| CkptName string //权重文件名称 | |||
| PreTrainModelUrl string //预训练模型地址 | |||
| ResultUrl string //推理结果的obs路径 | |||
| User *User `xorm:"-"` | |||
| @@ -603,6 +605,16 @@ type ResourceSpec struct { | |||
| ShareMemMiB int `json:"shareMemMiB"` | |||
| } | |||
| type FlavorInfos struct { | |||
| FlavorInfo []*FlavorInfo `json:"flavor_info"` | |||
| } | |||
| type FlavorInfo struct { | |||
| Id int `json:"id"` | |||
| Value string `json:"value"` | |||
| Desc string `json:"desc"` | |||
| } | |||
| type SpecialPools struct { | |||
| Pools []*SpecialPool `json:"pools"` | |||
| } | |||
| @@ -2223,9 +2235,10 @@ func CloudbrainAllStatic(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, er | |||
| type DatasetInfo struct { | |||
| DataLocalPath string | |||
| Name string | |||
| FullName string | |||
| } | |||
| func GetDatasetInfo(uuidStr string) (map[string]DatasetInfo, string, error) { | |||
| func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetInfo, string, error) { | |||
| var datasetNames string | |||
| uuids := strings.Split(uuidStr, ";") | |||
| if len(uuids) > setting.MaxDatasetNum { | |||
| @@ -2258,16 +2271,26 @@ func GetDatasetInfo(uuidStr string) (map[string]DatasetInfo, string, error) { | |||
| return nil, datasetNames, errors.New("the dataset name is same") | |||
| } | |||
| } | |||
| var dataLocalPath string | |||
| if len(grampusType) > 0 { | |||
| if grampusType[0] == GPU { | |||
| dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID | |||
| } else { | |||
| dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" | |||
| } | |||
| dataLocalPath := setting.Attachment.Minio.RealPath + | |||
| setting.Attachment.Minio.Bucket + "/" + | |||
| setting.Attachment.Minio.BasePath + | |||
| AttachmentRelativePath(attach.UUID) + | |||
| attach.UUID | |||
| } else { | |||
| dataLocalPath = setting.Attachment.Minio.RealPath + | |||
| setting.Attachment.Minio.Bucket + "/" + | |||
| setting.Attachment.Minio.BasePath + | |||
| AttachmentRelativePath(attach.UUID) + | |||
| attach.UUID | |||
| } | |||
| datasetInfos[attach.UUID] = DatasetInfo{ | |||
| DataLocalPath: dataLocalPath, | |||
| Name: fileName, | |||
| FullName: attach.Name, | |||
| } | |||
| if i == 0 { | |||
| datasetNames = attach.Name | |||
| @@ -131,13 +131,17 @@ func (datasets DatasetList) loadAttachmentAttributes(opts *SearchDatasetOptions) | |||
| permission = false | |||
| datasets[i].Repo.GetOwner() | |||
| if !permission { | |||
| isCollaborator, _ := datasets[i].Repo.IsCollaborator(opts.User.ID) | |||
| isInRepoTeam,_:=datasets[i].Repo.IsInRepoTeam(opts.User.ID) | |||
| if isCollaborator ||isInRepoTeam { | |||
| log.Info("Collaborator user may visit the attach.") | |||
| if datasets[i].Repo.OwnerID==opts.User.ID{ | |||
| permission = true | |||
| }else{ | |||
| isCollaborator, _ := datasets[i].Repo.IsCollaborator(opts.User.ID) | |||
| isInRepoTeam,_:=datasets[i].Repo.IsInRepoTeam(opts.User.ID) | |||
| if isCollaborator ||isInRepoTeam { | |||
| permission = true | |||
| } | |||
| } | |||
| } | |||
| permissionMap[datasets[i].ID] = permission | |||
| @@ -23,6 +23,11 @@ type CreateCloudBrainForm struct { | |||
| BootFile string `form:"boot_file"` | |||
| Params string `form:"run_para_list"` | |||
| BranchName string `form:"branch_name"` | |||
| ModelName string `form:"model_name"` | |||
| ModelVersion string `form:"model_version"` | |||
| CkptName string `form:"ckpt_name"` | |||
| LabelName string `form:"label_names"` | |||
| PreTrainModelUrl string `form:"pre_train_model_url"` | |||
| DatasetName string `form:"dataset_name"` | |||
| SpecId int64 `form:"spec_id"` | |||
| } | |||
| @@ -18,6 +18,11 @@ type CreateGrampusTrainJobForm struct { | |||
| WorkServerNumber int `form:"work_server_number" binding:"Required"` | |||
| Image string `form:"image"` | |||
| DatasetName string `form:"dataset_name"` | |||
| ModelName string `form:"model_name"` | |||
| ModelVersion string `form:"model_version"` | |||
| CkptName string `form:"ckpt_name"` | |||
| LabelName string `form:"label_names"` | |||
| PreTrainModelUrl string `form:"pre_train_model_url"` | |||
| SpecId int64 `form:"spec_id"` | |||
| } | |||
| @@ -48,6 +48,11 @@ type CreateModelArtsTrainJobForm struct { | |||
| FlavorName string `form:"flaver_names" binding:"Required"` | |||
| EngineName string `form:"engine_names" binding:"Required"` | |||
| SpecId int64 `form:"spec_id" binding:"Required"` | |||
| ModelName string `form:"model_name"` | |||
| ModelVersion string `form:"model_version"` | |||
| CkptName string `form:"ckpt_name"` | |||
| LabelName string `form:"label_names"` | |||
| PreTrainModelUrl string `form:"pre_train_model_url"` | |||
| } | |||
| type CreateModelArtsInferenceJobForm struct { | |||
| @@ -24,6 +24,7 @@ const ( | |||
| CodeMountPath = "/code" | |||
| DataSetMountPath = "/dataset" | |||
| ModelMountPath = "/model" | |||
| PretrainModelMountPath = "/pretrainmodel" | |||
| LogFile = "log.txt" | |||
| BenchMarkMountPath = "/benchmark" | |||
| BenchMarkResourceID = 1 | |||
| @@ -77,6 +78,8 @@ type GenerateCloudBrainTaskReq struct { | |||
| ModelVersion string | |||
| CkptName string | |||
| LabelName string | |||
| PreTrainModelPath string | |||
| PreTrainModelUrl string | |||
| Spec *models.Specification | |||
| } | |||
| @@ -276,6 +279,16 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { | |||
| }, | |||
| } | |||
| if req.PreTrainModelUrl != "" { //预训练 | |||
| volumes = append(volumes, models.Volume{ | |||
| HostPath: models.StHostPath{ | |||
| Path: req.PreTrainModelPath, | |||
| MountPath: PretrainModelMountPath, | |||
| ReadOnly: true, | |||
| }, | |||
| }) | |||
| } | |||
| if len(req.DatasetInfos) == 1 { | |||
| volumes = append(volumes, models.Volume{ | |||
| HostPath: models.StHostPath{ | |||
| @@ -359,6 +372,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { | |||
| CkptName: req.CkptName, | |||
| ResultUrl: req.ResultPath, | |||
| LabelName: req.LabelName, | |||
| PreTrainModelUrl: req.PreTrainModelUrl, | |||
| CreatedUnix: createTime, | |||
| UpdatedUnix: createTime, | |||
| CommitID: req.CommitID, | |||
| @@ -22,9 +22,6 @@ const ( | |||
| GpuWorkDir = "/tmp/" | |||
| NpuWorkDir = "/cache/" | |||
| CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;echo \"start loading script\";wget -q https://git.openi.org.cn/OpenIOSSG/script_for_grampus/archive/master.zip;" + | |||
| "echo \"finish loading script\";unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_npu downloader_for_minio uploader_for_gpu;" | |||
| CodeArchiveName = "master.zip" | |||
| ) | |||
| @@ -34,6 +31,9 @@ var ( | |||
| ImageInfos *setting.StImageInfosModelArts | |||
| SpecialPools *models.SpecialPools | |||
| CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;mkdir -p pretrainmodel;echo \"start loading script\";wget https://git.openi.org.cn/OpenIOSSG/%s/archive/master.zip;" + | |||
| "echo \"finish loading script\";unzip -q master.zip;cd %s;chmod 777 downloader_for_obs uploader_for_npu downloader_for_minio uploader_for_gpu;" | |||
| ) | |||
| type GenerateTrainJobReq struct { | |||
| @@ -62,8 +62,16 @@ type GenerateTrainJobReq struct { | |||
| TotalVersionCount int | |||
| ComputeResource string | |||
| ProcessType string | |||
| DatasetName string | |||
| DatasetNames string | |||
| DatasetInfos map[string]models.DatasetInfo | |||
| Params string | |||
| ModelName string | |||
| LabelName string | |||
| CkptName string | |||
| ModelVersion string | |||
| PreTrainModelPath string | |||
| PreTrainModelUrl string | |||
| Spec *models.Specification | |||
| } | |||
| @@ -103,7 +111,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||
| JobType: string(models.JobTypeTrain), | |||
| Type: models.TypeC2Net, | |||
| Uuid: req.Uuid, | |||
| DatasetName: req.DatasetName, | |||
| DatasetName: req.DatasetNames, | |||
| CommitID: req.CommitID, | |||
| IsLatestVersion: req.IsLatestVersion, | |||
| ComputeResource: req.ComputeResource, | |||
| @@ -121,6 +129,11 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||
| CreatedUnix: createTime, | |||
| UpdatedUnix: createTime, | |||
| Spec: req.Spec, | |||
| ModelName: req.ModelName, | |||
| ModelVersion: req.ModelVersion, | |||
| LabelName: req.LabelName, | |||
| PreTrainModelUrl: req.PreTrainModelUrl, | |||
| CkptName: req.CkptName, | |||
| }) | |||
| if err != nil { | |||
| @@ -104,6 +104,11 @@ type GenerateTrainJobReq struct { | |||
| UserCommand string | |||
| DatasetName string | |||
| Spec *models.Specification | |||
| ModelName string | |||
| LabelName string | |||
| CkptName string | |||
| ModelVersion string | |||
| PreTrainModelUrl string | |||
| } | |||
| type GenerateInferenceJobReq struct { | |||
| @@ -439,6 +444,11 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||
| CreatedUnix: createTime, | |||
| UpdatedUnix: createTime, | |||
| Spec: req.Spec, | |||
| ModelName: req.ModelName, | |||
| ModelVersion: req.ModelVersion, | |||
| LabelName: req.LabelName, | |||
| PreTrainModelUrl: req.PreTrainModelUrl, | |||
| CkptName: req.CkptName, | |||
| }) | |||
| if createErr != nil { | |||
| @@ -588,6 +598,11 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job | |||
| CreatedUnix: createTime, | |||
| UpdatedUnix: createTime, | |||
| Spec: req.Spec, | |||
| ModelName: req.ModelName, | |||
| ModelVersion: req.ModelVersion, | |||
| LabelName: req.LabelName, | |||
| PreTrainModelUrl: req.PreTrainModelUrl, | |||
| CkptName: req.CkptName, | |||
| }) | |||
| if createErr != nil { | |||
| log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, createErr.Error()) | |||
| @@ -583,12 +583,13 @@ var ( | |||
| //grampus config | |||
| Grampus = struct { | |||
| Env string | |||
| Host string | |||
| UserName string | |||
| Password string | |||
| SpecialPools string | |||
| C2NetSequence string | |||
| Env string | |||
| Host string | |||
| UserName string | |||
| Password string | |||
| SpecialPools string | |||
| C2NetSequence string | |||
| SyncScriptProject string | |||
| }{} | |||
| C2NetInfos *C2NetSqInfos | |||
| @@ -1558,6 +1559,8 @@ func getGrampusConfig() { | |||
| log.Error("Unmarshal(C2NetSequence) failed:%v", err) | |||
| } | |||
| } | |||
| Grampus.SyncScriptProject = sec.Key("SYNC_SCRIPT_PROJECT").MustString("script_for_grampus") | |||
| } | |||
| func SetRadarMapConfig() { | |||
| @@ -368,6 +368,16 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
| Spec: spec, | |||
| } | |||
| if form.ModelName != "" { //使用预训练模型训练 | |||
| req.ModelName = form.ModelName | |||
| req.LabelName = form.LabelName | |||
| req.CkptName = form.CkptName | |||
| req.ModelVersion = form.ModelVersion | |||
| req.PreTrainModelPath = setting.Attachment.Minio.RealPath + form.PreTrainModelUrl | |||
| req.PreTrainModelUrl = form.PreTrainModelUrl | |||
| } | |||
| err = cloudbrain.GenerateTask(req) | |||
| if err != nil { | |||
| cloudBrainNewDataPrepare(ctx) | |||
| @@ -2682,6 +2692,9 @@ func getTrainJobCommand(form auth.CreateCloudBrainForm) (string, error) { | |||
| param += " --" + parameter.Label + "=" + parameter.Value | |||
| } | |||
| } | |||
| if form.CkptName != "" { | |||
| param += " --pretrainmodelname" + "=" + form.CkptName | |||
| } | |||
| command += "python /code/" + bootFile + param + " > " + cloudbrain.ModelMountPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile | |||
| @@ -334,7 +334,6 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||
| repo := ctx.Repo.Repository | |||
| codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/" | |||
| codeMinioPath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" | |||
| dataMinioPath := setting.Attachment.Minio.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid | |||
| branchName := form.BranchName | |||
| image := strings.TrimSpace(form.Image) | |||
| @@ -414,11 +413,12 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||
| } | |||
| //check dataset | |||
| attachment, err := models.GetAttachmentByUUID(uuid) | |||
| datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, models.GPU) | |||
| if err != nil { | |||
| log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"]) | |||
| log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) | |||
| grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) | |||
| ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobGPUNew, &form) | |||
| ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobGPUNew, &form) | |||
| return | |||
| } | |||
| @@ -460,8 +460,22 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||
| return | |||
| } | |||
| var datasetRemotePath, allFileName string | |||
| for _, datasetInfo := range datasetInfos { | |||
| if datasetRemotePath == "" { | |||
| datasetRemotePath = datasetInfo.DataLocalPath | |||
| allFileName = datasetInfo.FullName | |||
| } else { | |||
| datasetRemotePath = datasetRemotePath + ";" + datasetInfo.DataLocalPath | |||
| allFileName = allFileName + ";" + datasetInfo.FullName | |||
| } | |||
| } | |||
| //prepare command | |||
| command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", dataMinioPath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", attachment.Name) | |||
| preTrainModelPath := getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName) | |||
| command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", allFileName, preTrainModelPath, form.CkptName) | |||
| if err != nil { | |||
| log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) | |||
| grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) | |||
| @@ -472,26 +486,37 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||
| commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) | |||
| req := &grampus.GenerateTrainJobReq{ | |||
| JobName: jobName, | |||
| DisplayJobName: displayJobName, | |||
| ComputeResource: models.GPUResource, | |||
| ProcessType: grampus.ProcessorTypeGPU, | |||
| Command: command, | |||
| ImageUrl: image, | |||
| Description: description, | |||
| BootFile: bootFile, | |||
| Uuid: uuid, | |||
| CommitID: commitID, | |||
| BranchName: branchName, | |||
| Params: form.Params, | |||
| EngineName: image, | |||
| DatasetName: attachment.Name, | |||
| JobName: jobName, | |||
| DisplayJobName: displayJobName, | |||
| ComputeResource: models.GPUResource, | |||
| ProcessType: grampus.ProcessorTypeGPU, | |||
| Command: command, | |||
| ImageUrl: image, | |||
| Description: description, | |||
| BootFile: bootFile, | |||
| Uuid: uuid, | |||
| CommitID: commitID, | |||
| BranchName: branchName, | |||
| Params: form.Params, | |||
| EngineName: image, | |||
| DatasetNames: datasetNames, | |||
| DatasetInfos: datasetInfos, | |||
| IsLatestVersion: modelarts.IsLatestVersion, | |||
| VersionCount: modelarts.VersionCountOne, | |||
| WorkServerNumber: 1, | |||
| Spec: spec, | |||
| } | |||
| if form.ModelName != "" { //使用预训练模型训练 | |||
| req.ModelName = form.ModelName | |||
| req.LabelName = form.LabelName | |||
| req.CkptName = form.CkptName | |||
| req.ModelVersion = form.ModelVersion | |||
| req.PreTrainModelUrl = form.PreTrainModelUrl | |||
| } | |||
| err = grampus.GenerateTrainJob(ctx, req) | |||
| if err != nil { | |||
| log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) | |||
| @@ -502,6 +527,17 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") | |||
| } | |||
| func getPreTrainModelPath(pretrainModelDir string, fileName string) string { | |||
| index := strings.Index(pretrainModelDir, "/") | |||
| if index > 0 { | |||
| filterBucket := pretrainModelDir[index+1:] | |||
| return filterBucket + fileName | |||
| } else { | |||
| return "" | |||
| } | |||
| } | |||
| func GrampusTrainJobVersionCreate(ctx *context.Context, form auth.CreateGrampusTrainJobForm) { | |||
| computeResource := ctx.Query("compute_resource") | |||
| if computeResource == models.GPUResource { | |||
| @@ -547,7 +583,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||
| repo := ctx.Repo.Repository | |||
| codeLocalPath := setting.JobPath + jobName + modelarts.CodePath | |||
| codeObsPath := grampus.JobPath + jobName + modelarts.CodePath | |||
| dataObsPath := setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" | |||
| //dataObsPath := setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" | |||
| branchName := form.BranchName | |||
| isLatestVersion := modelarts.IsLatestVersion | |||
| versionCount := modelarts.VersionCountOne | |||
| @@ -629,11 +665,11 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||
| } | |||
| //check dataset | |||
| attachment, err := models.GetAttachmentByUUID(uuid) | |||
| datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, models.NPU) | |||
| if err != nil { | |||
| log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"]) | |||
| log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) | |||
| grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) | |||
| ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobNPUNew, &form) | |||
| ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobNPUNew, &form) | |||
| return | |||
| } | |||
| @@ -665,8 +701,21 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||
| return | |||
| } | |||
| var datasetRemotePath, allFileName string | |||
| for _, datasetInfo := range datasetInfos { | |||
| if datasetRemotePath == "" { | |||
| datasetRemotePath = datasetInfo.DataLocalPath + "'" + datasetInfo.FullName + "'" | |||
| allFileName = datasetInfo.FullName | |||
| } else { | |||
| datasetRemotePath = datasetRemotePath + ";" + datasetInfo.DataLocalPath + "'" + datasetInfo.FullName + "'" | |||
| allFileName = allFileName + ";" + datasetInfo.FullName | |||
| } | |||
| } | |||
| //prepare command | |||
| command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", dataObsPath+"'"+attachment.Name+"'", bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, attachment.Name) | |||
| preTrainModelPath := getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName) | |||
| command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, allFileName, preTrainModelPath, form.CkptName) | |||
| if err != nil { | |||
| log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) | |||
| grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) | |||
| @@ -683,7 +732,6 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||
| ProcessType: grampus.ProcessorTypeNPU, | |||
| Command: command, | |||
| ImageId: form.ImageID, | |||
| DataUrl: dataObsPath, | |||
| Description: description, | |||
| CodeObsPath: codeObsPath, | |||
| BootFileUrl: codeObsPath + bootFile, | |||
| @@ -697,9 +745,18 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||
| EngineName: engineName, | |||
| VersionCount: versionCount, | |||
| TotalVersionCount: modelarts.TotalVersionCount, | |||
| DatasetName: attachment.Name, | |||
| DatasetNames: datasetNames, | |||
| DatasetInfos: datasetInfos, | |||
| Spec: spec, | |||
| } | |||
| if form.ModelName != "" { //使用预训练模型训练 | |||
| req.ModelName = form.ModelName | |||
| req.LabelName = form.LabelName | |||
| req.CkptName = form.CkptName | |||
| req.ModelVersion = form.ModelVersion | |||
| req.PreTrainModelUrl = form.PreTrainModelUrl | |||
| } | |||
| err = grampus.GenerateTrainJob(ctx, req) | |||
| if err != nil { | |||
| @@ -913,7 +970,7 @@ func GrampusGetLog(ctx *context.Context) { | |||
| return | |||
| } | |||
| func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName string) (string, error) { | |||
| func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName, pretrainModelPath, pretrainModelFileName string) (string, error) { | |||
| var command string | |||
| workDir := grampus.NpuWorkDir | |||
| @@ -921,22 +978,22 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo | |||
| workDir = grampus.GpuWorkDir | |||
| } | |||
| command += "pwd;cd " + workDir + grampus.CommandPrepareScript | |||
| command += "pwd;cd " + workDir + fmt.Sprintf(grampus.CommandPrepareScript, setting.Grampus.SyncScriptProject, setting.Grampus.SyncScriptProject) | |||
| //download code & dataset | |||
| if processorType == grampus.ProcessorTypeNPU { | |||
| commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "';" | |||
| commandDownload = processPretrainModelParameter(pretrainModelPath, pretrainModelFileName, commandDownload) | |||
| command += commandDownload | |||
| } else if processorType == grampus.ProcessorTypeGPU { | |||
| commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "';" | |||
| commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "'" | |||
| commandDownload = processPretrainModelParameter(pretrainModelPath, pretrainModelFileName, commandDownload) | |||
| command += commandDownload | |||
| } | |||
| //unzip code & dataset | |||
| toolUnzip := "unzip -q '" | |||
| if strings.HasSuffix(datasetName, ".tar.gz") { | |||
| toolUnzip = "tar -zxvf '" | |||
| } | |||
| commandUnzip := "cd " + workDir + "code;unzip -q master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + toolUnzip + datasetName + "';" | |||
| unZipDatasetCommand := generateDatasetUnzipCommand(datasetName) | |||
| commandUnzip := "cd " + workDir + "code;unzip -q master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + unZipDatasetCommand | |||
| command += commandUnzip | |||
| command += "echo \"unzip finished;start to exec code;\";" | |||
| @@ -967,6 +1024,10 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo | |||
| } | |||
| } | |||
| if pretrainModelFileName != "" { | |||
| paramCode += " --pretrainmodelname" + "=" + pretrainModelFileName | |||
| } | |||
| var commandCode string | |||
| if processorType == grampus.ProcessorTypeNPU { | |||
| commandCode = "/bin/bash /home/work/run_train_for_openi.sh " + workDir + "code/" + strings.ToLower(repoName) + "/" + bootFile + " /tmp/log/train.log" + paramCode + ";" | |||
| @@ -996,6 +1057,38 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo | |||
| return command, nil | |||
| } | |||
| func processPretrainModelParameter(pretrainModelPath string, pretrainModelFileName string, commandDownload string) string { | |||
| commandDownloadTemp := commandDownload | |||
| if pretrainModelPath != "" { | |||
| commandDownloadTemp += " '" + pretrainModelPath + "' '" + pretrainModelFileName + "'" | |||
| } | |||
| commandDownloadTemp += ";" | |||
| return commandDownloadTemp | |||
| } | |||
| func generateDatasetUnzipCommand(datasetName string) string { | |||
| var unZipDatasetCommand string | |||
| datasetNameArray := strings.Split(datasetName, ";") | |||
| if len(datasetNameArray) == 1 { //单数据集 | |||
| unZipDatasetCommand = "unzip -q '" + datasetName + "';" | |||
| if strings.HasSuffix(datasetName, ".tar.gz") { | |||
| unZipDatasetCommand = "tar --strip-components=1 -zxvf '" + datasetName + "';" | |||
| } | |||
| } else { //多数据集 | |||
| for _, datasetNameTemp := range datasetNameArray { | |||
| if strings.HasSuffix(datasetName, ".tar.gz") { | |||
| unZipDatasetCommand = unZipDatasetCommand + "tar -zxvf '" + datasetName + "';" | |||
| } else { | |||
| unZipDatasetCommand = unZipDatasetCommand + "unzip -q '" + datasetNameTemp + "' -d './" + strings.TrimSuffix(datasetNameTemp, ".zip") + "';" | |||
| } | |||
| } | |||
| } | |||
| return unZipDatasetCommand | |||
| } | |||
| func downloadZipCode(ctx *context.Context, codePath, branchName string) error { | |||
| archiveType := git.ZIP | |||
| archivePath := codePath | |||
| @@ -1010,6 +1010,13 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { | |||
| ctx.Data["engine_id"] = task.EngineID | |||
| ctx.Data["datasetType"] = models.TypeCloudBrainTwo | |||
| //pretrain model | |||
| ctx.Data["model_name"] = task.ModelName | |||
| ctx.Data["model_version"] = task.ModelVersion | |||
| ctx.Data["ckpt_name"] = task.CkptName | |||
| ctx.Data["label_names"] = task.LabelName | |||
| ctx.Data["pre_train_model_url"] = task.PreTrainModelUrl | |||
| configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) | |||
| if err != nil { | |||
| ctx.ServerError("getConfigList failed:", err) | |||
| @@ -1290,6 +1297,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| Value: string(jsondatas), | |||
| }) | |||
| } | |||
| if form.ModelName != "" { //使用预训练模型训练 | |||
| ckptUrl := "/" + form.PreTrainModelUrl + form.CkptName | |||
| param = append(param, models.Parameter{ | |||
| Label: modelarts.CkptUrl, | |||
| Value: "s3:/" + ckptUrl, | |||
| }) | |||
| } | |||
| //save param config | |||
| // if isSaveParam == "on" { | |||
| @@ -1358,6 +1372,15 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| DatasetName: datasetNames, | |||
| Spec: spec, | |||
| } | |||
| if form.ModelName != "" { //使用预训练模型训练 | |||
| req.ModelName = form.ModelName | |||
| req.LabelName = form.LabelName | |||
| req.CkptName = form.CkptName | |||
| req.ModelVersion = form.ModelVersion | |||
| req.PreTrainModelUrl = form.PreTrainModelUrl | |||
| } | |||
| userCommand, userImageUrl := getUserCommand(engineID, req) | |||
| req.UserCommand = userCommand | |||
| req.UserImageUrl = userImageUrl | |||
| @@ -1633,6 +1656,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||
| }) | |||
| } | |||
| if form.ModelName != "" { //使用预训练模型训练 | |||
| ckptUrl := "/" + form.PreTrainModelUrl + form.CkptName | |||
| param = append(param, models.Parameter{ | |||
| Label: modelarts.CkptUrl, | |||
| Value: "s3:/" + ckptUrl, | |||
| }) | |||
| } | |||
| // //save param config | |||
| // if isSaveParam == "on" { | |||
| // saveparams := append(param, models.Parameter{ | |||
| @@ -1707,6 +1738,15 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||
| DatasetName: datasetNames, | |||
| Spec: spec, | |||
| } | |||
| if form.ModelName != "" { //使用预训练模型训练 | |||
| req.ModelName = form.ModelName | |||
| req.LabelName = form.LabelName | |||
| req.CkptName = form.CkptName | |||
| req.ModelVersion = form.ModelVersion | |||
| req.PreTrainModelUrl = form.PreTrainModelUrl | |||
| } | |||
| userCommand, userImageUrl := getUserCommand(engineID, req) | |||
| req.UserCommand = userCommand | |||
| req.UserImageUrl = userImageUrl | |||
| @@ -0,0 +1,37 @@ | |||
| <input type="hidden" id="ai_model_version" name="model_version" value="{{$.model_version}}"> | |||
| <div class="inline min_title fields" style="{{if not .job_name}}width: 96.8%{{else}}width: 94.8%{{end}};"> | |||
| <label class="{{if not .job_name}}label-fix-width{{end}}" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.infer_job.select_model"}}</label> | |||
| <div class="six wide field"> | |||
| <div class="ui fluid search selection dropdown" id="select_model"> | |||
| <input type="hidden" name="model_name" required value="{{$.model_name}}"> | |||
| <div class="text"></div> | |||
| <i class="dropdown icon"></i> | |||
| <div class="menu" id="model_name"> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| <div class="three wide field"> | |||
| <div class="ui fluid search selection dropdown" id="select_model_version"> | |||
| <input type="hidden" name="pre_train_model_url" value="{{$.pre_train_model_url}}" required> | |||
| <div class="text"></div> | |||
| <i class="dropdown icon"></i> | |||
| <div class="menu" id="model_name_version"> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| <div class="five wide field"> | |||
| <div class="ui fluid search selection dropdown" id="select_model_checkpoint"> | |||
| <input type="hidden" name="ckpt_name" value="{{$.ckpt_name}}" required> | |||
| <div class="text"></div> | |||
| <i class="dropdown icon"></i> | |||
| <div class="menu" id="model_checkpoint"> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| <span > | |||
| <i class="question circle icon" data-content="{{.i18n.Tr "cloudbrain.model_file_postfix_rule"}}" data-position="top center" data-variation="inverted mini"></i> | |||
| </span> | |||
| </div> | |||
| @@ -70,7 +70,7 @@ | |||
| <div class="repository"> | |||
| {{template "repo/header" .}} | |||
| <div class="ui container"> | |||
| <div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-queue="{{.QueuesDetail}}" data-queue-start="{{.i18n.Tr "repo.wait_count_start"}}" data-queue-end="{{.i18n.Tr "repo.wait_count_end"}}"></div> | |||
| <div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-queue="{{.QueuesDetail}}" data-queue-start="{{.i18n.Tr "repo.wait_count_start"}}" data-queue-end="{{.i18n.Tr "repo.wait_count_end"}}"></div> | |||
| {{template "base/alert" .}} | |||
| <h4 class="ui top attached header"> | |||
| {{.i18n.Tr "repo.modelarts.train_job.new"}} | |||
| @@ -168,7 +168,7 @@ | |||
| {{end}} | |||
| </select> | |||
| </div> | |||
| {{template "custom/select_model" .}} | |||
| <div class="inline required field" style="display: none;"> | |||
| <label>{{.i18n.Tr "cloudbrain.task_type"}}</label> | |||
| <select id="cloudbrain_job_type" class="ui search dropdown" placeholder="选择任务类型" style='width:385px' | |||
| @@ -62,6 +62,7 @@ | |||
| <div class="repository"> | |||
| {{template "repo/header" .}} | |||
| <div class="ui container"> | |||
| <div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true"></div> | |||
| {{template "base/alert" .}} | |||
| <h4 class="ui top attached header"> | |||
| {{.i18n.Tr "repo.modelarts.train_job.new"}} | |||
| @@ -155,7 +156,7 @@ | |||
| {{end}} | |||
| </select> | |||
| </div> | |||
| {{template "custom/select_model" .}} | |||
| <div id="images-new-grampus"> | |||
| </div> | |||
| @@ -57,6 +57,7 @@ | |||
| <div class="repository"> | |||
| {{template "repo/header" .}} | |||
| <div class="ui container"> | |||
| <div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true"></div> | |||
| {{template "base/alert" .}} | |||
| <h4 class="ui top attached header"> | |||
| {{.i18n.Tr "repo.modelarts.train_job.new"}} | |||
| @@ -149,6 +150,7 @@ | |||
| {{end}} | |||
| </select> | |||
| </div> | |||
| {{template "custom/select_model" .}} | |||
| <div class="required min_title inline field" id="engine_name"> | |||
| <label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "cloudbrain.mirror"}}</label> | |||
| <select class="ui dropdown width81" id="trainjob_images" name="image_id"> | |||
| @@ -62,7 +62,7 @@ | |||
| <div class="repository"> | |||
| {{template "repo/header" .}} | |||
| <div class="ui container"> | |||
| <div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}"></div> | |||
| <div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true"></div> | |||
| {{template "base/alert" .}} | |||
| <h4 class="ui top attached header"> | |||
| {{.i18n.Tr "repo.modelarts.train_job.new"}} | |||
| @@ -157,8 +157,8 @@ | |||
| {{end}} | |||
| </select> | |||
| </div> | |||
| {{template "custom/select_model" .}} | |||
| <div class="required inline min_title fields" style="width: 95%;"> | |||
| <label class="label-fix-width" style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.AI_driver"}}</label> | |||
| @@ -55,7 +55,7 @@ | |||
| <div class="repository"> | |||
| {{template "repo/header" .}} | |||
| <div class="ui container"> | |||
| <div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-dataset-uuid="{{.uuid}}" data-dataset-name="{{.dataset_name}}"></div> | |||
| <div class="cloudbrain-type" style="display: none;" data-cloudbrain-type="{{.datasetType}}" data-repo-link="{{.RepoLink}}" data-flag-model="true" data-dataset-uuid="{{.uuid}}" data-dataset-name="{{.dataset_name}}"></div> | |||
| {{template "base/alert" .}} | |||
| <h4 class="ui top attached header"> | |||
| {{.i18n.Tr "repo.modelarts.train_job.new"}} | |||
| @@ -154,7 +154,7 @@ | |||
| </div> | |||
| {{template "custom/select_model" .}} | |||
| <div class="required unite min_title inline fields" style="width: 90%;"> | |||
| <label style="font-weight: normal;">{{.i18n.Tr "repo.modelarts.train_job.AI_driver"}} </label> | |||
| @@ -443,4 +443,134 @@ export default async function initCloudrainSow() { | |||
| html += "</div>"; | |||
| $(`#dir_list${version_name}`).append(html); | |||
| } | |||
| let nameMap, nameList; | |||
| let RepoLink = $(".cloudbrain-type").data("repo-link"); | |||
| let type = $(".cloudbrain-type").data("cloudbrain-type"); | |||
| let flagModel = $(".cloudbrain-type").data("flag-model"); | |||
| // 获取模型列表和模型名称对应的模型版本 | |||
| $(document).ready(function () { | |||
| if (!flagModel) return; | |||
| else { | |||
| $.get( | |||
| `${RepoLink}/modelmanage/query_model_for_predict?type=${type}`, | |||
| (data) => { | |||
| nameMap = data.nameMap; | |||
| nameList = data.nameList; | |||
| let html = ""; | |||
| nameList.forEach((element) => { | |||
| html += `<div class="item" data-value=${element}>${element}</div>`; | |||
| }); | |||
| if (nameList.length !== 0) { | |||
| $("#model_name").append(html); | |||
| } | |||
| let faildModelName = $('input[name="model_name"]').val(); | |||
| let faildModelVersion = $('input[name="model_version"]').val(); | |||
| let faildTrainUrl = $('input[name="pre_train_model_url"]').val(); | |||
| let faildCkptName = $('input[name="ckpt_name"]').val(); | |||
| // 新建错误的表单返回初始化 | |||
| if (faildModelName) { | |||
| $("#select_model").dropdown("set text", faildModelName); | |||
| $("#select_model").dropdown("set value", faildModelName); | |||
| $("#select_model_version").dropdown("set text", faildModelVersion); | |||
| $("#select_model_version").dropdown("set value", faildTrainUrl); | |||
| $("#select_model_checkpoint").dropdown("set text", faildCkptName); | |||
| $("#select_model_checkpoint").dropdown("set value", faildCkptName); | |||
| } | |||
| } | |||
| ); | |||
| } | |||
| $("#select_model").dropdown({ | |||
| onChange: function (value, text, $selectedItem) { | |||
| $("#model_name_version").empty(); | |||
| let html = ""; | |||
| nameMap[value].forEach((element) => { | |||
| let { TrainTaskInfo } = element; | |||
| TrainTaskInfo = JSON.parse(TrainTaskInfo); | |||
| html += `<div class="item" data-label="${element.Label}" data-id="${element.ID}" data-value="${element.Path}">${element.Version}</div>`; | |||
| }); | |||
| $("#model_name_version").append(html); | |||
| const initVersionText = $( | |||
| "#model_name_version div.item:first-child" | |||
| ).text(); | |||
| const initVersionValue = $( | |||
| "#model_name_version div.item:first-child" | |||
| ).data("value"); | |||
| $("#select_model_version").dropdown("set text", initVersionText); | |||
| $("#select_model_version").dropdown( | |||
| "set value", | |||
| initVersionValue, | |||
| initVersionText, | |||
| $("#model_name_version div.item:first-child") | |||
| ); | |||
| }, | |||
| }); | |||
| $("#select_model_version").dropdown({ | |||
| onChange: function (value, text, $selectedItem) { | |||
| const dataID = | |||
| $selectedItem && $selectedItem[0].getAttribute("data-id"); | |||
| $("input#ai_model_version").val(text); | |||
| $("#select_model_checkpoint").addClass("loading"); | |||
| $("#model_checkpoint").empty(); | |||
| let html = ""; | |||
| loadCheckpointList(dataID).then((res) => { | |||
| res.forEach((element) => { | |||
| const ckptSuffix = element.FileName.split("."); | |||
| const loadCheckpointFile = [ | |||
| "ckpt", | |||
| "pb", | |||
| "h5", | |||
| "json", | |||
| "pkl", | |||
| "pth", | |||
| "t7", | |||
| "pdparams", | |||
| "onnx", | |||
| "pbtxt", | |||
| "keras", | |||
| "mlmodel", | |||
| "cfg", | |||
| "pt", | |||
| ]; | |||
| if ( | |||
| !element.IsDir && | |||
| loadCheckpointFile.includes(ckptSuffix[ckptSuffix.length - 1]) | |||
| ) { | |||
| html += `<div class="item" data-value=${element.FileName}>${element.FileName}</div>`; | |||
| } | |||
| }); | |||
| $("#model_checkpoint").append(html); | |||
| $("#select_model_checkpoint").removeClass("loading"); | |||
| const initVersionText = $( | |||
| "#model_checkpoint div.item:first-child" | |||
| ).text(); | |||
| const initVersionValue = $( | |||
| "#model_checkpoint div.item:first-child" | |||
| ).data("value"); | |||
| $("#select_model_checkpoint").dropdown("set text", initVersionText); | |||
| $("#select_model_checkpoint").dropdown( | |||
| "set value", | |||
| initVersionValue, | |||
| initVersionText, | |||
| $("#model_name_version div.item:first-child") | |||
| ); | |||
| }); | |||
| }, | |||
| }); | |||
| }); | |||
| function loadCheckpointList(value) { | |||
| return new Promise((resolve, reject) => { | |||
| $.get( | |||
| `${RepoLink}/modelmanage/query_modelfile_for_predict`, | |||
| { ID: value }, | |||
| (data) => { | |||
| resolve(data); | |||
| } | |||
| ); | |||
| }); | |||
| } | |||
| } | |||