|
|
|
@@ -210,7 +210,6 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
repo := ctx.Repo.Repository |
|
|
|
codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/" |
|
|
|
codeMinioPath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" |
|
|
|
dataMinioPath := setting.Attachment.Minio.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid |
|
|
|
branchName := form.BranchName |
|
|
|
image := strings.TrimSpace(form.Image) |
|
|
|
|
|
|
|
@@ -290,11 +289,12 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
} |
|
|
|
|
|
|
|
//check dataset |
|
|
|
attachment, err := models.GetAttachmentByUUID(uuid) |
|
|
|
|
|
|
|
datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, models.GPU) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"]) |
|
|
|
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobGPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
@@ -336,8 +336,22 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
var datasetRemotePath, allFileName string |
|
|
|
for _, datasetInfo := range datasetInfos { |
|
|
|
if datasetRemotePath == "" { |
|
|
|
datasetRemotePath = datasetInfo.DataLocalPath |
|
|
|
allFileName = datasetInfo.FullName |
|
|
|
} else { |
|
|
|
datasetRemotePath = datasetRemotePath + ";" + datasetInfo.DataLocalPath |
|
|
|
allFileName = allFileName + ";" + datasetInfo.FullName |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
//prepare command |
|
|
|
command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", dataMinioPath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", attachment.Name) |
|
|
|
preTrainModelPath := getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName) |
|
|
|
|
|
|
|
command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", allFileName, preTrainModelPath, form.CkptName) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
|
@@ -348,26 +362,37 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) |
|
|
|
|
|
|
|
req := &grampus.GenerateTrainJobReq{ |
|
|
|
JobName: jobName, |
|
|
|
DisplayJobName: displayJobName, |
|
|
|
ComputeResource: models.GPUResource, |
|
|
|
ProcessType: grampus.ProcessorTypeGPU, |
|
|
|
Command: command, |
|
|
|
ImageUrl: image, |
|
|
|
Description: description, |
|
|
|
BootFile: bootFile, |
|
|
|
Uuid: uuid, |
|
|
|
CommitID: commitID, |
|
|
|
BranchName: branchName, |
|
|
|
Params: form.Params, |
|
|
|
EngineName: image, |
|
|
|
DatasetName: attachment.Name, |
|
|
|
JobName: jobName, |
|
|
|
DisplayJobName: displayJobName, |
|
|
|
ComputeResource: models.GPUResource, |
|
|
|
ProcessType: grampus.ProcessorTypeGPU, |
|
|
|
Command: command, |
|
|
|
ImageUrl: image, |
|
|
|
Description: description, |
|
|
|
BootFile: bootFile, |
|
|
|
Uuid: uuid, |
|
|
|
CommitID: commitID, |
|
|
|
BranchName: branchName, |
|
|
|
Params: form.Params, |
|
|
|
EngineName: image, |
|
|
|
DatasetNames: datasetNames, |
|
|
|
DatasetInfos: datasetInfos, |
|
|
|
|
|
|
|
IsLatestVersion: modelarts.IsLatestVersion, |
|
|
|
VersionCount: modelarts.VersionCountOne, |
|
|
|
WorkServerNumber: 1, |
|
|
|
Spec: spec, |
|
|
|
} |
|
|
|
|
|
|
|
if form.ModelName != "" { //使用预训练模型训练 |
|
|
|
req.ModelName = form.ModelName |
|
|
|
req.LabelName = form.LabelName |
|
|
|
req.CkptName = form.CkptName |
|
|
|
req.ModelVersion = form.ModelVersion |
|
|
|
req.PreTrainingModelUrl = form.PreTrainModelUrl |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
err = grampus.GenerateTrainJob(ctx, req) |
|
|
|
if err != nil { |
|
|
|
log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) |
|
|
|
@@ -378,6 +403,17 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") |
|
|
|
} |
|
|
|
|
|
|
|
func getPreTrainModelPath(pretrainModelDir string, fileName string) string { |
|
|
|
index := strings.Index(pretrainModelDir, "/") |
|
|
|
if index > 0 { |
|
|
|
filterBucket := pretrainModelDir[index+1:] |
|
|
|
return filterBucket + fileName |
|
|
|
} else { |
|
|
|
return "" |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
func checkSpecialPool(ctx *context.Context, resourceType string) string { |
|
|
|
grampus.InitSpecialPool() |
|
|
|
if grampus.SpecialPools != nil { |
|
|
|
@@ -410,7 +446,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
repo := ctx.Repo.Repository |
|
|
|
codeLocalPath := setting.JobPath + jobName + modelarts.CodePath |
|
|
|
codeObsPath := grampus.JobPath + jobName + modelarts.CodePath |
|
|
|
dataObsPath := setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" |
|
|
|
//dataObsPath := setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" |
|
|
|
branchName := form.BranchName |
|
|
|
isLatestVersion := modelarts.IsLatestVersion |
|
|
|
versionCount := modelarts.VersionCountOne |
|
|
|
@@ -492,11 +528,11 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
} |
|
|
|
|
|
|
|
//check dataset |
|
|
|
attachment, err := models.GetAttachmentByUUID(uuid) |
|
|
|
datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, models.NPU) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"]) |
|
|
|
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobNPUNew, &form) |
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobNPUNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
@@ -528,8 +564,21 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
var datasetRemotePath, allFileName string |
|
|
|
for _, datasetInfo := range datasetInfos { |
|
|
|
if datasetRemotePath == "" { |
|
|
|
datasetRemotePath = datasetInfo.DataLocalPath + "'" + datasetInfo.FullName + "'" |
|
|
|
allFileName = datasetInfo.FullName |
|
|
|
} else { |
|
|
|
datasetRemotePath = datasetRemotePath + ";" + datasetInfo.DataLocalPath + "'" + datasetInfo.FullName + "'" |
|
|
|
allFileName = allFileName + ";" + datasetInfo.FullName |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
//prepare command |
|
|
|
command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", dataObsPath+"'"+attachment.Name+"'", bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, attachment.Name) |
|
|
|
preTrainModelPath := getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName) |
|
|
|
command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, allFileName, preTrainModelPath, form.CkptName) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
@@ -546,7 +595,6 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
ProcessType: grampus.ProcessorTypeNPU, |
|
|
|
Command: command, |
|
|
|
ImageId: form.ImageID, |
|
|
|
DataUrl: dataObsPath, |
|
|
|
Description: description, |
|
|
|
CodeObsPath: codeObsPath, |
|
|
|
BootFileUrl: codeObsPath + bootFile, |
|
|
|
@@ -560,9 +608,18 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
EngineName: engineName, |
|
|
|
VersionCount: versionCount, |
|
|
|
TotalVersionCount: modelarts.TotalVersionCount, |
|
|
|
DatasetName: attachment.Name, |
|
|
|
DatasetNames: datasetNames, |
|
|
|
DatasetInfos: datasetInfos, |
|
|
|
Spec: spec, |
|
|
|
} |
|
|
|
if form.ModelName != "" { //使用预训练模型训练 |
|
|
|
req.ModelName = form.ModelName |
|
|
|
req.LabelName = form.LabelName |
|
|
|
req.CkptName = form.CkptName |
|
|
|
req.ModelVersion = form.ModelVersion |
|
|
|
req.PreTrainingModelUrl = form.PreTrainModelUrl |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
err = grampus.GenerateTrainJob(ctx, req) |
|
|
|
if err != nil { |
|
|
|
@@ -776,7 +833,7 @@ func GrampusGetLog(ctx *context.Context) { |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName string) (string, error) { |
|
|
|
func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName, pretrainModelPath, pretrainModelFileName string) (string, error) { |
|
|
|
var command string |
|
|
|
|
|
|
|
workDir := grampus.NpuWorkDir |
|
|
|
@@ -788,18 +845,18 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo |
|
|
|
//download code & dataset |
|
|
|
if processorType == grampus.ProcessorTypeNPU { |
|
|
|
commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "';" |
|
|
|
commandDownload = processPretrainModelParameter(pretrainModelPath, pretrainModelFileName, commandDownload) |
|
|
|
command += commandDownload |
|
|
|
} else if processorType == grampus.ProcessorTypeGPU { |
|
|
|
commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "';" |
|
|
|
commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "'" |
|
|
|
commandDownload = processPretrainModelParameter(pretrainModelPath, pretrainModelFileName, commandDownload) |
|
|
|
command += commandDownload |
|
|
|
} |
|
|
|
|
|
|
|
//unzip code & dataset |
|
|
|
toolUnzip := "unzip -q '" |
|
|
|
if strings.HasSuffix(datasetName, ".tar.gz") { |
|
|
|
toolUnzip = "tar -zxvf '" |
|
|
|
} |
|
|
|
commandUnzip := "cd " + workDir + "code;unzip -q master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + toolUnzip + datasetName + "';" |
|
|
|
unZipDatasetCommand := generateDatasetUnzipCommand(datasetName) |
|
|
|
|
|
|
|
commandUnzip := "cd " + workDir + "code;unzip -q master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + unZipDatasetCommand |
|
|
|
command += commandUnzip |
|
|
|
|
|
|
|
command += "echo \"unzip finished;start to exec code;\";" |
|
|
|
@@ -859,6 +916,38 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo |
|
|
|
return command, nil |
|
|
|
} |
|
|
|
|
|
|
|
func processPretrainModelParameter(pretrainModelPath string, pretrainModelFileName string, commandDownload string) string { |
|
|
|
commandDownloadTemp := commandDownload |
|
|
|
if pretrainModelPath != "" { |
|
|
|
commandDownloadTemp += " '" + pretrainModelPath + "' '" + pretrainModelFileName + "'" |
|
|
|
} |
|
|
|
commandDownloadTemp += ";" |
|
|
|
return commandDownloadTemp |
|
|
|
} |
|
|
|
|
|
|
|
func generateDatasetUnzipCommand(datasetName string) string { |
|
|
|
var unZipDatasetCommand string |
|
|
|
|
|
|
|
datasetNameArray := strings.Split(datasetName, ";") |
|
|
|
if len(datasetNameArray) == 1 { //单数据集 |
|
|
|
unZipDatasetCommand = "unzip -q '" + datasetName + "';" |
|
|
|
if strings.HasSuffix(datasetName, ".tar.gz") { |
|
|
|
unZipDatasetCommand = "tar --strip-components=1 -zxvf '" + datasetName + "';" |
|
|
|
} |
|
|
|
|
|
|
|
} else { //多数据集 |
|
|
|
for _, datasetNameTemp := range datasetNameArray { |
|
|
|
if strings.HasSuffix(datasetName, ".tar.gz") { |
|
|
|
unZipDatasetCommand = unZipDatasetCommand + "tar -zxvf '" + datasetName + "';" |
|
|
|
} else { |
|
|
|
unZipDatasetCommand = unZipDatasetCommand + "unzip -q '" + datasetNameTemp + "' -d './" + strings.TrimSuffix(datasetNameTemp, ".zip") + "';" |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
return unZipDatasetCommand |
|
|
|
} |
|
|
|
|
|
|
|
func downloadZipCode(ctx *context.Context, codePath, branchName string) error { |
|
|
|
archiveType := git.ZIP |
|
|
|
archivePath := codePath |
|
|
|
|