diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index 78b40fd56..c7c5775ef 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -1,13 +1,14 @@ package modelarts import ( - "code.gitea.io/gitea/modules/timeutil" "encoding/json" "errors" "fmt" "path" "strconv" + "code.gitea.io/gitea/modules/timeutil" + "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/log" @@ -383,6 +384,29 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error return nil } +func GenerateModelConvertTrainJob(req *GenerateTrainJobReq) (*models.CreateTrainJobResult, error) { + + return createTrainJob(models.CreateTrainJobParams{ + JobName: req.JobName, + Description: req.Description, + Config: models.Config{ + WorkServerNum: req.WorkServerNumber, + AppUrl: req.CodeObsPath, + BootFileUrl: req.BootFileUrl, + DataUrl: req.DataUrl, + EngineID: req.EngineID, + TrainUrl: req.TrainUrl, + LogUrl: req.LogUrl, + PoolID: req.PoolID, + CreateVersion: true, + Flavor: models.Flavor{ + Code: req.FlavorCode, + }, + Parameter: req.Parameters, + }, + }) +} + func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) { createTime := timeutil.TimeStampNow() jobResult, err := createTrainJobVersion(models.CreateTrainJobVersionParams{ diff --git a/routers/repo/ai_model_convert.go b/routers/repo/ai_model_convert.go index 82656d1ae..86d7fb38b 100644 --- a/routers/repo/ai_model_convert.go +++ b/routers/repo/ai_model_convert.go @@ -3,12 +3,16 @@ package repo import ( "encoding/json" "errors" + "fmt" + "io/ioutil" + "os" "strings" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/cloudbrain" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/modelarts" "code.gitea.io/gitea/modules/setting" uuid "github.com/satori/go.uuid" ) @@ -28,6 +32,11 @@ const ( GpuQueue = "openidgx" Success = "S000" GPU_PYTORCH_IMAGE = "dockerhub.pcl.ac.cn:5000/user-images/openi:tensorRT_7_zouap" + + PytorchBootFile = "convert_pytorch.py" + MindsporeBootFile = "convert_mindspore.py" + + REPO_ID = 33267 ) var ( @@ -77,7 +86,13 @@ func SaveModelConvert(ctx *context.Context) { UserId: ctx.User.ID, } models.SaveModelConvert(modelConvert) - err = createTrainJob(modelConvert, ctx, task.Path) + if modelConvert.SrcEngine == PYTORCH_ENGINE { + err = createGpuTrainJob(modelConvert, ctx, task.Path) + } else { + //create npu job + createNpuTrainJob(modelConvert, ctx, task.Path) + } + if err == nil { ctx.JSON(200, map[string]string{ "result_code": "0", @@ -89,8 +104,99 @@ func SaveModelConvert(ctx *context.Context) { } } -func createTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context, modelRelativePath string) error { - repo, _ := models.GetRepositoryByID(ctx.Repo.Repository.ID) +func createNpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context, modelRelativePath string) { + repo, _ := models.GetRepositoryByID(REPO_ID) + VersionOutputPath := "V0001" + codeLocalPath := setting.JobPath + modelConvert.ID + modelarts.CodePath + codeObsPath := "/" + setting.Bucket + modelarts.JobPath + modelConvert.ID + modelarts.CodePath + outputObsPath := "/" + setting.Bucket + modelarts.JobPath + modelConvert.ID + modelarts.OutputPath + VersionOutputPath + "/" + logObsPath := "/" + setting.Bucket + modelarts.JobPath + modelConvert.ID + modelarts.LogPath + VersionOutputPath + "/" + dataPath := modelRelativePath + + _, err := ioutil.ReadDir(codeLocalPath) + if err == nil { + os.RemoveAll(codeLocalPath) + } + if err := downloadCode(repo, codeLocalPath, DefaultBranchName); err != nil { + log.Error("downloadCode failed, server timed out: %s (%v)", repo.FullName(), err) + return + } + if err := obsMkdir(setting.CodePathPrefix + modelConvert.ID + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { + log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) + return + } + if err := obsMkdir(setting.CodePathPrefix + modelConvert.ID + modelarts.LogPath + VersionOutputPath + "/"); err != nil { + log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) + return + } + if err := uploadCodeToObs(codeLocalPath, modelConvert.ID, ""); err != nil { + log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) + return + } + intputshape := strings.Split(modelConvert.InputShape, ",") + n := "256" + c := "1" + h := "28" + w := "28" + if len(intputshape) == 4 { + n = intputshape[0] + c = intputshape[1] + h = intputshape[2] + w = intputshape[3] + } + param := make([]models.Parameter, 0) + modelPara := models.Parameter{ + Label: "--model", + Value: modelConvert.ModelPath, + } + param = append(param, modelPara) + + batchSizePara := models.Parameter{ + Label: "--n", + Value: fmt.Sprint(n), + } + param = append(param, batchSizePara) + channelSizePara := models.Parameter{ + Label: "--c", + Value: fmt.Sprint(c), + } + param = append(param, channelSizePara) + heightPara := models.Parameter{ + Label: "--h", + Value: fmt.Sprint(h), + } + param = append(param, heightPara) + widthPara := models.Parameter{ + Label: "--w", + Value: fmt.Sprint(w), + } + param = append(param, widthPara) + + req := &modelarts.GenerateTrainJobReq{ + JobName: modelConvert.ID, + DisplayJobName: modelConvert.Name, + DataUrl: dataPath, + Description: modelConvert.Description, + CodeObsPath: codeObsPath, + BootFileUrl: codeObsPath + MindsporeBootFile, + BootFile: MindsporeBootFile, + TrainUrl: outputObsPath, + FlavorCode: "modelarts.bm.910.arm.public.1", + WorkServerNumber: 1, + IsLatestVersion: modelarts.IsLatestVersion, + EngineID: int64(122), + LogUrl: logObsPath, + PoolID: "pool7908321a", + Parameters: param, + BranchName: DefaultBranchName, + } + result, err := modelarts.GenerateModelConvertTrainJob(req) + log.Info("jobId=" + fmt.Sprint(result.JobID)) + models.UpdateModelConvertCBTI(modelConvert.ID, fmt.Sprint(result.JobID)) +} + +func createGpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context, modelRelativePath string) error { + repo, _ := models.GetRepositoryByID(REPO_ID) command := "" if modelConvert.SrcEngine == PYTORCH_ENGINE { command = getPytorchModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert) @@ -183,7 +289,7 @@ func createTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context, m func getPytorchModelConvertCommand(name string, modelFile string, modelConvert *models.AiModelConvert) string { var command string - bootFile := "convert_pytorch.py" + intputshape := strings.Split(modelConvert.InputShape, ",") n := "256" c := "1" @@ -195,7 +301,7 @@ func getPytorchModelConvertCommand(name string, modelFile string, modelConvert * h = intputshape[2] w = intputshape[3] } - command += "python3 /code/" + bootFile + " --model " + modelFile + " --n " + n + " --c " + c + " --h " + h + " --w " + w + " > " + ModelMountPath + "/" + name + "-" + LogFile + command += "python3 /code/" + PytorchBootFile + " --model " + modelFile + " --n " + n + " --c " + c + " --h " + h + " --w " + w + " > " + ModelMountPath + "/" + name + "-" + LogFile return command } diff --git a/templates/repo/modelmanage/convertshowinfo.tmpl b/templates/repo/modelmanage/convertshowinfo.tmpl index 39b76272f..ba790e354 100644 --- a/templates/repo/modelmanage/convertshowinfo.tmpl +++ b/templates/repo/modelmanage/convertshowinfo.tmpl @@ -181,7 +181,7 @@ td, th {