|
|
|
@@ -3,12 +3,16 @@ package repo |
|
|
|
import ( |
|
|
|
"encoding/json" |
|
|
|
"errors" |
|
|
|
"fmt" |
|
|
|
"io/ioutil" |
|
|
|
"os" |
|
|
|
"strings" |
|
|
|
|
|
|
|
"code.gitea.io/gitea/models" |
|
|
|
"code.gitea.io/gitea/modules/cloudbrain" |
|
|
|
"code.gitea.io/gitea/modules/context" |
|
|
|
"code.gitea.io/gitea/modules/log" |
|
|
|
"code.gitea.io/gitea/modules/modelarts" |
|
|
|
"code.gitea.io/gitea/modules/setting" |
|
|
|
uuid "github.com/satori/go.uuid" |
|
|
|
) |
|
|
|
@@ -28,6 +32,11 @@ const ( |
|
|
|
GpuQueue = "openidgx" |
|
|
|
Success = "S000" |
|
|
|
GPU_PYTORCH_IMAGE = "dockerhub.pcl.ac.cn:5000/user-images/openi:tensorRT_7_zouap" |
|
|
|
|
|
|
|
PytorchBootFile = "convert_pytorch.py" |
|
|
|
MindsporeBootFile = "convert_mindspore.py" |
|
|
|
|
|
|
|
REPO_ID = 33267 |
|
|
|
) |
|
|
|
|
|
|
|
var ( |
|
|
|
@@ -77,7 +86,13 @@ func SaveModelConvert(ctx *context.Context) { |
|
|
|
UserId: ctx.User.ID, |
|
|
|
} |
|
|
|
models.SaveModelConvert(modelConvert) |
|
|
|
err = createTrainJob(modelConvert, ctx, task.Path) |
|
|
|
if modelConvert.SrcEngine == PYTORCH_ENGINE { |
|
|
|
err = createGpuTrainJob(modelConvert, ctx, task.Path) |
|
|
|
} else { |
|
|
|
//create npu job |
|
|
|
createNpuTrainJob(modelConvert, ctx, task.Path) |
|
|
|
} |
|
|
|
|
|
|
|
if err == nil { |
|
|
|
ctx.JSON(200, map[string]string{ |
|
|
|
"result_code": "0", |
|
|
|
@@ -89,8 +104,99 @@ func SaveModelConvert(ctx *context.Context) { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
func createTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context, modelRelativePath string) error { |
|
|
|
repo, _ := models.GetRepositoryByID(ctx.Repo.Repository.ID) |
|
|
|
func createNpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context, modelRelativePath string) { |
|
|
|
repo, _ := models.GetRepositoryByID(REPO_ID) |
|
|
|
VersionOutputPath := "V0001" |
|
|
|
codeLocalPath := setting.JobPath + modelConvert.ID + modelarts.CodePath |
|
|
|
codeObsPath := "/" + setting.Bucket + modelarts.JobPath + modelConvert.ID + modelarts.CodePath |
|
|
|
outputObsPath := "/" + setting.Bucket + modelarts.JobPath + modelConvert.ID + modelarts.OutputPath + VersionOutputPath + "/" |
|
|
|
logObsPath := "/" + setting.Bucket + modelarts.JobPath + modelConvert.ID + modelarts.LogPath + VersionOutputPath + "/" |
|
|
|
dataPath := modelRelativePath |
|
|
|
|
|
|
|
_, err := ioutil.ReadDir(codeLocalPath) |
|
|
|
if err == nil { |
|
|
|
os.RemoveAll(codeLocalPath) |
|
|
|
} |
|
|
|
if err := downloadCode(repo, codeLocalPath, DefaultBranchName); err != nil { |
|
|
|
log.Error("downloadCode failed, server timed out: %s (%v)", repo.FullName(), err) |
|
|
|
return |
|
|
|
} |
|
|
|
if err := obsMkdir(setting.CodePathPrefix + modelConvert.ID + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { |
|
|
|
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) |
|
|
|
return |
|
|
|
} |
|
|
|
if err := obsMkdir(setting.CodePathPrefix + modelConvert.ID + modelarts.LogPath + VersionOutputPath + "/"); err != nil { |
|
|
|
log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) |
|
|
|
return |
|
|
|
} |
|
|
|
if err := uploadCodeToObs(codeLocalPath, modelConvert.ID, ""); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) |
|
|
|
return |
|
|
|
} |
|
|
|
intputshape := strings.Split(modelConvert.InputShape, ",") |
|
|
|
n := "256" |
|
|
|
c := "1" |
|
|
|
h := "28" |
|
|
|
w := "28" |
|
|
|
if len(intputshape) == 4 { |
|
|
|
n = intputshape[0] |
|
|
|
c = intputshape[1] |
|
|
|
h = intputshape[2] |
|
|
|
w = intputshape[3] |
|
|
|
} |
|
|
|
param := make([]models.Parameter, 0) |
|
|
|
modelPara := models.Parameter{ |
|
|
|
Label: "--model", |
|
|
|
Value: modelConvert.ModelPath, |
|
|
|
} |
|
|
|
param = append(param, modelPara) |
|
|
|
|
|
|
|
batchSizePara := models.Parameter{ |
|
|
|
Label: "--n", |
|
|
|
Value: fmt.Sprint(n), |
|
|
|
} |
|
|
|
param = append(param, batchSizePara) |
|
|
|
channelSizePara := models.Parameter{ |
|
|
|
Label: "--c", |
|
|
|
Value: fmt.Sprint(c), |
|
|
|
} |
|
|
|
param = append(param, channelSizePara) |
|
|
|
heightPara := models.Parameter{ |
|
|
|
Label: "--h", |
|
|
|
Value: fmt.Sprint(h), |
|
|
|
} |
|
|
|
param = append(param, heightPara) |
|
|
|
widthPara := models.Parameter{ |
|
|
|
Label: "--w", |
|
|
|
Value: fmt.Sprint(w), |
|
|
|
} |
|
|
|
param = append(param, widthPara) |
|
|
|
|
|
|
|
req := &modelarts.GenerateTrainJobReq{ |
|
|
|
JobName: modelConvert.ID, |
|
|
|
DisplayJobName: modelConvert.Name, |
|
|
|
DataUrl: dataPath, |
|
|
|
Description: modelConvert.Description, |
|
|
|
CodeObsPath: codeObsPath, |
|
|
|
BootFileUrl: codeObsPath + MindsporeBootFile, |
|
|
|
BootFile: MindsporeBootFile, |
|
|
|
TrainUrl: outputObsPath, |
|
|
|
FlavorCode: "modelarts.bm.910.arm.public.1", |
|
|
|
WorkServerNumber: 1, |
|
|
|
IsLatestVersion: modelarts.IsLatestVersion, |
|
|
|
EngineID: int64(122), |
|
|
|
LogUrl: logObsPath, |
|
|
|
PoolID: "pool7908321a", |
|
|
|
Parameters: param, |
|
|
|
BranchName: DefaultBranchName, |
|
|
|
} |
|
|
|
result, err := modelarts.GenerateModelConvertTrainJob(req) |
|
|
|
log.Info("jobId=" + fmt.Sprint(result.JobID)) |
|
|
|
models.UpdateModelConvertCBTI(modelConvert.ID, fmt.Sprint(result.JobID)) |
|
|
|
} |
|
|
|
|
|
|
|
func createGpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context, modelRelativePath string) error { |
|
|
|
repo, _ := models.GetRepositoryByID(REPO_ID) |
|
|
|
command := "" |
|
|
|
if modelConvert.SrcEngine == PYTORCH_ENGINE { |
|
|
|
command = getPytorchModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert) |
|
|
|
@@ -183,7 +289,7 @@ func createTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context, m |
|
|
|
|
|
|
|
func getPytorchModelConvertCommand(name string, modelFile string, modelConvert *models.AiModelConvert) string { |
|
|
|
var command string |
|
|
|
bootFile := "convert_pytorch.py" |
|
|
|
|
|
|
|
intputshape := strings.Split(modelConvert.InputShape, ",") |
|
|
|
n := "256" |
|
|
|
c := "1" |
|
|
|
@@ -195,7 +301,7 @@ func getPytorchModelConvertCommand(name string, modelFile string, modelConvert * |
|
|
|
h = intputshape[2] |
|
|
|
w = intputshape[3] |
|
|
|
} |
|
|
|
command += "python3 /code/" + bootFile + " --model " + modelFile + " --n " + n + " --c " + c + " --h " + h + " --w " + w + " > " + ModelMountPath + "/" + name + "-" + LogFile |
|
|
|
command += "python3 /code/" + PytorchBootFile + " --model " + modelFile + " --n " + n + " --c " + c + " --h " + h + " --w " + w + " > " + ModelMountPath + "/" + name + "-" + LogFile |
|
|
|
return command |
|
|
|
} |
|
|
|
|
|
|
|
|