diff --git a/routers/repo/ai_model_convert.go b/routers/repo/ai_model_convert.go index 94b2d18c9..5a5a59652 100644 --- a/routers/repo/ai_model_convert.go +++ b/routers/repo/ai_model_convert.go @@ -3,6 +3,7 @@ package repo import ( "encoding/json" "errors" + "strings" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/cloudbrain" @@ -90,99 +91,111 @@ func SaveModelConvert(ctx *context.Context) { func createTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context, modelRelativePath string) error { repo, _ := models.GetRepositoryByID(ctx.Repo.Repository.ID) + command := "" if modelConvert.SrcEngine == PYTORCH_ENGINE { + command = getPytorchModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert) + } + log.Info("command=" + command) - codePath := setting.JobPath + modelConvert.ID + CodeMountPath - - downloadCode(repo, codePath, DefaultBranchName) + codePath := setting.JobPath + modelConvert.ID + CodeMountPath + downloadCode(repo, codePath, DefaultBranchName) - uploadCodeToMinio(codePath+"/", modelConvert.ID, CodeMountPath+"/") - log.Info("minio code path=" + setting.CBCodePathPrefix + modelConvert.ID) + uploadCodeToMinio(codePath+"/", modelConvert.ID, CodeMountPath+"/") - minioCodePath := setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.CBCodePathPrefix + modelConvert.ID + "/code" - log.Info("Volume codePath=" + minioCodePath) + minioCodePath := setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.CBCodePathPrefix + modelConvert.ID + "/code" + log.Info("minio codePath=" + minioCodePath) - modelPath := setting.JobPath + modelConvert.ID + ModelMountPath + "/" - log.Info("modelPath=" + modelPath) - mkModelPath(modelPath) + modelPath := setting.JobPath + modelConvert.ID + ModelMountPath + "/" + log.Info("local modelPath=" + modelPath) + mkModelPath(modelPath) - uploadCodeToMinio(modelPath, modelConvert.ID, ModelMountPath+"/") + uploadCodeToMinio(modelPath, modelConvert.ID, ModelMountPath+"/") + minioModelPath := setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.CBCodePathPrefix + modelConvert.ID + "/model" + log.Info("minio model path=" + minioModelPath) - command := getModelConvertCommand(modelConvert.ID, modelConvert.ModelPath) - log.Info("command=" + command) - dataActualPath := setting.Attachment.Minio.RealPath + modelRelativePath - log.Info("dataActualPath=" + dataActualPath) + dataActualPath := setting.Attachment.Minio.RealPath + modelRelativePath + log.Info("dataActualPath=" + dataActualPath) - if TrainResourceSpecs == nil { - json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) - } - resourceSpec := TrainResourceSpecs.ResourceSpec[1] - jobResult, err := cloudbrain.CreateJob(modelConvert.ID, models.CreateJobParams{ - JobName: modelConvert.ID, - RetryCount: 1, - GpuType: GpuQueue, - Image: GPU_PYTORCH_IMAGE, - TaskRoles: []models.TaskRole{ - { - Name: SubTaskName, - TaskNumber: 1, - MinSucceededTaskCount: 1, - MinFailedTaskCount: 1, - CPUNumber: resourceSpec.CpuNum, - GPUNumber: resourceSpec.GpuNum, - MemoryMB: resourceSpec.MemMiB, - ShmMB: resourceSpec.ShareMemMiB, - Command: command, - NeedIBDevice: false, - IsMainRole: false, - UseNNI: false, - }, + if TrainResourceSpecs == nil { + json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) + } + resourceSpec := TrainResourceSpecs.ResourceSpec[1] + jobResult, err := cloudbrain.CreateJob(modelConvert.ID, models.CreateJobParams{ + JobName: modelConvert.ID, + RetryCount: 1, + GpuType: GpuQueue, + Image: GPU_PYTORCH_IMAGE, + TaskRoles: []models.TaskRole{ + { + Name: SubTaskName, + TaskNumber: 1, + MinSucceededTaskCount: 1, + MinFailedTaskCount: 1, + CPUNumber: resourceSpec.CpuNum, + GPUNumber: resourceSpec.GpuNum, + MemoryMB: resourceSpec.MemMiB, + ShmMB: resourceSpec.ShareMemMiB, + Command: command, + NeedIBDevice: false, + IsMainRole: false, + UseNNI: false, }, - Volumes: []models.Volume{ - { - HostPath: models.StHostPath{ - Path: minioCodePath, - MountPath: CodeMountPath, - ReadOnly: false, - }, + }, + Volumes: []models.Volume{ + { + HostPath: models.StHostPath{ + Path: minioCodePath, + MountPath: CodeMountPath, + ReadOnly: false, }, - { - HostPath: models.StHostPath{ - Path: dataActualPath, - MountPath: DataSetMountPath, - ReadOnly: true, - }, + }, + { + HostPath: models.StHostPath{ + Path: dataActualPath, + MountPath: DataSetMountPath, + ReadOnly: true, }, - { - HostPath: models.StHostPath{ - Path: modelPath, - MountPath: ModelMountPath, - ReadOnly: false, - }, + }, + { + HostPath: models.StHostPath{ + Path: minioModelPath, + MountPath: ModelMountPath, + ReadOnly: false, }, }, - }) - if err != nil { - log.Error("CreateJob failed:", err.Error(), ctx.Data["MsgID"]) - return err - } - if jobResult.Code != Success { - log.Error("CreateJob(%s) failed:%s", modelConvert.ID, jobResult.Msg, ctx.Data["MsgID"]) - return errors.New(jobResult.Msg) - } - - var jobID = jobResult.Payload["jobId"].(string) - log.Info("jobId=" + jobID) - models.UpdateModelConvertCBTI(modelConvert.ID, jobID) + }, + }) + if err != nil { + log.Error("CreateJob failed:", err.Error(), ctx.Data["MsgID"]) + return err + } + if jobResult.Code != Success { + log.Error("CreateJob(%s) failed:%s", modelConvert.ID, jobResult.Msg, ctx.Data["MsgID"]) + return errors.New(jobResult.Msg) } + var jobID = jobResult.Payload["jobId"].(string) + log.Info("jobId=" + jobID) + models.UpdateModelConvertCBTI(modelConvert.ID, jobID) + return nil } -func getModelConvertCommand(name string, modelFile string) string { +func getPytorchModelConvertCommand(name string, modelFile string, modelConvert *models.AiModelConvert) string { var command string bootFile := "convert_pytorch.py" - command += "python3 /code/" + bootFile + " --model " + modelFile + " > " + ModelMountPath + "/" + name + "-" + LogFile + intputshape := strings.Split(modelConvert.InputShape, ",") + n := "256" + c := "1" + h := "28" + w := "28" + if len(intputshape) == 4 { + n = intputshape[0] + c = intputshape[1] + h = intputshape[2] + w = intputshape[3] + } + command += "python3 /code/" + bootFile + " --model " + modelFile + " --n " + n + " --c " + c + " --h " + h + " --w " + w + " > " + ModelMountPath + "/" + name + "-" + LogFile return command }