| @@ -3,6 +3,7 @@ package repo | |||
| import ( | |||
| "encoding/json" | |||
| "errors" | |||
| "strings" | |||
| "code.gitea.io/gitea/models" | |||
| "code.gitea.io/gitea/modules/cloudbrain" | |||
| @@ -90,99 +91,111 @@ func SaveModelConvert(ctx *context.Context) { | |||
| func createTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context, modelRelativePath string) error { | |||
| repo, _ := models.GetRepositoryByID(ctx.Repo.Repository.ID) | |||
| command := "" | |||
| if modelConvert.SrcEngine == PYTORCH_ENGINE { | |||
| command = getPytorchModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert) | |||
| } | |||
| log.Info("command=" + command) | |||
| codePath := setting.JobPath + modelConvert.ID + CodeMountPath | |||
| downloadCode(repo, codePath, DefaultBranchName) | |||
| codePath := setting.JobPath + modelConvert.ID + CodeMountPath | |||
| downloadCode(repo, codePath, DefaultBranchName) | |||
| uploadCodeToMinio(codePath+"/", modelConvert.ID, CodeMountPath+"/") | |||
| log.Info("minio code path=" + setting.CBCodePathPrefix + modelConvert.ID) | |||
| uploadCodeToMinio(codePath+"/", modelConvert.ID, CodeMountPath+"/") | |||
| minioCodePath := setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.CBCodePathPrefix + modelConvert.ID + "/code" | |||
| log.Info("Volume codePath=" + minioCodePath) | |||
| minioCodePath := setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.CBCodePathPrefix + modelConvert.ID + "/code" | |||
| log.Info("minio codePath=" + minioCodePath) | |||
| modelPath := setting.JobPath + modelConvert.ID + ModelMountPath + "/" | |||
| log.Info("modelPath=" + modelPath) | |||
| mkModelPath(modelPath) | |||
| modelPath := setting.JobPath + modelConvert.ID + ModelMountPath + "/" | |||
| log.Info("local modelPath=" + modelPath) | |||
| mkModelPath(modelPath) | |||
| uploadCodeToMinio(modelPath, modelConvert.ID, ModelMountPath+"/") | |||
| uploadCodeToMinio(modelPath, modelConvert.ID, ModelMountPath+"/") | |||
| minioModelPath := setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.CBCodePathPrefix + modelConvert.ID + "/model" | |||
| log.Info("minio model path=" + minioModelPath) | |||
| command := getModelConvertCommand(modelConvert.ID, modelConvert.ModelPath) | |||
| log.Info("command=" + command) | |||
| dataActualPath := setting.Attachment.Minio.RealPath + modelRelativePath | |||
| log.Info("dataActualPath=" + dataActualPath) | |||
| dataActualPath := setting.Attachment.Minio.RealPath + modelRelativePath | |||
| log.Info("dataActualPath=" + dataActualPath) | |||
| if TrainResourceSpecs == nil { | |||
| json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) | |||
| } | |||
| resourceSpec := TrainResourceSpecs.ResourceSpec[1] | |||
| jobResult, err := cloudbrain.CreateJob(modelConvert.ID, models.CreateJobParams{ | |||
| JobName: modelConvert.ID, | |||
| RetryCount: 1, | |||
| GpuType: GpuQueue, | |||
| Image: GPU_PYTORCH_IMAGE, | |||
| TaskRoles: []models.TaskRole{ | |||
| { | |||
| Name: SubTaskName, | |||
| TaskNumber: 1, | |||
| MinSucceededTaskCount: 1, | |||
| MinFailedTaskCount: 1, | |||
| CPUNumber: resourceSpec.CpuNum, | |||
| GPUNumber: resourceSpec.GpuNum, | |||
| MemoryMB: resourceSpec.MemMiB, | |||
| ShmMB: resourceSpec.ShareMemMiB, | |||
| Command: command, | |||
| NeedIBDevice: false, | |||
| IsMainRole: false, | |||
| UseNNI: false, | |||
| }, | |||
| if TrainResourceSpecs == nil { | |||
| json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) | |||
| } | |||
| resourceSpec := TrainResourceSpecs.ResourceSpec[1] | |||
| jobResult, err := cloudbrain.CreateJob(modelConvert.ID, models.CreateJobParams{ | |||
| JobName: modelConvert.ID, | |||
| RetryCount: 1, | |||
| GpuType: GpuQueue, | |||
| Image: GPU_PYTORCH_IMAGE, | |||
| TaskRoles: []models.TaskRole{ | |||
| { | |||
| Name: SubTaskName, | |||
| TaskNumber: 1, | |||
| MinSucceededTaskCount: 1, | |||
| MinFailedTaskCount: 1, | |||
| CPUNumber: resourceSpec.CpuNum, | |||
| GPUNumber: resourceSpec.GpuNum, | |||
| MemoryMB: resourceSpec.MemMiB, | |||
| ShmMB: resourceSpec.ShareMemMiB, | |||
| Command: command, | |||
| NeedIBDevice: false, | |||
| IsMainRole: false, | |||
| UseNNI: false, | |||
| }, | |||
| Volumes: []models.Volume{ | |||
| { | |||
| HostPath: models.StHostPath{ | |||
| Path: minioCodePath, | |||
| MountPath: CodeMountPath, | |||
| ReadOnly: false, | |||
| }, | |||
| }, | |||
| Volumes: []models.Volume{ | |||
| { | |||
| HostPath: models.StHostPath{ | |||
| Path: minioCodePath, | |||
| MountPath: CodeMountPath, | |||
| ReadOnly: false, | |||
| }, | |||
| { | |||
| HostPath: models.StHostPath{ | |||
| Path: dataActualPath, | |||
| MountPath: DataSetMountPath, | |||
| ReadOnly: true, | |||
| }, | |||
| }, | |||
| { | |||
| HostPath: models.StHostPath{ | |||
| Path: dataActualPath, | |||
| MountPath: DataSetMountPath, | |||
| ReadOnly: true, | |||
| }, | |||
| { | |||
| HostPath: models.StHostPath{ | |||
| Path: modelPath, | |||
| MountPath: ModelMountPath, | |||
| ReadOnly: false, | |||
| }, | |||
| }, | |||
| { | |||
| HostPath: models.StHostPath{ | |||
| Path: minioModelPath, | |||
| MountPath: ModelMountPath, | |||
| ReadOnly: false, | |||
| }, | |||
| }, | |||
| }) | |||
| if err != nil { | |||
| log.Error("CreateJob failed:", err.Error(), ctx.Data["MsgID"]) | |||
| return err | |||
| } | |||
| if jobResult.Code != Success { | |||
| log.Error("CreateJob(%s) failed:%s", modelConvert.ID, jobResult.Msg, ctx.Data["MsgID"]) | |||
| return errors.New(jobResult.Msg) | |||
| } | |||
| var jobID = jobResult.Payload["jobId"].(string) | |||
| log.Info("jobId=" + jobID) | |||
| models.UpdateModelConvertCBTI(modelConvert.ID, jobID) | |||
| }, | |||
| }) | |||
| if err != nil { | |||
| log.Error("CreateJob failed:", err.Error(), ctx.Data["MsgID"]) | |||
| return err | |||
| } | |||
| if jobResult.Code != Success { | |||
| log.Error("CreateJob(%s) failed:%s", modelConvert.ID, jobResult.Msg, ctx.Data["MsgID"]) | |||
| return errors.New(jobResult.Msg) | |||
| } | |||
| var jobID = jobResult.Payload["jobId"].(string) | |||
| log.Info("jobId=" + jobID) | |||
| models.UpdateModelConvertCBTI(modelConvert.ID, jobID) | |||
| return nil | |||
| } | |||
| func getModelConvertCommand(name string, modelFile string) string { | |||
| func getPytorchModelConvertCommand(name string, modelFile string, modelConvert *models.AiModelConvert) string { | |||
| var command string | |||
| bootFile := "convert_pytorch.py" | |||
| command += "python3 /code/" + bootFile + " --model " + modelFile + " > " + ModelMountPath + "/" + name + "-" + LogFile | |||
| intputshape := strings.Split(modelConvert.InputShape, ",") | |||
| n := "256" | |||
| c := "1" | |||
| h := "28" | |||
| w := "28" | |||
| if len(intputshape) == 4 { | |||
| n = intputshape[0] | |||
| c = intputshape[1] | |||
| h = intputshape[2] | |||
| w = intputshape[3] | |||
| } | |||
| command += "python3 /code/" + bootFile + " --model " + modelFile + " --n " + n + " --c " + c + " --h " + h + " --w " + w + " > " + ModelMountPath + "/" + name + "-" + LogFile | |||
| return command | |||
| } | |||