|
|
|
@@ -19,14 +19,14 @@ import ( |
|
|
|
|
|
|
|
const ( |
|
|
|
//notebook |
|
|
|
storageTypeOBS = "obs" |
|
|
|
autoStopDuration = 4 * 60 * 60 |
|
|
|
autoStopDurationMs = 4 * 60 * 60 * 1000 |
|
|
|
|
|
|
|
DataSetMountPath = "/home/ma-user/work" |
|
|
|
NotebookEnv = "Python3" |
|
|
|
NotebookType = "Ascend" |
|
|
|
FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)" |
|
|
|
storageTypeOBS = "obs" |
|
|
|
autoStopDuration = 4 * 60 * 60 |
|
|
|
autoStopDurationMs = 4 * 60 * 60 * 1000 |
|
|
|
MORDELART_USER_IMAGE_ENGINE_ID = 1000000 |
|
|
|
DataSetMountPath = "/home/ma-user/work" |
|
|
|
NotebookEnv = "Python3" |
|
|
|
NotebookType = "Ascend" |
|
|
|
FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)" |
|
|
|
|
|
|
|
//train-job |
|
|
|
// ResourcePools = "{\"resource_pool\":[{\"id\":\"pool1328035d\", \"value\":\"专属资源池\"}]}" |
|
|
|
@@ -152,6 +152,7 @@ type Engine struct { |
|
|
|
Info []struct { |
|
|
|
ID int `json:"id"` |
|
|
|
Value string `json:"value"` |
|
|
|
Url string `json:"url"` |
|
|
|
} `json:"engine"` |
|
|
|
} |
|
|
|
|
|
|
|
@@ -314,6 +315,78 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc |
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func GenerateTrainJobByUserImage(ctx *context.Context, req *GenerateTrainJobReq) (err error) { |
|
|
|
createTime := timeutil.TimeStampNow() |
|
|
|
jobResult, err := createTrainJobUserImage(models.CreateUserImageTrainJobParams{ |
|
|
|
JobName: req.JobName, |
|
|
|
Description: req.Description, |
|
|
|
Config: models.UserImageConfig{ |
|
|
|
WorkServerNum: req.WorkServerNumber, |
|
|
|
AppUrl: req.CodeObsPath, |
|
|
|
BootFileUrl: req.BootFileUrl, |
|
|
|
DataUrl: req.DataUrl, |
|
|
|
TrainUrl: req.TrainUrl, |
|
|
|
LogUrl: req.LogUrl, |
|
|
|
PoolID: req.PoolID, |
|
|
|
CreateVersion: true, |
|
|
|
Flavor: models.Flavor{ |
|
|
|
Code: req.FlavorCode, |
|
|
|
}, |
|
|
|
Parameter: req.Parameters, |
|
|
|
UserImageUrl: req.UserImageUrl, |
|
|
|
UserCommand: req.UserCommand, |
|
|
|
}, |
|
|
|
}) |
|
|
|
if err != nil { |
|
|
|
log.Error("CreateJob failed: %v", err.Error()) |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
jobId := strconv.FormatInt(jobResult.JobID, 10) |
|
|
|
err = models.CreateCloudbrain(&models.Cloudbrain{ |
|
|
|
Status: TransTrainJobStatus(jobResult.Status), |
|
|
|
UserID: ctx.User.ID, |
|
|
|
RepoID: ctx.Repo.Repository.ID, |
|
|
|
JobID: jobId, |
|
|
|
JobName: req.JobName, |
|
|
|
DisplayJobName: req.DisplayJobName, |
|
|
|
JobType: string(models.JobTypeTrain), |
|
|
|
Type: models.TypeCloudBrainTwo, |
|
|
|
VersionID: jobResult.VersionID, |
|
|
|
VersionName: jobResult.VersionName, |
|
|
|
Uuid: req.Uuid, |
|
|
|
DatasetName: req.DatasetName, |
|
|
|
CommitID: req.CommitID, |
|
|
|
IsLatestVersion: req.IsLatestVersion, |
|
|
|
ComputeResource: models.NPUResource, |
|
|
|
EngineID: MORDELART_USER_IMAGE_ENGINE_ID, |
|
|
|
Image: req.UserImageUrl, |
|
|
|
UserImageCommand: req.UserCommand, |
|
|
|
TrainUrl: req.TrainUrl, |
|
|
|
BranchName: req.BranchName, |
|
|
|
Parameters: req.Params, |
|
|
|
BootFile: req.BootFile, |
|
|
|
DataUrl: req.DataUrl, |
|
|
|
LogUrl: req.LogUrl, |
|
|
|
FlavorCode: req.FlavorCode, |
|
|
|
Description: req.Description, |
|
|
|
WorkServerNumber: req.WorkServerNumber, |
|
|
|
FlavorName: req.FlavorName, |
|
|
|
EngineName: req.EngineName, |
|
|
|
VersionCount: req.VersionCount, |
|
|
|
TotalVersionCount: req.TotalVersionCount, |
|
|
|
CreatedUnix: createTime, |
|
|
|
UpdatedUnix: createTime, |
|
|
|
}) |
|
|
|
|
|
|
|
if err != nil { |
|
|
|
log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error()) |
|
|
|
return err |
|
|
|
} |
|
|
|
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobId, req.DisplayJobName, models.ActionCreateTrainTask) |
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { |
|
|
|
createTime := timeutil.TimeStampNow() |
|
|
|
jobResult, err := createTrainJob(models.CreateTrainJobParams{ |
|
|
|
@@ -497,6 +570,100 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
func GenerateTrainJobVersionByUserImage(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) { |
|
|
|
createTime := timeutil.TimeStampNow() |
|
|
|
jobResult, err := createTrainJobUserImage(models.CreateUserImageTrainJobParams{ |
|
|
|
JobName: req.JobName, |
|
|
|
Description: req.Description, |
|
|
|
Config: models.UserImageConfig{ |
|
|
|
WorkServerNum: req.WorkServerNumber, |
|
|
|
AppUrl: req.CodeObsPath, |
|
|
|
BootFileUrl: req.BootFileUrl, |
|
|
|
DataUrl: req.DataUrl, |
|
|
|
TrainUrl: req.TrainUrl, |
|
|
|
LogUrl: req.LogUrl, |
|
|
|
PoolID: req.PoolID, |
|
|
|
CreateVersion: true, |
|
|
|
Flavor: models.Flavor{ |
|
|
|
Code: req.FlavorCode, |
|
|
|
}, |
|
|
|
Parameter: req.Parameters, |
|
|
|
UserImageUrl: req.UserImageUrl, |
|
|
|
UserCommand: req.UserCommand, |
|
|
|
}, |
|
|
|
}) |
|
|
|
if err != nil { |
|
|
|
log.Error("CreateJob failed: %v", err.Error()) |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
var jobTypes []string |
|
|
|
jobTypes = append(jobTypes, string(models.JobTypeTrain)) |
|
|
|
repo := ctx.Repo.Repository |
|
|
|
VersionTaskList, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ |
|
|
|
RepoID: repo.ID, |
|
|
|
Type: models.TypeCloudBrainTwo, |
|
|
|
JobTypes: jobTypes, |
|
|
|
JobID: strconv.FormatInt(jobResult.JobID, 10), |
|
|
|
}) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("Cloudbrain", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
//将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount |
|
|
|
|
|
|
|
err = models.CreateCloudbrain(&models.Cloudbrain{ |
|
|
|
Status: TransTrainJobStatus(jobResult.Status), |
|
|
|
UserID: ctx.User.ID, |
|
|
|
RepoID: ctx.Repo.Repository.ID, |
|
|
|
JobID: strconv.FormatInt(jobResult.JobID, 10), |
|
|
|
JobName: req.JobName, |
|
|
|
DisplayJobName: req.DisplayJobName, |
|
|
|
JobType: string(models.JobTypeTrain), |
|
|
|
Type: models.TypeCloudBrainTwo, |
|
|
|
VersionID: jobResult.VersionID, |
|
|
|
VersionName: jobResult.VersionName, |
|
|
|
Uuid: req.Uuid, |
|
|
|
DatasetName: req.DatasetName, |
|
|
|
CommitID: req.CommitID, |
|
|
|
IsLatestVersion: req.IsLatestVersion, |
|
|
|
PreVersionName: req.PreVersionName, |
|
|
|
ComputeResource: models.NPUResource, |
|
|
|
EngineID: MORDELART_USER_IMAGE_ENGINE_ID, |
|
|
|
Image: req.UserImageUrl, |
|
|
|
UserImageCommand: req.UserCommand, |
|
|
|
TrainUrl: req.TrainUrl, |
|
|
|
BranchName: req.BranchName, |
|
|
|
Parameters: req.Params, |
|
|
|
BootFile: req.BootFile, |
|
|
|
DataUrl: req.DataUrl, |
|
|
|
LogUrl: req.LogUrl, |
|
|
|
PreVersionId: req.PreVersionId, |
|
|
|
FlavorCode: req.FlavorCode, |
|
|
|
Description: req.Description, |
|
|
|
WorkServerNumber: req.WorkServerNumber, |
|
|
|
FlavorName: req.FlavorName, |
|
|
|
EngineName: req.EngineName, |
|
|
|
TotalVersionCount: VersionTaskList[0].TotalVersionCount + 1, |
|
|
|
VersionCount: VersionListCount + 1, |
|
|
|
CreatedUnix: createTime, |
|
|
|
UpdatedUnix: createTime, |
|
|
|
}) |
|
|
|
if err != nil { |
|
|
|
log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
//将训练任务的上一版本的isLatestVersion设置为"0" |
|
|
|
err = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCount, NotLatestVersion, TotalVersionCount) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("Update IsLatestVersion failed", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
func TransTrainJobStatus(status int) string { |
|
|
|
switch status { |
|
|
|
case 0: |
|
|
|
|