You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

grampus.go 4.4 kB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. package grampus
  2. import (
  3. "code.gitea.io/gitea/models"
  4. "code.gitea.io/gitea/modules/context"
  5. "code.gitea.io/gitea/modules/log"
  6. "code.gitea.io/gitea/modules/notification"
  7. "code.gitea.io/gitea/modules/timeutil"
  8. "strings"
  9. )
  10. const (
  11. //notebook
  12. storageTypeOBS = "obs"
  13. autoStopDuration = 4 * 60 * 60
  14. autoStopDurationMs = 4 * 60 * 60 * 1000
  15. DataSetMountPath = "/home/ma-user/work"
  16. NotebookEnv = "Python3"
  17. NotebookType = "Ascend"
  18. FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)"
  19. CodePath = "/code/"
  20. OutputPath = "/output/"
  21. ResultPath = "/result/"
  22. LogPath = "/log/"
  23. JobPath = "/job/"
  24. OrderDesc = "desc" //向下查询
  25. OrderAsc = "asc" //向上查询
  26. Lines = 500
  27. TrainUrl = "train_url"
  28. DataUrl = "data_url"
  29. ResultUrl = "result_url"
  30. CkptUrl = "ckpt_url"
  31. DeviceTarget = "device_target"
  32. Ascend = "Ascend"
  33. PerPage = 10
  34. IsLatestVersion = "1"
  35. NotLatestVersion = "0"
  36. VersionCount = 1
  37. SortByCreateTime = "create_time"
  38. ConfigTypeCustom = "custom"
  39. TotalVersionCount = 1
  40. ProcessorTypeNPU = "npu.huawei.com/NPU"
  41. ProcessorTypeGPU = "nvidia.com/gpu"
  42. )
  43. var (
  44. poolInfos *models.PoolInfos
  45. FlavorInfos *models.FlavorInfos
  46. ImageInfos *models.ImageInfosModelArts
  47. )
  48. type GenerateTrainJobReq struct {
  49. JobName string
  50. Command string
  51. ResourceSpecId string
  52. ImageUrl string //与image_id二选一,都有的情况下优先image_url
  53. ImageId string
  54. DisplayJobName string
  55. Uuid string
  56. Description string
  57. CodeObsPath string
  58. BootFile string
  59. BootFileUrl string
  60. DataUrl string
  61. TrainUrl string
  62. WorkServerNumber int
  63. EngineID int64
  64. CommitID string
  65. IsLatestVersion string
  66. BranchName string
  67. PreVersionId int64
  68. PreVersionName string
  69. FlavorName string
  70. VersionCount int
  71. EngineName string
  72. TotalVersionCount int
  73. ComputeResource string
  74. DatasetName string
  75. Params string
  76. }
  77. func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) {
  78. createTime := timeutil.TimeStampNow()
  79. jobResult, err := createJob(models.CreateGrampusJobRequest{
  80. Name: req.JobName,
  81. Tasks: []models.GrampusTasks{
  82. {
  83. Name: req.JobName,
  84. Command: req.Command,
  85. ResourceSpecId: req.ResourceSpecId,
  86. ImageId: req.ImageId,
  87. ImageUrl: req.ImageUrl,
  88. },
  89. },
  90. })
  91. if err != nil {
  92. log.Error("createJob failed: %v", err.Error())
  93. return err
  94. }
  95. jobID := jobResult.JobInfo.JobID
  96. err = models.CreateCloudbrain(&models.Cloudbrain{
  97. Status: TransTrainJobStatus(jobResult.JobInfo.Status),
  98. UserID: ctx.User.ID,
  99. RepoID: ctx.Repo.Repository.ID,
  100. JobID: jobID,
  101. JobName: req.JobName,
  102. DisplayJobName: req.DisplayJobName,
  103. JobType: string(models.JobTypeTrain),
  104. Type: models.TypeCloudBrainGrampus,
  105. Uuid: req.Uuid,
  106. DatasetName: req.DatasetName,
  107. CommitID: req.CommitID,
  108. IsLatestVersion: req.IsLatestVersion,
  109. ComputeResource: req.ComputeResource,
  110. ImageID: req.ImageId,
  111. TrainUrl: req.TrainUrl,
  112. BranchName: req.BranchName,
  113. Parameters: req.Params,
  114. BootFile: req.BootFile,
  115. DataUrl: req.DataUrl,
  116. FlavorCode: req.ResourceSpecId,
  117. Description: req.Description,
  118. WorkServerNumber: req.WorkServerNumber,
  119. FlavorName: req.FlavorName,
  120. EngineName: req.EngineName,
  121. VersionCount: req.VersionCount,
  122. TotalVersionCount: req.TotalVersionCount,
  123. CreatedUnix: createTime,
  124. UpdatedUnix: createTime,
  125. })
  126. if err != nil {
  127. log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error())
  128. return err
  129. }
  130. var actionType models.ActionType
  131. if req.ComputeResource == models.NPUResource {
  132. actionType = models.ActionCreateTrainTask
  133. } else if req.ComputeResource == models.GPUResource {
  134. actionType = models.ActionCreateGPUTrainTask
  135. }
  136. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.DisplayJobName, actionType)
  137. return nil
  138. }
  139. func TransTrainJobStatus(status string) string {
  140. if status == "pending" {
  141. status = "waiting"
  142. }
  143. return strings.ToUpper(status)
  144. }