You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

grampus.go 3.9 kB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. package grampus
  2. import (
  3. "code.gitea.io/gitea/models"
  4. "code.gitea.io/gitea/modules/context"
  5. "code.gitea.io/gitea/modules/log"
  6. "code.gitea.io/gitea/modules/notification"
  7. "code.gitea.io/gitea/modules/timeutil"
  8. "strings"
  9. )
  10. const (
  11. JobPath = "job/"
  12. ProcessorTypeNPU = "npu.huawei.com/NPU"
  13. ProcessorTypeGPU = "nvidia.com/gpu"
  14. CommandPrepareScript = "pwd;cd /tmp;mkdir -p output;mkdir -p code;mkdir -p dataset;wget -q https://git.openi.org.cn/OpenIOSSG/script_for_grampus/archive/master.zip;unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_obs downloader_for_minio uploader_for_minio;"
  15. CodeArchiveName = "master.zip"
  16. )
  17. var (
  18. poolInfos *models.PoolInfos
  19. FlavorInfos *models.FlavorInfos
  20. ImageInfos *models.ImageInfosModelArts
  21. )
  22. type GenerateTrainJobReq struct {
  23. JobName string
  24. Command string
  25. ResourceSpecId string
  26. ImageUrl string //与image_id二选一,都有的情况下优先image_url
  27. ImageId string
  28. DisplayJobName string
  29. Uuid string
  30. Description string
  31. CodeObsPath string
  32. BootFile string
  33. BootFileUrl string
  34. DataUrl string
  35. TrainUrl string
  36. WorkServerNumber int
  37. EngineID int64
  38. CommitID string
  39. IsLatestVersion string
  40. BranchName string
  41. PreVersionId int64
  42. PreVersionName string
  43. FlavorName string
  44. VersionCount int
  45. EngineName string
  46. TotalVersionCount int
  47. ComputeResource string
  48. DatasetName string
  49. Params string
  50. }
  51. func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) {
  52. createTime := timeutil.TimeStampNow()
  53. jobResult, err := createJob(models.CreateGrampusJobRequest{
  54. Name: req.JobName,
  55. Tasks: []models.GrampusTasks{
  56. {
  57. Name: req.JobName,
  58. Command: req.Command,
  59. ResourceSpecId: req.ResourceSpecId,
  60. ImageId: req.ImageId,
  61. ImageUrl: req.ImageUrl,
  62. ReplicaNum: 0,
  63. },
  64. },
  65. })
  66. if err != nil {
  67. log.Error("createJob failed: %v", err.Error())
  68. return err
  69. }
  70. jobID := jobResult.JobInfo.JobID
  71. err = models.CreateCloudbrain(&models.Cloudbrain{
  72. Status: TransTrainJobStatus(jobResult.JobInfo.Status),
  73. UserID: ctx.User.ID,
  74. RepoID: ctx.Repo.Repository.ID,
  75. JobID: jobID,
  76. JobName: req.JobName,
  77. DisplayJobName: req.DisplayJobName,
  78. JobType: string(models.JobTypeTrain),
  79. Type: models.TypeCloudBrainGrampus,
  80. Uuid: req.Uuid,
  81. DatasetName: req.DatasetName,
  82. CommitID: req.CommitID,
  83. IsLatestVersion: req.IsLatestVersion,
  84. ComputeResource: req.ComputeResource,
  85. ImageID: req.ImageId,
  86. TrainUrl: req.TrainUrl,
  87. BranchName: req.BranchName,
  88. Parameters: req.Params,
  89. BootFile: req.BootFile,
  90. DataUrl: req.DataUrl,
  91. FlavorCode: req.ResourceSpecId,
  92. Description: req.Description,
  93. WorkServerNumber: req.WorkServerNumber,
  94. FlavorName: req.FlavorName,
  95. EngineName: req.EngineName,
  96. VersionCount: req.VersionCount,
  97. TotalVersionCount: req.TotalVersionCount,
  98. CreatedUnix: createTime,
  99. UpdatedUnix: createTime,
  100. })
  101. if err != nil {
  102. log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error())
  103. return err
  104. }
  105. var actionType models.ActionType
  106. if req.ComputeResource == models.NPUResource {
  107. actionType = models.ActionCreateTrainTask
  108. } else if req.ComputeResource == models.GPUResource {
  109. actionType = models.ActionCreateGPUTrainTask
  110. }
  111. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.DisplayJobName, actionType)
  112. return nil
  113. }
  114. func TransTrainJobStatus(status string) string {
  115. if status == "pending" {
  116. status = "waiting"
  117. }
  118. return strings.ToUpper(status)
  119. }