You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

modelarts.go 4.6 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. package modelarts
  2. import (
  3. "path"
  4. "strconv"
  5. "code.gitea.io/gitea/models"
  6. "code.gitea.io/gitea/modules/context"
  7. "code.gitea.io/gitea/modules/log"
  8. "code.gitea.io/gitea/modules/setting"
  9. )
  10. const (
  11. //notebook
  12. storageTypeOBS = "obs"
  13. autoStopDuration = 4 * 60 * 60
  14. flavor = "modelarts.kat1.xlarge"
  15. //profileID = "Python3-ascend910-arm"
  16. profileID = "efa847c0-7359-11eb-b34f-0255ac100057"
  17. poolID = "pool1328035d"
  18. poolName = "train-private-1"
  19. poolType = "USER_DEFINED"
  20. DataSetMountPath = "/home/ma-user/work"
  21. NotebookEnv = "Python3"
  22. NotebookType = "Ascend"
  23. FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)"
  24. //train-job
  25. ResourcePools = "{\"resource_pool\":[{\"id\":\"pool1328035d\", \"value\":\"专属资源池\"}]}"
  26. Engines = "{\"engine\":[{\"id\":1, \"value\":\"Ascend-Powered-Engine\"}]}"
  27. EngineVersions = "{\"version\":[{\"id\":118,\"value\":\"MindSpore-1.0.0-c75-python3.7-euleros2.8-aarch64\"}," +
  28. "{\"id\":119,\"value\":\"MindSpore-1.1.1-c76-python3.7-euleros2.8-aarch64\"}," +
  29. "{\"id\":120,\"value\":\"MindSpore-1.1.1-c76-tr5-python3.7-euleros2.8-aarch64\"}," +
  30. "{\"id\":117,\"value\":\"TF-1.15-c75-python3.7-euleros2.8-aarch64\"}" +
  31. "]}"
  32. FlavorInfos = "{\"flavor\":[{\"code\":\"modelarts.bm.910.arm.public.2\",\"value\":\"Ascend : 2 * Ascend 910 CPU:48 核 512GiB\"}," +
  33. "{\"code\":\"modelarts.bm.910.arm.public.8\",\"value\":\"Ascend : 8 * Ascend 910 CPU:192 核 2048GiB\"}," +
  34. "{\"code\":\"modelarts.bm.910.arm.public.4\",\"value\":\"Ascend : 4 * Ascend 910 CPU:96 核 1024GiB\"}," +
  35. "{\"code\":\"modelarts.bm.910.arm.public.1\",\"value\":\"Ascend : 1 * Ascend 910 CPU:24 核 256GiB\"}" +
  36. "]}"
  37. CodePath = "/code/"
  38. OutputPath = "/output/"
  39. LogPath = "/log/"
  40. JobPath = "/job/"
  41. )
  42. type GenerateTrainJobReq struct {
  43. JobName string
  44. Uuid string
  45. Description string
  46. CodeObsPath string
  47. BootFile string
  48. DataUrl string
  49. TrainUrl string
  50. FlavorCode string
  51. LogUrl string
  52. PoolID string
  53. WorkServerNumber int
  54. EngineID int64
  55. }
  56. type VersionInfo struct {
  57. Version []struct {
  58. ID int `json:"id"`
  59. Value string `json:"value"`
  60. } `json:"version"`
  61. }
  62. type Flavor struct {
  63. Info []struct {
  64. Code string `json:"code"`
  65. Value string `json:"value"`
  66. } `json:"flavor"`
  67. }
  68. type Engine struct {
  69. Info []struct {
  70. ID int `json:"id"`
  71. Value string `json:"value"`
  72. } `json:"engine"`
  73. }
  74. type ResourcePool struct {
  75. Info []struct {
  76. ID string `json:"id"`
  77. Value string `json:"value"`
  78. } `json:"resource_pool"`
  79. }
  80. func GenerateTask(ctx *context.Context, jobName, uuid, description string) error {
  81. dataActualPath := setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
  82. jobResult, err := createNotebook(models.CreateNotebookParams{
  83. JobName: jobName,
  84. Description:description,
  85. ProfileID: profileID,
  86. Flavor: flavor,
  87. Pool: models.Pool{
  88. ID: poolID,
  89. Name: poolName,
  90. Type: poolType,
  91. },
  92. Spec: models.Spec{
  93. Storage: models.Storage{
  94. Type: storageTypeOBS,
  95. Location:models.Location{
  96. Path: dataActualPath,
  97. },
  98. },
  99. AutoStop: models.AutoStop{
  100. Enable: true,
  101. Duration: autoStopDuration,
  102. },
  103. },
  104. })
  105. if err != nil {
  106. log.Error("CreateJob failed: %v", err.Error())
  107. return err
  108. }
  109. err = models.CreateCloudbrain(&models.Cloudbrain{
  110. Status: string(models.JobWaiting),
  111. UserID: ctx.User.ID,
  112. RepoID: ctx.Repo.Repository.ID,
  113. JobID: jobResult.ID,
  114. JobName: jobName,
  115. JobType: string(models.JobTypeDebug),
  116. Type: models.TypeCloudBrainNotebook,
  117. })
  118. if err != nil {
  119. return err
  120. }
  121. return nil
  122. }
  123. func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error {
  124. jobResult, err := createTrainJob(models.CreateTrainJobParams{
  125. JobName: req.JobName,
  126. Description: req.Description,
  127. Config: models.Config{
  128. WorkServerNum: req.WorkServerNumber,
  129. AppUrl: req.CodeObsPath,
  130. BootFileUrl: req.BootFile,
  131. DataUrl: req.DataUrl,
  132. EngineID: req.EngineID,
  133. TrainUrl: req.TrainUrl,
  134. LogUrl: req.LogUrl,
  135. PoolID: req.PoolID,
  136. Flavor: models.Flavor{
  137. Code: req.FlavorCode,
  138. },
  139. },
  140. })
  141. if err != nil {
  142. log.Error("CreateJob failed: %v", err.Error())
  143. return err
  144. }
  145. err = models.CreateCloudbrain(&models.Cloudbrain{
  146. Status: strconv.Itoa(jobResult.Status),
  147. UserID: ctx.User.ID,
  148. RepoID: ctx.Repo.Repository.ID,
  149. JobID: strconv.FormatInt(jobResult.JobID, 10),
  150. JobName: req.JobName,
  151. JobType: string(models.JobTypeDebug),
  152. Type: models.TypeCloudBrainTrainJob,
  153. })
  154. if err != nil {
  155. return err
  156. }
  157. return nil
  158. }