You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

modelarts.go 5.6 kB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. package modelarts_cd
  2. import (
  3. "errors"
  4. "strconv"
  5. "strings"
  6. "code.gitea.io/gitea/models"
  7. "code.gitea.io/gitea/modules/context"
  8. "code.gitea.io/gitea/modules/log"
  9. "code.gitea.io/gitea/modules/notification"
  10. "code.gitea.io/gitea/modules/setting"
  11. "code.gitea.io/gitea/modules/timeutil"
  12. )
  13. const (
  14. //notebook
  15. storageTypeOBS = "obs"
  16. autoStopDuration = 4 * 60 * 60
  17. autoStopDurationMs = 4 * 60 * 60 * 1000
  18. MORDELART_USER_IMAGE_ENGINE_ID = -1
  19. DataSetMountPath = "/home/ma-user/work"
  20. NotebookEnv = "Python3"
  21. NotebookType = "Ascend"
  22. FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)"
  23. //train-job
  24. CodePath = "/code/"
  25. OutputPath = "/output/"
  26. ResultPath = "/result/"
  27. LogPath = "/log/"
  28. JobPath = "/job/"
  29. OrderDesc = "desc" //向下查询
  30. OrderAsc = "asc" //向上查询
  31. Lines = 500
  32. TrainUrl = "train_url"
  33. DataUrl = "data_url"
  34. MultiDataUrl = "multi_data_url"
  35. ResultUrl = "result_url"
  36. CkptUrl = "ckpt_url"
  37. DeviceTarget = "device_target"
  38. Ascend = "Ascend"
  39. PerPage = 10
  40. IsLatestVersion = "1"
  41. NotLatestVersion = "0"
  42. VersionCountOne = 1
  43. SortByCreateTime = "create_time"
  44. ConfigTypeCustom = "custom"
  45. TotalVersionCount = 1
  46. )
  47. var ()
  48. type VersionInfo struct {
  49. Version []struct {
  50. ID int `json:"id"`
  51. Value string `json:"value"`
  52. Url string `json:"url"`
  53. } `json:"version"`
  54. }
  55. type Flavor struct {
  56. Info []struct {
  57. Code string `json:"code"`
  58. Value string `json:"value"`
  59. } `json:"flavor"`
  60. }
  61. type Engine struct {
  62. Info []struct {
  63. ID int `json:"id"`
  64. Value string `json:"value"`
  65. } `json:"engine"`
  66. }
  67. type ResourcePool struct {
  68. Info []struct {
  69. ID string `json:"id"`
  70. Value string `json:"value"`
  71. } `json:"resource_pool"`
  72. }
  73. type Parameters struct {
  74. Parameter []struct {
  75. Label string `json:"label"`
  76. Value string `json:"value"`
  77. } `json:"parameter"`
  78. }
  79. func GenerateNotebook(ctx *context.Context, displayJobName, jobName, uuid, description, imageId string, spec *models.Specification) error {
  80. imageName, err := GetNotebookImageName(imageId)
  81. if err != nil {
  82. log.Error("GetNotebookImageName failed: %v", err.Error())
  83. return err
  84. }
  85. createTime := timeutil.TimeStampNow()
  86. jobResult, err := createNotebook(models.CreateNotebookWithoutPoolParams{
  87. JobName: jobName,
  88. Description: description,
  89. Flavor: spec.SourceSpecId,
  90. Duration: autoStopDurationMs,
  91. ImageID: imageId,
  92. Feature: models.NotebookFeature,
  93. Volume: models.VolumeReq{
  94. Capacity: setting.Capacity,
  95. Category: models.EVSCategory,
  96. Ownership: models.ManagedOwnership,
  97. },
  98. WorkspaceID: "0",
  99. })
  100. if err != nil {
  101. log.Error("createNotebook failed: %v", err.Error())
  102. if strings.HasPrefix(err.Error(), UnknownErrorPrefix) {
  103. log.Info("(%s)unknown error, set temp status", displayJobName)
  104. errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{
  105. JobID: models.TempJobId,
  106. VersionID: models.TempVersionId,
  107. Status: models.TempJobStatus,
  108. Type: models.TypeCDCenter,
  109. JobName: jobName,
  110. JobType: string(models.JobTypeDebug),
  111. })
  112. if errTemp != nil {
  113. log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error())
  114. return errTemp
  115. }
  116. }
  117. return err
  118. }
  119. task := &models.Cloudbrain{
  120. Status: jobResult.Status,
  121. UserID: ctx.User.ID,
  122. RepoID: ctx.Repo.Repository.ID,
  123. JobID: jobResult.ID,
  124. JobName: jobName,
  125. FlavorCode: spec.SourceSpecId,
  126. DisplayJobName: displayJobName,
  127. JobType: string(models.JobTypeDebug),
  128. Type: models.TypeCDCenter,
  129. Uuid: uuid,
  130. ComputeResource: models.NPUResource,
  131. Image: imageName,
  132. Description: description,
  133. CreatedUnix: createTime,
  134. UpdatedUnix: createTime,
  135. Spec: spec,
  136. }
  137. err = models.CreateCloudbrain(task)
  138. if err != nil {
  139. return err
  140. }
  141. stringId := strconv.FormatInt(task.ID, 10)
  142. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugNPUTask)
  143. return nil
  144. }
  145. func GetNotebookImageName(imageId string) (string, error) {
  146. var validImage = false
  147. var imageName = ""
  148. for _, imageInfo := range setting.StImageInfos.ImageInfo {
  149. if imageInfo.Id == imageId {
  150. validImage = true
  151. imageName = imageInfo.Value
  152. }
  153. }
  154. if !validImage {
  155. log.Error("the image id(%s) is invalid", imageId)
  156. return imageName, errors.New("the image id is invalid")
  157. }
  158. return imageName, nil
  159. }
  160. /*
  161. func HandleNotebookInfo(task *models.Cloudbrain) error {
  162. result, err := GetNotebook(task.JobID)
  163. if err != nil {
  164. log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err)
  165. return err
  166. }
  167. if result != nil {
  168. oldStatus := task.Status
  169. task.Status = result.Status
  170. if task.StartTime == 0 && result.Lease.UpdateTime > 0 {
  171. task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000)
  172. }
  173. if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) {
  174. task.EndTime = timeutil.TimeStampNow()
  175. }
  176. task.CorrectCreateUnix()
  177. task.ComputeAndSetDuration()
  178. if oldStatus != task.Status {
  179. notification.NotifyChangeCloudbrainStatus(task, oldStatus)
  180. }
  181. if task.FlavorCode == "" {
  182. task.FlavorCode = result.Flavor
  183. }
  184. err = models.UpdateJob(task)
  185. if err != nil {
  186. log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err)
  187. return err
  188. }
  189. }
  190. return nil
  191. }
  192. */