You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

notebook.go 12 kB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. package cloudbrainTask
  2. import (
  3. "fmt"
  4. "net/http"
  5. "path"
  6. "code.gitea.io/gitea/modules/modelarts"
  7. "code.gitea.io/gitea/modules/modelarts_cd"
  8. "code.gitea.io/gitea/modules/git"
  9. "code.gitea.io/gitea/modules/cloudbrain"
  10. "code.gitea.io/gitea/modules/log"
  11. "code.gitea.io/gitea/modules/redis/redis_key"
  12. "code.gitea.io/gitea/modules/redis/redis_lock"
  13. "code.gitea.io/gitea/modules/storage"
  14. "code.gitea.io/gitea/services/cloudbrain/resource"
  15. "code.gitea.io/gitea/services/reward/point/account"
  16. "code.gitea.io/gitea/modules/setting"
  17. cloudbrainService "code.gitea.io/gitea/services/cloudbrain"
  18. repo_service "code.gitea.io/gitea/services/repository"
  19. "code.gitea.io/gitea/models"
  20. "code.gitea.io/gitea/modules/context"
  21. api "code.gitea.io/gitea/modules/structs"
  22. "code.gitea.io/gitea/modules/util"
  23. )
  24. const NoteBookExtension = ".ipynb"
  25. func FileNotebookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption) {
  26. if ctx.Written() {
  27. return
  28. }
  29. if path.Ext(option.File) != NoteBookExtension {
  30. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_select_wrong")))
  31. return
  32. }
  33. isNotebookFileExist, _ := isNoteBookFileExist(ctx, option)
  34. if !isNotebookFileExist {
  35. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist")))
  36. return
  37. }
  38. //create repo if not exist
  39. repo, err := models.GetRepositoryByName(ctx.User.ID, setting.FileNoteBook.ProjectName)
  40. if repo == nil {
  41. repo, err = repo_service.CreateRepository(ctx.User, ctx.User, models.CreateRepoOptions{
  42. Name: setting.FileNoteBook.ProjectName,
  43. Alias: "",
  44. Description: "",
  45. IssueLabels: "",
  46. Gitignores: "",
  47. License: "",
  48. Readme: "Default",
  49. IsPrivate: false,
  50. AutoInit: true,
  51. DefaultBranch: "master",
  52. })
  53. }
  54. if err != nil {
  55. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("repo.failed_to_create_repo"))
  56. return
  57. }
  58. if option.Type <= 1 {
  59. cloudBrainFileNoteBookCreate(ctx, option, repo)
  60. } else {
  61. modelartsFileNoteBookCreate(ctx, option, repo)
  62. }
  63. }
  64. func cloudBrainFileNoteBookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption, repo *models.Repository) {
  65. displayJobName := cloudbrainService.GetDisplayJobName(ctx.User.Name)
  66. jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
  67. jobType := string(models.JobTypeDebug)
  68. lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), jobType, displayJobName))
  69. defer lock.UnLock()
  70. isOk, err := lock.Lock(models.CloudbrainKeyDuration)
  71. if !isOk {
  72. log.Error("lock processed failed:%v", err, ctx.Data["MsgID"])
  73. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  74. return
  75. }
  76. tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName)
  77. if err == nil {
  78. if len(tasks) != 0 {
  79. log.Error("the job name did already exist", ctx.Data["MsgID"])
  80. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  81. return
  82. }
  83. } else {
  84. if !models.IsErrJobNotExist(err) {
  85. log.Error("system error, %v", err, ctx.Data["MsgID"])
  86. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  87. return
  88. }
  89. }
  90. count, err := GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType)
  91. if err != nil {
  92. log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"])
  93. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  94. return
  95. } else {
  96. if count >= 1 {
  97. log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
  98. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain.morethanonejob")))
  99. return
  100. }
  101. }
  102. sourceRepo, err := models.GetRepositoryByOwnerAndName(option.OwnerName, option.ProjectName)
  103. if err != nil {
  104. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist")))
  105. return
  106. }
  107. errStr := uploadCodeFile(sourceRepo, getCodePath(jobName), option.BranchName, option.File, jobName)
  108. if errStr != "" {
  109. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist")))
  110. return
  111. }
  112. command := cloudbrain.GetCloudbrainDebugCommand()
  113. commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(option.BranchName)
  114. specId := setting.FileNoteBook.SpecIdGPU
  115. if option.Type == 0 {
  116. specId = setting.FileNoteBook.SpecIdCPU
  117. }
  118. spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{
  119. JobType: models.JobType(jobType),
  120. ComputeResource: models.GPU,
  121. Cluster: models.OpenICluster,
  122. AiCenterCode: models.AICenterOfCloudBrainOne})
  123. if err != nil || spec == nil {
  124. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.wrong_specification")))
  125. return
  126. }
  127. if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) {
  128. log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID)
  129. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("points.insufficient_points_balance")))
  130. return
  131. }
  132. req := cloudbrain.GenerateCloudBrainTaskReq{
  133. Ctx: ctx,
  134. DisplayJobName: displayJobName,
  135. JobName: jobName,
  136. Image: setting.FileNoteBook.ImageGPU,
  137. Command: command,
  138. Uuids: "",
  139. DatasetNames: "",
  140. DatasetInfos: nil,
  141. CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"),
  142. ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"),
  143. BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"),
  144. Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
  145. BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"),
  146. JobType: jobType,
  147. Description: getDescription(option),
  148. BranchName: option.BranchName,
  149. BootFile: option.File,
  150. Params: "{\"parameter\":[]}",
  151. CommitID: commitID,
  152. BenchmarkTypeID: 0,
  153. BenchmarkChildTypeID: 0,
  154. ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"),
  155. Spec: spec,
  156. }
  157. jobId, err := cloudbrain.GenerateTask(req)
  158. if err != nil {
  159. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
  160. return
  161. }
  162. ctx.JSON(http.StatusOK, models.BaseMessageApi{
  163. Code: 0,
  164. Message: jobId,
  165. })
  166. }
  167. func getCodePath(jobName string) string {
  168. return setting.JobPath + jobName + cloudbrain.CodeMountPath
  169. }
  170. func getDescription(option api.CreateFileNotebookJobOption) string {
  171. return option.OwnerName + "/" + option.ProjectName + "/" + option.File
  172. }
  173. func modelartsFileNoteBookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption, repo *models.Repository) {
  174. displayJobName := cloudbrainService.GetDisplayJobName(ctx.User.Name)
  175. jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
  176. lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName))
  177. isOk, err := lock.Lock(models.CloudbrainKeyDuration)
  178. if !isOk {
  179. log.Error("lock processed failed:%v", err, ctx.Data["MsgID"])
  180. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  181. return
  182. }
  183. defer lock.UnLock()
  184. count, err := GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeDebug))
  185. if err != nil {
  186. log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
  187. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  188. return
  189. } else {
  190. if count >= 1 {
  191. log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
  192. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain.morethanonejob")))
  193. return
  194. }
  195. }
  196. tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeDebug), displayJobName)
  197. if err == nil {
  198. if len(tasks) != 0 {
  199. log.Error("the job name did already exist", ctx.Data["MsgID"])
  200. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  201. return
  202. }
  203. } else {
  204. if !models.IsErrJobNotExist(err) {
  205. log.Error("system error, %v", err, ctx.Data["MsgID"])
  206. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  207. return
  208. }
  209. }
  210. sourceRepo, err := models.GetRepositoryByOwnerAndName(option.OwnerName, option.ProjectName)
  211. if err != nil {
  212. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist")))
  213. return
  214. }
  215. err = downloadCode(sourceRepo, getCodePath(jobName), option.BranchName)
  216. if err != nil {
  217. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed")))
  218. return
  219. }
  220. var aiCenterCode = models.AICenterOfCloudBrainTwo
  221. var specId = setting.FileNoteBook.SpecIdNPU
  222. if setting.ModelartsCD.Enabled {
  223. aiCenterCode = models.AICenterOfChengdu
  224. specId = setting.FileNoteBook.SpecIdNPUCD
  225. }
  226. spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{
  227. JobType: models.JobTypeDebug,
  228. ComputeResource: models.NPU,
  229. Cluster: models.OpenICluster,
  230. AiCenterCode: aiCenterCode})
  231. if err != nil || spec == nil {
  232. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.wrong_specification")))
  233. return
  234. }
  235. if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) {
  236. log.Error("point balance is not enough,userId=%d specId=%d ", ctx.User.ID, spec.ID)
  237. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("points.insufficient_points_balance")))
  238. return
  239. }
  240. ctx.Repo = &context.Repository{
  241. Repository: repo,
  242. }
  243. var jobId string
  244. if setting.ModelartsCD.Enabled {
  245. jobId, err = modelarts_cd.GenerateNotebook(ctx, displayJobName, jobName, "", getDescription(option), setting.FileNoteBook.ImageIdNPUCD, spec, option.File)
  246. } else {
  247. jobId, err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, "", getDescription(option), setting.FileNoteBook.ImageIdNPU, spec, option.File)
  248. }
  249. if err != nil {
  250. log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"])
  251. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
  252. return
  253. }
  254. ctx.JSON(http.StatusOK, models.BaseMessageApi{
  255. Code: 0,
  256. Message: jobId,
  257. })
  258. }
  259. func isNoteBookFileExist(ctx *context.Context, option api.CreateFileNotebookJobOption) (bool, error) {
  260. repoPathOfNoteBook := models.RepoPath(option.OwnerName, option.ProjectName)
  261. gitRepoOfNoteBook, err := git.OpenRepository(repoPathOfNoteBook)
  262. if err != nil {
  263. log.Error("RepoRef Invalid repo "+repoPathOfNoteBook, err.Error())
  264. return false, err
  265. }
  266. // We opened it, we should close it
  267. defer func() {
  268. // If it's been set to nil then assume someone else has closed it.
  269. if gitRepoOfNoteBook != nil {
  270. gitRepoOfNoteBook.Close()
  271. }
  272. }()
  273. fileExist, err := fileExists(gitRepoOfNoteBook, option.File, option.BranchName)
  274. if err != nil || !fileExist {
  275. log.Error("Get file error:", err, ctx.Data["MsgID"])
  276. return false, err
  277. }
  278. return true, nil
  279. }
  280. func uploadCodeFile(repo *models.Repository, codePath string, branchName string, filePath string, jobName string) string {
  281. err := downloadCode(repo, codePath, branchName)
  282. if err != nil {
  283. return "cloudbrain.load_code_failed"
  284. }
  285. err = uploadOneFileToMinio(codePath, filePath, jobName, cloudbrain.CodeMountPath+"/")
  286. if err != nil {
  287. return "cloudbrain.load_code_failed"
  288. }
  289. return ""
  290. }
  291. func fileExists(gitRepo *git.Repository, path string, branch string) (bool, error) {
  292. commit, err := gitRepo.GetBranchCommit(branch)
  293. if err != nil {
  294. return false, err
  295. }
  296. if _, err := commit.GetTreeEntryByPath(path); err != nil {
  297. return false, err
  298. }
  299. return true, nil
  300. }