You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

notebook.go 12 kB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. package cloudbrainTask
  2. import (
  3. "fmt"
  4. "net/http"
  5. "path"
  6. "code.gitea.io/gitea/modules/modelarts"
  7. "code.gitea.io/gitea/modules/modelarts_cd"
  8. "code.gitea.io/gitea/modules/git"
  9. "code.gitea.io/gitea/modules/cloudbrain"
  10. "code.gitea.io/gitea/modules/log"
  11. "code.gitea.io/gitea/modules/redis/redis_key"
  12. "code.gitea.io/gitea/modules/redis/redis_lock"
  13. "code.gitea.io/gitea/modules/storage"
  14. "code.gitea.io/gitea/services/cloudbrain/resource"
  15. "code.gitea.io/gitea/services/reward/point/account"
  16. "code.gitea.io/gitea/modules/setting"
  17. cloudbrainService "code.gitea.io/gitea/services/cloudbrain"
  18. repo_service "code.gitea.io/gitea/services/repository"
  19. "code.gitea.io/gitea/models"
  20. "code.gitea.io/gitea/modules/context"
  21. api "code.gitea.io/gitea/modules/structs"
  22. "code.gitea.io/gitea/modules/util"
  23. )
  24. const NoteBookExtension = ".ipynb"
  25. func FileNotebookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption) {
  26. if ctx.Written() {
  27. return
  28. }
  29. if path.Ext(option.File) != NoteBookExtension {
  30. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_select_wrong")))
  31. return
  32. }
  33. isNotebookFileExist, _ := isNoteBookFileExist(ctx, option)
  34. if !isNotebookFileExist {
  35. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist")))
  36. return
  37. }
  38. sourceRepo, err := models.GetRepositoryByOwnerAndName(option.OwnerName, option.ProjectName)
  39. if err != nil {
  40. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist")))
  41. return
  42. }
  43. permission, err := models.GetUserRepoPermission(sourceRepo, ctx.User)
  44. if err != nil {
  45. log.Error("Get permission failed", err)
  46. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_no_right")))
  47. return
  48. }
  49. if !permission.CanRead(models.UnitTypeCode) {
  50. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_no_right")))
  51. return
  52. }
  53. //create repo if not exist
  54. repo, err := models.GetRepositoryByName(ctx.User.ID, setting.FileNoteBook.ProjectName)
  55. if repo == nil {
  56. repo, err = repo_service.CreateRepository(ctx.User, ctx.User, models.CreateRepoOptions{
  57. Name: setting.FileNoteBook.ProjectName,
  58. Alias: "",
  59. Description: "",
  60. IssueLabels: "",
  61. Gitignores: "",
  62. License: "",
  63. Readme: "Default",
  64. IsPrivate: false,
  65. AutoInit: true,
  66. DefaultBranch: "master",
  67. })
  68. }
  69. if err != nil {
  70. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("repo.failed_to_create_repo"))
  71. return
  72. }
  73. if option.Type <= 1 {
  74. cloudBrainFileNoteBookCreate(ctx, option, repo, sourceRepo)
  75. } else {
  76. modelartsFileNoteBookCreate(ctx, option, repo, sourceRepo)
  77. }
  78. }
  79. func cloudBrainFileNoteBookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption, repo *models.Repository, sourceRepo *models.Repository) {
  80. displayJobName := cloudbrainService.GetDisplayJobName(ctx.User.Name)
  81. jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
  82. jobType := string(models.JobTypeDebug)
  83. lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), jobType, displayJobName))
  84. defer lock.UnLock()
  85. isOk, err := lock.Lock(models.CloudbrainKeyDuration)
  86. if !isOk {
  87. log.Error("lock processed failed:%v", err, ctx.Data["MsgID"])
  88. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  89. return
  90. }
  91. tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName)
  92. if err == nil {
  93. if len(tasks) != 0 {
  94. log.Error("the job name did already exist", ctx.Data["MsgID"])
  95. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  96. return
  97. }
  98. } else {
  99. if !models.IsErrJobNotExist(err) {
  100. log.Error("system error, %v", err, ctx.Data["MsgID"])
  101. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  102. return
  103. }
  104. }
  105. count, err := GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType)
  106. if err != nil {
  107. log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"])
  108. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  109. return
  110. } else {
  111. if count >= 1 {
  112. log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
  113. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain.morethanonejob")))
  114. return
  115. }
  116. }
  117. errStr := uploadCodeFile(sourceRepo, getCodePath(jobName), option.BranchName, option.File, jobName)
  118. if errStr != "" {
  119. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist")))
  120. return
  121. }
  122. command := cloudbrain.GetCloudbrainDebugCommand()
  123. specId := setting.FileNoteBook.SpecIdGPU
  124. if option.Type == 0 {
  125. specId = setting.FileNoteBook.SpecIdCPU
  126. }
  127. spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{
  128. JobType: models.JobType(jobType),
  129. ComputeResource: models.GPU,
  130. Cluster: models.OpenICluster,
  131. AiCenterCode: models.AICenterOfCloudBrainOne})
  132. if err != nil || spec == nil {
  133. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.wrong_specification")))
  134. return
  135. }
  136. if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) {
  137. log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID)
  138. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("points.insufficient_points_balance")))
  139. return
  140. }
  141. ctx.Repo = &context.Repository{
  142. Repository: repo,
  143. }
  144. req := cloudbrain.GenerateCloudBrainTaskReq{
  145. Ctx: ctx,
  146. DisplayJobName: displayJobName,
  147. JobName: jobName,
  148. Image: setting.FileNoteBook.ImageGPU,
  149. Command: command,
  150. Uuids: "",
  151. DatasetNames: "",
  152. DatasetInfos: nil,
  153. CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"),
  154. ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"),
  155. BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"),
  156. Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
  157. BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"),
  158. JobType: jobType,
  159. Description: getDescription(option),
  160. BranchName: option.BranchName,
  161. BootFile: option.File,
  162. Params: "{\"parameter\":[]}",
  163. CommitID: "",
  164. BenchmarkTypeID: 0,
  165. BenchmarkChildTypeID: 0,
  166. ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"),
  167. Spec: spec,
  168. }
  169. jobId, err := cloudbrain.GenerateTask(req)
  170. if err != nil {
  171. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
  172. return
  173. }
  174. ctx.JSON(http.StatusOK, models.BaseMessageApi{
  175. Code: 0,
  176. Message: jobId,
  177. })
  178. }
  179. func getCodePath(jobName string) string {
  180. return setting.JobPath + jobName + cloudbrain.CodeMountPath
  181. }
  182. func getDescription(option api.CreateFileNotebookJobOption) string {
  183. return option.OwnerName + "/" + option.ProjectName + "/" + option.File
  184. }
  185. func modelartsFileNoteBookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption, repo *models.Repository, sourceRepo *models.Repository) {
  186. displayJobName := cloudbrainService.GetDisplayJobName(ctx.User.Name)
  187. jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
  188. lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName))
  189. isOk, err := lock.Lock(models.CloudbrainKeyDuration)
  190. if !isOk {
  191. log.Error("lock processed failed:%v", err, ctx.Data["MsgID"])
  192. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  193. return
  194. }
  195. defer lock.UnLock()
  196. count, err := GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeDebug))
  197. if err != nil {
  198. log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
  199. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  200. return
  201. } else {
  202. if count >= 1 {
  203. log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
  204. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain.morethanonejob")))
  205. return
  206. }
  207. }
  208. tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeDebug), displayJobName)
  209. if err == nil {
  210. if len(tasks) != 0 {
  211. log.Error("the job name did already exist", ctx.Data["MsgID"])
  212. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  213. return
  214. }
  215. } else {
  216. if !models.IsErrJobNotExist(err) {
  217. log.Error("system error, %v", err, ctx.Data["MsgID"])
  218. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  219. return
  220. }
  221. }
  222. err = downloadCode(sourceRepo, getCodePath(jobName), option.BranchName)
  223. if err != nil {
  224. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed")))
  225. return
  226. }
  227. var aiCenterCode = models.AICenterOfCloudBrainTwo
  228. var specId = setting.FileNoteBook.SpecIdNPU
  229. if setting.ModelartsCD.Enabled {
  230. aiCenterCode = models.AICenterOfChengdu
  231. specId = setting.FileNoteBook.SpecIdNPUCD
  232. }
  233. spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{
  234. JobType: models.JobTypeDebug,
  235. ComputeResource: models.NPU,
  236. Cluster: models.OpenICluster,
  237. AiCenterCode: aiCenterCode})
  238. if err != nil || spec == nil {
  239. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.wrong_specification")))
  240. return
  241. }
  242. if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) {
  243. log.Error("point balance is not enough,userId=%d specId=%d ", ctx.User.ID, spec.ID)
  244. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("points.insufficient_points_balance")))
  245. return
  246. }
  247. ctx.Repo = &context.Repository{
  248. Repository: repo,
  249. }
  250. var jobId string
  251. if setting.ModelartsCD.Enabled {
  252. jobId, err = modelarts_cd.GenerateNotebook(ctx, displayJobName, jobName, "", getDescription(option), setting.FileNoteBook.ImageIdNPUCD, spec, option.File)
  253. } else {
  254. jobId, err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, "", getDescription(option), setting.FileNoteBook.ImageIdNPU, spec, option.File)
  255. }
  256. if err != nil {
  257. log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"])
  258. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
  259. return
  260. }
  261. ctx.JSON(http.StatusOK, models.BaseMessageApi{
  262. Code: 0,
  263. Message: jobId,
  264. })
  265. }
  266. func isNoteBookFileExist(ctx *context.Context, option api.CreateFileNotebookJobOption) (bool, error) {
  267. repoPathOfNoteBook := models.RepoPath(option.OwnerName, option.ProjectName)
  268. gitRepoOfNoteBook, err := git.OpenRepository(repoPathOfNoteBook)
  269. if err != nil {
  270. log.Error("RepoRef Invalid repo "+repoPathOfNoteBook, err.Error())
  271. return false, err
  272. }
  273. // We opened it, we should close it
  274. defer func() {
  275. // If it's been set to nil then assume someone else has closed it.
  276. if gitRepoOfNoteBook != nil {
  277. gitRepoOfNoteBook.Close()
  278. }
  279. }()
  280. fileExist, err := fileExists(gitRepoOfNoteBook, option.File, option.BranchName)
  281. if err != nil || !fileExist {
  282. log.Error("Get file error:", err, ctx.Data["MsgID"])
  283. return false, err
  284. }
  285. return true, nil
  286. }
  287. func uploadCodeFile(repo *models.Repository, codePath string, branchName string, filePath string, jobName string) string {
  288. err := downloadCode(repo, codePath, branchName)
  289. if err != nil {
  290. return "cloudbrain.load_code_failed"
  291. }
  292. err = uploadOneFileToMinio(codePath, filePath, jobName, cloudbrain.CodeMountPath+"/")
  293. if err != nil {
  294. return "cloudbrain.load_code_failed"
  295. }
  296. return ""
  297. }
  298. func fileExists(gitRepo *git.Repository, path string, branch string) (bool, error) {
  299. commit, err := gitRepo.GetBranchCommit(branch)
  300. if err != nil {
  301. return false, err
  302. }
  303. if _, err := commit.GetTreeEntryByPath(path); err != nil {
  304. return false, err
  305. }
  306. return true, nil
  307. }