You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

notebook.go 18 kB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago

  1. package cloudbrainTask
  2. import (
  3. "fmt"
  4. "net/http"
  5. "path"
  6. "strings"
  7. "code.gitea.io/gitea/modules/notebook"
  8. "code.gitea.io/gitea/modules/modelarts"
  9. "code.gitea.io/gitea/modules/modelarts_cd"
  10. "code.gitea.io/gitea/modules/git"
  11. "code.gitea.io/gitea/modules/cloudbrain"
  12. "code.gitea.io/gitea/modules/log"
  13. "code.gitea.io/gitea/modules/redis/redis_key"
  14. "code.gitea.io/gitea/modules/redis/redis_lock"
  15. "code.gitea.io/gitea/modules/storage"
  16. "code.gitea.io/gitea/services/cloudbrain/resource"
  17. "code.gitea.io/gitea/services/reward/point/account"
  18. "code.gitea.io/gitea/modules/setting"
  19. cloudbrainService "code.gitea.io/gitea/services/cloudbrain"
  20. repo_service "code.gitea.io/gitea/services/repository"
  21. "code.gitea.io/gitea/models"
  22. "code.gitea.io/gitea/modules/context"
  23. api "code.gitea.io/gitea/modules/structs"
  24. "code.gitea.io/gitea/modules/util"
  25. )
  26. const NoteBookExtension = ".ipynb"
  27. const CPUType = 0
  28. const GPUType = 1
  29. const NPUType = 2
  30. func FileNotebookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption) {
  31. if ctx.Written() {
  32. return
  33. }
  34. if path.Ext(option.File) != NoteBookExtension {
  35. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_select_wrong")))
  36. return
  37. }
  38. isNotebookFileExist, _ := isNoteBookFileExist(ctx, option)
  39. if !isNotebookFileExist {
  40. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist")))
  41. return
  42. }
  43. sourceRepo, err := models.GetRepositoryByOwnerAndName(option.OwnerName, option.ProjectName)
  44. if err != nil {
  45. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist")))
  46. return
  47. }
  48. permission, err := models.GetUserRepoPermission(sourceRepo, ctx.User)
  49. if err != nil {
  50. log.Error("Get permission failed", err)
  51. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_no_right")))
  52. return
  53. }
  54. if !permission.CanRead(models.UnitTypeCode) {
  55. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_no_right")))
  56. return
  57. }
  58. //create repo if not exist
  59. repo, _ := models.GetRepositoryByName(ctx.User.ID, setting.FileNoteBook.ProjectName)
  60. if repo == nil {
  61. repo, err = repo_service.CreateRepository(ctx.User, ctx.User, models.CreateRepoOptions{
  62. Name: setting.FileNoteBook.ProjectName,
  63. Alias: "",
  64. Description: "",
  65. IssueLabels: "",
  66. Gitignores: "",
  67. License: "",
  68. Readme: "Default",
  69. IsPrivate: false,
  70. AutoInit: true,
  71. DefaultBranch: "master",
  72. })
  73. if err != nil {
  74. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.failed_to_create_notebook_repo", setting.FileNoteBook.ProjectName)))
  75. return
  76. }
  77. } else {
  78. noteBook, _ := models.GetWaitOrRunFileNotebookByRepo(repo.ID, getCloudbrainType(option.Type))
  79. if noteBook != nil {
  80. if isRepoConfilcts(option, noteBook) {
  81. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_repo_conflict")))
  82. return
  83. }
  84. if isNotebookSpecMath(option, noteBook) {
  85. err = downloadCode(sourceRepo, getCodePath(noteBook.JobName, sourceRepo), option.BranchName)
  86. if err != nil {
  87. log.Error("download code failed", err)
  88. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed")))
  89. return
  90. }
  91. noteBook.BootFile += ";" + getBootFile(option.File, option.OwnerName, option.ProjectName)
  92. noteBook.BranchName += ";" + option.BranchName
  93. err := models.UpdateJob(noteBook)
  94. if err != nil {
  95. log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"])
  96. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
  97. return
  98. }
  99. ctx.JSON(http.StatusOK, models.BaseMessageApi{
  100. Code: 0,
  101. Message: noteBook.JobID,
  102. })
  103. return
  104. }
  105. }
  106. }
  107. if option.Type <= GPUType {
  108. cloudBrainFileNoteBookCreate(ctx, option, repo, sourceRepo)
  109. } else {
  110. modelartsFileNoteBookCreate(ctx, option, repo, sourceRepo)
  111. }
  112. }
  113. func FileNotebookStatus(ctx *context.Context, option api.CreateFileNotebookJobOption) {
  114. if ctx.Written() {
  115. return
  116. }
  117. if path.Ext(option.File) != NoteBookExtension {
  118. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_select_wrong")))
  119. return
  120. }
  121. isNotebookFileExist, _ := isNoteBookFileExist(ctx, option)
  122. if !isNotebookFileExist {
  123. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist")))
  124. return
  125. }
  126. task, err := models.GetCloudbrainByJobID(option.JobId)
  127. if err != nil {
  128. log.Error("job not found:"+option.JobId, err)
  129. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Job id may not be right. can not find job."))
  130. return
  131. }
  132. if task.BootFile == "" || task.Status != string(models.ModelArtsRunning) {
  133. log.Warn("Boot file is empty or status is running. ")
  134. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Boot file is empty or status is running."))
  135. return
  136. }
  137. if !isRepoFileMatch(option, task) {
  138. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("can not math repo file."))
  139. return
  140. }
  141. debugBaseUrl, token, err := getBaseUrlAndToken(task)
  142. if err != nil {
  143. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
  144. return
  145. }
  146. if uploadNotebookFileIfCannotBroswer(debugBaseUrl, getBootFile(option.File, option.OwnerName, option.ProjectName), task, token) {
  147. ctx.JSON(http.StatusOK, models.BaseOKMessageApi)
  148. } else {
  149. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("upload failed."))
  150. }
  151. }
  152. func getBaseUrlAndToken(task *models.Cloudbrain) (string, string, error) {
  153. var debugBaseUrl string
  154. var token string
  155. if task.Type == models.TypeCloudBrainOne {
  156. debugBaseUrl = setting.DebugServerHost + "jpylab_" + task.JobID + "_" + task.SubTaskName + "/lab"
  157. } else {
  158. var result *models.GetNotebook2Result
  159. var err error
  160. if task.Type == models.TypeCloudBrainTwo {
  161. result, err = modelarts.GetNotebook2(task.JobID)
  162. } else if task.Type == models.TypeCDCenter {
  163. result, err = modelarts_cd.GetNotebook(task.JobID)
  164. }
  165. if err != nil || result == nil || result.Status != string(models.ModelArtsRunning) || result.Url == "" {
  166. log.Error("notebook job not found:"+task.JobID, err)
  167. return "", "", fmt.Errorf("can not get job or job is invalid.")
  168. }
  169. debugBaseUrl = result.Url
  170. token = result.Token
  171. }
  172. return debugBaseUrl, token, nil
  173. }
  174. func uploadNotebookFileIfCannotBroswer(debugBaseUrl string, bootFile string, task *models.Cloudbrain, token string) bool {
  175. c := &notebook.NotebookContent{
  176. Url: debugBaseUrl,
  177. Path: bootFile,
  178. PathType: "file",
  179. Token: token,
  180. }
  181. if c.IsNotebookFileCanBrowser() {
  182. return true
  183. } else {
  184. c.SetCookiesAndCsrf()
  185. c.UploadNoteBookFile(task)
  186. return c.IsNotebookFileCanBrowser()
  187. }
  188. }
  189. func isNotebookSpecMath(option api.CreateFileNotebookJobOption, book *models.Cloudbrain) bool {
  190. if option.Type == NPUType || option.Type == CPUType {
  191. return true
  192. }
  193. spec, err := models.GetCloudbrainSpecByID(book.ID)
  194. if err != nil {
  195. log.Warn("can not get spec ", err)
  196. return false
  197. }
  198. return spec.AccCardsNum > 0
  199. }
  200. func isRepoConfilcts(option api.CreateFileNotebookJobOption, book *models.Cloudbrain) bool {
  201. bootFiles := strings.Split(book.BootFile, ";")
  202. branches := strings.Split(book.BranchName, ";")
  203. for i, bootFile := range bootFiles {
  204. splits := strings.Split(bootFile, "/")
  205. if len(splits) >= 3 {
  206. if splits[0] == option.OwnerName && splits[1] == option.ProjectName && branches[i] != option.BranchName {
  207. return true
  208. }
  209. }
  210. }
  211. return false
  212. }
  213. func isRepoFileMatch(option api.CreateFileNotebookJobOption, book *models.Cloudbrain) bool {
  214. bootFiles := strings.Split(book.BootFile, ";")
  215. branches := strings.Split(book.BranchName, ";")
  216. for i, bootFile := range bootFiles {
  217. if branches[i] == option.BranchName && getBootFile(option.File, option.OwnerName, option.ProjectName) == bootFile {
  218. return true
  219. }
  220. }
  221. return false
  222. }
  223. func UploadNotebookFiles(task *models.Cloudbrain) {
  224. if task.Status == string(models.JobRunning) && task.BootFile != "" {
  225. debugBaseUrl, token, err := getBaseUrlAndToken(task)
  226. if err != nil {
  227. log.Error("can not get base url:", err)
  228. return
  229. }
  230. bootFiles := strings.Split(task.BootFile, ";")
  231. for _, bootFile := range bootFiles {
  232. uploadNotebookFileIfCannotBroswer(debugBaseUrl, bootFile, task, token)
  233. }
  234. }
  235. }
  236. func cloudBrainFileNoteBookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption, repo *models.Repository, sourceRepo *models.Repository) {
  237. displayJobName := cloudbrainService.GetDisplayJobName(ctx.User.Name)
  238. jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
  239. jobType := string(models.JobTypeDebug)
  240. lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), jobType, displayJobName))
  241. defer lock.UnLock()
  242. isOk, err := lock.Lock(models.CloudbrainKeyDuration)
  243. if !isOk {
  244. log.Error("lock processed failed:%v", err, ctx.Data["MsgID"])
  245. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  246. return
  247. }
  248. tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName)
  249. if err == nil {
  250. if len(tasks) != 0 {
  251. log.Error("the job name did already exist", ctx.Data["MsgID"])
  252. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  253. return
  254. }
  255. } else {
  256. if !models.IsErrJobNotExist(err) {
  257. log.Error("system error, %v", err, ctx.Data["MsgID"])
  258. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  259. return
  260. }
  261. }
  262. count, err := GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType)
  263. if err != nil {
  264. log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"])
  265. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  266. return
  267. } else {
  268. if count >= 1 {
  269. log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
  270. ctx.JSON(http.StatusOK, models.BaseMessageApi{
  271. Code: 2,
  272. Message: ctx.Tr("repo.cloudbrain.morethanonejob"),
  273. })
  274. return
  275. }
  276. }
  277. err = downloadCode(sourceRepo, getCodePath(jobName, sourceRepo), option.BranchName)
  278. if err != nil {
  279. log.Error("download code failed", err)
  280. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed")))
  281. return
  282. }
  283. command := cloudbrain.GetCloudbrainDebugCommand()
  284. specId := setting.FileNoteBook.SpecIdGPU
  285. if option.Type == 0 {
  286. specId = setting.FileNoteBook.SpecIdCPU
  287. }
  288. spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{
  289. JobType: models.JobType(jobType),
  290. ComputeResource: models.GPU,
  291. Cluster: models.OpenICluster,
  292. AiCenterCode: models.AICenterOfCloudBrainOne})
  293. if err != nil || spec == nil {
  294. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.wrong_specification")))
  295. return
  296. }
  297. if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) {
  298. log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID)
  299. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("points.insufficient_points_balance")))
  300. return
  301. }
  302. ctx.Repo = &context.Repository{
  303. Repository: repo,
  304. }
  305. req := cloudbrain.GenerateCloudBrainTaskReq{
  306. Ctx: ctx,
  307. DisplayJobName: displayJobName,
  308. JobName: jobName,
  309. Image: setting.FileNoteBook.ImageGPU,
  310. Command: command,
  311. Uuids: "",
  312. DatasetNames: "",
  313. DatasetInfos: nil,
  314. CodePath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"),
  315. ModelPath: storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"),
  316. BenchmarkPath: storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"),
  317. Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
  318. BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"),
  319. JobType: jobType,
  320. Description: getDescription(option),
  321. BranchName: option.BranchName,
  322. BootFile: getBootFile(option.File, option.OwnerName, option.ProjectName),
  323. Params: "{\"parameter\":[]}",
  324. CommitID: "",
  325. BenchmarkTypeID: 0,
  326. BenchmarkChildTypeID: 0,
  327. ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"),
  328. Spec: spec,
  329. }
  330. jobId, err := cloudbrain.GenerateTask(req)
  331. if err != nil {
  332. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
  333. return
  334. }
  335. ctx.JSON(http.StatusOK, models.BaseMessageApi{
  336. Code: 0,
  337. Message: jobId,
  338. })
  339. }
  340. func getCloudbrainType(optionType int) int {
  341. if optionType < 1 {
  342. return models.TypeCloudBrainOne
  343. }
  344. return models.TypeCloudBrainTwo
  345. }
  346. func getCodePath(jobName string, repo *models.Repository) string {
  347. return setting.JobPath + jobName + cloudbrain.CodeMountPath + "/" + repo.OwnerName + "/" + repo.Name
  348. }
  349. func getDescription(option api.CreateFileNotebookJobOption) string {
  350. return option.OwnerName + "/" + option.ProjectName + "/" + option.File
  351. }
  352. func modelartsFileNoteBookCreate(ctx *context.Context, option api.CreateFileNotebookJobOption, repo *models.Repository, sourceRepo *models.Repository) {
  353. displayJobName := cloudbrainService.GetDisplayJobName(ctx.User.Name)
  354. jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
  355. lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName))
  356. isOk, err := lock.Lock(models.CloudbrainKeyDuration)
  357. if !isOk {
  358. log.Error("lock processed failed:%v", err, ctx.Data["MsgID"])
  359. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  360. return
  361. }
  362. defer lock.UnLock()
  363. count, err := GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainTwo, string(models.JobTypeDebug))
  364. if err != nil {
  365. log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
  366. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  367. return
  368. } else {
  369. if count >= 1 {
  370. log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
  371. ctx.JSON(http.StatusOK, models.BaseMessageApi{
  372. Code: 2,
  373. Message: ctx.Tr("repo.cloudbrain.morethanonejob"),
  374. })
  375. return
  376. }
  377. }
  378. tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeDebug), displayJobName)
  379. if err == nil {
  380. if len(tasks) != 0 {
  381. log.Error("the job name did already exist", ctx.Data["MsgID"])
  382. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
  383. return
  384. }
  385. } else {
  386. if !models.IsErrJobNotExist(err) {
  387. log.Error("system error, %v", err, ctx.Data["MsgID"])
  388. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error."))
  389. return
  390. }
  391. }
  392. err = downloadCode(sourceRepo, getCodePath(jobName, sourceRepo), option.BranchName)
  393. if err != nil {
  394. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed")))
  395. return
  396. }
  397. var aiCenterCode = models.AICenterOfCloudBrainTwo
  398. var specId = setting.FileNoteBook.SpecIdNPU
  399. if setting.ModelartsCD.Enabled {
  400. aiCenterCode = models.AICenterOfChengdu
  401. specId = setting.FileNoteBook.SpecIdNPUCD
  402. }
  403. spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{
  404. JobType: models.JobTypeDebug,
  405. ComputeResource: models.NPU,
  406. Cluster: models.OpenICluster,
  407. AiCenterCode: aiCenterCode})
  408. if err != nil || spec == nil {
  409. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.wrong_specification")))
  410. return
  411. }
  412. if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) {
  413. log.Error("point balance is not enough,userId=%d specId=%d ", ctx.User.ID, spec.ID)
  414. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("points.insufficient_points_balance")))
  415. return
  416. }
  417. ctx.Repo = &context.Repository{
  418. Repository: repo,
  419. }
  420. var jobId string
  421. req := cloudbrain.GenerateModelArtsNotebookReq{
  422. DisplayJobName: displayJobName,
  423. JobName: jobName,
  424. Description: getDescription(option),
  425. ImageId: setting.FileNoteBook.ImageIdNPU,
  426. Spec: spec,
  427. BootFile: "",
  428. AutoStopDurationMs: modelarts.AutoStopDurationMs / 4,
  429. BranchName: option.BranchName,
  430. }
  431. if setting.ModelartsCD.Enabled {
  432. req.ImageId = setting.FileNoteBook.ImageIdNPUCD
  433. jobId, err = modelarts_cd.GenerateNotebook(ctx, req)
  434. } else {
  435. jobId, err = modelarts.GenerateNotebook2(ctx, req)
  436. }
  437. if err != nil {
  438. log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"])
  439. ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
  440. return
  441. }
  442. ctx.JSON(http.StatusOK, models.BaseMessageApi{
  443. Code: 0,
  444. Message: jobId,
  445. })
  446. }
  447. func isNoteBookFileExist(ctx *context.Context, option api.CreateFileNotebookJobOption) (bool, error) {
  448. repoPathOfNoteBook := models.RepoPath(option.OwnerName, option.ProjectName)
  449. gitRepoOfNoteBook, err := git.OpenRepository(repoPathOfNoteBook)
  450. if err != nil {
  451. log.Error("RepoRef Invalid repo "+repoPathOfNoteBook, err.Error())
  452. return false, err
  453. }
  454. // We opened it, we should close it
  455. defer func() {
  456. // If it's been set to nil then assume someone else has closed it.
  457. if gitRepoOfNoteBook != nil {
  458. gitRepoOfNoteBook.Close()
  459. }
  460. }()
  461. fileExist, err := fileExists(gitRepoOfNoteBook, option.File, option.BranchName)
  462. if err != nil || !fileExist {
  463. log.Error("Get file error:", err, ctx.Data["MsgID"])
  464. return false, err
  465. }
  466. return true, nil
  467. }
  468. func getBootFile(filePath string, ownerName string, projectName string) string {
  469. return ownerName + "/" + projectName + "/" + filePath
  470. }
  471. func fileExists(gitRepo *git.Repository, path string, branch string) (bool, error) {
  472. commit, err := gitRepo.GetBranchCommit(branch)
  473. if err != nil {
  474. return false, err
  475. }
  476. if _, err := commit.GetTreeEntryByPath(path); err != nil {
  477. return false, err
  478. }
  479. return true, nil
  480. }