You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cloudbrain.go 27 kB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
5 years ago
5 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001
  1. // Copyright 2016 The Gogs Authors. All rights reserved.
  2. // Copyright 2018 The Gitea Authors. All rights reserved.
  3. // Use of this source code is governed by a MIT-style
  4. // license that can be found in the LICENSE file.
  5. package repo
  6. import (
  7. "bufio"
  8. "encoding/json"
  9. "io"
  10. "net/http"
  11. "os"
  12. "sort"
  13. "strconv"
  14. "strings"
  15. "time"
  16. "code.gitea.io/gitea/services/cloudbrain/cloudbrainTask"
  17. api "code.gitea.io/gitea/modules/structs"
  18. "code.gitea.io/gitea/modules/notification"
  19. "code.gitea.io/gitea/modules/setting"
  20. "code.gitea.io/gitea/models"
  21. "code.gitea.io/gitea/modules/cloudbrain"
  22. "code.gitea.io/gitea/modules/context"
  23. "code.gitea.io/gitea/modules/log"
  24. "code.gitea.io/gitea/modules/modelarts"
  25. "code.gitea.io/gitea/modules/storage"
  26. routerRepo "code.gitea.io/gitea/routers/repo"
  27. )
  28. func CreateCloudBrain(ctx *context.APIContext, option api.CreateTrainJobOption) {
  29. if option.Type == 2 {
  30. cloudbrainTask.GrampusTrainJobGpuCreate(ctx.Context, option)
  31. }
  32. if option.Type == 3 {
  33. cloudbrainTask.GrampusTrainJobNpuCreate(ctx.Context, option)
  34. }
  35. }
  36. func CreateCloudBrainInferenceTask(ctx *context.APIContext, option api.CreateTrainJobOption) {
  37. if option.Type == 0 {
  38. cloudbrainTask.GrampusTrainJobGpuCreate(ctx.Context, option)
  39. }
  40. if option.Type == 1 {
  41. cloudbrainTask.GrampusTrainJobNpuCreate(ctx.Context, option)
  42. }
  43. }
  44. // cloudbrain get job task by jobid
  45. func GetCloudbrainTask(ctx *context.APIContext) {
  46. // swagger:operation GET /repos/{owner}/{repo}/cloudbrain/{jobid} cloudbrain jobTask
  47. // ---
  48. // summary: Get a single task
  49. // produces:
  50. // - application/json
  51. // parameters:
  52. // - name: owner
  53. // in: path
  54. // description: owner of the repo
  55. // type: string
  56. // required: true
  57. // - name: repo
  58. // in: path
  59. // description: name of the repo
  60. // type: string
  61. // required: true
  62. // - name: jobid
  63. // in: path
  64. // description: id of cloudbrain jobid
  65. // type: string
  66. // required: true
  67. // responses:
  68. // "200":
  69. // "$ref": "#/responses/Label"
  70. var (
  71. err error
  72. )
  73. ID := ctx.Params(":id")
  74. job, err := models.GetCloudbrainByID(ID)
  75. if err != nil {
  76. ctx.NotFound(err)
  77. log.Error("GetCloudbrainByID failed:", err)
  78. return
  79. }
  80. if job.JobType == string(models.JobTypeModelSafety) {
  81. routerRepo.GetAiSafetyTaskByJob(job)
  82. job, err = models.GetCloudbrainByID(ID)
  83. ctx.JSON(http.StatusOK, map[string]interface{}{
  84. "ID": ID,
  85. "JobName": job.JobName,
  86. "JobStatus": job.Status,
  87. "SubState": "",
  88. "CreatedTime": job.CreatedUnix.Format("2006-01-02 15:04:05"),
  89. "CompletedTime": job.UpdatedUnix.Format("2006-01-02 15:04:05"),
  90. "JobDuration": job.TrainJobDuration,
  91. })
  92. } else {
  93. jobResult, err := cloudbrain.GetJob(job.JobID)
  94. if err != nil {
  95. ctx.NotFound(err)
  96. log.Error("GetJob failed:", err)
  97. return
  98. }
  99. result, _ := models.ConvertToJobResultPayload(jobResult.Payload)
  100. if err != nil {
  101. ctx.NotFound(err)
  102. log.Error("ConvertToJobResultPayload failed:", err)
  103. return
  104. }
  105. oldStatus := job.Status
  106. job.Status = result.JobStatus.State
  107. taskRoles := result.TaskRoles
  108. taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))
  109. if result.JobStatus.State != string(models.JobWaiting) && result.JobStatus.State != string(models.JobFailed) {
  110. job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP
  111. job.ContainerID = taskRes.TaskStatuses[0].ContainerID
  112. job.Status = taskRes.TaskStatuses[0].State
  113. }
  114. if result.JobStatus.State != string(models.JobWaiting) {
  115. models.ParseAndSetDurationFromCloudBrainOne(result, job)
  116. if oldStatus != job.Status {
  117. notification.NotifyChangeCloudbrainStatus(job, oldStatus)
  118. }
  119. err = models.UpdateJob(job)
  120. if err != nil {
  121. log.Error("UpdateJob failed:", err)
  122. }
  123. }
  124. ctx.JSON(http.StatusOK, map[string]interface{}{
  125. "ID": ID,
  126. "JobName": result.Config.JobName,
  127. "JobStatus": result.JobStatus.State,
  128. "SubState": result.JobStatus.SubState,
  129. "CreatedTime": time.Unix(result.JobStatus.CreatedTime/1000, 0).Format("2006-01-02 15:04:05"),
  130. "CompletedTime": time.Unix(result.JobStatus.CompletedTime/1000, 0).Format("2006-01-02 15:04:05"),
  131. "JobDuration": job.TrainJobDuration,
  132. })
  133. }
  134. }
  135. func GetCloudBrainInferenceJob(ctx *context.APIContext) {
  136. jobID := ctx.Params(":jobid")
  137. job, err := models.GetCloudbrainByJobID(jobID)
  138. if err != nil {
  139. ctx.NotFound(err)
  140. return
  141. }
  142. jobResult, err := cloudbrain.GetJob(job.JobID)
  143. if err != nil {
  144. ctx.NotFound(err)
  145. log.Error("GetJob failed:", err)
  146. return
  147. }
  148. result, err := models.ConvertToJobResultPayload(jobResult.Payload)
  149. if err != nil {
  150. ctx.NotFound(err)
  151. log.Error("ConvertToJobResultPayload failed:", err)
  152. return
  153. }
  154. oldStatus := job.Status
  155. job.Status = result.JobStatus.State
  156. if result.JobStatus.State != string(models.JobWaiting) && result.JobStatus.State != string(models.JobFailed) {
  157. taskRoles := result.TaskRoles
  158. taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))
  159. job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP
  160. job.ContainerID = taskRes.TaskStatuses[0].ContainerID
  161. job.Status = taskRes.TaskStatuses[0].State
  162. }
  163. if result.JobStatus.State != string(models.JobWaiting) {
  164. models.ParseAndSetDurationFromCloudBrainOne(result, job)
  165. if oldStatus != job.Status {
  166. notification.NotifyChangeCloudbrainStatus(job, oldStatus)
  167. }
  168. err = models.UpdateJob(job)
  169. if err != nil {
  170. log.Error("UpdateJob failed:", err)
  171. }
  172. }
  173. ctx.JSON(http.StatusOK, map[string]interface{}{
  174. "JobID": jobID,
  175. "JobStatus": job.Status,
  176. "JobDuration": job.TrainJobDuration,
  177. })
  178. }
  179. func DelCloudBrainJob(ctx *context.APIContext) {
  180. jobID := ctx.Params(":jobid")
  181. errStr := cloudbrain.DelCloudBrainJob(jobID)
  182. if errStr != "" {
  183. ctx.JSON(http.StatusOK, map[string]interface{}{
  184. "Message": ctx.Tr(errStr),
  185. "VersionName": "1",
  186. "Code": 1,
  187. })
  188. } else {
  189. ctx.JSON(http.StatusOK, map[string]interface{}{
  190. "Message": "",
  191. "VersionName": "1",
  192. "Code": 0,
  193. })
  194. }
  195. }
  196. func InferencJobResultList(ctx *context.APIContext) {
  197. jobID := ctx.Params(":jobid")
  198. parentDir := ctx.Query("parentDir")
  199. dirArray := strings.Split(parentDir, "/")
  200. task, err := models.GetCloudbrainByJobID(jobID)
  201. if err != nil {
  202. log.Error("get cloud brain err:", err)
  203. ctx.ServerError("get cloud brain information failed:", err)
  204. }
  205. //get dirs
  206. dirs, err := routerRepo.GetResultDirs(task.JobName, parentDir)
  207. if err != nil {
  208. log.Error("GetModelDirs failed:%v", err.Error(), ctx.Data["msgID"])
  209. ctx.ServerError("GetModelDirs failed:", err)
  210. return
  211. }
  212. var fileInfos []storage.FileInfo
  213. err = json.Unmarshal([]byte(dirs), &fileInfos)
  214. if err != nil {
  215. log.Error("json.Unmarshal failed:%v", err.Error(), ctx.Data["msgID"])
  216. ctx.ServerError("json.Unmarshal failed:", err)
  217. return
  218. }
  219. for i, fileInfo := range fileInfos {
  220. temp, _ := time.Parse("2006-01-02 15:04:05", fileInfo.ModTime)
  221. fileInfos[i].ModTime = temp.Local().Format("2006-01-02 15:04:05")
  222. }
  223. sort.Slice(fileInfos, func(i, j int) bool {
  224. return fileInfos[i].ModTime > fileInfos[j].ModTime
  225. })
  226. ctx.JSON(http.StatusOK, map[string]interface{}{
  227. "JobID": jobID,
  228. "StatusOK": 0,
  229. "Path": dirArray,
  230. "Dirs": fileInfos,
  231. "task": task,
  232. "PageIsCloudBrain": true,
  233. })
  234. }
  235. func GetCloudbrainModelConvertTask(ctx *context.APIContext) {
  236. var (
  237. err error
  238. )
  239. ID := ctx.Params(":id")
  240. job, err := models.QueryModelConvertById(ID)
  241. if err != nil {
  242. ctx.NotFound(err)
  243. log.Error("GetCloudbrainByID failed:", err)
  244. return
  245. }
  246. if job.IsGpuTrainTask() {
  247. jobResult, err := cloudbrain.GetJob(job.CloudBrainTaskId)
  248. if err != nil {
  249. ctx.NotFound(err)
  250. log.Error("GetJob failed:", err)
  251. return
  252. }
  253. result, _ := models.ConvertToJobResultPayload(jobResult.Payload)
  254. if err != nil {
  255. ctx.NotFound(err)
  256. log.Error("ConvertToJobResultPayload failed:", err)
  257. return
  258. }
  259. job.Status = result.JobStatus.State
  260. taskRoles := result.TaskRoles
  261. taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))
  262. if result.JobStatus.State != string(models.JobWaiting) && result.JobStatus.State != string(models.JobFailed) {
  263. job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP
  264. job.ContainerID = taskRes.TaskStatuses[0].ContainerID
  265. job.Status = taskRes.TaskStatuses[0].State
  266. }
  267. if result.JobStatus.State != string(models.JobWaiting) {
  268. models.ModelComputeAndSetDuration(job, result)
  269. err = models.UpdateModelConvert(job)
  270. if err != nil {
  271. log.Error("UpdateJob failed:", err)
  272. }
  273. }
  274. ctx.JSON(http.StatusOK, map[string]interface{}{
  275. "ID": ID,
  276. "JobName": result.Config.JobName,
  277. "JobStatus": result.JobStatus.State,
  278. "SubState": result.JobStatus.SubState,
  279. "CreatedTime": time.Unix(result.JobStatus.CreatedTime/1000, 0).Format("2006-01-02 15:04:05"),
  280. "CompletedTime": time.Unix(result.JobStatus.CompletedTime/1000, 0).Format("2006-01-02 15:04:05"),
  281. })
  282. } else {
  283. result, err := modelarts.GetTrainJob(job.CloudBrainTaskId, job.ModelArtsVersionId)
  284. if err != nil {
  285. log.Error("get modelart job failed:", err)
  286. ctx.NotFound(err)
  287. return
  288. }
  289. job.Status = modelarts.TransTrainJobStatus(result.IntStatus)
  290. job.RunTime = result.Duration / 1000
  291. job.TrainJobDuration = models.ConvertDurationToStr(job.RunTime)
  292. err = models.UpdateModelConvert(job)
  293. if err != nil {
  294. log.Error("UpdateJob failed:", err)
  295. }
  296. ctx.JSON(http.StatusOK, map[string]interface{}{
  297. "ID": ID,
  298. "JobStatus": job.Status,
  299. })
  300. }
  301. }
  302. func CloudbrainGetLogByJobId(jobId string, jobName string) map[string]interface{} {
  303. var hits []models.Hits
  304. result, err := cloudbrain.GetJobLog(jobId)
  305. if err != nil {
  306. log.Error("GetJobLog failed: %v", err)
  307. return nil
  308. }
  309. hits = result.Hits.Hits
  310. //if the size equal page_size, then take the scroll_id to get all log and delete the scroll_id(the num of scroll_id is limited)
  311. if len(result.Hits.Hits) >= cloudbrain.LogPageSize {
  312. for {
  313. resultNext, err := cloudbrain.GetJobAllLog(result.ScrollID)
  314. if err != nil {
  315. log.Error("GetJobAllLog failed: %v", err)
  316. } else {
  317. for _, hit := range resultNext.Hits.Hits {
  318. hits = append(hits, hit)
  319. }
  320. }
  321. if len(resultNext.Hits.Hits) < cloudbrain.LogPageSize {
  322. log.Info("get all log already")
  323. break
  324. }
  325. }
  326. }
  327. cloudbrain.DeleteJobLogToken(result.ScrollID)
  328. sort.Slice(hits, func(i, j int) bool {
  329. return hits[i].Sort[0] < hits[j].Sort[0]
  330. })
  331. var content string
  332. for _, log := range hits {
  333. content += log.Source.Message + "\n"
  334. }
  335. return map[string]interface{}{
  336. "JobName": jobName,
  337. "Content": content,
  338. }
  339. }
  340. func CloudbrainForModelConvertGetLog(ctx *context.Context) {
  341. ID := ctx.Params(":id")
  342. job, err := models.QueryModelConvertById(ID)
  343. if err != nil {
  344. log.Error("GetCloudbrainByJobName failed: %v", err, ctx.Data["MsgID"])
  345. ctx.ServerError(err.Error(), err)
  346. return
  347. }
  348. result := CloudbrainGetLogByJobId(job.CloudBrainTaskId, job.Name)
  349. if result == nil {
  350. log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"])
  351. ctx.ServerError(err.Error(), err)
  352. return
  353. }
  354. ctx.JSON(http.StatusOK, result)
  355. }
  356. func ModelSafetyGetLog(ctx *context.APIContext) {
  357. ID := ctx.Params(":id")
  358. job, err := models.GetCloudbrainByID(ID)
  359. if err != nil {
  360. log.Error("GetCloudbrainByJobName failed: %v", err, ctx.Data["MsgID"])
  361. ctx.ServerError(err.Error(), err)
  362. return
  363. }
  364. if job.JobType == string(models.JobTypeModelSafety) {
  365. if job.Type == models.TypeCloudBrainTwo {
  366. //TrainJobForModelConvertGetLog(ctx)
  367. var baseLine = ctx.Query("base_line")
  368. var order = ctx.Query("order")
  369. var lines = ctx.Query("lines")
  370. lines_int, err := strconv.Atoi(lines)
  371. if err != nil {
  372. log.Error("change lines(%d) string to int failed", lines_int)
  373. }
  374. if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
  375. log.Error("order(%s) check failed", order)
  376. ctx.JSON(http.StatusBadRequest, map[string]interface{}{
  377. "err_msg": "order check failed",
  378. })
  379. return
  380. }
  381. resultLogFile, err := modelarts.GetTrainJobLogFileNames(job.JobID, strconv.FormatInt(job.VersionID, 10))
  382. if err != nil {
  383. log.Error("GetTrainJobLogFileNames(%s) failed:%v", job.JobID, err.Error())
  384. }
  385. result, err := modelarts.GetTrainJobLog(job.JobID, strconv.FormatInt(job.VersionID, 10), baseLine, resultLogFile.LogFileList[0], order, lines_int)
  386. if err != nil {
  387. log.Error("GetTrainJobLog(%s) failed:%v", job.JobID, err.Error())
  388. }
  389. if err != nil {
  390. log.Error("trainJobGetLog(%s) failed:%v", job.JobID, err.Error())
  391. // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  392. ctx.JSON(http.StatusOK, map[string]interface{}{
  393. "JobID": job.JobID,
  394. "LogFileName": "",
  395. "StartLine": "0",
  396. "EndLine": "0",
  397. "Content": "",
  398. "Lines": 0,
  399. "CanLogDownload": false,
  400. })
  401. return
  402. }
  403. ctx.Data["log_file_name"] = resultLogFile.LogFileList[0]
  404. ctx.JSON(http.StatusOK, map[string]interface{}{
  405. "JobID": job.JobID,
  406. "LogFileName": resultLogFile.LogFileList[0],
  407. "StartLine": result.StartLine,
  408. "EndLine": result.EndLine,
  409. "Content": result.Content,
  410. "Lines": result.Lines,
  411. "CanLogDownload": isCanDownloadLog(ctx, job),
  412. })
  413. }
  414. }
  415. //result := ""
  416. //ctx.JSON(http.StatusOK, result)
  417. }
  418. func isCanDownloadLog(ctx *context.APIContext, job *models.Cloudbrain) bool {
  419. if !ctx.IsSigned {
  420. return false
  421. }
  422. return ctx.IsUserSiteAdmin() || ctx.User.ID == job.UserID
  423. }
  424. func ModelSafetyDownloadLogFile(ctx *context.Context) {
  425. ID := ctx.Params(":id")
  426. job, err := models.GetCloudbrainByID(ID)
  427. if err != nil {
  428. log.Error("GetCloudbrainByJobName failed: %v", err, ctx.Data["MsgID"])
  429. ctx.ServerError(err.Error(), err)
  430. return
  431. }
  432. if job.JobType == string(models.JobTypeModelSafety) {
  433. if job.Type == models.TypeCloudBrainOne {
  434. CloudbrainDownloadLogFile(ctx)
  435. } else if job.Type == models.TypeCloudBrainTwo {
  436. ctx.SetParams("jobid", job.JobID)
  437. ctx.Req.Form.Set("version_name", job.VersionName)
  438. routerRepo.TrainJobDownloadLogFile(ctx)
  439. }
  440. }
  441. }
  442. func CloudbrainDownloadLogFile(ctx *context.Context) {
  443. ID := ctx.Params(":id")
  444. job, err := models.GetCloudbrainByID(ID)
  445. if err != nil {
  446. log.Error("GetCloudbrainByJobName failed: %v", err, ctx.Data["MsgID"])
  447. ctx.ServerError(err.Error(), err)
  448. return
  449. }
  450. if job.JobType == string(models.JobTypeModelSafety) {
  451. if job.Type == models.TypeCloudBrainTwo {
  452. ModelSafetyDownloadLogFile(ctx)
  453. return
  454. }
  455. }
  456. logDir := "/model"
  457. if job.JobType == string(models.JobTypeInference) || job.JobType == string(models.JobTypeModelSafety) {
  458. logDir = cloudbrain.ResultPath
  459. }
  460. files, err := storage.GetOneLevelAllObjectUnderDirMinio(setting.Attachment.Minio.Bucket, setting.CBCodePathPrefix+job.JobName+logDir, "")
  461. if err != nil {
  462. log.Error("query cloudbrain model failed: %v", err)
  463. return
  464. }
  465. fileName := ""
  466. for _, file := range files {
  467. if strings.HasSuffix(file.FileName, "log.txt") {
  468. fileName = file.FileName
  469. break
  470. }
  471. }
  472. if fileName != "" {
  473. prefix := "/" + setting.CBCodePathPrefix + job.JobName + logDir
  474. url, err := storage.Attachments.PresignedGetURL(prefix+"/"+fileName, fileName)
  475. if err != nil {
  476. log.Error("Get minio get SignedUrl failed: %v", err.Error(), ctx.Data["msgID"])
  477. ctx.ServerError("Get minio get SignedUrl failed", err)
  478. return
  479. }
  480. log.Info("fileName=" + fileName)
  481. http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusTemporaryRedirect)
  482. } else {
  483. log.Info("fileName is null.")
  484. }
  485. }
  486. func CloudbrainGetLog(ctx *context.APIContext) {
  487. ID := ctx.Params(":id")
  488. job, err := models.GetCloudbrainByID(ID)
  489. if err != nil {
  490. log.Error("GetCloudbrainByJobName failed: %v", err, ctx.Data["MsgID"])
  491. ctx.ServerError(err.Error(), err)
  492. return
  493. }
  494. if job.JobType == string(models.JobTypeModelSafety) {
  495. if job.Type == models.TypeCloudBrainOne {
  496. result, err := cloudbrain.GetJob(job.JobID)
  497. existStr := ""
  498. if err == nil && result != nil {
  499. jobRes, _ := models.ConvertToJobResultPayload(result.Payload)
  500. taskRoles := jobRes.TaskRoles
  501. taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))
  502. existStr = taskRes.TaskStatuses[0].ExitDiagnostics
  503. }
  504. ctx.Data["existStr"] = existStr
  505. log.Info("existStr=" + existStr)
  506. } else {
  507. ModelSafetyGetLog(ctx)
  508. return
  509. }
  510. }
  511. lines := ctx.QueryInt("lines")
  512. baseLine := ctx.Query("base_line")
  513. order := ctx.Query("order")
  514. var result map[string]interface{}
  515. resultPath := "/model"
  516. if job.JobType == string(models.JobTypeInference) || job.JobType == string(models.JobTypeModelSafety) {
  517. resultPath = "/result"
  518. }
  519. if baseLine == "" && order == "desc" {
  520. result = getLastLogFromModelDir(job.JobName, lines, resultPath)
  521. } else {
  522. startLine := ctx.QueryInt("base_line")
  523. endLine := startLine + lines
  524. if order == "asc" {
  525. if baseLine == "" {
  526. startLine = 0
  527. endLine = lines
  528. } else {
  529. endLine = startLine
  530. startLine = endLine - lines
  531. if startLine < 0 {
  532. startLine = 0
  533. }
  534. }
  535. }
  536. result = getLogFromModelDir(job.JobName, startLine, endLine, resultPath)
  537. if result == nil {
  538. log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"])
  539. ctx.ServerError(err.Error(), err)
  540. return
  541. }
  542. }
  543. content := ""
  544. if result["Content"] != nil {
  545. content = result["Content"].(string)
  546. }
  547. if ctx.Data["existStr"] != nil && result["Lines"].(int) < 50 {
  548. content = content + ctx.Data["existStr"].(string)
  549. }
  550. re := map[string]interface{}{
  551. "JobID": ID,
  552. "LogFileName": result["FileName"],
  553. "StartLine": result["StartLine"],
  554. "EndLine": result["EndLine"],
  555. "Content": content,
  556. "Lines": result["Lines"],
  557. "CanLogDownload": result["FileName"] != "",
  558. }
  559. //result := CloudbrainGetLogByJobId(job.JobID, job.JobName)
  560. ctx.JSON(http.StatusOK, re)
  561. }
  562. func getAllLineFromFile(path string) int {
  563. count := 0
  564. reader, err := os.Open(path)
  565. defer reader.Close()
  566. if err == nil {
  567. r := bufio.NewReader(reader)
  568. for {
  569. _, error := r.ReadString('\n')
  570. if error == io.EOF {
  571. log.Info("read file completed.")
  572. break
  573. }
  574. if error != nil {
  575. log.Info("read file error." + error.Error())
  576. break
  577. }
  578. count = count + 1
  579. }
  580. } else {
  581. log.Info("error:" + err.Error())
  582. }
  583. return count
  584. }
  585. func getLastLogFromModelDir(jobName string, lines int, resultPath string) map[string]interface{} {
  586. prefix := setting.CBCodePathPrefix + jobName + resultPath
  587. files, err := storage.GetOneLevelAllObjectUnderDirMinio(setting.Attachment.Minio.Bucket, prefix, "")
  588. if err != nil {
  589. log.Error("query cloudbrain model failed: %v", err)
  590. return nil
  591. }
  592. re := ""
  593. fileName := ""
  594. count := 0
  595. allLines := 0
  596. startLine := 0
  597. for _, file := range files {
  598. if strings.HasSuffix(file.FileName, "log.txt") {
  599. fileName = file.FileName
  600. path := storage.GetMinioPath(jobName+resultPath+"/", file.FileName)
  601. allLines = getAllLineFromFile(path)
  602. startLine = allLines - lines
  603. if startLine < 0 {
  604. startLine = 0
  605. }
  606. count = allLines - startLine
  607. log.Info("path=" + path)
  608. reader, err := os.Open(path)
  609. defer reader.Close()
  610. if err == nil {
  611. r := bufio.NewReader(reader)
  612. for i := 0; i < allLines; i++ {
  613. line, error := r.ReadString('\n')
  614. if error == io.EOF {
  615. log.Info("read file completed.")
  616. break
  617. }
  618. if error != nil {
  619. log.Info("read file error." + error.Error())
  620. break
  621. }
  622. if error == nil {
  623. if i >= startLine {
  624. re = re + line
  625. }
  626. }
  627. }
  628. } else {
  629. log.Info("error:" + err.Error())
  630. }
  631. break
  632. }
  633. }
  634. return map[string]interface{}{
  635. "JobName": jobName,
  636. "Content": re,
  637. "FileName": fileName,
  638. "Lines": count,
  639. "EndLine": allLines,
  640. "StartLine": startLine,
  641. }
  642. }
  643. func getLogFromModelDir(jobName string, startLine int, endLine int, resultPath string) map[string]interface{} {
  644. prefix := setting.CBCodePathPrefix + jobName + resultPath
  645. files, err := storage.GetOneLevelAllObjectUnderDirMinio(setting.Attachment.Minio.Bucket, prefix, "")
  646. if err != nil {
  647. log.Error("query cloudbrain model failed: %v", err)
  648. return nil
  649. }
  650. if startLine == endLine {
  651. return map[string]interface{}{
  652. "JobName": jobName,
  653. "Content": "",
  654. "FileName": "",
  655. "Lines": 0,
  656. "EndLine": startLine,
  657. "StartLine": startLine,
  658. }
  659. }
  660. re := ""
  661. fileName := ""
  662. count := 0
  663. fileEndLine := endLine
  664. for _, file := range files {
  665. if strings.HasSuffix(file.FileName, "log.txt") {
  666. fileName = file.FileName
  667. path := storage.GetMinioPath(jobName+resultPath+"/", file.FileName)
  668. log.Info("path=" + path)
  669. reader, err := os.Open(path)
  670. defer reader.Close()
  671. if err == nil {
  672. r := bufio.NewReader(reader)
  673. for i := 0; i < endLine; i++ {
  674. line, error := r.ReadString('\n')
  675. if error == io.EOF {
  676. if i >= startLine {
  677. fileEndLine = i
  678. re = re + line
  679. count++
  680. }
  681. log.Info("read file completed.")
  682. break
  683. }
  684. if error != nil {
  685. log.Info("read file error." + error.Error())
  686. break
  687. }
  688. if error == nil {
  689. if i >= startLine {
  690. fileEndLine = i
  691. re = re + line
  692. count++
  693. }
  694. }
  695. }
  696. fileEndLine = fileEndLine + 1
  697. } else {
  698. log.Info("error:" + err.Error())
  699. }
  700. break
  701. }
  702. }
  703. return map[string]interface{}{
  704. "JobName": jobName,
  705. "Content": re,
  706. "FileName": fileName,
  707. "Lines": count,
  708. "EndLine": fileEndLine,
  709. "StartLine": startLine,
  710. }
  711. }
  712. func CloudBrainModelConvertList(ctx *context.APIContext) {
  713. var (
  714. err error
  715. )
  716. ID := ctx.Params(":id")
  717. parentDir := ctx.Query("parentDir")
  718. dirArray := strings.Split(parentDir, "/")
  719. job, err := models.QueryModelConvertById(ID)
  720. if err != nil {
  721. log.Error("GetCloudbrainByJobID(%s) failed:%v", job.Name, err.Error())
  722. return
  723. }
  724. if job.IsGpuTrainTask() {
  725. //get dirs
  726. dirs, err := routerRepo.GetModelDirs(job.ID, parentDir)
  727. if err != nil {
  728. log.Error("GetModelDirs failed:%v", err.Error(), ctx.Data["msgID"])
  729. ctx.ServerError("GetModelDirs failed:", err)
  730. return
  731. }
  732. var fileInfos []storage.FileInfo
  733. err = json.Unmarshal([]byte(dirs), &fileInfos)
  734. if err != nil {
  735. log.Error("json.Unmarshal failed:%v", err.Error(), ctx.Data["msgID"])
  736. ctx.ServerError("json.Unmarshal failed:", err)
  737. return
  738. }
  739. for i, fileInfo := range fileInfos {
  740. temp, _ := time.Parse("2006-01-02 15:04:05", fileInfo.ModTime)
  741. fileInfos[i].ModTime = temp.Local().Format("2006-01-02 15:04:05")
  742. }
  743. sort.Slice(fileInfos, func(i, j int) bool {
  744. return fileInfos[i].ModTime > fileInfos[j].ModTime
  745. })
  746. ctx.JSON(http.StatusOK, map[string]interface{}{
  747. "JobID": ID,
  748. "VersionName": "",
  749. "StatusOK": 0,
  750. "Path": dirArray,
  751. "Dirs": fileInfos,
  752. "task": job,
  753. "PageIsCloudBrain": true,
  754. })
  755. } else {
  756. var jobID = ctx.Params(":id")
  757. var versionName = "V0001"
  758. parentDir := ctx.Query("parentDir")
  759. dirArray := strings.Split(parentDir, "/")
  760. models, err := storage.GetObsListObject(job.ID, "output/", parentDir, versionName)
  761. if err != nil {
  762. log.Info("get TrainJobListModel failed:", err)
  763. ctx.ServerError("GetObsListObject:", err)
  764. return
  765. }
  766. ctx.JSON(http.StatusOK, map[string]interface{}{
  767. "JobID": jobID,
  768. "VersionName": versionName,
  769. "StatusOK": 0,
  770. "Path": dirArray,
  771. "Dirs": models,
  772. "task": job,
  773. "PageIsCloudBrain": true,
  774. })
  775. }
  776. }
  777. func CloudBrainModelList(ctx *context.APIContext) {
  778. var (
  779. err error
  780. )
  781. var jobID = ctx.Params(":jobid")
  782. var versionName = ctx.Query("version_name")
  783. parentDir := ctx.Query("parentDir")
  784. dirArray := strings.Split(parentDir, "/")
  785. task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
  786. if err != nil {
  787. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  788. return
  789. }
  790. //get dirs
  791. dirs, err := routerRepo.GetModelDirs(task.JobName, parentDir)
  792. if err != nil {
  793. log.Error("GetModelDirs failed:%v", err.Error(), ctx.Data["msgID"])
  794. ctx.ServerError("GetModelDirs failed:", err)
  795. return
  796. }
  797. var fileInfos []storage.FileInfo
  798. err = json.Unmarshal([]byte(dirs), &fileInfos)
  799. if err != nil {
  800. log.Error("json.Unmarshal failed:%v", err.Error(), ctx.Data["msgID"])
  801. ctx.ServerError("json.Unmarshal failed:", err)
  802. return
  803. }
  804. for i, fileInfo := range fileInfos {
  805. temp, _ := time.Parse("2006-01-02 15:04:05", fileInfo.ModTime)
  806. fileInfos[i].ModTime = temp.Local().Format("2006-01-02 15:04:05")
  807. }
  808. sort.Slice(fileInfos, func(i, j int) bool {
  809. return fileInfos[i].ModTime > fileInfos[j].ModTime
  810. })
  811. ctx.JSON(http.StatusOK, map[string]interface{}{
  812. "JobID": jobID,
  813. "VersionName": versionName,
  814. "StatusOK": 0,
  815. "Path": dirArray,
  816. "Dirs": fileInfos,
  817. "task": task,
  818. "PageIsCloudBrain": true,
  819. })
  820. }
  821. type JobInfo struct {
  822. JobName string `json:"job_name"`
  823. AiCenterId int `json:"ai_center_id"`
  824. }
  825. func GetNewestJobs(ctx *context.APIContext) {
  826. idsC2Net, err := models.GetNewestJobsByAiCenter()
  827. if err != nil {
  828. log.Error("GetNewestJobsByAiCenter(%s) failed:%v", err.Error())
  829. return
  830. }
  831. idsCloudbrain, err := models.GetNewestJobsByType()
  832. if err != nil {
  833. log.Error("GetNewestJobsByType(%s) failed:%v", err.Error())
  834. return
  835. }
  836. ids := make([]int64, len(idsC2Net), cap(idsC2Net)*2)
  837. copy(ids, idsC2Net)
  838. for _, id := range idsCloudbrain {
  839. ids = append(ids, id)
  840. }
  841. jobs, err := models.GetCloudbrainByIDs(ids)
  842. if err != nil {
  843. log.Error("GetCloudbrainByIDs(%s) failed:%v", err.Error())
  844. return
  845. }
  846. jobInfos := make([]JobInfo, 0)
  847. for _, job := range jobs {
  848. var id int
  849. var content string
  850. switch job.Type {
  851. case models.TypeCloudBrainOne:
  852. id, content = getAICenterID("cloudbrain_one")
  853. if content == "" {
  854. log.Error("job(%s) has no match config info", job.DisplayJobName)
  855. continue
  856. }
  857. case models.TypeCloudBrainTwo:
  858. id, content = getAICenterID("cloudbrain_two")
  859. if content == "" {
  860. log.Error("job(%s) has no match config info", job.DisplayJobName)
  861. continue
  862. }
  863. case models.TypeC2Net:
  864. centerInfo := strings.Split(job.AiCenter, "+")
  865. if len(centerInfo) != 2 {
  866. log.Error("job(%s):ai_center(%s) is wrong", job.DisplayJobName, job.AiCenter)
  867. continue
  868. }
  869. id, content = getAICenterID(centerInfo[0])
  870. if content == "" {
  871. log.Error("job(%s) has no match config info", job.DisplayJobName)
  872. continue
  873. }
  874. default:
  875. log.Error("no match info")
  876. continue
  877. }
  878. jobInfos = append(jobInfos, JobInfo{
  879. JobName: job.DisplayJobName,
  880. AiCenterId: id,
  881. })
  882. }
  883. ctx.JSON(http.StatusOK, jobInfos)
  884. }
  885. func GetAICenterInfo(ctx *context.APIContext) {
  886. if setting.C2NetInfos == nil {
  887. log.Error("C2NET_SEQUENCE is incorrect")
  888. return
  889. }
  890. ctx.JSON(http.StatusOK, setting.C2NetInfos.C2NetSqInfo)
  891. }
  892. func getAICenterID(name string) (int, string) {
  893. for _, info := range setting.C2NetInfos.C2NetSqInfo {
  894. if name == info.Name {
  895. return info.ID, info.Content
  896. }
  897. }
  898. return 0, ""
  899. }