You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

modelarts.go 18 kB

3 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago

  1. // Copyright 2016 The Gogs Authors. All rights reserved.
  2. // Copyright 2018 The Gitea Authors. All rights reserved.
  3. // Use of this source code is governed by a MIT-style
  4. // license that can be found in the LICENSE file.
  5. package repo
  6. import (
  7. "encoding/json"
  8. "net/http"
  9. "path"
  10. "strconv"
  11. "strings"
  12. "code.gitea.io/gitea/modules/grampus"
  13. "code.gitea.io/gitea/modules/setting"
  14. "code.gitea.io/gitea/models"
  15. "code.gitea.io/gitea/modules/cloudbrain"
  16. "code.gitea.io/gitea/modules/context"
  17. "code.gitea.io/gitea/modules/log"
  18. "code.gitea.io/gitea/modules/modelarts"
  19. "code.gitea.io/gitea/modules/storage"
  20. "code.gitea.io/gitea/modules/timeutil"
  21. routerRepo "code.gitea.io/gitea/routers/repo"
  22. )
  23. func GetModelArtsNotebook(ctx *context.APIContext) {
  24. var (
  25. err error
  26. )
  27. jobID := ctx.Params(":jobid")
  28. repoID := ctx.Repo.Repository.ID
  29. job, err := models.GetRepoCloudBrainByJobID(repoID, jobID)
  30. if err != nil {
  31. ctx.NotFound(err)
  32. return
  33. }
  34. result, err := modelarts.GetJob(jobID)
  35. if err != nil {
  36. ctx.NotFound(err)
  37. return
  38. }
  39. job.Status = result.Status
  40. err = models.UpdateJob(job)
  41. if err != nil {
  42. log.Error("UpdateJob failed:", err)
  43. }
  44. ctx.JSON(http.StatusOK, map[string]interface{}{
  45. "JobID": jobID,
  46. "JobStatus": result.Status,
  47. })
  48. }
  49. func GetModelArtsNotebook2(ctx *context.APIContext) {
  50. var (
  51. err error
  52. )
  53. ID := ctx.Params(":id")
  54. job, err := models.GetCloudbrainByID(ID)
  55. if err != nil {
  56. ctx.NotFound(err)
  57. return
  58. }
  59. result, err := modelarts.GetNotebook2(job.JobID)
  60. if err != nil {
  61. ctx.NotFound(err)
  62. return
  63. }
  64. if job.StartTime == 0 && result.Lease.UpdateTime > 0 {
  65. job.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000)
  66. }
  67. job.Status = result.Status
  68. if job.EndTime == 0 && models.IsModelArtsDebugJobTerminal(job.Status) {
  69. job.EndTime = timeutil.TimeStampNow()
  70. }
  71. job.CorrectCreateUnix()
  72. job.ComputeAndSetDuration()
  73. err = models.UpdateJob(job)
  74. if err != nil {
  75. log.Error("UpdateJob failed:", err)
  76. }
  77. ctx.JSON(http.StatusOK, map[string]interface{}{
  78. "ID": ID,
  79. "JobName": job.JobName,
  80. "JobStatus": result.Status,
  81. })
  82. }
  83. func GetModelArtsTrainJob(ctx *context.APIContext) {
  84. var (
  85. err error
  86. )
  87. jobID := ctx.Params(":jobid")
  88. repoID := ctx.Repo.Repository.ID
  89. job, err := models.GetRepoCloudBrainByJobID(repoID, jobID)
  90. if err != nil {
  91. ctx.NotFound(err)
  92. return
  93. }
  94. result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10))
  95. if err != nil {
  96. ctx.NotFound(err)
  97. return
  98. }
  99. job.Status = modelarts.TransTrainJobStatus(result.IntStatus)
  100. job.Duration = result.Duration
  101. job.TrainJobDuration = result.TrainJobDuration
  102. err = models.UpdateJob(job)
  103. if err != nil {
  104. log.Error("UpdateJob failed:", err)
  105. }
  106. ctx.JSON(http.StatusOK, map[string]interface{}{
  107. "JobID": jobID,
  108. "JobStatus": job.Status,
  109. "JobDuration": job.Duration,
  110. })
  111. }
  112. func GetModelArtsTrainJobVersion(ctx *context.APIContext) {
  113. var (
  114. err error
  115. aiCenterName string
  116. )
  117. jobID := ctx.Params(":jobid")
  118. versionName := ctx.Query("version_name")
  119. job, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
  120. if err != nil {
  121. ctx.NotFound(err)
  122. return
  123. }
  124. if job.Type == models.TypeCloudBrainOne {
  125. jobResult, err := cloudbrain.GetJob(job.JobID)
  126. if err != nil {
  127. ctx.NotFound(err)
  128. log.Error("GetJob failed:", err)
  129. return
  130. }
  131. result, err := models.ConvertToJobResultPayload(jobResult.Payload)
  132. if err != nil {
  133. ctx.NotFound(err)
  134. log.Error("ConvertToJobResultPayload failed:", err)
  135. return
  136. }
  137. job.Status = result.JobStatus.State
  138. if result.JobStatus.State != string(models.JobWaiting) && result.JobStatus.State != string(models.JobFailed) {
  139. taskRoles := result.TaskRoles
  140. taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{}))
  141. job.ContainerIp = taskRes.TaskStatuses[0].ContainerIP
  142. job.ContainerID = taskRes.TaskStatuses[0].ContainerID
  143. job.Status = taskRes.TaskStatuses[0].State
  144. }
  145. if result.JobStatus.State != string(models.JobWaiting) {
  146. models.ParseAndSetDurationFromCloudBrainOne(result, job)
  147. err = models.UpdateJob(job)
  148. if err != nil {
  149. log.Error("UpdateJob failed:", err)
  150. }
  151. }
  152. } else if job.Type == models.TypeCloudBrainTwo {
  153. result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10))
  154. if err != nil {
  155. ctx.NotFound(err)
  156. return
  157. }
  158. if job.StartTime == 0 && result.StartTime > 0 {
  159. job.StartTime = timeutil.TimeStamp(result.StartTime / 1000)
  160. }
  161. job.Status = modelarts.TransTrainJobStatus(result.IntStatus)
  162. job.Duration = result.Duration / 1000
  163. job.TrainJobDuration = models.ConvertDurationToStr(job.Duration)
  164. if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 {
  165. job.EndTime = job.StartTime.Add(job.Duration)
  166. }
  167. job.CorrectCreateUnix()
  168. err = models.UpdateTrainJobVersion(job)
  169. if err != nil {
  170. log.Error("UpdateJob failed:", err)
  171. }
  172. } else if job.Type == models.TypeC2Net {
  173. result, err := grampus.GetJob(jobID)
  174. if err != nil {
  175. log.Error("GetJob(%s) failed:%v", job.JobName, err)
  176. ctx.NotFound(err)
  177. return
  178. }
  179. if job.StartTime == 0 && result.JobInfo.StartedAt > 0 {
  180. job.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt)
  181. }
  182. job.Status = grampus.TransTrainJobStatus(result.JobInfo.Status)
  183. job.Duration = result.JobInfo.RunSec
  184. job.TrainJobDuration = models.ConvertDurationToStr(job.Duration)
  185. if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 {
  186. job.EndTime = job.StartTime.Add(job.Duration)
  187. }
  188. job.CorrectCreateUnix()
  189. if len(job.AiCenter) == 0 {
  190. if len(result.JobInfo.Tasks) > 0 {
  191. if len(result.JobInfo.Tasks[0].CenterID) > 0 && len(result.JobInfo.Tasks[0].CenterName) > 0 {
  192. job.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0]
  193. aiCenterName = result.JobInfo.Tasks[0].CenterName[0]
  194. }
  195. }
  196. } else {
  197. temp := strings.Split(job.AiCenter, "+")
  198. if len(temp) > 1 {
  199. aiCenterName = temp[1]
  200. }
  201. }
  202. err = models.UpdateTrainJobVersion(job)
  203. if err != nil {
  204. log.Error("UpdateJob failed:", err)
  205. }
  206. }
  207. ctx.JSON(http.StatusOK, map[string]interface{}{
  208. "JobID": jobID,
  209. "JobStatus": job.Status,
  210. "JobDuration": job.TrainJobDuration,
  211. "AiCenter": aiCenterName,
  212. })
  213. }
  214. func TrainJobForModelConvertGetLog(ctx *context.APIContext) {
  215. var (
  216. err error
  217. )
  218. var jobID = ctx.Params(":id")
  219. var baseLine = ctx.Query("base_line")
  220. var order = ctx.Query("order")
  221. var lines = ctx.Query("lines")
  222. lines_int, err := strconv.Atoi(lines)
  223. if err != nil {
  224. log.Error("change lines(%d) string to int failed", lines_int)
  225. }
  226. if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
  227. log.Error("order(%s) check failed", order)
  228. ctx.JSON(http.StatusBadRequest, map[string]interface{}{
  229. "err_msg": "order check failed",
  230. })
  231. return
  232. }
  233. resultLogFile, result, err := trainJobForModelConvertGetLogContent(jobID, baseLine, order, lines_int)
  234. if err != nil {
  235. log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error())
  236. // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  237. ctx.JSON(http.StatusOK, map[string]interface{}{
  238. "JobID": jobID,
  239. "LogFileName": "",
  240. "StartLine": "0",
  241. "EndLine": "0",
  242. "Content": "",
  243. "Lines": 0,
  244. })
  245. return
  246. }
  247. ctx.Data["log_file_name"] = resultLogFile.LogFileList[0]
  248. ctx.JSON(http.StatusOK, map[string]interface{}{
  249. "JobID": jobID,
  250. "LogFileName": resultLogFile.LogFileList[0],
  251. "StartLine": result.StartLine,
  252. "EndLine": result.EndLine,
  253. "Content": result.Content,
  254. "Lines": result.Lines,
  255. })
  256. }
  257. func trainJobForModelConvertGetLogContent(jobID string, baseLine string, order string, lines int) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) {
  258. task, err := models.QueryModelConvertById(jobID)
  259. if err != nil {
  260. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  261. return nil, nil, err
  262. }
  263. resultLogFile, err := modelarts.GetTrainJobLogFileNames(task.CloudBrainTaskId, task.ModelArtsVersionId)
  264. if err != nil {
  265. log.Error("GetTrainJobLogFileNames(%s) failed:%v", task.CloudBrainTaskId, err.Error())
  266. return nil, nil, err
  267. }
  268. result, err := modelarts.GetTrainJobLog(task.CloudBrainTaskId, task.ModelArtsVersionId, baseLine, resultLogFile.LogFileList[0], order, lines)
  269. if err != nil {
  270. log.Error("GetTrainJobLog(%s) failed:%v", task.CloudBrainTaskId, err.Error())
  271. return nil, nil, err
  272. }
  273. return resultLogFile, result, err
  274. }
  275. func TrainJobGetLog(ctx *context.APIContext) {
  276. var (
  277. err error
  278. )
  279. var jobID = ctx.Params(":jobid")
  280. var versionName = ctx.Query("version_name")
  281. var baseLine = ctx.Query("base_line")
  282. var order = ctx.Query("order")
  283. var lines = ctx.Query("lines")
  284. lines_int, err := strconv.Atoi(lines)
  285. if err != nil {
  286. log.Error("change lines(%d) string to int failed", lines_int)
  287. }
  288. if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
  289. log.Error("order(%s) check failed", order)
  290. ctx.JSON(http.StatusBadRequest, map[string]interface{}{
  291. "err_msg": "order check failed",
  292. })
  293. return
  294. }
  295. task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
  296. if err != nil {
  297. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  298. return
  299. }
  300. resultLogFile, result, err := trainJobGetLogContent(jobID, task.VersionID, baseLine, order, lines_int)
  301. if err != nil {
  302. log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error())
  303. // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  304. return
  305. }
  306. prefix := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, modelarts.LogPath, versionName), "/") + "/job"
  307. _, err = storage.GetObsLogFileName(prefix)
  308. var canLogDownload bool
  309. if err != nil {
  310. canLogDownload = false
  311. } else {
  312. canLogDownload = true
  313. }
  314. ctx.Data["log_file_name"] = resultLogFile.LogFileList[0]
  315. ctx.JSON(http.StatusOK, map[string]interface{}{
  316. "JobID": jobID,
  317. "LogFileName": resultLogFile.LogFileList[0],
  318. "StartLine": result.StartLine,
  319. "EndLine": result.EndLine,
  320. "Content": result.Content,
  321. "Lines": result.Lines,
  322. "CanLogDownload": canLogDownload,
  323. })
  324. }
  325. func trainJobGetLogContent(jobID string, versionID int64, baseLine string, order string, lines int) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) {
  326. resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(versionID, 10))
  327. if err != nil {
  328. log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error())
  329. return nil, nil, err
  330. }
  331. result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(versionID, 10), baseLine, resultLogFile.LogFileList[0], order, lines)
  332. if err != nil {
  333. log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
  334. return nil, nil, err
  335. }
  336. return resultLogFile, result, err
  337. }
  338. func DelTrainJobVersion(ctx *context.APIContext) {
  339. var (
  340. err error
  341. )
  342. var jobID = ctx.Params(":jobid")
  343. var versionName = ctx.Query("version_name")
  344. task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
  345. if err != nil {
  346. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  347. ctx.NotFound(err)
  348. return
  349. }
  350. //删除modelarts上的记录
  351. _, err = modelarts.DelTrainJobVersion(jobID, strconv.FormatInt(task.VersionID, 10))
  352. if err != nil {
  353. log.Error("DelTrainJobVersion(%s) failed:%v", task.JobName, err.Error())
  354. ctx.NotFound(err)
  355. return
  356. }
  357. //删除数据库记录
  358. err = models.DeleteJob(task)
  359. if err != nil {
  360. ctx.ServerError("DeleteJob failed", err)
  361. ctx.NotFound(err)
  362. return
  363. }
  364. //获取删除后的版本数量
  365. var jobTypes []string
  366. jobTypes = append(jobTypes, string(models.JobTypeTrain))
  367. repo := ctx.Repo.Repository
  368. VersionTaskList, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
  369. RepoID: repo.ID,
  370. Type: models.TypeCloudBrainTwo,
  371. JobTypes: jobTypes,
  372. JobID: jobID,
  373. })
  374. if err != nil {
  375. ctx.ServerError("get VersionListCount failed", err)
  376. return
  377. }
  378. if VersionListCount > 0 {
  379. // 判断当前删掉的任务是否是最新版本,若是,将排序后的TotalVersionCount置为删掉的最新版本的TotalVersionCount,若不是,按时间排序后的版本列表的第一个版本设置为最新版本,TotalVersionCount不变
  380. if task.IsLatestVersion == modelarts.IsLatestVersion {
  381. err = models.SetVersionCountAndLatestVersion(jobID, VersionTaskList[0].Cloudbrain.VersionName, VersionListCount, modelarts.IsLatestVersion, task.TotalVersionCount)
  382. if err != nil {
  383. ctx.ServerError("UpdateJobVersionCount failed", err)
  384. return
  385. }
  386. } else {
  387. err = models.SetVersionCountAndLatestVersion(jobID, VersionTaskList[0].VersionName, VersionListCount, modelarts.IsLatestVersion, VersionTaskList[0].Cloudbrain.TotalVersionCount)
  388. if err != nil {
  389. ctx.ServerError("UpdateJobVersionCount failed", err)
  390. return
  391. }
  392. }
  393. } else { //已删除该任务下的所有版本
  394. routerRepo.DeleteJobStorage(task.JobName)
  395. }
  396. ctx.JSON(http.StatusOK, map[string]interface{}{
  397. "JobID": jobID,
  398. "VersionName": versionName,
  399. "StatusOK": 0,
  400. "VersionListCount": VersionListCount,
  401. })
  402. }
  403. func StopTrainJobVersion(ctx *context.APIContext) {
  404. var (
  405. err error
  406. )
  407. var jobID = ctx.Params(":jobid")
  408. var versionName = ctx.Query("version_name")
  409. task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
  410. if err != nil {
  411. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  412. return
  413. }
  414. _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
  415. if err != nil {
  416. log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error())
  417. return
  418. }
  419. ctx.JSON(http.StatusOK, map[string]interface{}{
  420. "JobID": jobID,
  421. "VersionName": versionName,
  422. "StatusOK": 0,
  423. })
  424. }
  425. func ModelList(ctx *context.APIContext) {
  426. var (
  427. err error
  428. )
  429. var jobID = ctx.Params(":jobid")
  430. var versionName = ctx.Query("version_name")
  431. parentDir := ctx.Query("parentDir")
  432. dirArray := strings.Split(parentDir, "/")
  433. task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
  434. if err != nil {
  435. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  436. return
  437. }
  438. var fileInfos []storage.FileInfo
  439. if task.ComputeResource == models.NPUResource {
  440. fileInfos, err = storage.GetObsListObject(task.JobName, "output/", parentDir, versionName)
  441. if err != nil {
  442. log.Info("get TrainJobListModel failed:", err)
  443. ctx.ServerError("GetObsListObject:", err)
  444. return
  445. }
  446. } else if task.ComputeResource == models.GPUResource {
  447. files, err := routerRepo.GetModelDirs(task.JobName, parentDir)
  448. if err != nil {
  449. log.Info("GetModelDirs failed:", err)
  450. ctx.ServerError("GetModelDirs:", err)
  451. return
  452. }
  453. err = json.Unmarshal([]byte(files), &fileInfos)
  454. if err != nil {
  455. log.Error("json.Unmarshal failed:%v", err.Error(), ctx.Data["msgID"])
  456. ctx.ServerError("json.Unmarshal failed:", err)
  457. return
  458. }
  459. }
  460. ctx.JSON(http.StatusOK, map[string]interface{}{
  461. "JobID": jobID,
  462. "VersionName": versionName,
  463. "StatusOK": 0,
  464. "Path": dirArray,
  465. "Dirs": fileInfos,
  466. "task": task,
  467. "PageIsCloudBrain": true,
  468. })
  469. }
  470. func GetModelArtsInferenceJob(ctx *context.APIContext) {
  471. var (
  472. err error
  473. )
  474. jobID := ctx.Params(":jobid")
  475. job, err := models.GetCloudbrainByJobID(jobID)
  476. if err != nil {
  477. ctx.NotFound(err)
  478. return
  479. }
  480. result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10))
  481. if err != nil {
  482. ctx.NotFound(err)
  483. return
  484. }
  485. if job.StartTime == 0 && result.StartTime > 0 {
  486. job.StartTime = timeutil.TimeStamp(result.StartTime / 1000)
  487. }
  488. job.Status = modelarts.TransTrainJobStatus(result.IntStatus)
  489. job.Duration = result.Duration / 1000
  490. job.TrainJobDuration = models.ConvertDurationToStr(job.Duration)
  491. if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 {
  492. job.EndTime = job.StartTime.Add(job.Duration)
  493. }
  494. job.CorrectCreateUnix()
  495. err = models.UpdateInferenceJob(job)
  496. if err != nil {
  497. log.Error("UpdateJob failed:", err)
  498. }
  499. ctx.JSON(http.StatusOK, map[string]interface{}{
  500. "JobID": jobID,
  501. "JobStatus": job.Status,
  502. "JobDuration": job.TrainJobDuration,
  503. })
  504. }
  505. func ResultList(ctx *context.APIContext) {
  506. var (
  507. err error
  508. )
  509. var jobID = ctx.Params(":jobid")
  510. var versionName = ctx.Query("version_name")
  511. parentDir := ctx.Query("parentDir")
  512. dirArray := strings.Split(parentDir, "/")
  513. task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
  514. if err != nil {
  515. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  516. return
  517. }
  518. models, err := storage.GetObsListObject(task.JobName, "result/", parentDir, versionName)
  519. if err != nil {
  520. log.Info("get TrainJobListModel failed:", err)
  521. ctx.ServerError("GetObsListObject:", err)
  522. return
  523. }
  524. ctx.JSON(http.StatusOK, map[string]interface{}{
  525. "JobID": jobID,
  526. "VersionName": versionName,
  527. "StatusOK": 0,
  528. "Path": dirArray,
  529. "Dirs": models,
  530. "task": task,
  531. "PageIsCloudBrain": true,
  532. })
  533. }
  534. func TrainJobGetMetricStatistic(ctx *context.APIContext) {
  535. var (
  536. err error
  537. )
  538. var jobID = ctx.Params(":jobid")
  539. var versionName = ctx.Query("version_name")
  540. result, err := trainJobGetMetricStatistic(jobID, versionName)
  541. if err != nil {
  542. log.Error("trainJobGetMetricStatistic(%s) failed:%v", jobID, err.Error())
  543. return
  544. }
  545. ctx.JSON(http.StatusOK, map[string]interface{}{
  546. "JobID": jobID,
  547. "Interval": result.Interval,
  548. "MetricsInfo": result.MetricsInfo,
  549. })
  550. }
  551. func trainJobGetMetricStatistic(jobID string, versionName string) (*models.GetTrainJobMetricStatisticResult, error) {
  552. task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
  553. if err != nil {
  554. log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error())
  555. return nil, err
  556. }
  557. resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10))
  558. if err != nil {
  559. log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error())
  560. return nil, err
  561. }
  562. result, err := modelarts.GetTrainJobMetricStatistic(jobID, strconv.FormatInt(task.VersionID, 10), resultLogFile.LogFileList[0])
  563. if err != nil {
  564. log.Error("GetTrainJobMetricStatistic(%s) failed:%v", jobID, err.Error())
  565. return nil, err
  566. }
  567. return result, err
  568. }