You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

grampus.go 19 kB

3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. package repo
  2. import (
  3. "code.gitea.io/gitea/modules/auth"
  4. "code.gitea.io/gitea/modules/git"
  5. "code.gitea.io/gitea/modules/grampus"
  6. "code.gitea.io/gitea/modules/modelarts"
  7. "code.gitea.io/gitea/modules/timeutil"
  8. "code.gitea.io/gitea/modules/util"
  9. "encoding/json"
  10. "errors"
  11. "io/ioutil"
  12. "net/http"
  13. "os"
  14. "path"
  15. "strconv"
  16. "strings"
  17. "time"
  18. "code.gitea.io/gitea/models"
  19. "code.gitea.io/gitea/modules/base"
  20. "code.gitea.io/gitea/modules/cloudbrain"
  21. "code.gitea.io/gitea/modules/context"
  22. "code.gitea.io/gitea/modules/log"
  23. "code.gitea.io/gitea/modules/setting"
  24. )
  25. const (
  26. tplGrampusTrainJobShow base.TplName = "repo/grampus/trainjob/show"
  27. //GPU
  28. tplGrampusTrainJobGPUNew base.TplName = "repo/grampus/trainjob/gpu/new"
  29. //NPU
  30. tplGrampusTrainJobNPUNew base.TplName = "repo/grampus/trainjob/npu/new"
  31. )
  32. func GrampusTrainJobGPUNew(ctx *context.Context) {
  33. err := grampusGpuNewDataPrepare(ctx)
  34. if err != nil {
  35. ctx.ServerError("get new train-job info failed", err)
  36. return
  37. }
  38. ctx.HTML(http.StatusOK, tplGrampusTrainJobGPUNew)
  39. }
  40. func grampusGpuNewDataPrepare(ctx *context.Context) error {
  41. ctx.Data["PageIsCloudBrain"] = true
  42. t := time.Now()
  43. var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  44. ctx.Data["display_job_name"] = displayJobName
  45. //get valid images
  46. result, err := cloudbrain.GetImages()
  47. if err != nil {
  48. ctx.Data["error"] = err.Error()
  49. log.Error("cloudbrain.GetImages failed:", err.Error(), ctx.Data["MsgID"])
  50. }
  51. for i, payload := range result.Payload.ImageInfo {
  52. if strings.HasPrefix(result.Payload.ImageInfo[i].Place, "192.168") {
  53. result.Payload.ImageInfo[i].PlaceView = payload.Place[strings.Index(payload.Place, "/"):len(payload.Place)]
  54. } else {
  55. result.Payload.ImageInfo[i].PlaceView = payload.Place
  56. }
  57. }
  58. ctx.Data["images"] = result.Payload.ImageInfo
  59. resultPublic, err := cloudbrain.GetPublicImages()
  60. if err != nil {
  61. ctx.Data["error"] = err.Error()
  62. log.Error("cloudbrain.GetPublicImages failed:", err.Error(), ctx.Data["MsgID"])
  63. }
  64. for i, payload := range resultPublic.Payload.ImageInfo {
  65. if strings.HasPrefix(resultPublic.Payload.ImageInfo[i].Place, "192.168") {
  66. resultPublic.Payload.ImageInfo[i].PlaceView = payload.Place[strings.Index(payload.Place, "/"):len(payload.Place)]
  67. } else {
  68. resultPublic.Payload.ImageInfo[i].PlaceView = payload.Place
  69. }
  70. }
  71. ctx.Data["public_images"] = resultPublic.Payload.ImageInfo
  72. //get valid dataset
  73. attachs, err := models.GetAllUserAttachments(ctx.User.ID)
  74. if err != nil {
  75. log.Error("GetAllUserAttachments failed: %v", err, ctx.Data["MsgID"])
  76. return err
  77. }
  78. ctx.Data["attachments"] = attachs
  79. ctx.Data["command"] = cloudbrain.Command
  80. ctx.Data["code_path"] = cloudbrain.CodeMountPath
  81. ctx.Data["dataset_path"] = cloudbrain.DataSetMountPath
  82. ctx.Data["model_path"] = cloudbrain.ModelMountPath
  83. ctx.Data["benchmark_path"] = cloudbrain.BenchMarkMountPath
  84. ctx.Data["is_benchmark_enabled"] = setting.IsBenchmarkEnabled
  85. //get valid resource specs
  86. if categories == nil {
  87. json.Unmarshal([]byte(setting.BenchmarkCategory), &categories)
  88. }
  89. ctx.Data["benchmark_categories"] = categories.Category
  90. ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType
  91. if gpuInfos == nil {
  92. json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos)
  93. }
  94. ctx.Data["gpu_types"] = gpuInfos.GpuInfo
  95. if trainGpuInfos == nil {
  96. json.Unmarshal([]byte(setting.TrainGpuTypes), &trainGpuInfos)
  97. }
  98. ctx.Data["train_gpu_types"] = trainGpuInfos.GpuInfo
  99. if benchmarkGpuInfos == nil {
  100. json.Unmarshal([]byte(setting.BenchmarkGpuTypes), &benchmarkGpuInfos)
  101. }
  102. ctx.Data["benchmark_gpu_types"] = benchmarkGpuInfos.GpuInfo
  103. if benchmarkResourceSpecs == nil {
  104. json.Unmarshal([]byte(setting.BenchmarkResourceSpecs), &benchmarkResourceSpecs)
  105. }
  106. ctx.Data["benchmark_resource_specs"] = benchmarkResourceSpecs.ResourceSpec
  107. if cloudbrain.ResourceSpecs == nil {
  108. json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs)
  109. }
  110. ctx.Data["resource_specs"] = cloudbrain.ResourceSpecs.ResourceSpec
  111. if cloudbrain.TrainResourceSpecs == nil {
  112. json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs)
  113. }
  114. ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec
  115. ctx.Data["params"] = ""
  116. ctx.Data["branchName"] = ctx.Repo.BranchName
  117. ctx.Data["snn4imagenet_path"] = cloudbrain.Snn4imagenetMountPath
  118. ctx.Data["is_snn4imagenet_enabled"] = setting.IsSnn4imagenetEnabled
  119. ctx.Data["brainscore_path"] = cloudbrain.BrainScoreMountPath
  120. ctx.Data["is_brainscore_enabled"] = setting.IsBrainScoreEnabled
  121. ctx.Data["cloudbraintype"] = models.TypeCloudBrainOne
  122. ctx.Data["benchmarkMode"] = ctx.Query("benchmarkMode")
  123. return nil
  124. }
  125. func GrampusTrainJobNPUNew(ctx *context.Context) {
  126. err := grampusTrainJobNpuNewDataPrepare(ctx)
  127. if err != nil {
  128. ctx.ServerError("get new train-job info failed", err)
  129. return
  130. }
  131. ctx.HTML(200, tplGrampusTrainJobNPUNew)
  132. }
  133. func grampusTrainJobNpuNewDataPrepare(ctx *context.Context) error {
  134. ctx.Data["PageIsCloudBrain"] = true
  135. t := time.Now()
  136. var displayJobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  137. ctx.Data["display_job_name"] = displayJobName
  138. //get valid dataset
  139. attachs, err := models.GetModelArtsTrainAttachments(ctx.User.ID)
  140. if err != nil {
  141. ctx.ServerError("GetAllUserAttachments failed:", err)
  142. return err
  143. }
  144. ctx.Data["attachments"] = attachs
  145. //get valid resource specs
  146. var resourcePools modelarts.ResourcePool
  147. if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil {
  148. ctx.ServerError("json.Unmarshal failed:", err)
  149. return err
  150. }
  151. ctx.Data["resource_pools"] = resourcePools.Info
  152. var engines modelarts.Engine
  153. if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil {
  154. ctx.ServerError("json.Unmarshal failed:", err)
  155. return err
  156. }
  157. ctx.Data["engines"] = engines.Info
  158. var versionInfos modelarts.VersionInfo
  159. if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil {
  160. ctx.ServerError("json.Unmarshal failed:", err)
  161. return err
  162. }
  163. ctx.Data["engine_versions"] = versionInfos.Version
  164. var flavorInfos modelarts.Flavor
  165. if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil {
  166. ctx.ServerError("json.Unmarshal failed:", err)
  167. return err
  168. }
  169. ctx.Data["flavor_infos"] = flavorInfos.Info
  170. ctx.Data["params"] = ""
  171. ctx.Data["branchName"] = ctx.Repo.BranchName
  172. configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom)
  173. if err != nil {
  174. ctx.ServerError("getConfigList failed:", err)
  175. return err
  176. }
  177. ctx.Data["config_list"] = configList.ParaConfigs
  178. ctx.Data["cloudbraintype"] = models.TypeCloudBrainTwo
  179. return nil
  180. }
  181. func grampusParamCheckCreateTrainJob(form auth.CreateGrampusTrainJobForm) error {
  182. if !strings.HasSuffix(form.BootFile, ".py") {
  183. log.Error("the boot file(%s) must be a python file", form.BootFile)
  184. return errors.New("启动文件必须是python文件")
  185. }
  186. if form.BranchName == "" {
  187. log.Error("the branch must not be null!", form.BranchName)
  188. return errors.New("代码分支不能为空!")
  189. }
  190. return nil
  191. }
  192. func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrainJobForm) {
  193. VersionOutputPath := modelarts.GetOutputPathByCount(modelarts.TotalVersionCount)
  194. displayJobName := form.DisplayJobName
  195. jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
  196. uuid := form.Attachment
  197. description := form.Description
  198. bootFile := form.BootFile
  199. params := form.Params
  200. repo := ctx.Repo.Repository
  201. codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
  202. codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath
  203. dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/"
  204. branchName := form.BranchName
  205. isLatestVersion := modelarts.IsLatestVersion
  206. FlavorName := form.FlavorName
  207. VersionCount := modelarts.VersionCount
  208. EngineName := form.EngineName
  209. count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.NPUResource)
  210. if err != nil {
  211. log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"])
  212. grampusTrainJobNpuNewDataPrepare(ctx)
  213. ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form)
  214. return
  215. } else {
  216. if count >= 1 {
  217. log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
  218. grampusTrainJobNpuNewDataPrepare(ctx)
  219. ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobNPUNew, &form)
  220. return
  221. }
  222. }
  223. if err := grampusParamCheckCreateTrainJob(form); err != nil {
  224. log.Error("paramCheckCreateTrainJob failed:(%v)", err)
  225. grampusTrainJobNpuNewDataPrepare(ctx)
  226. ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form)
  227. return
  228. }
  229. //check whether the task name in the project is duplicated
  230. tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, string(models.JobTypeTrain), displayJobName)
  231. if err == nil {
  232. if len(tasks) != 0 {
  233. log.Error("the job name did already exist", ctx.Data["MsgID"])
  234. grampusTrainJobNpuNewDataPrepare(ctx)
  235. ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobNPUNew, &form)
  236. return
  237. }
  238. } else {
  239. if !models.IsErrJobNotExist(err) {
  240. log.Error("system error, %v", err, ctx.Data["MsgID"])
  241. grampusTrainJobNpuNewDataPrepare(ctx)
  242. ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form)
  243. return
  244. }
  245. }
  246. //prepare code and out path
  247. _, err = ioutil.ReadDir(codeLocalPath)
  248. if err == nil {
  249. os.RemoveAll(codeLocalPath)
  250. }
  251. gitRepo, _ := git.OpenRepository(repo.RepoPath())
  252. commitID, _ := gitRepo.GetBranchCommitID(branchName)
  253. if err := downloadCode(repo, codeLocalPath, branchName); err != nil {
  254. log.Error("downloadCode failed, server timed out: %s (%v)", repo.FullName(), err)
  255. grampusTrainJobNpuNewDataPrepare(ctx)
  256. ctx.RenderWithErr("Create task failed, server timed out", tplGrampusTrainJobNPUNew, &form)
  257. return
  258. }
  259. //todo: upload code (send to file_server todo this work?)
  260. if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil {
  261. log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
  262. grampusTrainJobNpuNewDataPrepare(ctx)
  263. ctx.RenderWithErr("Failed to obsMkdir_output", tplGrampusTrainJobNPUNew, &form)
  264. return
  265. }
  266. if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil {
  267. log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err)
  268. grampusTrainJobNpuNewDataPrepare(ctx)
  269. ctx.RenderWithErr("Failed to obsMkdir_log", tplGrampusTrainJobNPUNew, &form)
  270. return
  271. }
  272. if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
  273. // if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil {
  274. log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
  275. grampusTrainJobNpuNewDataPrepare(ctx)
  276. ctx.RenderWithErr("Failed to uploadCodeToObs", tplGrampusTrainJobNPUNew, &form)
  277. return
  278. }
  279. //prepare command
  280. //todo: download code, download dataset, unzip dataset, exec code, upload model
  281. var parameters models.Parameters
  282. param := make([]models.Parameter, 0)
  283. existDeviceTarget := false
  284. if len(params) != 0 {
  285. err := json.Unmarshal([]byte(params), &parameters)
  286. if err != nil {
  287. log.Error("Failed to Unmarshal params: %s (%v)", params, err)
  288. grampusTrainJobNpuNewDataPrepare(ctx)
  289. ctx.RenderWithErr("运行参数错误", tplGrampusTrainJobNPUNew, &form)
  290. return
  291. }
  292. for _, parameter := range parameters.Parameter {
  293. if parameter.Label == modelarts.DeviceTarget {
  294. existDeviceTarget = true
  295. }
  296. if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl {
  297. param = append(param, models.Parameter{
  298. Label: parameter.Label,
  299. Value: parameter.Value,
  300. })
  301. }
  302. }
  303. }
  304. if !existDeviceTarget {
  305. param = append(param, models.Parameter{
  306. Label: modelarts.DeviceTarget,
  307. Value: modelarts.Ascend,
  308. })
  309. }
  310. req := &grampus.GenerateTrainJobReq{
  311. JobName: jobName,
  312. DisplayJobName: displayJobName,
  313. ComputeResource: models.NPUResource,
  314. Command: "echo test",
  315. ResourceSpecId: "f2497d54732b45fb8d887e63be1db4a7",
  316. ImageUrl: "",
  317. ImageId: "e6e85cd78ca24e158f71b6fac9c2fb95",
  318. DataUrl: dataPath,
  319. Description: description,
  320. CodeObsPath: codeObsPath,
  321. BootFileUrl: codeObsPath + bootFile,
  322. BootFile: bootFile,
  323. //TrainUrl: outputObsPath,
  324. //FlavorCode: flavorCode,
  325. WorkServerNumber: 1,
  326. //EngineID: int64(engineID),
  327. //LogUrl: logObsPath,
  328. //PoolID: poolID,
  329. Uuid: uuid,
  330. //Parameters: param,
  331. CommitID: commitID,
  332. IsLatestVersion: isLatestVersion,
  333. BranchName: branchName,
  334. Params: form.Params,
  335. FlavorName: FlavorName,
  336. EngineName: EngineName,
  337. VersionCount: VersionCount,
  338. TotalVersionCount: modelarts.TotalVersionCount,
  339. }
  340. //将params转换Parameters.Parameter,出错时返回给前端
  341. var Parameters modelarts.Parameters
  342. if err := json.Unmarshal([]byte(params), &Parameters); err != nil {
  343. ctx.ServerError("json.Unmarshal failed:", err)
  344. return
  345. }
  346. err = grampus.GenerateTrainJob(ctx, req)
  347. if err != nil {
  348. log.Error("GenerateTrainJob failed:%v", err.Error())
  349. grampusTrainJobNpuNewDataPrepare(ctx)
  350. ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form)
  351. return
  352. }
  353. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  354. }
  355. func GrampusStopJob(ctx *context.Context) {
  356. var ID = ctx.Params(":jobid")
  357. var resultCode = "0"
  358. var errorMsg = ""
  359. var status = ""
  360. task := ctx.Cloudbrain
  361. for {
  362. if task.Status == string(models.GrampusStatusStopped) || task.Status == string(models.GrampusStatusFailed) || task.Status == string(models.GrampusStatusSucceeded) {
  363. log.Error("the job(%s) has been stopped", task.JobName, ctx.Data["msgID"])
  364. resultCode = "-1"
  365. errorMsg = "system error"
  366. break
  367. }
  368. res, err := grampus.StopJob(task.JobID)
  369. if err != nil {
  370. log.Error("StopJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"])
  371. resultCode = strconv.Itoa(res.ErrorCode)
  372. errorMsg = res.ErrorMsg
  373. break
  374. }
  375. task.Status = string(models.GrampusStatusStopped)
  376. if task.EndTime == 0 {
  377. task.EndTime = timeutil.TimeStampNow()
  378. }
  379. task.ComputeAndSetDuration()
  380. err = models.UpdateJob(task)
  381. if err != nil {
  382. log.Error("UpdateJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"])
  383. resultCode = "-1"
  384. errorMsg = "system error"
  385. break
  386. }
  387. status = task.Status
  388. break
  389. }
  390. ctx.JSON(200, map[string]interface{}{
  391. "result_code": resultCode,
  392. "error_msg": errorMsg,
  393. "status": status,
  394. "id": ID,
  395. "StatusOK": 0,
  396. })
  397. }
  398. func GrampusTrainJobDel(ctx *context.Context) {
  399. var listType = ctx.Query("listType")
  400. if err := deleteGrampusJob(ctx); err != nil {
  401. log.Error("deleteGrampusJob failed: %v", err, ctx.Data["msgID"])
  402. ctx.ServerError(err.Error(), err)
  403. return
  404. }
  405. var isAdminPage = ctx.Query("isadminpage")
  406. var isHomePage = ctx.Query("ishomepage")
  407. if ctx.IsUserSiteAdmin() && isAdminPage == "true" {
  408. ctx.Redirect(setting.AppSubURL + "/admin" + "/cloudbrains")
  409. } else if isHomePage == "true" {
  410. ctx.Redirect(setting.AppSubURL + "/cloudbrains")
  411. } else {
  412. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType)
  413. }
  414. }
  415. func deleteGrampusJob(ctx *context.Context) error {
  416. task := ctx.Cloudbrain
  417. if task.Status != string(models.GrampusStatusStopped) && task.Status != string(models.GrampusStatusSucceeded) && task.Status != string(models.GrampusStatusFailed) {
  418. log.Error("the job(%s) has not been stopped", task.JobName, ctx.Data["msgID"])
  419. return errors.New("the job has not been stopped")
  420. }
  421. err := models.DeleteJob(task)
  422. if err != nil {
  423. log.Error("DeleteJob failed: %v", err, ctx.Data["msgID"])
  424. return err
  425. }
  426. storageType := models.TypeCloudBrainOne
  427. if task.ComputeResource == models.NPUResource {
  428. storageType = models.TypeCloudBrainTwo
  429. }
  430. deleteJobStorage(task.JobName, storageType)
  431. return nil
  432. }
  433. func GrampusTrainJobShow(ctx *context.Context) {
  434. ctx.Data["PageIsCloudBrain"] = true
  435. var task *models.Cloudbrain
  436. task, err := models.GetCloudbrainByJobIDWithDeleted(ctx.Params(":jobid"))
  437. if err != nil {
  438. log.Error("GetCloudbrainByJobID failed:" + err.Error())
  439. ctx.ServerError("system error", err)
  440. return
  441. }
  442. attachment, err := models.GetAttachmentByUUID(task.Uuid)
  443. if err == nil {
  444. task.DatasetName = attachment.Name
  445. }
  446. if len(task.Parameters) > 0 {
  447. var parameters models.Parameters
  448. err := json.Unmarshal([]byte(task.Parameters), &parameters)
  449. if err != nil {
  450. log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err)
  451. ctx.ServerError("system error", err)
  452. return
  453. }
  454. if len(parameters.Parameter) > 0 {
  455. paramTemp := ""
  456. for _, Parameter := range parameters.Parameter {
  457. param := Parameter.Label + " = " + Parameter.Value + "; "
  458. paramTemp = paramTemp + param
  459. }
  460. task.Parameters = paramTemp[:len(paramTemp)-2]
  461. }
  462. }
  463. if task.DeletedAt.IsZero() { //normal record
  464. result, err := grampus.GetJob(task.JobID)
  465. if err != nil {
  466. log.Error("GetJob failed:" + err.Error())
  467. ctx.ServerError("GetJob failed", err)
  468. return
  469. }
  470. if result != nil {
  471. task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status)
  472. if task.Status != result.JobInfo.Status || result.JobInfo.Status == models.GrampusStatusRunning {
  473. task.Duration = result.JobInfo.RunSec
  474. task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)
  475. if task.StartTime == 0 && result.JobInfo.StartedAt > 0 {
  476. task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt)
  477. }
  478. if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 {
  479. task.EndTime = task.StartTime.Add(task.Duration)
  480. }
  481. task.CorrectCreateUnix()
  482. err = models.UpdateJob(task)
  483. if err != nil {
  484. log.Error("UpdateJob failed:" + err.Error())
  485. }
  486. }
  487. }
  488. }
  489. taskList := make([]*models.Cloudbrain, 0)
  490. taskList = append(taskList, task)
  491. ctx.Data["version_list_task"] = taskList
  492. ctx.HTML(http.StatusOK, tplGrampusTrainJobShow)
  493. }
  494. func GrampusGetLog(ctx *context.Context) {
  495. jobID := ctx.Params(":jobid")
  496. job, err := models.GetCloudbrainByJobID(jobID)
  497. if err != nil {
  498. log.Error("GetCloudbrainByJobID failed: %v", err, ctx.Data["MsgID"])
  499. ctx.ServerError(err.Error(), err)
  500. return
  501. }
  502. content, err := grampus.GetTrainJobLog(job.JobID)
  503. if err != nil {
  504. log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"])
  505. ctx.ServerError(err.Error(), err)
  506. return
  507. }
  508. ctx.JSON(http.StatusOK, map[string]interface{}{
  509. "JobName": job.JobName,
  510. "Content": content,
  511. })
  512. return
  513. }