You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

modelarts.go 32 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago

  1. package repo
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "io"
  6. "net/http"
  7. "os"
  8. "path"
  9. "strconv"
  10. "strings"
  11. "time"
  12. "code.gitea.io/gitea/models"
  13. "code.gitea.io/gitea/modules/auth"
  14. "code.gitea.io/gitea/modules/base"
  15. "code.gitea.io/gitea/modules/context"
  16. "code.gitea.io/gitea/modules/git"
  17. "code.gitea.io/gitea/modules/log"
  18. "code.gitea.io/gitea/modules/modelarts"
  19. "code.gitea.io/gitea/modules/obs"
  20. "code.gitea.io/gitea/modules/setting"
  21. "code.gitea.io/gitea/modules/storage"
  22. "github.com/unknwon/com"
  23. )
  24. const (
  25. // tplModelArtsNotebookIndex base.TplName = "repo/modelarts/notebook/index"
  26. tplModelArtsNotebookIndex base.TplName = "repo/modelarts/notebook/index"
  27. tplModelArtsNotebookNew base.TplName = "repo/modelarts/notebook/new"
  28. tplModelArtsNotebookShow base.TplName = "repo/modelarts/notebook/show"
  29. tplModelArtsIndex base.TplName = "repo/modelarts/index"
  30. tplModelArtsNew base.TplName = "repo/modelarts/new"
  31. tplModelArtsShow base.TplName = "repo/modelarts/show"
  32. tplModelArtsTrainJobIndex base.TplName = "repo/modelarts/trainjob/index"
  33. tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new"
  34. tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show"
  35. tplModelArtsTrainJobShowModels base.TplName = "repo/modelarts/trainjob/models/index"
  36. )
  37. // MustEnableDataset check if repository enable internal cb
  38. func MustEnableModelArts(ctx *context.Context) {
  39. if !ctx.Repo.CanRead(models.UnitTypeCloudBrain) {
  40. ctx.NotFound("MustEnableCloudbrain", nil)
  41. return
  42. }
  43. }
  44. func ModelArtsIndex(ctx *context.Context) {
  45. MustEnableModelArts(ctx)
  46. repo := ctx.Repo.Repository
  47. page := ctx.QueryInt("page")
  48. if page <= 0 {
  49. page = 1
  50. }
  51. ciTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
  52. ListOptions: models.ListOptions{
  53. Page: page,
  54. PageSize: setting.UI.IssuePagingNum,
  55. },
  56. RepoID: repo.ID,
  57. Type: models.TypeCloudBrainTwo,
  58. })
  59. if err != nil {
  60. ctx.ServerError("Cloudbrain", err)
  61. return
  62. }
  63. for i, task := range ciTasks {
  64. if task.Status == string(models.JobRunning) {
  65. ciTasks[i].CanDebug = true
  66. } else {
  67. ciTasks[i].CanDebug = false
  68. }
  69. ciTasks[i].CanDel = models.CanDelJob(ctx.IsSigned, ctx.User, task)
  70. }
  71. pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
  72. pager.SetDefaultParams(ctx)
  73. ctx.Data["Page"] = pager
  74. ctx.Data["PageIsCloudBrain"] = true
  75. ctx.Data["Tasks"] = ciTasks
  76. ctx.HTML(200, tplModelArtsIndex)
  77. }
  78. func ModelArtsNew(ctx *context.Context) {
  79. ctx.Data["PageIsCloudBrain"] = true
  80. t := time.Now()
  81. var jobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  82. ctx.Data["job_name"] = jobName
  83. attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
  84. if err != nil {
  85. ctx.ServerError("GetAllUserAttachments failed:", err)
  86. return
  87. }
  88. ctx.Data["attachments"] = attachs
  89. ctx.Data["dataset_path"] = modelarts.DataSetMountPath
  90. ctx.Data["env"] = modelarts.NotebookEnv
  91. ctx.Data["notebook_type"] = modelarts.NotebookType
  92. if modelarts.FlavorInfos == nil {
  93. json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
  94. }
  95. ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo
  96. ctx.HTML(200, tplModelArtsNew)
  97. }
  98. func ModelArtsCreate(ctx *context.Context, form auth.CreateModelArtsForm) {
  99. ctx.Data["PageIsCloudBrain"] = true
  100. jobName := form.JobName
  101. uuid := form.Attachment
  102. description := form.Description
  103. //repo := ctx.Repo.Repository
  104. if !jobNamePattern.MatchString(jobName) {
  105. ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplModelArtsNew, &form)
  106. return
  107. }
  108. err := modelarts.GenerateTask(ctx, jobName, uuid, description)
  109. if err != nil {
  110. ctx.RenderWithErr(err.Error(), tplModelArtsNew, &form)
  111. return
  112. }
  113. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
  114. }
  115. func ModelArtsShow(ctx *context.Context) {
  116. ctx.Data["PageIsCloudBrain"] = true
  117. var jobID = ctx.Params(":jobid")
  118. task, err := models.GetCloudbrainByJobID(jobID)
  119. if err != nil {
  120. ctx.Data["error"] = err.Error()
  121. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  122. return
  123. }
  124. result, err := modelarts.GetJob(jobID)
  125. if err != nil {
  126. ctx.Data["error"] = err.Error()
  127. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  128. return
  129. }
  130. if result != nil {
  131. task.Status = result.Status
  132. err = models.UpdateJob(task)
  133. if err != nil {
  134. ctx.Data["error"] = err.Error()
  135. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  136. return
  137. }
  138. createTime, _ := com.StrTo(result.CreationTimestamp).Int64()
  139. result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05")
  140. endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64()
  141. result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05")
  142. result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  143. result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  144. }
  145. ctx.Data["task"] = task
  146. ctx.Data["jobID"] = jobID
  147. ctx.Data["result"] = result
  148. ctx.HTML(200, tplModelArtsShow)
  149. }
  150. func ModelArtsDebug(ctx *context.Context) {
  151. var jobID = ctx.Params(":jobid")
  152. _, err := models.GetCloudbrainByJobID(jobID)
  153. if err != nil {
  154. ctx.ServerError("GetCloudbrainByJobID failed", err)
  155. return
  156. }
  157. result, err := modelarts.GetJob(jobID)
  158. if err != nil {
  159. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  160. return
  161. }
  162. res, err := modelarts.GetJobToken(jobID)
  163. if err != nil {
  164. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  165. return
  166. }
  167. urls := strings.Split(result.Spec.Annotations.Url, "/")
  168. urlPrefix := result.Spec.Annotations.TargetDomain
  169. for i, url := range urls {
  170. if i > 2 {
  171. urlPrefix += "/" + url
  172. }
  173. }
  174. //urlPrefix := result.Spec.Annotations.TargetDomain + "/modelarts/internal/hub/notebook/user/" + task.JobID
  175. log.Info(urlPrefix)
  176. debugUrl := urlPrefix + "?token=" + res.Token
  177. ctx.Redirect(debugUrl)
  178. }
  179. func ModelArtsStop(ctx *context.Context) {
  180. var jobID = ctx.Params(":jobid")
  181. log.Info(jobID)
  182. task, err := models.GetCloudbrainByJobID(jobID)
  183. if err != nil {
  184. ctx.ServerError("GetCloudbrainByJobID failed", err)
  185. return
  186. }
  187. if task.Status != string(models.JobRunning) {
  188. log.Error("the job(%s) is not running", task.JobName)
  189. ctx.ServerError("the job is not running", errors.New("the job is not running"))
  190. return
  191. }
  192. param := models.NotebookAction{
  193. Action: models.ActionStop,
  194. }
  195. res, err := modelarts.StopJob(jobID, param)
  196. if err != nil {
  197. log.Error("StopJob(%s) failed:%v", task.JobName, err.Error())
  198. ctx.ServerError("StopJob failed", err)
  199. return
  200. }
  201. task.Status = res.CurrentStatus
  202. err = models.UpdateJob(task)
  203. if err != nil {
  204. ctx.ServerError("UpdateJob failed", err)
  205. return
  206. }
  207. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
  208. }
  209. func ModelArtsDel(ctx *context.Context) {
  210. var jobID = ctx.Params(":jobid")
  211. task, err := models.GetCloudbrainByJobID(jobID)
  212. if err != nil {
  213. ctx.ServerError("GetCloudbrainByJobID failed", err)
  214. return
  215. }
  216. if task.Status != string(models.ModelArtsCreateFailed) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsStopped) {
  217. log.Error("the job(%s) has not been stopped", task.JobName)
  218. ctx.ServerError("the job has not been stopped", errors.New("the job has not been stopped"))
  219. return
  220. }
  221. _, err = modelarts.DelJob(jobID)
  222. if err != nil {
  223. log.Error("DelJob(%s) failed:%v", task.JobName, err.Error())
  224. ctx.ServerError("DelJob failed", err)
  225. return
  226. }
  227. err = models.DeleteJob(task)
  228. if err != nil {
  229. ctx.ServerError("DeleteJob failed", err)
  230. return
  231. }
  232. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
  233. }
  234. func NotebookIndex(ctx *context.Context) {
  235. MustEnableModelArts(ctx)
  236. repo := ctx.Repo.Repository
  237. page := ctx.QueryInt("page")
  238. if page <= 0 {
  239. page = 1
  240. }
  241. ciTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
  242. ListOptions: models.ListOptions{
  243. Page: page,
  244. PageSize: setting.UI.IssuePagingNum,
  245. },
  246. RepoID: repo.ID,
  247. Type: models.TypeCloudBrainNotebook,
  248. })
  249. if err != nil {
  250. ctx.ServerError("Cloudbrain", err)
  251. return
  252. }
  253. for i, task := range ciTasks {
  254. if task.Status == string(models.JobRunning) {
  255. ciTasks[i].CanDebug = true
  256. } else {
  257. ciTasks[i].CanDebug = false
  258. }
  259. }
  260. pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
  261. pager.SetDefaultParams(ctx)
  262. ctx.Data["Page"] = pager
  263. ctx.Data["PageIsCloudBrain"] = true
  264. ctx.Data["Tasks"] = ciTasks
  265. ctx.HTML(200, tplModelArtsNotebookIndex)
  266. }
  267. func NotebookNew(ctx *context.Context) {
  268. ctx.Data["PageIsCloudBrain"] = true
  269. t := time.Now()
  270. var jobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  271. ctx.Data["job_name"] = jobName
  272. attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
  273. if err != nil {
  274. ctx.ServerError("GetAllUserAttachments failed:", err)
  275. return
  276. }
  277. ctx.Data["attachments"] = attachs
  278. ctx.Data["dataset_path"] = modelarts.DataSetMountPath
  279. ctx.Data["env"] = modelarts.NotebookEnv
  280. ctx.Data["notebook_type"] = modelarts.NotebookType
  281. if modelarts.FlavorInfos == nil {
  282. json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
  283. }
  284. ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo
  285. ctx.HTML(200, tplModelArtsNotebookNew)
  286. }
  287. func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) {
  288. ctx.Data["PageIsNotebook"] = true
  289. jobName := form.JobName
  290. uuid := form.Attachment
  291. description := form.Description
  292. err := modelarts.GenerateTask(ctx, jobName, uuid, description)
  293. if err != nil {
  294. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form)
  295. return
  296. }
  297. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
  298. }
  299. func NotebookShow(ctx *context.Context) {
  300. ctx.Data["PageIsCloudBrain"] = true
  301. var jobID = ctx.Params(":jobid")
  302. task, err := models.GetCloudbrainByJobID(jobID)
  303. if err != nil {
  304. ctx.Data["error"] = err.Error()
  305. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil)
  306. return
  307. }
  308. result, err := modelarts.GetJob(jobID)
  309. if err != nil {
  310. ctx.Data["error"] = err.Error()
  311. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil)
  312. return
  313. }
  314. if result != nil {
  315. task.Status = result.Status
  316. err = models.UpdateJob(task)
  317. if err != nil {
  318. ctx.Data["error"] = err.Error()
  319. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil)
  320. return
  321. }
  322. createTime, _ := com.StrTo(result.CreationTimestamp).Int64()
  323. result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05")
  324. endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64()
  325. result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05")
  326. result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  327. result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  328. }
  329. ctx.Data["task"] = task
  330. ctx.Data["jobID"] = jobID
  331. ctx.Data["result"] = result
  332. ctx.HTML(200, tplModelArtsNotebookShow)
  333. }
  334. func NotebookDebug(ctx *context.Context) {
  335. var jobID = ctx.Params(":jobid")
  336. _, err := models.GetCloudbrainByJobID(jobID)
  337. if err != nil {
  338. ctx.ServerError("GetCloudbrainByJobID failed", err)
  339. return
  340. }
  341. result, err := modelarts.GetJob(jobID)
  342. if err != nil {
  343. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  344. return
  345. }
  346. res, err := modelarts.GetJobToken(jobID)
  347. if err != nil {
  348. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  349. return
  350. }
  351. urls := strings.Split(result.Spec.Annotations.Url, "/")
  352. urlPrefix := result.Spec.Annotations.TargetDomain
  353. for i, url := range urls {
  354. if i > 2 {
  355. urlPrefix += "/" + url
  356. }
  357. }
  358. debugUrl := urlPrefix + "?token=" + res.Token
  359. ctx.Redirect(debugUrl)
  360. }
  361. func NotebookStop(ctx *context.Context) {
  362. var jobID = ctx.Params(":jobid")
  363. log.Info(jobID)
  364. task, err := models.GetCloudbrainByJobID(jobID)
  365. if err != nil {
  366. ctx.ServerError("GetCloudbrainByJobID failed", err)
  367. return
  368. }
  369. if task.Status != string(models.JobRunning) {
  370. log.Error("the job(%s) is not running", task.JobName)
  371. ctx.ServerError("the job is not running", errors.New("the job is not running"))
  372. return
  373. }
  374. param := models.NotebookAction{
  375. Action: models.ActionStop,
  376. }
  377. res, err := modelarts.StopJob(jobID, param)
  378. if err != nil {
  379. log.Error("StopJob(%s) failed:%v", task.JobName, err.Error())
  380. ctx.ServerError("StopJob failed", err)
  381. return
  382. }
  383. task.Status = res.CurrentStatus
  384. err = models.UpdateJob(task)
  385. if err != nil {
  386. ctx.ServerError("UpdateJob failed", err)
  387. return
  388. }
  389. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
  390. }
  391. func NotebookDel(ctx *context.Context) {
  392. var jobID = ctx.Params(":jobid")
  393. task, err := models.GetCloudbrainByJobID(jobID)
  394. if err != nil {
  395. ctx.ServerError("GetCloudbrainByJobID failed", err)
  396. return
  397. }
  398. if task.Status != string(models.JobStopped) {
  399. log.Error("the job(%s) has not been stopped", task.JobName)
  400. ctx.ServerError("the job has not been stopped", errors.New("the job has not been stopped"))
  401. return
  402. }
  403. _, err = modelarts.DelNotebook(jobID)
  404. if err != nil {
  405. log.Error("DelJob(%s) failed:%v", task.JobName, err.Error())
  406. ctx.ServerError("DelJob failed", err)
  407. return
  408. }
  409. err = models.DeleteJob(task)
  410. if err != nil {
  411. ctx.ServerError("DeleteJob failed", err)
  412. return
  413. }
  414. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
  415. }
  416. func TrainJobIndex(ctx *context.Context) {
  417. MustEnableModelArts(ctx)
  418. //can, err := canUserCreateTrainJob(ctx.User.ID)
  419. //if err != nil {
  420. // ctx.ServerError("canUserCreateTrainJob", err)
  421. // return
  422. //}
  423. //
  424. //ctx.Data["CanCreate"] = can
  425. repo := ctx.Repo.Repository
  426. page := ctx.QueryInt("page")
  427. if page <= 0 {
  428. page = 1
  429. }
  430. tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
  431. ListOptions: models.ListOptions{
  432. Page: page,
  433. PageSize: setting.UI.IssuePagingNum,
  434. },
  435. RepoID: repo.ID,
  436. Type: models.TypeCloudBrainTrainJob,
  437. })
  438. if err != nil {
  439. ctx.ServerError("Cloudbrain", err)
  440. return
  441. }
  442. pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
  443. pager.SetDefaultParams(ctx)
  444. ctx.Data["Page"] = pager
  445. ctx.Data["PageIsCloudBrain"] = true
  446. ctx.Data["Tasks"] = tasks
  447. ctx.HTML(200, tplModelArtsTrainJobIndex)
  448. }
  449. func TrainJobNew(ctx *context.Context) {
  450. ctx.Data["PageIsCloudBrain"] = true
  451. //can, err := canUserCreateTrainJob(ctx.User.ID)
  452. //if err != nil {
  453. // ctx.ServerError("canUserCreateTrainJob", err)
  454. // return
  455. //}
  456. //
  457. //if !can {
  458. // log.Error("the user can not create train-job")
  459. // ctx.ServerError("the user can not create train-job", fmt.Errorf("the user can not create train-job"))
  460. // return
  461. //}
  462. t := time.Now()
  463. var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  464. ctx.Data["job_name"] = jobName
  465. attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
  466. if err != nil {
  467. ctx.ServerError("GetAllUserAttachments failed:", err)
  468. return
  469. }
  470. ctx.Data["attachments"] = attachs
  471. var resourcePools modelarts.ResourcePool
  472. if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil {
  473. ctx.ServerError("json.Unmarshal failed:", err)
  474. return
  475. }
  476. ctx.Data["resource_pools"] = resourcePools.Info
  477. var engines modelarts.Engine
  478. if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil {
  479. ctx.ServerError("json.Unmarshal failed:", err)
  480. return
  481. }
  482. ctx.Data["engines"] = engines.Info
  483. var versionInfos modelarts.VersionInfo
  484. if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil {
  485. ctx.ServerError("json.Unmarshal failed:", err)
  486. return
  487. }
  488. ctx.Data["engine_versions"] = versionInfos.Version
  489. var flavorInfos modelarts.Flavor
  490. if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil {
  491. ctx.ServerError("json.Unmarshal failed:", err)
  492. return
  493. }
  494. ctx.Data["flavor_infos"] = flavorInfos.Info
  495. outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
  496. ctx.Data["train_url"] = outputObsPath
  497. configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom)
  498. if err != nil {
  499. ctx.ServerError("getConfigList failed:", err)
  500. return
  501. }
  502. ctx.Data["config_list"] = configList.ParaConfigs
  503. ctx.HTML(200, tplModelArtsTrainJobNew)
  504. }
  505. func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) {
  506. ctx.Data["PageIsTrainJob"] = true
  507. jobName := form.JobName
  508. uuid := form.Attachment
  509. description := form.Description
  510. workServerNumber := form.WorkServerNumber
  511. engineID := form.EngineID
  512. bootFile := form.BootFile
  513. flavorCode := form.Flavor
  514. params := form.Params
  515. poolID := form.PoolID
  516. isSaveParam := form.IsSaveParam
  517. repo := ctx.Repo.Repository
  518. codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
  519. codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath
  520. outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
  521. logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath
  522. dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
  523. can, err := canUserCreateTrainJob(ctx.User.ID)
  524. if err != nil {
  525. ctx.ServerError("canUserCreateTrainJob", err)
  526. return
  527. }
  528. if !can {
  529. log.Error("the user can not create train-job")
  530. ctx.RenderWithErr("the user can not create train-job", tplModelArtsTrainJobNew, &form)
  531. return
  532. }
  533. //param check
  534. if err := paramCheckCreateTrainJob(form); err != nil {
  535. log.Error("paramCheckCreateTrainJob failed:(%v)", err)
  536. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form)
  537. return
  538. }
  539. if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil {
  540. log.Error("Failed to clone repository: %s (%v)", repo.FullName(), err)
  541. ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form)
  542. return
  543. }
  544. //todo: upload code (send to file_server todo this work?)
  545. if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
  546. log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
  547. ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form)
  548. return
  549. }
  550. if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil {
  551. log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err)
  552. ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form)
  553. return
  554. }
  555. if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
  556. log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
  557. ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobNew, &form)
  558. return
  559. }
  560. //todo: del local code?
  561. var parameters models.Parameters
  562. param := make([]models.Parameter, 0)
  563. param = append(param, models.Parameter{
  564. Label: modelarts.TrainUrl,
  565. Value: outputObsPath,
  566. }, models.Parameter{
  567. Label: modelarts.DataUrl,
  568. Value: dataPath,
  569. })
  570. if len(params) != 0 {
  571. err := json.Unmarshal([]byte(params), &parameters)
  572. if err != nil {
  573. log.Error("Failed to Unmarshal params: %s (%v)", params, err)
  574. ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobNew, &form)
  575. return
  576. }
  577. for _, parameter := range parameters.Parameter {
  578. if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl {
  579. param = append(param, models.Parameter{
  580. Label: parameter.Label,
  581. Value: parameter.Value,
  582. })
  583. }
  584. }
  585. }
  586. //save param config
  587. if isSaveParam == "on" {
  588. if form.ParameterTemplateName == "" {
  589. log.Error("ParameterTemplateName is empty")
  590. ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobNew, &form)
  591. return
  592. }
  593. _, err := modelarts.CreateTrainJobConfig(models.CreateConfigParams{
  594. ConfigName: form.ParameterTemplateName,
  595. Description: form.PrameterDescription,
  596. DataUrl: dataPath,
  597. AppUrl: codeObsPath,
  598. BootFileUrl: codeObsPath + bootFile,
  599. TrainUrl: outputObsPath,
  600. Flavor: models.Flavor{
  601. Code: flavorCode,
  602. },
  603. WorkServerNum: workServerNumber,
  604. EngineID: int64(engineID),
  605. LogUrl: logObsPath,
  606. PoolID: poolID,
  607. Parameter: param,
  608. })
  609. if err != nil {
  610. log.Error("Failed to CreateTrainJobConfig: %v", err)
  611. ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobNew, &form)
  612. return
  613. }
  614. }
  615. req := &modelarts.GenerateTrainJobReq{
  616. JobName: jobName,
  617. DataUrl: dataPath,
  618. Description: description,
  619. CodeObsPath: codeObsPath,
  620. BootFile: codeObsPath + bootFile,
  621. TrainUrl: outputObsPath,
  622. FlavorCode: flavorCode,
  623. WorkServerNumber: workServerNumber,
  624. EngineID: int64(engineID),
  625. LogUrl: logObsPath,
  626. PoolID: poolID,
  627. Uuid: uuid,
  628. Parameters: param,
  629. }
  630. err = modelarts.GenerateTrainJob(ctx, req)
  631. if err != nil {
  632. log.Error("GenerateTrainJob failed:%v", err.Error())
  633. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form)
  634. return
  635. }
  636. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  637. }
  638. // readDir reads the directory named by dirname and returns
  639. // a list of directory entries sorted by filename.
  640. func readDir(dirname string) ([]os.FileInfo, error) {
  641. f, err := os.Open(dirname)
  642. if err != nil {
  643. return nil, err
  644. }
  645. list, err := f.Readdir(100)
  646. f.Close()
  647. if err != nil {
  648. //todo: can not upload empty folder
  649. if err == io.EOF {
  650. return nil, nil
  651. }
  652. return nil, err
  653. }
  654. //sort.Slice(list, func(i, j int) bool { return list[i].Name() < list[j].Name() })
  655. return list, nil
  656. }
  657. func uploadCodeToObs(codePath, jobName, parentDir string) error {
  658. files, err := readDir(codePath)
  659. if err != nil {
  660. log.Error("readDir(%s) failed: %s", codePath, err.Error())
  661. return err
  662. }
  663. for _, file := range files {
  664. if file.IsDir() {
  665. input := &obs.PutObjectInput{}
  666. input.Bucket = setting.Bucket
  667. input.Key = parentDir + file.Name() + "/"
  668. _, err = storage.ObsCli.PutObject(input)
  669. if err != nil {
  670. log.Error("PutObject(%s) failed: %s", input.Key, err.Error())
  671. return err
  672. }
  673. if err = uploadCodeToObs(codePath+file.Name()+"/", jobName, parentDir+file.Name()+"/"); err != nil {
  674. log.Error("uploadCodeToObs(%s) failed: %s", file.Name(), err.Error())
  675. return err
  676. }
  677. } else {
  678. input := &obs.PutFileInput{}
  679. input.Bucket = setting.Bucket
  680. input.Key = setting.CodePathPrefix + jobName + "/code/" + parentDir + file.Name()
  681. input.SourceFile = codePath + file.Name()
  682. _, err = storage.ObsCli.PutFile(input)
  683. if err != nil {
  684. log.Error("PutFile(%s) failed: %s", input.SourceFile, err.Error())
  685. return err
  686. }
  687. }
  688. }
  689. return nil
  690. }
  691. func obsMkdir(dir string) error {
  692. input := &obs.PutObjectInput{}
  693. input.Bucket = setting.Bucket
  694. input.Key = dir
  695. _, err := storage.ObsCli.PutObject(input)
  696. if err != nil {
  697. log.Error("PutObject(%s) failed: %s", input.Key, err.Error())
  698. return err
  699. }
  700. return nil
  701. }
  702. func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error {
  703. if !strings.HasSuffix(form.BootFile, ".py") {
  704. log.Error("the boot file(%s) must be a python file", form.BootFile)
  705. return errors.New("启动文件必须是python文件")
  706. }
  707. if form.WorkServerNumber > 25 || form.WorkServerNumber < 1 {
  708. log.Error("the WorkServerNumber(%d) must be in (1,25)", form.WorkServerNumber)
  709. return errors.New("计算节点数必须在1-25之间")
  710. }
  711. return nil
  712. }
  713. func TrainJobShow(ctx *context.Context) {
  714. ctx.Data["PageIsCloudBrain"] = true
  715. var jobID = ctx.Params(":jobid")
  716. task, err := models.GetCloudbrainByJobID(jobID)
  717. if err != nil {
  718. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  719. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  720. return
  721. }
  722. attach, err := models.GetAttachmentByUUID(task.Uuid)
  723. if err != nil {
  724. log.Error("GetAttachmentByUUID(%s) failed:%v", jobID, err.Error())
  725. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  726. return
  727. }
  728. result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
  729. if err != nil {
  730. log.Error("GetJob(%s) failed:%v", jobID, err.Error())
  731. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  732. return
  733. }
  734. if result != nil {
  735. result.CreateTime = time.Unix(int64(result.LongCreateTime/1000), 0).Format("2006-01-02 15:04:05")
  736. if result.Duration != 0 {
  737. result.TrainJobDuration = addZero(result.Duration/3600000) + ":" + addZero(result.Duration%3600000/60000) + ":" + addZero(result.Duration%60000/1000)
  738. } else {
  739. result.TrainJobDuration = "00:00:00"
  740. }
  741. result.Status = modelarts.TransTrainJobStatus(result.IntStatus)
  742. err = models.SetTrainJobStatusByJobID(jobID, result.Status, result.Duration, string(result.TrainJobDuration))
  743. if err != nil {
  744. ctx.ServerError("UpdateJob failed", err)
  745. return
  746. }
  747. result.DatasetName = attach.Name
  748. }
  749. resultLogFile, resultLog, err := trainJobGetLog(jobID)
  750. if err != nil {
  751. log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error())
  752. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  753. return
  754. }
  755. ctx.Data["log_file_name"] = resultLogFile.LogFileList[0]
  756. ctx.Data["log"] = resultLog
  757. ctx.Data["task"] = task
  758. ctx.Data["jobID"] = jobID
  759. ctx.Data["result"] = result
  760. ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  761. }
  762. func addZero(t int64) (m string) {
  763. if t < 10 {
  764. m = "0" + strconv.FormatInt(t, 10)
  765. return m
  766. } else {
  767. return strconv.FormatInt(t, 10)
  768. }
  769. }
  770. func TrainJobGetLog(ctx *context.Context) {
  771. ctx.Data["PageIsTrainJob"] = true
  772. var jobID = ctx.Params(":jobid")
  773. var logFileName = ctx.Query("file_name")
  774. var baseLine = ctx.Query("base_line")
  775. var order = ctx.Query("order")
  776. if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
  777. log.Error("order(%s) check failed", order)
  778. ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow)
  779. return
  780. }
  781. task, err := models.GetCloudbrainByJobID(jobID)
  782. if err != nil {
  783. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  784. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  785. return
  786. }
  787. result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines)
  788. if err != nil {
  789. log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
  790. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  791. return
  792. }
  793. ctx.Data["log"] = result
  794. //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  795. }
  796. func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) {
  797. task, err := models.GetCloudbrainByJobID(jobID)
  798. if err != nil {
  799. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  800. return nil, nil, err
  801. }
  802. resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10))
  803. if err != nil {
  804. log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error())
  805. return nil, nil, err
  806. }
  807. result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), "", resultLogFile.LogFileList[0], modelarts.OrderDesc, modelarts.Lines)
  808. if err != nil {
  809. log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
  810. return nil, nil, err
  811. }
  812. return resultLogFile, result, err
  813. }
  814. func TrainJobDel(ctx *context.Context) {
  815. var jobID = ctx.Params(":jobid")
  816. task, err := models.GetCloudbrainByJobID(jobID)
  817. if err != nil {
  818. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  819. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  820. return
  821. }
  822. _, err = modelarts.DelTrainJob(jobID)
  823. if err != nil {
  824. log.Error("DelTrainJob(%s) failed:%v", task.JobName, err.Error())
  825. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  826. return
  827. }
  828. err = models.DeleteJob(task)
  829. if err != nil {
  830. ctx.ServerError("DeleteJob failed", err)
  831. return
  832. }
  833. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  834. }
  835. func TrainJobStop(ctx *context.Context) {
  836. var jobID = ctx.Params(":jobid")
  837. task, err := models.GetCloudbrainByJobID(jobID)
  838. if err != nil {
  839. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  840. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  841. return
  842. }
  843. _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
  844. if err != nil {
  845. log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error())
  846. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  847. return
  848. }
  849. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  850. }
  851. func canUserCreateTrainJob(uid int64) (bool, error) {
  852. org, err := models.GetOrgByName(setting.AllowedOrg)
  853. if err != nil {
  854. log.Error("get allowed org failed: ", setting.AllowedOrg)
  855. return false, err
  856. }
  857. return org.IsOrgMember(uid)
  858. }
  859. func TrainJobGetConfigList(ctx *context.Context) {
  860. ctx.Data["PageIsTrainJob"] = true
  861. var jobID = ctx.Params(":jobid")
  862. var logFileName = ctx.Query("file_name")
  863. var baseLine = ctx.Query("base_line")
  864. var order = ctx.Query("order")
  865. if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
  866. log.Error("order(%s) check failed", order)
  867. ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow)
  868. return
  869. }
  870. task, err := models.GetCloudbrainByJobID(jobID)
  871. if err != nil {
  872. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  873. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  874. return
  875. }
  876. result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines)
  877. if err != nil {
  878. log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
  879. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  880. return
  881. }
  882. ctx.Data["log"] = result
  883. //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  884. }
  885. func getConfigList(perPage, page int, sortBy, order, searchContent, configType string) (*models.GetConfigListResult, error) {
  886. var result models.GetConfigListResult
  887. list, err := modelarts.GetConfigList(perPage, page, sortBy, order, searchContent, configType)
  888. if err != nil {
  889. log.Error("GetConfigList failed:", err)
  890. return &result, err
  891. }
  892. for _, config := range list.ParaConfigs {
  893. paraConfig, err := modelarts.GetParaConfig(config.ConfigName, configType)
  894. if err != nil {
  895. log.Error("GetParaConfig failed:", err)
  896. return &result, err
  897. }
  898. config.Result = paraConfig
  899. }
  900. return list, nil
  901. }
  902. func TrainJobShowModels(ctx *context.Context) {
  903. ctx.Data["PageIsCloudBrain"] = true
  904. jobID := ctx.Params(":jobid")
  905. parentDir := ctx.Query("parentDir")
  906. dirArray := strings.Split(parentDir, "/")
  907. task, err := models.GetCloudbrainByJobID(jobID)
  908. if err != nil {
  909. log.Error("no such job!", ctx.Data["msgID"])
  910. ctx.ServerError("no such job:", err)
  911. return
  912. }
  913. models, err := storage.GetObsListObject(task.JobName, parentDir)
  914. if err != nil {
  915. log.Info("get TrainJobListModel failed:", err)
  916. ctx.ServerError("GetObsListObject:", err)
  917. return
  918. }
  919. ctx.Data["Path"] = dirArray
  920. ctx.Data["Dirs"] = models
  921. ctx.Data["task"] = task
  922. ctx.Data["JobID"] = jobID
  923. ctx.HTML(200, tplModelArtsTrainJobShowModels)
  924. }
  925. func TrainJobDownloadModel(ctx *context.Context) {
  926. parentDir := ctx.Query("parentDir")
  927. fileName := ctx.Query("fileName")
  928. jobName := ctx.Query("jobName")
  929. url, err := storage.GetObsCreateSignedUrl(jobName, parentDir, fileName)
  930. if err != nil {
  931. log.Error("GetObsCreateSignedUrl failed: %v", err.Error(), ctx.Data["msgID"])
  932. ctx.ServerError("GetObsCreateSignedUrl", err)
  933. return
  934. }
  935. http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently)
  936. }