You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

modelarts.go 30 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038
  1. package repo
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "net/http"
  8. "os"
  9. "path"
  10. "strconv"
  11. "strings"
  12. "time"
  13. "code.gitea.io/gitea/modules/git"
  14. "code.gitea.io/gitea/modules/modelarts"
  15. "code.gitea.io/gitea/modules/obs"
  16. "code.gitea.io/gitea/modules/storage"
  17. "github.com/unknwon/com"
  18. "code.gitea.io/gitea/models"
  19. "code.gitea.io/gitea/modules/auth"
  20. "code.gitea.io/gitea/modules/base"
  21. "code.gitea.io/gitea/modules/context"
  22. "code.gitea.io/gitea/modules/log"
  23. "code.gitea.io/gitea/modules/setting"
  24. )
  25. const (
  26. // tplModelArtsNotebookIndex base.TplName = "repo/modelarts/notebook/index"
  27. tplModelArtsNotebookIndex base.TplName = "repo/modelarts/notebook/index"
  28. tplModelArtsNotebookNew base.TplName = "repo/modelarts/notebook/new"
  29. tplModelArtsNotebookShow base.TplName = "repo/modelarts/notebook/show"
  30. tplModelArtsIndex base.TplName = "repo/modelarts/index"
  31. tplModelArtsNew base.TplName = "repo/modelarts/new"
  32. tplModelArtsShow base.TplName = "repo/modelarts/show"
  33. tplModelArtsTrainJobIndex base.TplName = "repo/modelarts/trainjob/index"
  34. tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new"
  35. tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show"
  36. )
  37. // MustEnableDataset check if repository enable internal cb
  38. func MustEnableModelArts(ctx *context.Context) {
  39. if !ctx.Repo.CanRead(models.UnitTypeCloudBrain) {
  40. ctx.NotFound("MustEnableCloudbrain", nil)
  41. return
  42. }
  43. }
  44. func ModelArtsIndex(ctx *context.Context) {
  45. MustEnableModelArts(ctx)
  46. repo := ctx.Repo.Repository
  47. page := ctx.QueryInt("page")
  48. if page <= 0 {
  49. page = 1
  50. }
  51. ciTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
  52. ListOptions: models.ListOptions{
  53. Page: page,
  54. PageSize: setting.UI.IssuePagingNum,
  55. },
  56. RepoID: repo.ID,
  57. Type: models.TypeCloudBrainTwo,
  58. })
  59. if err != nil {
  60. ctx.ServerError("Cloudbrain", err)
  61. return
  62. }
  63. for i, task := range ciTasks {
  64. if task.Status == string(models.JobRunning) {
  65. ciTasks[i].CanDebug = true
  66. } else {
  67. ciTasks[i].CanDebug = false
  68. }
  69. ciTasks[i].CanDel = models.CanDelJob(ctx.IsSigned, ctx.User, task)
  70. }
  71. pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
  72. pager.SetDefaultParams(ctx)
  73. ctx.Data["Page"] = pager
  74. ctx.Data["PageIsCloudBrain"] = true
  75. ctx.Data["Tasks"] = ciTasks
  76. ctx.HTML(200, tplModelArtsIndex)
  77. }
  78. func ModelArtsNew(ctx *context.Context) {
  79. ctx.Data["PageIsCloudBrain"] = true
  80. t := time.Now()
  81. var jobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  82. ctx.Data["job_name"] = jobName
  83. attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
  84. if err != nil {
  85. ctx.ServerError("GetAllUserAttachments failed:", err)
  86. return
  87. }
  88. ctx.Data["attachments"] = attachs
  89. ctx.Data["dataset_path"] = modelarts.DataSetMountPath
  90. ctx.Data["env"] = modelarts.NotebookEnv
  91. ctx.Data["notebook_type"] = modelarts.NotebookType
  92. if modelarts.FlavorInfos == nil {
  93. json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
  94. }
  95. ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo
  96. ctx.HTML(200, tplModelArtsNew)
  97. }
  98. func ModelArtsCreate(ctx *context.Context, form auth.CreateModelArtsForm) {
  99. ctx.Data["PageIsCloudBrain"] = true
  100. jobName := form.JobName
  101. uuid := form.Attachment
  102. description := form.Description
  103. //repo := ctx.Repo.Repository
  104. err := modelarts.GenerateTask(ctx, jobName, uuid, description)
  105. if err != nil {
  106. ctx.RenderWithErr(err.Error(), tplModelArtsNew, &form)
  107. return
  108. }
  109. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
  110. }
  111. func ModelArtsShow(ctx *context.Context) {
  112. ctx.Data["PageIsCloudBrain"] = true
  113. var jobID = ctx.Params(":jobid")
  114. task, err := models.GetCloudbrainByJobID(jobID)
  115. if err != nil {
  116. ctx.Data["error"] = err.Error()
  117. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  118. return
  119. }
  120. result, err := modelarts.GetJob(jobID)
  121. if err != nil {
  122. ctx.Data["error"] = err.Error()
  123. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  124. return
  125. }
  126. if result != nil {
  127. task.Status = result.Status
  128. err = models.UpdateJob(task)
  129. if err != nil {
  130. ctx.Data["error"] = err.Error()
  131. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  132. return
  133. }
  134. createTime, _ := com.StrTo(result.CreationTimestamp).Int64()
  135. result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05")
  136. endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64()
  137. result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05")
  138. result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  139. result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  140. }
  141. ctx.Data["task"] = task
  142. ctx.Data["jobID"] = jobID
  143. ctx.Data["result"] = result
  144. ctx.HTML(200, tplModelArtsShow)
  145. }
  146. func ModelArtsDebug(ctx *context.Context) {
  147. var jobID = ctx.Params(":jobid")
  148. _, err := models.GetCloudbrainByJobID(jobID)
  149. if err != nil {
  150. ctx.ServerError("GetCloudbrainByJobID failed", err)
  151. return
  152. }
  153. result, err := modelarts.GetJob(jobID)
  154. if err != nil {
  155. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  156. return
  157. }
  158. res, err := modelarts.GetJobToken(jobID)
  159. if err != nil {
  160. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  161. return
  162. }
  163. urls := strings.Split(result.Spec.Annotations.Url, "/")
  164. urlPrefix := result.Spec.Annotations.TargetDomain
  165. for i, url := range urls {
  166. if i > 2 {
  167. urlPrefix += "/" + url
  168. }
  169. }
  170. //urlPrefix := result.Spec.Annotations.TargetDomain + "/modelarts/internal/hub/notebook/user/" + task.JobID
  171. log.Info(urlPrefix)
  172. debugUrl := urlPrefix + "?token=" + res.Token
  173. ctx.Redirect(debugUrl)
  174. }
  175. func ModelArtsStop(ctx *context.Context) {
  176. var jobID = ctx.Params(":jobid")
  177. log.Info(jobID)
  178. task, err := models.GetCloudbrainByJobID(jobID)
  179. if err != nil {
  180. ctx.ServerError("GetCloudbrainByJobID failed", err)
  181. return
  182. }
  183. if task.Status != string(models.JobRunning) {
  184. log.Error("the job(%s) is not running", task.JobName)
  185. ctx.ServerError("the job is not running", errors.New("the job is not running"))
  186. return
  187. }
  188. param := models.NotebookAction{
  189. Action: models.ActionStop,
  190. }
  191. res, err := modelarts.StopJob(jobID, param)
  192. if err != nil {
  193. log.Error("StopJob(%s) failed:%v", task.JobName, err.Error())
  194. ctx.ServerError("StopJob failed", err)
  195. return
  196. }
  197. task.Status = res.CurrentStatus
  198. err = models.UpdateJob(task)
  199. if err != nil {
  200. ctx.ServerError("UpdateJob failed", err)
  201. return
  202. }
  203. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
  204. }
  205. func ModelArtsDel(ctx *context.Context) {
  206. var jobID = ctx.Params(":jobid")
  207. task, err := models.GetCloudbrainByJobID(jobID)
  208. if err != nil {
  209. ctx.ServerError("GetCloudbrainByJobID failed", err)
  210. return
  211. }
  212. if task.Status != string(models.JobStopped) {
  213. log.Error("the job(%s) has not been stopped", task.JobName)
  214. ctx.ServerError("the job has not been stopped", errors.New("the job has not been stopped"))
  215. return
  216. }
  217. _, err = modelarts.DelJob(jobID)
  218. if err != nil {
  219. log.Error("DelJob(%s) failed:%v", task.JobName, err.Error())
  220. ctx.ServerError("DelJob failed", err)
  221. return
  222. }
  223. err = models.DeleteJob(task)
  224. if err != nil {
  225. ctx.ServerError("DeleteJob failed", err)
  226. return
  227. }
  228. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
  229. }
  230. func NotebookIndex(ctx *context.Context) {
  231. MustEnableModelArts(ctx)
  232. repo := ctx.Repo.Repository
  233. page := ctx.QueryInt("page")
  234. if page <= 0 {
  235. page = 1
  236. }
  237. ciTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
  238. ListOptions: models.ListOptions{
  239. Page: page,
  240. PageSize: setting.UI.IssuePagingNum,
  241. },
  242. RepoID: repo.ID,
  243. Type: models.TypeCloudBrainNotebook,
  244. })
  245. if err != nil {
  246. ctx.ServerError("Cloudbrain", err)
  247. return
  248. }
  249. for i, task := range ciTasks {
  250. if task.Status == string(models.JobRunning) {
  251. ciTasks[i].CanDebug = true
  252. } else {
  253. ciTasks[i].CanDebug = false
  254. }
  255. }
  256. pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
  257. pager.SetDefaultParams(ctx)
  258. ctx.Data["Page"] = pager
  259. ctx.Data["PageIsNotebook"] = true
  260. ctx.Data["Tasks"] = ciTasks
  261. ctx.HTML(200, tplModelArtsNotebookIndex)
  262. }
  263. func NotebookNew(ctx *context.Context) {
  264. ctx.Data["PageIsNotebook"] = true
  265. t := time.Now()
  266. var jobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  267. ctx.Data["job_name"] = jobName
  268. attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
  269. if err != nil {
  270. ctx.ServerError("GetAllUserAttachments failed:", err)
  271. return
  272. }
  273. ctx.Data["attachments"] = attachs
  274. ctx.Data["dataset_path"] = modelarts.DataSetMountPath
  275. ctx.Data["env"] = modelarts.NotebookEnv
  276. ctx.Data["notebook_type"] = modelarts.NotebookType
  277. if modelarts.FlavorInfos == nil {
  278. json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
  279. }
  280. ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo
  281. ctx.HTML(200, tplModelArtsNew)
  282. }
  283. func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) {
  284. ctx.Data["PageIsNotebook"] = true
  285. jobName := form.JobName
  286. uuid := form.Attachment
  287. description := form.Description
  288. err := modelarts.GenerateTask(ctx, jobName, uuid, description)
  289. if err != nil {
  290. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form)
  291. return
  292. }
  293. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
  294. }
  295. func NotebookShow(ctx *context.Context) {
  296. ctx.Data["PageIsNotebook"] = true
  297. var jobID = ctx.Params(":jobid")
  298. task, err := models.GetCloudbrainByJobID(jobID)
  299. if err != nil {
  300. ctx.Data["error"] = err.Error()
  301. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  302. return
  303. }
  304. result, err := modelarts.GetJob(jobID)
  305. if err != nil {
  306. ctx.Data["error"] = err.Error()
  307. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  308. return
  309. }
  310. if result != nil {
  311. task.Status = result.Status
  312. err = models.UpdateJob(task)
  313. if err != nil {
  314. ctx.Data["error"] = err.Error()
  315. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  316. return
  317. }
  318. createTime, _ := com.StrTo(result.CreationTimestamp).Int64()
  319. result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05")
  320. endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64()
  321. result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05")
  322. result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  323. result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  324. }
  325. ctx.Data["task"] = task
  326. ctx.Data["jobID"] = jobID
  327. ctx.Data["result"] = result
  328. ctx.HTML(200, tplModelArtsNotebookShow)
  329. }
  330. func NotebookDebug(ctx *context.Context) {
  331. var jobID = ctx.Params(":jobid")
  332. _, err := models.GetCloudbrainByJobID(jobID)
  333. if err != nil {
  334. ctx.ServerError("GetCloudbrainByJobID failed", err)
  335. return
  336. }
  337. result, err := modelarts.GetJob(jobID)
  338. if err != nil {
  339. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  340. return
  341. }
  342. res, err := modelarts.GetJobToken(jobID)
  343. if err != nil {
  344. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  345. return
  346. }
  347. urls := strings.Split(result.Spec.Annotations.Url, "/")
  348. urlPrefix := result.Spec.Annotations.TargetDomain
  349. for i, url := range urls {
  350. if i > 2 {
  351. urlPrefix += "/" + url
  352. }
  353. }
  354. debugUrl := urlPrefix + "?token=" + res.Token
  355. ctx.Redirect(debugUrl)
  356. }
  357. func NotebookStop(ctx *context.Context) {
  358. var jobID = ctx.Params(":jobid")
  359. log.Info(jobID)
  360. task, err := models.GetCloudbrainByJobID(jobID)
  361. if err != nil {
  362. ctx.ServerError("GetCloudbrainByJobID failed", err)
  363. return
  364. }
  365. if task.Status != string(models.JobRunning) {
  366. log.Error("the job(%s) is not running", task.JobName)
  367. ctx.ServerError("the job is not running", errors.New("the job is not running"))
  368. return
  369. }
  370. param := models.NotebookAction{
  371. Action: models.ActionStop,
  372. }
  373. res, err := modelarts.StopJob(jobID, param)
  374. if err != nil {
  375. log.Error("StopJob(%s) failed:%v", task.JobName, err.Error())
  376. ctx.ServerError("StopJob failed", err)
  377. return
  378. }
  379. task.Status = res.CurrentStatus
  380. err = models.UpdateJob(task)
  381. if err != nil {
  382. ctx.ServerError("UpdateJob failed", err)
  383. return
  384. }
  385. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
  386. }
  387. func NotebookDel(ctx *context.Context) {
  388. var jobID = ctx.Params(":jobid")
  389. task, err := models.GetCloudbrainByJobID(jobID)
  390. if err != nil {
  391. ctx.ServerError("GetCloudbrainByJobID failed", err)
  392. return
  393. }
  394. if task.Status != string(models.JobStopped) {
  395. log.Error("the job(%s) has not been stopped", task.JobName)
  396. ctx.ServerError("the job has not been stopped", errors.New("the job has not been stopped"))
  397. return
  398. }
  399. _, err = modelarts.DelNotebook(jobID)
  400. if err != nil {
  401. log.Error("DelJob(%s) failed:%v", task.JobName, err.Error())
  402. ctx.ServerError("DelJob failed", err)
  403. return
  404. }
  405. err = models.DeleteJob(task)
  406. if err != nil {
  407. ctx.ServerError("DeleteJob failed", err)
  408. return
  409. }
  410. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
  411. }
  412. func TrainJobIndex(ctx *context.Context) {
  413. MustEnableModelArts(ctx)
  414. can, err := canUserCreateTrainJob(ctx.User.ID)
  415. if err != nil {
  416. ctx.ServerError("canUserCreateTrainJob", err)
  417. return
  418. }
  419. ctx.Data["CanCreate"] = can
  420. repo := ctx.Repo.Repository
  421. page := ctx.QueryInt("page")
  422. if page <= 0 {
  423. page = 1
  424. }
  425. tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
  426. ListOptions: models.ListOptions{
  427. Page: page,
  428. PageSize: setting.UI.IssuePagingNum,
  429. },
  430. RepoID: repo.ID,
  431. Type: models.TypeCloudBrainTrainJob,
  432. })
  433. if err != nil {
  434. ctx.ServerError("Cloudbrain", err)
  435. return
  436. }
  437. pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
  438. pager.SetDefaultParams(ctx)
  439. ctx.Data["Page"] = pager
  440. ctx.Data["PageIsTrainJob"] = true
  441. ctx.Data["Tasks"] = tasks
  442. ctx.HTML(200, tplModelArtsTrainJobIndex)
  443. }
  444. func TrainJobNew(ctx *context.Context) {
  445. ctx.Data["PageIsTrainJob"] = true
  446. can, err := canUserCreateTrainJob(ctx.User.ID)
  447. if err != nil {
  448. ctx.ServerError("canUserCreateTrainJob", err)
  449. return
  450. }
  451. if !can {
  452. log.Error("the user can not create train-job")
  453. ctx.ServerError("the user can not create train-job", fmt.Errorf("the user can not create train-job"))
  454. return
  455. }
  456. t := time.Now()
  457. var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  458. ctx.Data["job_name"] = jobName
  459. attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
  460. if err != nil {
  461. ctx.ServerError("GetAllUserAttachments failed:", err)
  462. return
  463. }
  464. ctx.Data["attachments"] = attachs
  465. var resourcePools modelarts.ResourcePool
  466. if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil {
  467. ctx.ServerError("json.Unmarshal failed:", err)
  468. return
  469. }
  470. ctx.Data["resource_pools"] = resourcePools.Info
  471. var engines modelarts.Engine
  472. if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil {
  473. ctx.ServerError("json.Unmarshal failed:", err)
  474. return
  475. }
  476. ctx.Data["engines"] = engines.Info
  477. var versionInfos modelarts.VersionInfo
  478. if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil {
  479. ctx.ServerError("json.Unmarshal failed:", err)
  480. return
  481. }
  482. ctx.Data["engine_versions"] = versionInfos.Version
  483. var flavorInfos modelarts.Flavor
  484. if err = json.Unmarshal([]byte(setting.FlavorInfos), &flavorInfos); err != nil {
  485. ctx.ServerError("json.Unmarshal failed:", err)
  486. return
  487. }
  488. ctx.Data["flavor_infos"] = flavorInfos.Info
  489. outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
  490. ctx.Data["train_url"] = outputObsPath
  491. configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom)
  492. if err != nil {
  493. ctx.ServerError("getConfigList failed:", err)
  494. return
  495. }
  496. ctx.Data["config_list"] = configList.ParaConfigs
  497. ctx.HTML(200, tplModelArtsTrainJobNew)
  498. }
  499. func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) {
  500. ctx.Data["PageIsTrainJob"] = true
  501. jobName := form.JobName
  502. uuid := form.Attachment
  503. description := form.Description
  504. workServerNumber := form.WorkServerNumber
  505. engineID := form.EngineID
  506. bootFile := form.BootFile
  507. flavorCode := form.Flavor
  508. params := form.Params
  509. poolID := form.PoolID
  510. isSaveParam := form.IsSaveParam
  511. repo := ctx.Repo.Repository
  512. codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
  513. codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath
  514. outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
  515. logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath
  516. dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
  517. can, err := canUserCreateTrainJob(ctx.User.ID)
  518. if err != nil {
  519. ctx.ServerError("canUserCreateTrainJob", err)
  520. return
  521. }
  522. if !can {
  523. log.Error("the user can not create train-job")
  524. ctx.RenderWithErr("the user can not create train-job", tplModelArtsTrainJobNew, &form)
  525. return
  526. }
  527. //param check
  528. if err := paramCheckCreateTrainJob(form); err != nil {
  529. log.Error("paramCheckCreateTrainJob failed:(%v)", err)
  530. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form)
  531. return
  532. }
  533. if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil {
  534. log.Error("Failed to clone repository: %s (%v)", repo.FullName(), err)
  535. ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form)
  536. return
  537. }
  538. //todo: upload code (send to file_server todo this work?)
  539. if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
  540. log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
  541. ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form)
  542. return
  543. }
  544. if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil {
  545. log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err)
  546. ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form)
  547. return
  548. }
  549. if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
  550. log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
  551. ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobNew, &form)
  552. return
  553. }
  554. //todo: del local code?
  555. var parameters models.Parameters
  556. param := make([]models.Parameter, 0)
  557. param = append(param, models.Parameter{
  558. Label: modelarts.TrainUrl,
  559. Value: outputObsPath,
  560. }, models.Parameter{
  561. Label: modelarts.DataUrl,
  562. Value: dataPath,
  563. })
  564. if len(params) != 0 {
  565. err := json.Unmarshal([]byte(params), &parameters)
  566. if err != nil {
  567. log.Error("Failed to Unmarshal params: %s (%v)", params, err)
  568. ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobNew, &form)
  569. return
  570. }
  571. for _, parameter := range parameters.Parameter {
  572. if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl {
  573. param = append(param, models.Parameter{
  574. Label: parameter.Label,
  575. Value: parameter.Value,
  576. })
  577. }
  578. }
  579. }
  580. //save param config
  581. if isSaveParam == "on" {
  582. if form.ParameterTemplateName == "" {
  583. log.Error("ParameterTemplateName is empty")
  584. ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobNew, &form)
  585. return
  586. }
  587. _, err := modelarts.CreateTrainJobConfig(models.CreateConfigParams{
  588. ConfigName: form.ParameterTemplateName,
  589. Description: form.PrameterDescription,
  590. DataUrl: dataPath,
  591. AppUrl: codeObsPath,
  592. BootFileUrl: codeObsPath + bootFile,
  593. TrainUrl: outputObsPath,
  594. Flavor: models.Flavor{
  595. Code: flavorCode,
  596. },
  597. WorkServerNum: workServerNumber,
  598. EngineID: int64(engineID),
  599. LogUrl: logObsPath,
  600. PoolID: poolID,
  601. Parameter: param,
  602. })
  603. if err != nil {
  604. log.Error("Failed to CreateTrainJobConfig: %v", err)
  605. ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobNew, &form)
  606. return
  607. }
  608. }
  609. req := &modelarts.GenerateTrainJobReq{
  610. JobName: jobName,
  611. DataUrl: dataPath,
  612. Description: description,
  613. CodeObsPath: codeObsPath,
  614. BootFile: codeObsPath + bootFile,
  615. TrainUrl: outputObsPath,
  616. FlavorCode: flavorCode,
  617. WorkServerNumber: workServerNumber,
  618. EngineID: int64(engineID),
  619. LogUrl: logObsPath,
  620. PoolID: poolID,
  621. Uuid: uuid,
  622. Parameters: param,
  623. }
  624. err = modelarts.GenerateTrainJob(ctx, req)
  625. if err != nil {
  626. log.Error("GenerateTrainJob failed:%v", err.Error())
  627. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form)
  628. return
  629. }
  630. // ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")\
  631. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
  632. }
  633. // readDir reads the directory named by dirname and returns
  634. // a list of directory entries sorted by filename.
  635. func readDir(dirname string) ([]os.FileInfo, error) {
  636. f, err := os.Open(dirname)
  637. if err != nil {
  638. return nil, err
  639. }
  640. list, err := f.Readdir(100)
  641. f.Close()
  642. if err != nil {
  643. //todo: can not upload empty folder
  644. if err == io.EOF {
  645. return nil, nil
  646. }
  647. return nil, err
  648. }
  649. //sort.Slice(list, func(i, j int) bool { return list[i].Name() < list[j].Name() })
  650. return list, nil
  651. }
  652. func uploadCodeToObs(codePath, jobName, parentDir string) error {
  653. files, err := readDir(codePath)
  654. if err != nil {
  655. log.Error("readDir(%s) failed: %s", codePath, err.Error())
  656. return err
  657. }
  658. for _, file := range files {
  659. if file.IsDir() {
  660. input := &obs.PutObjectInput{}
  661. input.Bucket = setting.Bucket
  662. input.Key = parentDir + file.Name() + "/"
  663. _, err = storage.ObsCli.PutObject(input)
  664. if err != nil {
  665. log.Error("PutObject(%s) failed: %s", input.Key, err.Error())
  666. return err
  667. }
  668. if err = uploadCodeToObs(codePath+file.Name()+"/", jobName, parentDir+file.Name()+"/"); err != nil {
  669. log.Error("uploadCodeToObs(%s) failed: %s", file.Name(), err.Error())
  670. return err
  671. }
  672. } else {
  673. input := &obs.PutFileInput{}
  674. input.Bucket = setting.Bucket
  675. input.Key = setting.CodePathPrefix + jobName + "/code/" + parentDir + file.Name()
  676. input.SourceFile = codePath + file.Name()
  677. _, err = storage.ObsCli.PutFile(input)
  678. if err != nil {
  679. log.Error("PutFile(%s) failed: %s", input.SourceFile, err.Error())
  680. return err
  681. }
  682. }
  683. }
  684. return nil
  685. }
  686. func obsMkdir(dir string) error {
  687. input := &obs.PutObjectInput{}
  688. input.Bucket = setting.Bucket
  689. input.Key = dir
  690. _, err := storage.ObsCli.PutObject(input)
  691. if err != nil {
  692. log.Error("PutObject(%s) failed: %s", input.Key, err.Error())
  693. return err
  694. }
  695. return nil
  696. }
  697. func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error {
  698. if !strings.HasSuffix(form.BootFile, ".py") {
  699. log.Error("the boot file(%s) must be a python file", form.BootFile)
  700. return errors.New("启动文件必须是python文件")
  701. }
  702. if form.WorkServerNumber > 25 || form.WorkServerNumber < 1 {
  703. log.Error("the WorkServerNumber(%d) must be in (1,25)", form.WorkServerNumber)
  704. return errors.New("计算节点数必须在1-25之间")
  705. }
  706. return nil
  707. }
  708. func TrainJobShow(ctx *context.Context) {
  709. ctx.Data["PageIsTrainJob"] = true
  710. var jobID = ctx.Params(":jobid")
  711. task, err := models.GetCloudbrainByJobID(jobID)
  712. if err != nil {
  713. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  714. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  715. return
  716. }
  717. attach, err := models.GetAttachmentByUUID(task.Uuid)
  718. if err != nil {
  719. log.Error("GetAttachmentByUUID(%s) failed:%v", jobID, err.Error())
  720. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  721. return
  722. }
  723. result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
  724. if err != nil {
  725. log.Error("GetJob(%s) failed:%v", jobID, err.Error())
  726. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  727. return
  728. }
  729. if result != nil {
  730. result.CreateTime = time.Unix(int64(result.LongCreateTime/1000), 0).Format("2006-01-02 15:04:05")
  731. result.Status = modelarts.TransTrainJobStatus(result.IntStatus)
  732. result.DatasetName = attach.Name
  733. }
  734. resultLogFile, resultLog, err := trainJobGetLog(jobID)
  735. if err != nil {
  736. log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error())
  737. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  738. return
  739. }
  740. ctx.Data["log_file_name"] = resultLogFile.LogFileList[0]
  741. ctx.Data["log"] = resultLog
  742. ctx.Data["task"] = task
  743. ctx.Data["jobID"] = jobID
  744. ctx.Data["result"] = result
  745. ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  746. }
  747. func TrainJobGetLog(ctx *context.Context) {
  748. ctx.Data["PageIsTrainJob"] = true
  749. var jobID = ctx.Params(":jobid")
  750. var logFileName = ctx.Query("file_name")
  751. var baseLine = ctx.Query("base_line")
  752. var order = ctx.Query("order")
  753. if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
  754. log.Error("order(%s) check failed", order)
  755. ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow)
  756. return
  757. }
  758. task, err := models.GetCloudbrainByJobID(jobID)
  759. if err != nil {
  760. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  761. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  762. return
  763. }
  764. result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines)
  765. if err != nil {
  766. log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
  767. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  768. return
  769. }
  770. ctx.Data["log"] = result
  771. //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  772. }
  773. func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) {
  774. task, err := models.GetCloudbrainByJobID(jobID)
  775. if err != nil {
  776. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  777. return nil, nil, err
  778. }
  779. resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10))
  780. if err != nil {
  781. log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error())
  782. return nil, nil, err
  783. }
  784. result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), "", resultLogFile.LogFileList[0], modelarts.OrderDesc, modelarts.Lines)
  785. if err != nil {
  786. log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
  787. return nil, nil, err
  788. }
  789. return resultLogFile, result, err
  790. }
  791. func TrainJobDel(ctx *context.Context) {
  792. var jobID = ctx.Params(":jobid")
  793. task, err := models.GetCloudbrainByJobID(jobID)
  794. if err != nil {
  795. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  796. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  797. return
  798. }
  799. _, err = modelarts.DelTrainJob(jobID)
  800. if err != nil {
  801. log.Error("DelTrainJob(%s) failed:%v", task.JobName, err.Error())
  802. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  803. return
  804. }
  805. err = models.DeleteJob(task)
  806. if err != nil {
  807. ctx.ServerError("DeleteJob failed", err)
  808. return
  809. }
  810. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  811. }
  812. func TrainJobStop(ctx *context.Context) {
  813. var jobID = ctx.Params(":jobid")
  814. task, err := models.GetCloudbrainByJobID(jobID)
  815. if err != nil {
  816. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  817. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  818. return
  819. }
  820. _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
  821. if err != nil {
  822. log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error())
  823. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  824. return
  825. }
  826. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  827. }
  828. func canUserCreateTrainJob(uid int64) (bool, error) {
  829. org, err := models.GetOrgByName(setting.AllowedOrg)
  830. if err != nil {
  831. log.Error("get allowed org failed: ", setting.AllowedOrg)
  832. return false, err
  833. }
  834. return org.IsOrgMember(uid)
  835. }
  836. func TrainJobGetConfigList(ctx *context.Context) {
  837. ctx.Data["PageIsTrainJob"] = true
  838. var jobID = ctx.Params(":jobid")
  839. var logFileName = ctx.Query("file_name")
  840. var baseLine = ctx.Query("base_line")
  841. var order = ctx.Query("order")
  842. if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
  843. log.Error("order(%s) check failed", order)
  844. ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow)
  845. return
  846. }
  847. task, err := models.GetCloudbrainByJobID(jobID)
  848. if err != nil {
  849. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  850. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  851. return
  852. }
  853. result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines)
  854. if err != nil {
  855. log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
  856. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  857. return
  858. }
  859. ctx.Data["log"] = result
  860. //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  861. }
  862. func getConfigList(perPage, page int, sortBy, order, searchContent, configType string) (*models.GetConfigListResult, error) {
  863. var result models.GetConfigListResult
  864. list, err := modelarts.GetConfigList(perPage, page, sortBy, order, searchContent, configType)
  865. if err != nil {
  866. log.Error("GetConfigList failed:", err)
  867. return &result, err
  868. }
  869. for _, config := range list.ParaConfigs {
  870. paraConfig, err := modelarts.GetParaConfig(config.ConfigName, configType)
  871. if err != nil {
  872. log.Error("GetParaConfig failed:", err)
  873. return &result, err
  874. }
  875. config.Result = paraConfig
  876. }
  877. return list, nil
  878. }