You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

modelarts.go 19 kB

4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
4 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
4 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
4 years ago
4 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
3 years ago
4 years ago
3 years ago
4 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658
  1. package modelarts
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "path"
  7. "strconv"
  8. "code.gitea.io/gitea/modules/timeutil"
  9. "code.gitea.io/gitea/models"
  10. "code.gitea.io/gitea/modules/context"
  11. "code.gitea.io/gitea/modules/log"
  12. "code.gitea.io/gitea/modules/notification"
  13. "code.gitea.io/gitea/modules/setting"
  14. "code.gitea.io/gitea/modules/storage"
  15. )
  16. const (
  17. //notebook
  18. storageTypeOBS = "obs"
  19. autoStopDuration = 4 * 60 * 60
  20. autoStopDurationMs = 4 * 60 * 60 * 1000
  21. DataSetMountPath = "/home/ma-user/work"
  22. NotebookEnv = "Python3"
  23. NotebookType = "Ascend"
  24. FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)"
  25. //train-job
  26. // ResourcePools = "{\"resource_pool\":[{\"id\":\"pool1328035d\", \"value\":\"专属资源池\"}]}"
  27. // Engines = "{\"engine\":[{\"id\":1, \"value\":\"Ascend-Powered-Engine\"}]}"
  28. // EngineVersions = "{\"version\":[{\"id\":118,\"value\":\"MindSpore-1.0.0-c75-python3.7-euleros2.8-aarch64\"}," +
  29. // "{\"id\":119,\"value\":\"MindSpore-1.1.1-c76-python3.7-euleros2.8-aarch64\"}," +
  30. // "{\"id\":120,\"value\":\"MindSpore-1.1.1-c76-tr5-python3.7-euleros2.8-aarch64\"}," +
  31. // "{\"id\":117,\"value\":\"TF-1.15-c75-python3.7-euleros2.8-aarch64\"}" +
  32. // "]}"
  33. // TrainJobFlavorInfo = "{\"flavor\":[{\"code\":\"modelarts.bm.910.arm.public.2\",\"value\":\"Ascend : 2 * Ascend 910 CPU:48 核 512GiB\"}," +
  34. // "{\"code\":\"modelarts.bm.910.arm.public.8\",\"value\":\"Ascend : 8 * Ascend 910 CPU:192 核 2048GiB\"}," +
  35. // "{\"code\":\"modelarts.bm.910.arm.public.4\",\"value\":\"Ascend : 4 * Ascend 910 CPU:96 核 1024GiB\"}," +
  36. // "{\"code\":\"modelarts.bm.910.arm.public.1\",\"value\":\"Ascend : 1 * Ascend 910 CPU:24 核 256GiB\"}" +
  37. // "]}"
  38. CodePath = "/code/"
  39. OutputPath = "/output/"
  40. ResultPath = "/result/"
  41. LogPath = "/log/"
  42. JobPath = "/job/"
  43. OrderDesc = "desc" //向下查询
  44. OrderAsc = "asc" //向上查询
  45. Lines = 500
  46. TrainUrl = "train_url"
  47. DataUrl = "data_url"
  48. MultiDataUrl = "multi_data_url"
  49. ResultUrl = "result_url"
  50. CkptUrl = "ckpt_url"
  51. DeviceTarget = "device_target"
  52. Ascend = "Ascend"
  53. PerPage = 10
  54. IsLatestVersion = "1"
  55. NotLatestVersion = "0"
  56. VersionCount = 1
  57. SortByCreateTime = "create_time"
  58. ConfigTypeCustom = "custom"
  59. TotalVersionCount = 1
  60. )
  61. var (
  62. poolInfos *models.PoolInfos
  63. FlavorInfos *models.FlavorInfos
  64. ImageInfos *models.ImageInfosModelArts
  65. )
  66. type GenerateTrainJobReq struct {
  67. JobName string
  68. DisplayJobName string
  69. Uuid string
  70. Description string
  71. CodeObsPath string
  72. BootFile string
  73. BootFileUrl string
  74. DataUrl string
  75. TrainUrl string
  76. FlavorCode string
  77. LogUrl string
  78. PoolID string
  79. WorkServerNumber int
  80. EngineID int64
  81. Parameters []models.Parameter
  82. CommitID string
  83. IsLatestVersion string
  84. Params string
  85. BranchName string
  86. PreVersionId int64
  87. PreVersionName string
  88. FlavorName string
  89. VersionCount int
  90. EngineName string
  91. TotalVersionCount int
  92. UserImageUrl string
  93. UserCommand string
  94. DatasetName string
  95. }
  96. type GenerateInferenceJobReq struct {
  97. JobName string
  98. DisplayJobName string
  99. Uuid string
  100. Description string
  101. CodeObsPath string
  102. BootFile string
  103. BootFileUrl string
  104. DataUrl string
  105. TrainUrl string
  106. FlavorCode string
  107. LogUrl string
  108. PoolID string
  109. WorkServerNumber int
  110. EngineID int64
  111. Parameters []models.Parameter
  112. CommitID string
  113. Params string
  114. BranchName string
  115. FlavorName string
  116. EngineName string
  117. LabelName string
  118. IsLatestVersion string
  119. VersionCount int
  120. TotalVersionCount int
  121. ModelName string
  122. ModelVersion string
  123. CkptName string
  124. ResultUrl string
  125. }
  126. type VersionInfo struct {
  127. Version []struct {
  128. ID int `json:"id"`
  129. Value string `json:"value"`
  130. } `json:"version"`
  131. }
  132. type Flavor struct {
  133. Info []struct {
  134. Code string `json:"code"`
  135. Value string `json:"value"`
  136. } `json:"flavor"`
  137. }
  138. type Engine struct {
  139. Info []struct {
  140. ID int `json:"id"`
  141. Value string `json:"value"`
  142. } `json:"engine"`
  143. }
  144. type ResourcePool struct {
  145. Info []struct {
  146. ID string `json:"id"`
  147. Value string `json:"value"`
  148. } `json:"resource_pool"`
  149. }
  150. // type Parameter struct {
  151. // Label string `json:"label"`
  152. // Value string `json:"value"`
  153. // }
  154. // type Parameters struct {
  155. // Parameter []Parameter `json:"parameter"`
  156. // }
  157. type Parameters struct {
  158. Parameter []struct {
  159. Label string `json:"label"`
  160. Value string `json:"value"`
  161. } `json:"parameter"`
  162. }
  163. func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor string) error {
  164. var dataActualPath string
  165. if uuid != "" {
  166. dataActualPath = setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
  167. } else {
  168. userPath := setting.UserBasePath + ctx.User.Name + "/"
  169. isExist, err := storage.ObsHasObject(userPath)
  170. if err != nil {
  171. log.Error("ObsHasObject failed:%v", err.Error(), ctx.Data["MsgID"])
  172. return err
  173. }
  174. if !isExist {
  175. if err = storage.ObsCreateObject(userPath); err != nil {
  176. log.Error("ObsCreateObject failed:%v", err.Error(), ctx.Data["MsgID"])
  177. return err
  178. }
  179. }
  180. dataActualPath = setting.Bucket + "/" + userPath
  181. }
  182. if poolInfos == nil {
  183. json.Unmarshal([]byte(setting.PoolInfos), &poolInfos)
  184. }
  185. createTime := timeutil.TimeStampNow()
  186. jobResult, err := CreateJob(models.CreateNotebookParams{
  187. JobName: jobName,
  188. Description: description,
  189. ProfileID: setting.ProfileID,
  190. Flavor: flavor,
  191. Pool: models.Pool{
  192. ID: poolInfos.PoolInfo[0].PoolId,
  193. Name: poolInfos.PoolInfo[0].PoolName,
  194. Type: poolInfos.PoolInfo[0].PoolType,
  195. },
  196. Spec: models.Spec{
  197. Storage: models.Storage{
  198. Type: storageTypeOBS,
  199. Location: models.Location{
  200. Path: dataActualPath,
  201. },
  202. },
  203. AutoStop: models.AutoStop{
  204. Enable: true,
  205. Duration: autoStopDuration,
  206. },
  207. },
  208. })
  209. if err != nil {
  210. log.Error("CreateJob failed: %v", err.Error())
  211. return err
  212. }
  213. err = models.CreateCloudbrain(&models.Cloudbrain{
  214. Status: string(models.JobWaiting),
  215. UserID: ctx.User.ID,
  216. RepoID: ctx.Repo.Repository.ID,
  217. JobID: jobResult.ID,
  218. JobName: jobName,
  219. JobType: string(models.JobTypeDebug),
  220. Type: models.TypeCloudBrainTwo,
  221. Uuid: uuid,
  222. ComputeResource: models.NPUResource,
  223. CreatedUnix: createTime,
  224. UpdatedUnix: createTime,
  225. })
  226. if err != nil {
  227. return err
  228. }
  229. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobResult.ID, jobName, models.ActionCreateDebugNPUTask)
  230. return nil
  231. }
  232. func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, description, flavor, imageId string) error {
  233. if poolInfos == nil {
  234. json.Unmarshal([]byte(setting.PoolInfos), &poolInfos)
  235. }
  236. imageName, err := GetNotebookImageName(imageId)
  237. if err != nil {
  238. log.Error("GetNotebookImageName failed: %v", err.Error())
  239. return err
  240. }
  241. createTime := timeutil.TimeStampNow()
  242. jobResult, err := createNotebook2(models.CreateNotebook2Params{
  243. JobName: jobName,
  244. Description: description,
  245. Flavor: flavor,
  246. Duration: autoStopDurationMs,
  247. ImageID: imageId,
  248. PoolID: poolInfos.PoolInfo[0].PoolId,
  249. Feature: models.NotebookFeature,
  250. Volume: models.VolumeReq{
  251. Capacity: setting.Capacity,
  252. Category: models.EVSCategory,
  253. Ownership: models.ManagedOwnership,
  254. },
  255. WorkspaceID: "0",
  256. })
  257. if err != nil {
  258. log.Error("createNotebook2 failed: %v", err.Error())
  259. return err
  260. }
  261. err = models.CreateCloudbrain(&models.Cloudbrain{
  262. Status: jobResult.Status,
  263. UserID: ctx.User.ID,
  264. RepoID: ctx.Repo.Repository.ID,
  265. JobID: jobResult.ID,
  266. JobName: jobName,
  267. FlavorCode: flavor,
  268. DisplayJobName: displayJobName,
  269. JobType: string(models.JobTypeDebug),
  270. Type: models.TypeCloudBrainTwo,
  271. Uuid: uuid,
  272. ComputeResource: models.NPUResource,
  273. Image: imageName,
  274. Description: description,
  275. CreatedUnix: createTime,
  276. UpdatedUnix: createTime,
  277. })
  278. if err != nil {
  279. return err
  280. }
  281. task, err := models.GetCloudbrainByName(jobName)
  282. if err != nil {
  283. log.Error("GetCloudbrainByName failed: %v", err.Error())
  284. return err
  285. }
  286. stringId := strconv.FormatInt(task.ID, 10)
  287. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugNPUTask)
  288. return nil
  289. }
  290. func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) {
  291. createTime := timeutil.TimeStampNow()
  292. jobResult, err := createTrainJob(models.CreateTrainJobParams{
  293. JobName: req.JobName,
  294. Description: req.Description,
  295. Config: models.Config{
  296. WorkServerNum: req.WorkServerNumber,
  297. AppUrl: req.CodeObsPath,
  298. BootFileUrl: req.BootFileUrl,
  299. DataUrl: req.DataUrl,
  300. EngineID: req.EngineID,
  301. TrainUrl: req.TrainUrl,
  302. LogUrl: req.LogUrl,
  303. PoolID: req.PoolID,
  304. CreateVersion: true,
  305. Flavor: models.Flavor{
  306. Code: req.FlavorCode,
  307. },
  308. Parameter: req.Parameters,
  309. },
  310. })
  311. if err != nil {
  312. log.Error("CreateJob failed: %v", err.Error())
  313. return err
  314. }
  315. jobId := strconv.FormatInt(jobResult.JobID, 10)
  316. err = models.CreateCloudbrain(&models.Cloudbrain{
  317. Status: TransTrainJobStatus(jobResult.Status),
  318. UserID: ctx.User.ID,
  319. RepoID: ctx.Repo.Repository.ID,
  320. JobID: jobId,
  321. JobName: req.JobName,
  322. DisplayJobName: req.DisplayJobName,
  323. JobType: string(models.JobTypeTrain),
  324. Type: models.TypeCloudBrainTwo,
  325. VersionID: jobResult.VersionID,
  326. VersionName: jobResult.VersionName,
  327. Uuid: req.Uuid,
  328. DatasetName: req.DatasetName,
  329. CommitID: req.CommitID,
  330. IsLatestVersion: req.IsLatestVersion,
  331. ComputeResource: models.NPUResource,
  332. EngineID: req.EngineID,
  333. TrainUrl: req.TrainUrl,
  334. BranchName: req.BranchName,
  335. Parameters: req.Params,
  336. BootFile: req.BootFile,
  337. DataUrl: req.DataUrl,
  338. LogUrl: req.LogUrl,
  339. FlavorCode: req.FlavorCode,
  340. Description: req.Description,
  341. WorkServerNumber: req.WorkServerNumber,
  342. FlavorName: req.FlavorName,
  343. EngineName: req.EngineName,
  344. VersionCount: req.VersionCount,
  345. TotalVersionCount: req.TotalVersionCount,
  346. CreatedUnix: createTime,
  347. UpdatedUnix: createTime,
  348. })
  349. if err != nil {
  350. log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error())
  351. return err
  352. }
  353. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobId, req.DisplayJobName, models.ActionCreateTrainTask)
  354. return nil
  355. }
  356. func GenerateModelConvertTrainJob(req *GenerateTrainJobReq) (*models.CreateTrainJobResult, error) {
  357. return createTrainJobUserImage(models.CreateUserImageTrainJobParams{
  358. JobName: req.JobName,
  359. Description: req.Description,
  360. Config: models.UserImageConfig{
  361. WorkServerNum: req.WorkServerNumber,
  362. AppUrl: req.CodeObsPath,
  363. BootFileUrl: req.BootFileUrl,
  364. DataUrl: req.DataUrl,
  365. TrainUrl: req.TrainUrl,
  366. LogUrl: req.LogUrl,
  367. PoolID: req.PoolID,
  368. CreateVersion: true,
  369. Flavor: models.Flavor{
  370. Code: req.FlavorCode,
  371. },
  372. Parameter: req.Parameters,
  373. UserImageUrl: req.UserImageUrl,
  374. UserCommand: req.UserCommand,
  375. },
  376. })
  377. }
  378. func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) {
  379. createTime := timeutil.TimeStampNow()
  380. jobResult, err := createTrainJobVersion(models.CreateTrainJobVersionParams{
  381. Description: req.Description,
  382. Config: models.TrainJobVersionConfig{
  383. WorkServerNum: req.WorkServerNumber,
  384. AppUrl: req.CodeObsPath,
  385. BootFileUrl: req.BootFileUrl,
  386. DataUrl: req.DataUrl,
  387. EngineID: req.EngineID,
  388. TrainUrl: req.TrainUrl,
  389. LogUrl: req.LogUrl,
  390. PoolID: req.PoolID,
  391. Flavor: models.Flavor{
  392. Code: req.FlavorCode,
  393. },
  394. Parameter: req.Parameters,
  395. PreVersionId: req.PreVersionId,
  396. },
  397. }, jobId)
  398. if err != nil {
  399. log.Error("CreateJob failed: %v", err.Error())
  400. return err
  401. }
  402. var jobTypes []string
  403. jobTypes = append(jobTypes, string(models.JobTypeTrain))
  404. repo := ctx.Repo.Repository
  405. VersionTaskList, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
  406. RepoID: repo.ID,
  407. Type: models.TypeCloudBrainTwo,
  408. JobTypes: jobTypes,
  409. JobID: strconv.FormatInt(jobResult.JobID, 10),
  410. })
  411. if err != nil {
  412. ctx.ServerError("Cloudbrain", err)
  413. return err
  414. }
  415. //将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount
  416. err = models.CreateCloudbrain(&models.Cloudbrain{
  417. Status: TransTrainJobStatus(jobResult.Status),
  418. UserID: ctx.User.ID,
  419. RepoID: ctx.Repo.Repository.ID,
  420. JobID: strconv.FormatInt(jobResult.JobID, 10),
  421. JobName: req.JobName,
  422. DisplayJobName: req.DisplayJobName,
  423. JobType: string(models.JobTypeTrain),
  424. Type: models.TypeCloudBrainTwo,
  425. VersionID: jobResult.VersionID,
  426. VersionName: jobResult.VersionName,
  427. Uuid: req.Uuid,
  428. DatasetName: req.DatasetName,
  429. CommitID: req.CommitID,
  430. IsLatestVersion: req.IsLatestVersion,
  431. PreVersionName: req.PreVersionName,
  432. ComputeResource: models.NPUResource,
  433. EngineID: req.EngineID,
  434. TrainUrl: req.TrainUrl,
  435. BranchName: req.BranchName,
  436. Parameters: req.Params,
  437. BootFile: req.BootFile,
  438. DataUrl: req.DataUrl,
  439. LogUrl: req.LogUrl,
  440. PreVersionId: req.PreVersionId,
  441. FlavorCode: req.FlavorCode,
  442. Description: req.Description,
  443. WorkServerNumber: req.WorkServerNumber,
  444. FlavorName: req.FlavorName,
  445. EngineName: req.EngineName,
  446. TotalVersionCount: VersionTaskList[0].TotalVersionCount + 1,
  447. VersionCount: VersionListCount + 1,
  448. CreatedUnix: createTime,
  449. UpdatedUnix: createTime,
  450. })
  451. if err != nil {
  452. log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error())
  453. return err
  454. }
  455. //将训练任务的上一版本的isLatestVersion设置为"0"
  456. err = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCount, NotLatestVersion, TotalVersionCount)
  457. if err != nil {
  458. ctx.ServerError("Update IsLatestVersion failed", err)
  459. return err
  460. }
  461. return err
  462. }
  463. func TransTrainJobStatus(status int) string {
  464. switch status {
  465. case 0:
  466. return "UNKNOWN"
  467. case 1:
  468. return "INIT"
  469. case 2:
  470. return "IMAGE_CREATING"
  471. case 3:
  472. return "IMAGE_FAILED"
  473. case 4:
  474. return "SUBMIT_TRYING"
  475. case 5:
  476. return "SUBMIT_FAILED"
  477. case 6:
  478. return "DELETE_FAILED"
  479. case 7:
  480. return "WAITING"
  481. case 8:
  482. return "RUNNING"
  483. case 9:
  484. return "KILLING"
  485. case 10:
  486. return "COMPLETED"
  487. case 11:
  488. return "FAILED"
  489. case 12:
  490. return "KILLED"
  491. case 13:
  492. return "CANCELED"
  493. case 14:
  494. return "LOST"
  495. case 15:
  496. return "SCALING"
  497. case 16:
  498. return "SUBMIT_MODEL_FAILED"
  499. case 17:
  500. return "DEPLOY_SERVICE_FAILED"
  501. case 18:
  502. return "CHECK_INIT"
  503. case 19:
  504. return "CHECK_RUNNING"
  505. case 20:
  506. return "CHECK_RUNNING_COMPLETED"
  507. case 21:
  508. return "CHECK_FAILED"
  509. default:
  510. return strconv.Itoa(status)
  511. }
  512. }
  513. func GetOutputPathByCount(TotalVersionCount int) (VersionOutputPath string) {
  514. talVersionCountToString := fmt.Sprintf("%04d", TotalVersionCount)
  515. VersionOutputPath = "V" + talVersionCountToString
  516. return VersionOutputPath
  517. }
  518. func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (err error) {
  519. createTime := timeutil.TimeStampNow()
  520. jobResult, err := createInferenceJob(models.CreateInferenceJobParams{
  521. JobName: req.JobName,
  522. Description: req.Description,
  523. InfConfig: models.InfConfig{
  524. WorkServerNum: req.WorkServerNumber,
  525. AppUrl: req.CodeObsPath,
  526. BootFileUrl: req.BootFileUrl,
  527. DataUrl: req.DataUrl,
  528. EngineID: req.EngineID,
  529. // TrainUrl: req.TrainUrl,
  530. LogUrl: req.LogUrl,
  531. PoolID: req.PoolID,
  532. CreateVersion: true,
  533. Flavor: models.Flavor{
  534. Code: req.FlavorCode,
  535. },
  536. Parameter: req.Parameters,
  537. },
  538. })
  539. if err != nil {
  540. log.Error("CreateJob failed: %v", err.Error())
  541. return err
  542. }
  543. attach, err := models.GetAttachmentByUUID(req.Uuid)
  544. if err != nil {
  545. log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error())
  546. return err
  547. }
  548. jobID := strconv.FormatInt(jobResult.JobID, 10)
  549. err = models.CreateCloudbrain(&models.Cloudbrain{
  550. Status: TransTrainJobStatus(jobResult.Status),
  551. UserID: ctx.User.ID,
  552. RepoID: ctx.Repo.Repository.ID,
  553. JobID: jobID,
  554. JobName: req.JobName,
  555. DisplayJobName: req.DisplayJobName,
  556. JobType: string(models.JobTypeInference),
  557. Type: models.TypeCloudBrainTwo,
  558. VersionID: jobResult.VersionID,
  559. VersionName: jobResult.VersionName,
  560. Uuid: req.Uuid,
  561. DatasetName: attach.Name,
  562. CommitID: req.CommitID,
  563. EngineID: req.EngineID,
  564. TrainUrl: req.TrainUrl,
  565. BranchName: req.BranchName,
  566. Parameters: req.Params,
  567. BootFile: req.BootFile,
  568. DataUrl: req.DataUrl,
  569. LogUrl: req.LogUrl,
  570. FlavorCode: req.FlavorCode,
  571. Description: req.Description,
  572. WorkServerNumber: req.WorkServerNumber,
  573. FlavorName: req.FlavorName,
  574. EngineName: req.EngineName,
  575. LabelName: req.LabelName,
  576. IsLatestVersion: req.IsLatestVersion,
  577. ComputeResource: models.NPUResource,
  578. VersionCount: req.VersionCount,
  579. TotalVersionCount: req.TotalVersionCount,
  580. ModelName: req.ModelName,
  581. ModelVersion: req.ModelVersion,
  582. CkptName: req.CkptName,
  583. ResultUrl: req.ResultUrl,
  584. CreatedUnix: createTime,
  585. UpdatedUnix: createTime,
  586. })
  587. if err != nil {
  588. log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error())
  589. return err
  590. }
  591. notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.DisplayJobName, models.ActionCreateInferenceTask)
  592. return nil
  593. }
  594. func GetNotebookImageName(imageId string) (string, error) {
  595. var validImage = false
  596. var imageName = ""
  597. if ImageInfos == nil {
  598. json.Unmarshal([]byte(setting.ImageInfos), &ImageInfos)
  599. }
  600. for _, imageInfo := range ImageInfos.ImageInfo {
  601. if imageInfo.Id == imageId {
  602. validImage = true
  603. imageName = imageInfo.Value
  604. }
  605. }
  606. if !validImage {
  607. log.Error("the image id(%s) is invalid", imageId)
  608. return imageName, errors.New("the image id is invalid")
  609. }
  610. return imageName, nil
  611. }