diff --git a/.gitignore b/.gitignore index 9f34fea2a..ceeeaa92c 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,7 @@ coverage.all !/custom/conf/templates /custom/conf/app.ini !/custom/conf/app.ini.sample +/custom/public/kanban /data /indexers /log diff --git a/README.md b/README.md index 99f6a6e8c..1d9ab8d06 100644 --- a/README.md +++ b/README.md @@ -54,4 +54,7 @@ ## 平台引用 如果本平台对您的科研工作提供了帮助,可在论文致谢中加入: 英文版:```Thanks for the support provided by OpenI Community (https://git.openi.org.cn).``` -中文版:```感谢启智社区提供的技术支持(https://git.openi.org.cn)。``` \ No newline at end of file +中文版:```感谢启智社区提供的技术支持(https://git.openi.org.cn)。``` + +如果您的成果中引用了本平台,也欢迎在下述开源项目中提交您的成果信息: +https://git.openi.org.cn/OpenIOSSG/references diff --git a/custom/public/rotation3D/rotation3D.css b/custom/public/rotation3D/rotation3D.css index 032096b48..4fd12283c 100755 --- a/custom/public/rotation3D/rotation3D.css +++ b/custom/public/rotation3D/rotation3D.css @@ -29,51 +29,22 @@ } .rotation3D__item .scale{ position: absolute; top: 0; width: 100%; height: 100%; } .rotation3D__item .cont{ position: relative; z-index: 2; } -.rotation3D__item .cont .iconfont { font-size: 28px; margin-top: 30px; margin-bottom: 96px; display: block; } +.rotation3D__item .cont .iconfont { font-size: 28px; margin-top: 30px; margin-bottom: 96px; display: block; height: 35px;} .rotation3D__item .cont p{ color: #101010; } -.itemList .rotation3D__item .cont p::after{ - font-size: 12px; - content: ''; - position: absolute; - left: 0; - right: 0; - margin-top: 60px; - color: #101010; -} -.itemList .rotation3D__item:nth-child(1) .cont p::after{ - content: "鹏城云脑一号"; -} -.itemList .rotation3D__item:nth-child(2) .cont p::after{ - content: "鹏城云脑二号"; -} -.itemList .rotation3D__item:nth-child(3) .cont p::after{ - content: "北大人工智能集群系统"; -} -.itemList .rotation3D__item:nth-child(4) .cont p::after{ - content: "合肥类脑智能开放平台"; +.lineList .rotation3D__line:nth-child(5n+0) .dot{ } -.itemList .rotation3D__item:nth-child(5) .cont p::after{ - content: "武汉人工智能计算中心"; +.lineList .rotation3D__line:nth-child(5n+1) .dot{ + animation-delay: 1s; } -.itemList .rotation3D__item:nth-child(6) .cont p::after{ - content: "西安未来人工智能计算中心"; +.lineList .rotation3D__line:nth-child(5n+2) .dot{ + animation-delay: 3s; } -.itemList .rotation3D__item:nth-child(7) .cont p::after{ - content: "更多接入中…"; +.lineList .rotation3D__line:nth-child(5n+3) .dot{ + animation-delay: 2s; } -.itemList .rotation3D__item:nth-child(8) .cont p::after{ - content: "中原人工智能计算中心"; +.lineList .rotation3D__line:nth-child(5n+3) .dot{ + animation-delay: 4s; } -.itemList .rotation3D__item:nth-child(9) .cont p::after{ - content: "成都人工智能计算中心"; -} -.itemList .rotation3D__item:nth-child(10) .cont p::after{ - content: "横琴先进智能计算中心"; -} -.itemList .rotation3D__item:nth-child(11) .cont p::after{ - content: "国家超级计算济南中心"; -} - .rotation3D__item.blue{ color: #01e9fc; } .rotation3D__item.green{ color: #b4b3ca; } .rotation3D__item.yellow{ color: #ffd200; } @@ -90,14 +61,17 @@ ---------------------------*/ .rotation3D__line{ position: absolute; left: 50%; top: 50%; - display: block; width: 1px; height: 50%; + display: block; + width: 30px; + height: 50%; padding-top: 60px; color: #fff; font-size: 50px; /*background: #fff;*/ /*原点设置在中间*/ transform-origin: 50% 0; transform-style: preserve-3d; -} -.rotation3D__line .pos{ position: absolute; top: 0; } + overflow: hidden; + } +.rotation3D__line .pos{ position: absolute; top: 0; left: 15px;} .rotation3D__line svg { position: absolute; top: 0; } .rotation3D__line svg path { stroke: #fff; fill: none; @@ -139,8 +113,10 @@ position: absolute; font-size: 12px; color: #888; - transform: rotate(180deg)scale(0.80); -} + transform:scale(0.80); + transform-origin:left; + white-space: nowrap; + } /*颜色*/ .rotation3D__line.blue { color: #07b2f9; } diff --git a/go.mod b/go.mod index 387a34520..3b83aced9 100755 --- a/go.mod +++ b/go.mod @@ -22,6 +22,7 @@ require ( github.com/PuerkitoBio/goquery v1.5.0 github.com/RichardKnop/machinery v1.6.9 github.com/RoaringBitmap/roaring v0.4.23 // indirect + github.com/alecthomas/chroma v0.10.0 github.com/alibabacloud-go/darabonba-openapi v0.1.18 github.com/alibabacloud-go/dysmsapi-20170525/v2 v2.0.9 github.com/alibabacloud-go/tea v1.1.17 @@ -120,8 +121,9 @@ require ( github.com/urfave/cli v1.22.1 github.com/xanzy/go-gitlab v0.31.0 github.com/yohcop/openid-go v1.0.0 - github.com/yuin/goldmark v1.1.30 - github.com/yuin/goldmark-meta v0.0.0-20191126180153-f0638e958b60 + github.com/yuin/goldmark v1.4.13 + github.com/yuin/goldmark-highlighting v0.0.0-20220208100518-594be1970594 + github.com/yuin/goldmark-meta v1.1.0 golang.org/x/crypto v0.0.0-20200510223506-06a226fb4e37 golang.org/x/mod v0.3.0 // indirect golang.org/x/net v0.0.0-20200513185701-a91f0712d120 @@ -138,7 +140,7 @@ require ( gopkg.in/ldap.v3 v3.0.2 gopkg.in/macaron.v1 v1.3.9 // indirect gopkg.in/testfixtures.v2 v2.5.0 - gopkg.in/yaml.v2 v2.2.8 + gopkg.in/yaml.v2 v2.3.0 mvdan.cc/xurls/v2 v2.1.0 strk.kbt.io/projects/go/libravatar v0.0.0-20191008002943-06d1c002b251 xorm.io/builder v0.3.7 diff --git a/go.sum b/go.sum index d55d7af48..e0c11f261 100755 --- a/go.sum +++ b/go.sum @@ -76,6 +76,8 @@ github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMx github.com/Unknwon/com v0.0.0-20190321035513-0fed4efef755/go.mod h1:voKvFVpXBJxdIPeqjoJuLK+UVcRlo/JLjeToGxPYu68= github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7 h1:uSoVVbwJiQipAclBbw+8quDsfcvFjOpI5iCf4p/cqCs= github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7/go.mod h1:6zEj6s6u/ghQa61ZWa/C2Aw3RkjiTBOix7dkqa1VLIs= +github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek= +github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alibabacloud-go/alibabacloud-gateway-spi v0.0.2/go.mod h1:sCavSAvdzOjul4cEqeVtvlSaSScfNsTQ+46HwlTL1hc= @@ -203,6 +205,8 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c= github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4= +github.com/dlclark/regexp2 v1.4.0 h1:F1rxgk7p4uKjwIQxBs9oAXe5CqrXlCduYEJvrF4u93E= +github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= github.com/docker/go-units v0.3.3/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= @@ -709,12 +713,14 @@ github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1 github.com/smartystreets/assertions v0.0.0-20190116191733-b6c0e53d7304/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/assertions v1.0.1 h1:voD4ITNjPL5jjBfgR/r8fPIIBrliWrWHeiJApdr3r4w= github.com/smartystreets/assertions v1.0.1/go.mod h1:kHHU4qYBaI3q23Pp3VPrmWhuIUrLW/7eUrw0BU5VaoM= +github.com/smartystreets/assertions v1.1.0 h1:MkTeG1DMwsrdH7QtLXy5W+fUxWq+vmb6cLmyJ7aRtF0= github.com/smartystreets/assertions v1.1.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo= github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM= github.com/smartystreets/goconvey v0.0.0-20181108003508-044398e4856c/go.mod h1:XDJAKZRPZ1CvBcN2aX5YOUTYGHki24fSF0Iv48Ibg0s= github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v0.0.0-20190731233626-505e41936337 h1:WN9BUFbdyOsSH/XohnWpXOlq9NBD5sGAB2FciQMUEe8= github.com/smartystreets/goconvey v0.0.0-20190731233626-505e41936337/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= +github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/sourcegraph/annotate v0.0.0-20160123013949-f4cad6c6324d/go.mod h1:UdhH50NIW0fCiwBSr0co2m7BnFLdv4fQTgdqdJTHFeE= @@ -804,8 +810,16 @@ github.com/yuin/goldmark v1.1.27 h1:nqDD4MMMQA0lmWq03Z2/myGPYLQoXtmi0rGVs95ntbo= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.30 h1:j4d4Lw3zqZelDhBksEo3BnWg9xhXRQGJPPSL6OApZjI= github.com/yuin/goldmark v1.1.30/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.4.5/go.mod h1:rmuwmfZ0+bvzB24eSC//bk1R1Zp3hM0OXYv/G2LIilg= +github.com/yuin/goldmark v1.4.6/go.mod h1:rmuwmfZ0+bvzB24eSC//bk1R1Zp3hM0OXYv/G2LIilg= +github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/yuin/goldmark-highlighting v0.0.0-20220208100518-594be1970594 h1:yHfZyN55+5dp1wG7wDKv8HQ044moxkyGq12KFFMFDxg= +github.com/yuin/goldmark-highlighting v0.0.0-20220208100518-594be1970594/go.mod h1:U9ihbh+1ZN7fR5Se3daSPoz1CGF9IYtSvWwVQtnzGHU= github.com/yuin/goldmark-meta v0.0.0-20191126180153-f0638e958b60 h1:gZucqLjL1eDzVWrXj4uiWeMbAopJlBR2mKQAsTGdPwo= github.com/yuin/goldmark-meta v0.0.0-20191126180153-f0638e958b60/go.mod h1:i9VhcIHN2PxXMbQrKqXNueok6QNONoPjNMoj9MygVL0= +github.com/yuin/goldmark-meta v1.1.0 h1:pWw+JLHGZe8Rk0EGsMVssiNb/AaPMHfSRszZeUeiOUc= +github.com/yuin/goldmark-meta v1.1.0/go.mod h1:U4spWENafuA7Zyg+Lj5RqK/MF+ovMYtBvXi1lBb2VP0= github.com/ziutek/mymysql v1.5.4 h1:GB0qdRGsTwQSBVYuVShFBKaXSnSnYYC2d9knnE1LHFs= github.com/ziutek/mymysql v1.5.4/go.mod h1:LMSpPZ6DbqWFxNCHW77HeMg9I646SAhApZ/wKdgO/C0= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= @@ -1086,6 +1100,8 @@ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= grpc.go4.org v0.0.0-20170609214715-11d0a25b4919/go.mod h1:77eQGdRu53HpSqPFJFmuJdjuHRquDANNeA4x7B8WQ9o= diff --git a/index.html b/index.html new file mode 100644 index 000000000..643c31b06 --- /dev/null +++ b/index.html @@ -0,0 +1,688 @@ + + +
+ + + +* Only show the dynamics of open source projects
+These excellent organizations are using the OpenI AI Collaboration Platform for collaborative development of projects. To show your organization here, Click here to submit.
+ More Organizations +The community has prepared a wealth of activities, waiting for you to participate!
+Excellent AI projects recommendation. To show your project here, Click here to submit.Click here to explore more projects.
+人工智能算力网络推进联盟已接入10家智算中心,算力总规模1542P
+Provide a collaborative development environment for AI development, which is the biggest highlight that distinguishes the OpenI AI Collaboration Platform from other traditional Git platforms.
+
+
+
+
+
+ The platform has been connected with Pengcheng Cloudbrain and can use the rich computing resources of Pengcheng Cloudbrain to complete AI development tasks.
+ Pengcheng Cloudbrain's existing AI computing power is 100p FLOPS@FP16 (billions of half precision floating-point calculations per second), the main hardware infrastructure is composed of GPU server equipped with NVIDIA Tesla V100 and Atlas 900 AI cluster equipped with Kunpeng and Ascend processors.
+ Developers can freely choose the corresponding computing resources according to their needs, and can test the adaptability, performance, stability of the model in different hardware environments.
+ If your model requires more computing resources, you can also apply for it separately.
+
`)
+ if err != nil {
+ return
+ }
+
+ // include language-x class as part of commonmark spec
+ _, err = w.WriteString(``)
+ if err != nil {
+ return
+ }
+ } else {
+ _, err := w.WriteString("")
+ if err != nil {
+ return
+ }
+ }
}),
),
meta.Meta,
diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go
index 36d9ec3dd..f5b90a277 100755
--- a/modules/modelarts/modelarts.go
+++ b/modules/modelarts/modelarts.go
@@ -6,8 +6,7 @@ import (
"fmt"
"path"
"strconv"
-
- "code.gitea.io/gitea/modules/timeutil"
+ "strings"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/context"
@@ -15,18 +14,19 @@ import (
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
+ "code.gitea.io/gitea/modules/timeutil"
)
const (
//notebook
- storageTypeOBS = "obs"
- autoStopDuration = 4 * 60 * 60
- autoStopDurationMs = 4 * 60 * 60 * 1000
-
- DataSetMountPath = "/home/ma-user/work"
- NotebookEnv = "Python3"
- NotebookType = "Ascend"
- FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)"
+ storageTypeOBS = "obs"
+ autoStopDuration = 4 * 60 * 60
+ autoStopDurationMs = 4 * 60 * 60 * 1000
+ MORDELART_USER_IMAGE_ENGINE_ID = -1
+ DataSetMountPath = "/home/ma-user/work"
+ NotebookEnv = "Python3"
+ NotebookType = "Ascend"
+ FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)"
//train-job
// ResourcePools = "{\"resource_pool\":[{\"id\":\"pool1328035d\", \"value\":\"专属资源池\"}]}"
@@ -59,7 +59,7 @@ const (
PerPage = 10
IsLatestVersion = "1"
NotLatestVersion = "0"
- VersionCount = 1
+ VersionCountOne = 1
SortByCreateTime = "create_time"
ConfigTypeCustom = "custom"
@@ -67,9 +67,11 @@ const (
)
var (
- poolInfos *models.PoolInfos
- FlavorInfos *models.FlavorInfos
- ImageInfos *models.ImageInfosModelArts
+ poolInfos *models.PoolInfos
+ FlavorInfos *models.FlavorInfos
+ ImageInfos *models.ImageInfosModelArts
+ TrainFlavorInfos *Flavor
+ SpecialPools *models.SpecialPools
)
type GenerateTrainJobReq struct {
@@ -82,7 +84,6 @@ type GenerateTrainJobReq struct {
BootFileUrl string
DataUrl string
TrainUrl string
- FlavorCode string
LogUrl string
PoolID string
WorkServerNumber int
@@ -94,11 +95,15 @@ type GenerateTrainJobReq struct {
BranchName string
PreVersionId int64
PreVersionName string
+ FlavorCode string
FlavorName string
VersionCount int
EngineName string
TotalVersionCount int
+ UserImageUrl string
+ UserCommand string
DatasetName string
+ Spec *models.Specification
}
type GenerateInferenceJobReq struct {
@@ -111,7 +116,6 @@ type GenerateInferenceJobReq struct {
BootFileUrl string
DataUrl string
TrainUrl string
- FlavorCode string
LogUrl string
PoolID string
WorkServerNumber int
@@ -130,12 +134,15 @@ type GenerateInferenceJobReq struct {
ModelVersion string
CkptName string
ResultUrl string
+ Spec *models.Specification
+ DatasetName string
}
type VersionInfo struct {
Version []struct {
ID int `json:"id"`
Value string `json:"value"`
+ Url string `json:"url"`
} `json:"version"`
}
@@ -252,7 +259,7 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor strin
return nil
}
-func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, description, flavor, imageId string) error {
+func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, description, imageId string, spec *models.Specification) error {
if poolInfos == nil {
json.Unmarshal([]byte(setting.PoolInfos), &poolInfos)
}
@@ -266,7 +273,7 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc
jobResult, err := createNotebook2(models.CreateNotebook2Params{
JobName: jobName,
Description: description,
- Flavor: flavor,
+ Flavor: spec.SourceSpecId,
Duration: autoStopDurationMs,
ImageID: imageId,
PoolID: poolInfos.PoolInfo[0].PoolId,
@@ -280,15 +287,30 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc
})
if err != nil {
log.Error("createNotebook2 failed: %v", err.Error())
+ if strings.HasPrefix(err.Error(), UnknownErrorPrefix) {
+ log.Info("(%s)unknown error, set temp status", displayJobName)
+ errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{
+ JobID: models.TempJobId,
+ VersionID: models.TempVersionId,
+ Status: models.TempJobStatus,
+ Type: models.TypeCloudBrainTwo,
+ JobName: jobName,
+ JobType: string(models.JobTypeDebug),
+ })
+ if errTemp != nil {
+ log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error())
+ return errTemp
+ }
+ }
return err
}
- err = models.CreateCloudbrain(&models.Cloudbrain{
+ task := &models.Cloudbrain{
Status: jobResult.Status,
UserID: ctx.User.ID,
RepoID: ctx.Repo.Repository.ID,
JobID: jobResult.ID,
JobName: jobName,
- FlavorCode: flavor,
+ FlavorCode: spec.SourceSpecId,
DisplayJobName: displayJobName,
JobType: string(models.JobTypeDebug),
Type: models.TypeCloudBrainTwo,
@@ -298,16 +320,14 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc
Description: description,
CreatedUnix: createTime,
UpdatedUnix: createTime,
- })
-
- if err != nil {
- return err
+ Spec: spec,
}
- task, err := models.GetCloudbrainByName(jobName)
+
+ err = models.CreateCloudbrain(task)
if err != nil {
- log.Error("GetCloudbrainByName failed: %v", err.Error())
return err
}
+
stringId := strconv.FormatInt(task.ID, 10)
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugNPUTask)
return nil
@@ -315,32 +335,71 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc
func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) {
createTime := timeutil.TimeStampNow()
- jobResult, err := createTrainJob(models.CreateTrainJobParams{
- JobName: req.JobName,
- Description: req.Description,
- Config: models.Config{
- WorkServerNum: req.WorkServerNumber,
- AppUrl: req.CodeObsPath,
- BootFileUrl: req.BootFileUrl,
- DataUrl: req.DataUrl,
- EngineID: req.EngineID,
- TrainUrl: req.TrainUrl,
- LogUrl: req.LogUrl,
- PoolID: req.PoolID,
- CreateVersion: true,
- Flavor: models.Flavor{
- Code: req.FlavorCode,
+ var jobResult *models.CreateTrainJobResult
+ var createErr error
+ if req.EngineID < 0 {
+ jobResult, createErr = createTrainJobUserImage(models.CreateUserImageTrainJobParams{
+ JobName: req.JobName,
+ Description: req.Description,
+ Config: models.UserImageConfig{
+ WorkServerNum: req.WorkServerNumber,
+ AppUrl: req.CodeObsPath,
+ BootFileUrl: req.BootFileUrl,
+ DataUrl: req.DataUrl,
+ TrainUrl: req.TrainUrl,
+ LogUrl: req.LogUrl,
+ PoolID: req.PoolID,
+ CreateVersion: true,
+ Flavor: models.Flavor{
+ Code: req.Spec.SourceSpecId,
+ },
+ Parameter: req.Parameters,
+ UserImageUrl: req.UserImageUrl,
+ UserCommand: req.UserCommand,
},
- Parameter: req.Parameters,
- },
- })
- if err != nil {
- log.Error("CreateJob failed: %v", err.Error())
- return err
+ })
+ } else {
+ jobResult, createErr = createTrainJob(models.CreateTrainJobParams{
+ JobName: req.JobName,
+ Description: req.Description,
+ Config: models.Config{
+ WorkServerNum: req.WorkServerNumber,
+ AppUrl: req.CodeObsPath,
+ BootFileUrl: req.BootFileUrl,
+ DataUrl: req.DataUrl,
+ EngineID: req.EngineID,
+ TrainUrl: req.TrainUrl,
+ LogUrl: req.LogUrl,
+ PoolID: req.PoolID,
+ CreateVersion: true,
+ Flavor: models.Flavor{
+ Code: req.Spec.SourceSpecId,
+ },
+ Parameter: req.Parameters,
+ },
+ })
+ }
+ if createErr != nil {
+ log.Error("createTrainJob failed: %v", createErr.Error())
+ if strings.HasPrefix(createErr.Error(), UnknownErrorPrefix) {
+ log.Info("(%s)unknown error, set temp status", req.DisplayJobName)
+ errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{
+ JobID: models.TempJobId,
+ VersionID: models.TempVersionId,
+ Status: models.TempJobStatus,
+ Type: models.TypeCloudBrainTwo,
+ JobName: req.JobName,
+ JobType: string(models.JobTypeTrain),
+ })
+ if errTemp != nil {
+ log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error())
+ return errTemp
+ }
+ }
+ return createErr
}
-
jobId := strconv.FormatInt(jobResult.JobID, 10)
- err = models.CreateCloudbrain(&models.Cloudbrain{
+ createErr = models.CreateCloudbrain(&models.Cloudbrain{
Status: TransTrainJobStatus(jobResult.Status),
UserID: ctx.User.ID,
RepoID: ctx.Repo.Repository.ID,
@@ -363,7 +422,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
BootFile: req.BootFile,
DataUrl: req.DataUrl,
LogUrl: req.LogUrl,
- FlavorCode: req.FlavorCode,
+ FlavorCode: req.Spec.SourceSpecId,
Description: req.Description,
WorkServerNumber: req.WorkServerNumber,
FlavorName: req.FlavorName,
@@ -372,57 +431,122 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
TotalVersionCount: req.TotalVersionCount,
CreatedUnix: createTime,
UpdatedUnix: createTime,
+ Spec: req.Spec,
})
- if err != nil {
- log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error())
- return err
+ if createErr != nil {
+ log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, createErr.Error())
+ return createErr
}
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobId, req.DisplayJobName, models.ActionCreateTrainTask)
return nil
}
-func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) {
- createTime := timeutil.TimeStampNow()
- jobResult, err := createTrainJobVersion(models.CreateTrainJobVersionParams{
+func GenerateModelConvertTrainJob(req *GenerateTrainJobReq) (*models.CreateTrainJobResult, error) {
+
+ return createTrainJobUserImage(models.CreateUserImageTrainJobParams{
+ JobName: req.JobName,
Description: req.Description,
- Config: models.TrainJobVersionConfig{
+ Config: models.UserImageConfig{
WorkServerNum: req.WorkServerNumber,
AppUrl: req.CodeObsPath,
BootFileUrl: req.BootFileUrl,
DataUrl: req.DataUrl,
- EngineID: req.EngineID,
TrainUrl: req.TrainUrl,
LogUrl: req.LogUrl,
PoolID: req.PoolID,
+ CreateVersion: true,
Flavor: models.Flavor{
Code: req.FlavorCode,
},
Parameter: req.Parameters,
- PreVersionId: req.PreVersionId,
+ UserImageUrl: req.UserImageUrl,
+ UserCommand: req.UserCommand,
},
- }, jobId)
- if err != nil {
- log.Error("CreateJob failed: %v", err.Error())
- return err
+ })
+}
+
+func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) {
+ createTime := timeutil.TimeStampNow()
+ var jobResult *models.CreateTrainJobResult
+ var createErr error
+
+ if req.EngineID < 0 {
+ jobResult, createErr = createTrainJobVersionUserImage(models.CreateTrainJobVersionUserImageParams{
+ Description: req.Description,
+ Config: models.TrainJobVersionUserImageConfig{
+ WorkServerNum: req.WorkServerNumber,
+ AppUrl: req.CodeObsPath,
+ BootFileUrl: req.BootFileUrl,
+ DataUrl: req.DataUrl,
+ TrainUrl: req.TrainUrl,
+ LogUrl: req.LogUrl,
+ PoolID: req.PoolID,
+ Flavor: models.Flavor{
+ Code: req.Spec.SourceSpecId,
+ },
+ Parameter: req.Parameters,
+ PreVersionId: req.PreVersionId,
+ UserImageUrl: req.UserImageUrl,
+ UserCommand: req.UserCommand,
+ },
+ }, jobId)
+ } else {
+ jobResult, createErr = createTrainJobVersion(models.CreateTrainJobVersionParams{
+ Description: req.Description,
+ Config: models.TrainJobVersionConfig{
+ WorkServerNum: req.WorkServerNumber,
+ AppUrl: req.CodeObsPath,
+ BootFileUrl: req.BootFileUrl,
+ DataUrl: req.DataUrl,
+ EngineID: req.EngineID,
+ TrainUrl: req.TrainUrl,
+ LogUrl: req.LogUrl,
+ PoolID: req.PoolID,
+ Flavor: models.Flavor{
+ Code: req.Spec.SourceSpecId,
+ },
+ Parameter: req.Parameters,
+ PreVersionId: req.PreVersionId,
+ },
+ }, jobId)
+ }
+ if createErr != nil {
+ log.Error("createTrainJobVersion failed: %v", createErr.Error())
+ if strings.HasPrefix(createErr.Error(), UnknownErrorPrefix) {
+ log.Info("(%s)unknown error, set temp status", req.DisplayJobName)
+ errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{
+ JobID: jobId,
+ VersionID: models.TempVersionId,
+ Status: models.TempJobStatus,
+ Type: models.TypeCloudBrainTwo,
+ JobName: req.JobName,
+ JobType: string(models.JobTypeTrain),
+ })
+ if errTemp != nil {
+ log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error())
+ return errTemp
+ }
+ }
+ return createErr
}
var jobTypes []string
jobTypes = append(jobTypes, string(models.JobTypeTrain))
repo := ctx.Repo.Repository
- VersionTaskList, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
+ VersionTaskList, VersionListCount, createErr := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
RepoID: repo.ID,
Type: models.TypeCloudBrainTwo,
JobTypes: jobTypes,
JobID: strconv.FormatInt(jobResult.JobID, 10),
})
- if err != nil {
- ctx.ServerError("Cloudbrain", err)
- return err
+ if createErr != nil {
+ ctx.ServerError("Cloudbrain", createErr)
+ return createErr
}
//将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount
- err = models.CreateCloudbrain(&models.Cloudbrain{
+ createErr = models.CreateCloudbrain(&models.Cloudbrain{
Status: TransTrainJobStatus(jobResult.Status),
UserID: ctx.User.ID,
RepoID: ctx.Repo.Repository.ID,
@@ -447,7 +571,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job
DataUrl: req.DataUrl,
LogUrl: req.LogUrl,
PreVersionId: req.PreVersionId,
- FlavorCode: req.FlavorCode,
+ FlavorCode: req.Spec.SourceSpecId,
Description: req.Description,
WorkServerNumber: req.WorkServerNumber,
FlavorName: req.FlavorName,
@@ -456,20 +580,21 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job
VersionCount: VersionListCount + 1,
CreatedUnix: createTime,
UpdatedUnix: createTime,
+ Spec: req.Spec,
})
- if err != nil {
- log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error())
- return err
+ if createErr != nil {
+ log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, createErr.Error())
+ return createErr
}
//将训练任务的上一版本的isLatestVersion设置为"0"
- err = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCount, NotLatestVersion, TotalVersionCount)
- if err != nil {
- ctx.ServerError("Update IsLatestVersion failed", err)
- return err
+ createErr = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCountOne, NotLatestVersion, TotalVersionCount)
+ if createErr != nil {
+ ctx.ServerError("Update IsLatestVersion failed", createErr)
+ return createErr
}
- return err
+ return createErr
}
func TransTrainJobStatus(status int) string {
@@ -546,21 +671,36 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e
PoolID: req.PoolID,
CreateVersion: true,
Flavor: models.Flavor{
- Code: req.FlavorCode,
+ Code: req.Spec.SourceSpecId,
},
Parameter: req.Parameters,
},
})
if err != nil {
- log.Error("CreateJob failed: %v", err.Error())
+ log.Error("createInferenceJob failed: %v", err.Error())
+ if strings.HasPrefix(err.Error(), UnknownErrorPrefix) {
+ log.Info("(%s)unknown error, set temp status", req.DisplayJobName)
+ err = models.InsertCloudbrainTemp(&models.CloudbrainTemp{
+ JobID: models.TempJobId,
+ VersionID: models.TempVersionId,
+ Status: models.TempJobStatus,
+ Type: models.TypeCloudBrainTwo,
+ JobName: req.JobName,
+ JobType: string(models.JobTypeInference),
+ })
+ if err != nil {
+ log.Error("InsertCloudbrainTemp failed: %v", err.Error())
+ return err
+ }
+ }
return err
}
- attach, err := models.GetAttachmentByUUID(req.Uuid)
- if err != nil {
- log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error())
- return err
- }
+ // attach, err := models.GetAttachmentByUUID(req.Uuid)
+ // if err != nil {
+ // log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error())
+ // return err
+ // }
jobID := strconv.FormatInt(jobResult.JobID, 10)
err = models.CreateCloudbrain(&models.Cloudbrain{
Status: TransTrainJobStatus(jobResult.Status),
@@ -574,7 +714,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e
VersionID: jobResult.VersionID,
VersionName: jobResult.VersionName,
Uuid: req.Uuid,
- DatasetName: attach.Name,
+ DatasetName: req.DatasetName,
CommitID: req.CommitID,
EngineID: req.EngineID,
TrainUrl: req.TrainUrl,
@@ -583,7 +723,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e
BootFile: req.BootFile,
DataUrl: req.DataUrl,
LogUrl: req.LogUrl,
- FlavorCode: req.FlavorCode,
+ FlavorCode: req.Spec.SourceSpecId,
Description: req.Description,
WorkServerNumber: req.WorkServerNumber,
FlavorName: req.FlavorName,
@@ -599,6 +739,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e
ResultUrl: req.ResultUrl,
CreatedUnix: createTime,
UpdatedUnix: createTime,
+ Spec: req.Spec,
})
if err != nil {
@@ -631,3 +772,461 @@ func GetNotebookImageName(imageId string) (string, error) {
return imageName, nil
}
+
+func InitSpecialPool() {
+ if SpecialPools == nil && setting.ModelArtsSpecialPools != "" {
+ json.Unmarshal([]byte(setting.ModelArtsSpecialPools), &SpecialPools)
+ }
+}
+
+func HandleTrainJobInfo(task *models.Cloudbrain) error {
+
+ result, err := GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10))
+ if err != nil {
+ log.Error("GetTrainJob(%s) failed:%v", task.DisplayJobName, err)
+ return err
+ }
+
+ if result != nil {
+ oldStatus := task.Status
+ task.Status = TransTrainJobStatus(result.IntStatus)
+ task.Duration = result.Duration / 1000
+ task.TrainJobDuration = result.TrainJobDuration
+
+ if task.StartTime == 0 && result.StartTime > 0 {
+ task.StartTime = timeutil.TimeStamp(result.StartTime / 1000)
+ }
+ task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)
+ if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 {
+ task.EndTime = task.StartTime.Add(task.Duration)
+ }
+ task.CorrectCreateUnix()
+ if oldStatus != task.Status {
+ notification.NotifyChangeCloudbrainStatus(task, oldStatus)
+ }
+ err = models.UpdateJob(task)
+ if err != nil {
+ log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
+ return err
+ }
+ }
+
+ return nil
+}
+
+func HandleNotebookInfo(task *models.Cloudbrain) error {
+
+ result, err := GetNotebook2(task.JobID)
+ if err != nil {
+ log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err)
+ return err
+ }
+
+ if result != nil {
+ oldStatus := task.Status
+ task.Status = result.Status
+ if task.StartTime == 0 && result.Lease.UpdateTime > 0 {
+ task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000)
+ }
+ if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) {
+ task.EndTime = timeutil.TimeStampNow()
+ }
+ task.CorrectCreateUnix()
+ task.ComputeAndSetDuration()
+ if oldStatus != task.Status {
+ notification.NotifyChangeCloudbrainStatus(task, oldStatus)
+ }
+ if task.FlavorCode == "" {
+ task.FlavorCode = result.Flavor
+ }
+ err = models.UpdateJob(task)
+ if err != nil {
+ log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err)
+ return err
+ }
+ }
+
+ return nil
+}
+
+func SyncTempStatusJob() {
+ jobs, err := models.GetCloudBrainTempJobs()
+ if err != nil {
+ log.Error("GetCloudBrainTempJobs failed:%v", err.Error())
+ return
+ }
+
+ for _, temp := range jobs {
+ log.Info("start to handle record: %s", temp.JobName)
+ if temp.Type == models.TypeCloudBrainTwo {
+ if temp.JobType == string(models.JobTypeDebug) {
+ err = handleNotebook(temp)
+ if err != nil {
+ log.Error("handleNotebook falied:%v", err)
+ break
+ }
+ } else if temp.JobType == string(models.JobTypeTrain) || temp.JobType == string(models.JobTypeInference) {
+ _, err = models.GetCloudbrainByJobID(temp.JobID)
+ if err != nil {
+ //one version
+ err = handleTrainJob(temp)
+ if err != nil {
+ log.Error("handleTrainJob falied:%v", err)
+ break
+ }
+ } else {
+ //multi version
+ err = handleTrainJobMultiVersion(temp)
+ if err != nil {
+ log.Error("handleTrainJobMultiVersion falied:%v", err)
+ break
+ }
+ }
+ }
+ }
+ }
+
+ return
+}
+
+func handleNotebook(temp *models.CloudbrainTemp) error {
+ if temp.Status == models.TempJobStatus {
+ err := handleTempNotebook(temp)
+ if err != nil {
+ log.Error("handleTempNotebook failed:%v", err)
+ return err
+ }
+ } else if temp.Status == string(models.ModelArtsStopping) {
+ res, err := GetNotebook2(temp.JobID)
+ if err != nil {
+ log.Error("GetNotebook2 failed:%v", err)
+ return err
+ }
+
+ temp.Status = res.Status
+ if temp.Status == string(models.ModelArtsStopped) {
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ return err
+ }
+
+ _, err := DelNotebook2(temp.JobID)
+ if err != nil {
+ log.Error("DelNotebook2 failed:%v", err)
+ return err
+ }
+
+ temp.Status = string(models.ModelArtsDeleted)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ return err
+ }
+ }
+ }
+
+ return nil
+}
+
+func handleTempNotebook(temp *models.CloudbrainTemp) error {
+ var err error
+ var isExist bool
+
+ for {
+ result, err := GetNotebookList(1000, 0, "createTime", "DESC", temp.JobName)
+ if err != nil {
+ log.Error("GetNotebookList failed:%v", err)
+ break
+ }
+
+ temp.QueryTimes++
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ }
+
+ if result != nil {
+ for _, notebook := range result.NotebookList {
+ if temp.JobID == models.TempJobId {
+ //new notebook
+ if notebook.JobName == temp.JobName {
+ isExist = true
+ temp.Status = notebook.Status
+ temp.JobID = notebook.JobID
+ break
+ }
+ } else {
+ //restart: always can find one record
+ if notebook.JobName == temp.JobName {
+ if notebook.Status != string(models.ModelArtsStopped) {
+ isExist = true
+ temp.Status = notebook.Status
+ temp.JobID = notebook.JobID
+ break
+ }
+ }
+ }
+ }
+
+ if isExist {
+ log.Info("find the record(%s), status(%s)", temp.JobName, temp.Status)
+ if temp.Status == string(models.ModelArtsCreateFailed) {
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ break
+ }
+
+ _, err := DelNotebook2(temp.JobID)
+ if err != nil {
+ log.Error("DelNotebook2(%s) failed:%v", temp.JobName, err)
+ break
+ }
+
+ temp.Status = string(models.ModelArtsDeleted)
+ } else {
+ _, err := ManageNotebook2(temp.JobID, models.NotebookAction{Action: models.ActionStop})
+ if err != nil {
+ log.Error("ManageNotebook2(%s) failed:%v", temp.JobName, err)
+ break
+ }
+ temp.Status = string(models.ModelArtsStopping)
+ }
+
+ models.UpdateCloudbrainTemp(temp)
+ } else {
+ log.Error("can not find the record(%s) till now", temp.JobName)
+ err = errors.New("not found")
+ break
+ }
+ } else {
+ log.Error("can not find the record(%s) till now", temp.JobName)
+ err = errors.New("not found")
+ break
+ }
+
+ break
+ }
+
+ if temp.QueryTimes >= setting.MaxTempQueryTimes && !isExist {
+ log.Info("reach MaxTempQueryTimes, set the job failed")
+
+ temp.Status = string(models.ModelArtsTrainJobFailed)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp(%s) failed:%v", temp.JobName, err)
+ return err
+ }
+ }
+
+ return err
+}
+
+func handleTrainJob(temp *models.CloudbrainTemp) error {
+ if temp.Status == models.TempJobStatus {
+ err := handleTempTrainJob(temp)
+ if err != nil {
+ log.Error("handleTempTrainJob failed:%v", err)
+ return err
+ }
+ } else if temp.Status == string(models.ModelArtsTrainJobKilling) {
+ res, err := GetTrainJob(temp.JobID, temp.VersionID)
+ if err != nil {
+ log.Error("GetTrainJob failed:%v", err)
+ return err
+ }
+
+ temp.Status = TransTrainJobStatus(res.IntStatus)
+ if temp.Status == string(models.ModelArtsTrainJobKilled) {
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ return err
+ }
+
+ _, err := DelTrainJob(temp.JobID)
+ if err != nil {
+ log.Error("DelTrainJob failed:%v", err)
+ return err
+ }
+
+ temp.Status = string(models.ModelArtsDeleted)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ return err
+ }
+ }
+ }
+
+ return nil
+}
+
+func handleTrainJobMultiVersion(temp *models.CloudbrainTemp) error {
+ if temp.Status == models.TempJobStatus {
+ err := handleTempTrainJobMultiVersion(temp)
+ if err != nil {
+ log.Error("handleTempTrainJobMultiVersion failed:%v", err)
+ return err
+ }
+ } else if temp.Status == string(models.ModelArtsTrainJobKilling) {
+ res, err := GetTrainJob(temp.JobID, temp.VersionID)
+ if err != nil {
+ log.Error("GetTrainJob failed:%v", err)
+ return err
+ }
+
+ temp.Status = TransTrainJobStatus(res.IntStatus)
+ if temp.Status == string(models.ModelArtsTrainJobKilled) {
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ return err
+ }
+
+ _, err := DelTrainJobVersion(temp.JobID, temp.VersionID)
+ if err != nil {
+ log.Error("DelTrainJob failed:%v", err)
+ return err
+ }
+
+ temp.Status = string(models.ModelArtsDeleted)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ return err
+ }
+ }
+
+ }
+
+ return nil
+}
+
+func handleTempTrainJobMultiVersion(temp *models.CloudbrainTemp) error {
+ var err error
+ var isExist bool
+
+ for {
+ result, err := GetTrainJobVersionList(1000, 1, temp.JobID)
+ if err != nil {
+ log.Error("GetTrainJobVersionList failed:%v", err)
+ break
+ }
+
+ temp.QueryTimes++
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ }
+
+ if result != nil {
+ count, _ := models.GetCloudbrainCountByJobName(temp.JobName, temp.JobType, temp.Type)
+ if result.VersionCount == int64(count+1) {
+ isExist = true
+ temp.Status = TransTrainJobStatus(result.JobVersionList[0].IntStatus)
+ temp.VersionID = strconv.FormatInt(result.JobVersionList[0].VersionID, 10)
+
+ log.Info("find the record(%s), status(%s)", temp.JobName, temp.Status)
+
+ _, err := StopTrainJob(temp.JobID, temp.VersionID)
+ if err != nil {
+ log.Error("StopTrainJob failed:%v", err)
+ break
+ }
+ temp.Status = string(models.ModelArtsTrainJobKilling)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp(%s) failed:%v", temp.JobName, err)
+ break
+ }
+ } else {
+ log.Error("can not find the record(%s) till now", temp.JobName)
+ err = errors.New("not found")
+ break
+ }
+ }
+
+ break
+ }
+
+ if temp.QueryTimes >= setting.MaxTempQueryTimes && !isExist {
+ log.Info("reach MaxTempQueryTimes, set the job failed")
+
+ temp.Status = string(models.ModelArtsTrainJobFailed)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp(%s) failed:%v", temp.JobName, err)
+ return err
+ }
+ }
+
+ return err
+}
+
+func handleTempTrainJob(temp *models.CloudbrainTemp) error {
+ var err error
+ var isExist bool
+
+ for {
+ result, err := GetTrainJobList(1000, 1, "create_time", "desc", temp.JobName)
+ if err != nil {
+ log.Error("GetTrainJobList failed:%v", err)
+ break
+ }
+
+ temp.QueryTimes++
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp failed:%v", err)
+ }
+
+ if result != nil {
+ for _, job := range result.JobList {
+ if temp.JobName == job.JobName && TransTrainJobStatus(job.IntStatus) != string(models.ModelArtsTrainJobFailed) {
+ isExist = true
+ temp.Status = TransTrainJobStatus(job.IntStatus)
+ temp.JobID = strconv.FormatInt(job.JobID, 10)
+ temp.VersionID = strconv.FormatInt(job.VersionID, 10)
+
+ log.Info("find the record(%s), status(%s)", temp.JobName, temp.Status)
+
+ _, err = StopTrainJob(temp.JobID, temp.VersionID)
+ if err != nil {
+ log.Error("StopTrainJob(%s) failed:%v", temp.JobName, err)
+ break
+ }
+
+ temp.Status = string(models.ModelArtsTrainJobKilling)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp(%s) failed:%v", temp.JobName, err)
+ break
+ }
+ }
+ }
+
+ if !isExist {
+ log.Error("can not find the record(%s) till now", temp.JobName)
+ err = errors.New("not found")
+ break
+ }
+ }
+
+ break
+ }
+
+ if temp.QueryTimes >= setting.MaxTempQueryTimes && !isExist {
+ log.Info("reach MaxTempQueryTimes, set the job failed")
+
+ temp.Status = string(models.ModelArtsTrainJobFailed)
+ err = models.UpdateCloudbrainTemp(temp)
+ if err != nil {
+ log.Error("UpdateCloudbrainTemp(%s) failed:%v", temp.JobName, err)
+ return err
+ }
+ }
+
+ return err
+}
diff --git a/modules/modelarts/resty.go b/modules/modelarts/resty.go
index 6a2803cb1..fd1c467f3 100755
--- a/modules/modelarts/resty.go
+++ b/modules/modelarts/resty.go
@@ -37,6 +37,7 @@ const (
NotebookNotFound = "ModelArts.6404"
NotebookNoPermission = "ModelArts.6407"
NotebookInvalid = "ModelArts.6400"
+ UnknownErrorPrefix = "UNKNOWN:"
)
func getRestyClient() *resty.Client {
@@ -298,6 +299,10 @@ sendjob:
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
}
+ if res.StatusCode() == http.StatusBadGateway {
+ return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
+ }
+
if len(response.ErrorCode) != 0 {
log.Error("ManageNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
if response.ErrorCode == modelartsIllegalToken && retry < 1 {
@@ -472,7 +477,7 @@ sendjob:
return &result, nil
}
-func createTrainJob(createJobParams models.CreateTrainJobParams) (*models.CreateTrainJobResult, error) {
+func createTrainJobUserImage(createJobParams models.CreateUserImageTrainJobParams) (*models.CreateTrainJobResult, error) {
checkSetting()
client := getRestyClient()
var result models.CreateTrainJobResult
@@ -500,6 +505,63 @@ sendjob:
goto sendjob
}
+ if res.StatusCode() != http.StatusOK {
+ var temp models.ErrorResult
+ if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
+ log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ }
+ log.Error("createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ bootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'."
+ dataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'."
+ if temp.ErrorMsg == bootFileErrorMsg {
+ log.Error("启动文件错误!createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ return &result, fmt.Errorf("启动文件错误!")
+ }
+ if temp.ErrorMsg == dataSetErrorMsg {
+ log.Error("数据集错误!createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ return &result, fmt.Errorf("数据集错误!")
+ }
+ if res.StatusCode() == http.StatusBadGateway {
+ return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ } else {
+ return &result, fmt.Errorf("createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ }
+ }
+
+ if !result.IsSuccess {
+ log.Error("createTrainJobUserImage failed(%s): %s", result.ErrorCode, result.ErrorMsg)
+ return &result, fmt.Errorf("createTrainJobUserImage failed(%s): %s", result.ErrorCode, result.ErrorMsg)
+ }
+
+ return &result, nil
+}
+
+func createTrainJob(createJobParams models.CreateTrainJobParams) (*models.CreateTrainJobResult, error) {
+ checkSetting()
+ client := getRestyClient()
+ var result models.CreateTrainJobResult
+
+ retry := 0
+
+sendjob:
+ res, err := client.R().
+ SetHeader("Content-Type", "application/json").
+ SetAuthToken(TOKEN).
+ SetBody(createJobParams).
+ SetResult(&result).
+ Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob)
+
+ if err != nil {
+ return nil, fmt.Errorf("resty create train-job: %s", err)
+ }
+
+ if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
+ retry++
+ _ = getToken()
+ goto sendjob
+ }
+
if res.StatusCode() != http.StatusOK {
var temp models.ErrorResult
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
@@ -507,17 +569,21 @@ sendjob:
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
}
log.Error("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
- BootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'."
- DataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'."
- if temp.ErrorMsg == BootFileErrorMsg {
+ bootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'."
+ dataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'."
+ if temp.ErrorMsg == bootFileErrorMsg {
log.Error("启动文件错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf("启动文件错误!")
}
- if temp.ErrorMsg == DataSetErrorMsg {
+ if temp.ErrorMsg == dataSetErrorMsg {
log.Error("数据集错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf("数据集错误!")
}
- return &result, fmt.Errorf("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ if res.StatusCode() == http.StatusBadGateway {
+ return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ } else {
+ return &result, fmt.Errorf("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ }
}
if !result.IsSuccess {
@@ -535,6 +601,64 @@ func createTrainJobVersion(createJobVersionParams models.CreateTrainJobVersionPa
retry := 0
+sendjob:
+ res, err := client.R().
+ SetHeader("Content-Type", "application/json").
+ SetAuthToken(TOKEN).
+ SetBody(createJobVersionParams).
+ SetResult(&result).
+ Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions")
+
+ if err != nil {
+ return nil, fmt.Errorf("resty create train-job version: %s", err)
+ }
+
+ if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
+ retry++
+ _ = getToken()
+ goto sendjob
+ }
+
+ if res.StatusCode() != http.StatusOK {
+ var temp models.ErrorResult
+ if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
+ log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ }
+
+ log.Error("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ bootFileErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.BootFileUrl + "'."
+ dataSetErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.DataUrl + "'."
+ if temp.ErrorMsg == bootFileErrorMsg {
+ log.Error("启动文件错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ return &result, fmt.Errorf("启动文件错误!")
+ }
+ if temp.ErrorMsg == dataSetErrorMsg {
+ log.Error("数据集错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ return &result, fmt.Errorf("数据集错误!")
+ }
+ if res.StatusCode() == http.StatusBadGateway {
+ return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ } else {
+ return &result, fmt.Errorf("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ }
+ }
+
+ if !result.IsSuccess {
+ log.Error("createTrainJobVersion failed(%s): %s", result.ErrorCode, result.ErrorMsg)
+ return &result, fmt.Errorf("createTrainJobVersion failed(%s): %s", result.ErrorCode, result.ErrorMsg)
+ }
+
+ return &result, nil
+}
+
+func createTrainJobVersionUserImage(createJobVersionParams models.CreateTrainJobVersionUserImageParams, jobID string) (*models.CreateTrainJobResult, error) {
+ checkSetting()
+ client := getRestyClient()
+ var result models.CreateTrainJobResult
+
+ retry := 0
+
sendjob:
res, err := client.R().
SetHeader("Content-Type", "application/json").
@@ -650,9 +774,6 @@ sendjob:
goto sendjob
}
- //temp, _ := json.Marshal(req)
- //log.Info("%s", temp)
-
if res.StatusCode() != http.StatusOK {
var temp models.ErrorResult
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
@@ -1061,7 +1182,11 @@ sendjob:
log.Error("数据集错误!createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf("数据集错误!")
}
- return &result, fmt.Errorf("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ if res.StatusCode() == http.StatusBadGateway {
+ return &result, fmt.Errorf(UnknownErrorPrefix+"createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ } else {
+ return &result, fmt.Errorf("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ }
}
if !result.IsSuccess {
@@ -1101,7 +1226,11 @@ sendjob:
err = json.Unmarshal(res.Body(), &response)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
- return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
+ return &result, fmt.Errorf("json.Unmarshal failed: %s", err.Error())
+ }
+
+ if res.StatusCode() == http.StatusBadGateway {
+ return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
}
if len(response.ErrorCode) != 0 {
@@ -1160,3 +1289,139 @@ sendjob:
return &result, nil
}
+
+func GetTrainJobList(perPage, page int, sortBy, order, searchContent string) (*models.GetTrainJobListResult, error) {
+ checkSetting()
+ client := getRestyClient()
+ var result models.GetTrainJobListResult
+
+ retry := 0
+
+sendjob:
+ res, err := client.R().
+ SetQueryParams(map[string]string{
+ "per_page": strconv.Itoa(perPage),
+ "page": strconv.Itoa(page),
+ "sortBy": sortBy,
+ "order": order,
+ "search_content": searchContent,
+ }).
+ SetAuthToken(TOKEN).
+ SetResult(&result).
+ Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob)
+
+ if err != nil {
+ return nil, fmt.Errorf("resty GetTrainJobList: %v", err)
+ }
+
+ if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
+ retry++
+ _ = getToken()
+ goto sendjob
+ }
+
+ if res.StatusCode() != http.StatusOK {
+ var temp models.ErrorResult
+ if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
+ log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ }
+ log.Error("GetTrainJobList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ return &result, fmt.Errorf(temp.ErrorMsg)
+ }
+
+ if !result.IsSuccess {
+ log.Error("GetTrainJobList failed(%s): %s", result.ErrorCode, result.ErrorMsg)
+ return &result, fmt.Errorf(result.ErrorMsg)
+ }
+
+ return &result, nil
+}
+
+func GetTrainJobVersionList(perPage, page int, jobID string) (*models.GetTrainJobVersionListResult, error) {
+ checkSetting()
+ client := getRestyClient()
+ var result models.GetTrainJobVersionListResult
+
+ retry := 0
+
+sendjob:
+ res, err := client.R().
+ SetQueryParams(map[string]string{
+ "per_page": strconv.Itoa(perPage),
+ "page": strconv.Itoa(page),
+ }).
+ SetAuthToken(TOKEN).
+ SetResult(&result).
+ Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions")
+
+ if err != nil {
+ return nil, fmt.Errorf("resty GetTrainJobVersionList: %v", err)
+ }
+
+ if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
+ retry++
+ _ = getToken()
+ goto sendjob
+ }
+
+ if res.StatusCode() != http.StatusOK {
+ var temp models.ErrorResult
+ if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
+ log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ }
+ log.Error("GetTrainJobVersionList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ return &result, fmt.Errorf(temp.ErrorMsg)
+ }
+
+ if !result.IsSuccess {
+ log.Error("GetTrainJobVersionList failed(%s): %s", result.ErrorCode, result.ErrorMsg)
+ return &result, fmt.Errorf(result.ErrorMsg)
+ }
+
+ return &result, nil
+}
+
+func GetNotebookList(limit, offset int, sortBy, order, searchContent string) (*models.GetNotebookListResult, error) {
+ checkSetting()
+ client := getRestyClient()
+ var result models.GetNotebookListResult
+
+ retry := 0
+
+sendjob:
+ res, err := client.R().
+ SetQueryParams(map[string]string{
+ "limit": strconv.Itoa(limit),
+ "offset": strconv.Itoa(offset),
+ "name": searchContent,
+ "sort_key": sortBy,
+ "sort_dir": order,
+ }).
+ SetAuthToken(TOKEN).
+ SetResult(&result).
+ Get(HOST + "/v1/" + setting.ProjectID + urlNotebook2)
+
+ if err != nil {
+ return nil, fmt.Errorf("resty GetNotebookList: %v", err)
+ }
+
+ if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
+ retry++
+ _ = getToken()
+ goto sendjob
+ }
+
+ if res.StatusCode() != http.StatusOK {
+ var temp models.ErrorResult
+ if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
+ log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
+ }
+ log.Error("GetNotebookList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
+ return &result, fmt.Errorf(temp.ErrorMsg)
+ }
+
+ return &result, nil
+}
diff --git a/modules/notification/base/notifier.go b/modules/notification/base/notifier.go
index 1429dc090..354de1ab2 100644
--- a/modules/notification/base/notifier.go
+++ b/modules/notification/base/notifier.go
@@ -62,4 +62,6 @@ type Notifier interface {
NotifyCreateImage(doer *models.User, image models.Image)
NotifyImageRecommend(optUser *models.User, image *models.Image, action string)
NotifyChangeUserAvatar(user *models.User, form auth.AvatarForm)
+
+ NotifyChangeCloudbrainStatus(cloudbrain *models.Cloudbrain, oldStatus string)
}
diff --git a/modules/notification/base/null.go b/modules/notification/base/null.go
index 27ed24f15..5cd812b5d 100644
--- a/modules/notification/base/null.go
+++ b/modules/notification/base/null.go
@@ -176,3 +176,7 @@ func (*NullNotifier) NotifyImageRecommend(optUser *models.User, image *models.Im
func (*NullNotifier) NotifyChangeUserAvatar(user *models.User, form auth.AvatarForm) {
}
+
+func (*NullNotifier) NotifyChangeCloudbrainStatus(cloudbrain *models.Cloudbrain, oldStatus string) {
+
+}
diff --git a/modules/notification/notification.go b/modules/notification/notification.go
index 6c96d58da..e98221886 100644
--- a/modules/notification/notification.go
+++ b/modules/notification/notification.go
@@ -13,6 +13,7 @@ import (
"code.gitea.io/gitea/modules/notification/mail"
"code.gitea.io/gitea/modules/notification/ui"
"code.gitea.io/gitea/modules/notification/webhook"
+ wechatNotifier "code.gitea.io/gitea/modules/notification/wechat"
"code.gitea.io/gitea/modules/repository"
"code.gitea.io/gitea/modules/setting"
)
@@ -36,6 +37,7 @@ func NewContext() {
RegisterNotifier(indexer.NewNotifier())
RegisterNotifier(webhook.NewNotifier())
RegisterNotifier(action.NewNotifier())
+ RegisterNotifier(wechatNotifier.NewNotifier())
}
// NotifyUploadAttachment notifies attachment upload message to notifiers
@@ -305,3 +307,10 @@ func NotifyChangeUserAvatar(user *models.User, form auth.AvatarForm) {
notifier.NotifyChangeUserAvatar(user, form)
}
}
+
+// NotifyChangeCloudbrainStatus
+func NotifyChangeCloudbrainStatus(cloudbrain *models.Cloudbrain, oldStatus string) {
+ for _, notifier := range notifiers {
+ notifier.NotifyChangeCloudbrainStatus(cloudbrain, oldStatus)
+ }
+}
diff --git a/modules/notification/wechat/wechat.go b/modules/notification/wechat/wechat.go
new file mode 100644
index 000000000..cd72bb54e
--- /dev/null
+++ b/modules/notification/wechat/wechat.go
@@ -0,0 +1,36 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package wechat
+
+import (
+ "code.gitea.io/gitea/models"
+ "code.gitea.io/gitea/modules/auth/wechat"
+ "code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/notification/base"
+)
+
+type wechatNotifier struct {
+ base.NullNotifier
+}
+
+var (
+ _ base.Notifier = &wechatNotifier{}
+)
+
+// NewNotifier create a new wechatNotifier notifier
+func NewNotifier() base.Notifier {
+ return &wechatNotifier{}
+}
+
+func (*wechatNotifier) NotifyChangeCloudbrainStatus(cloudbrain *models.Cloudbrain, oldStatus string) {
+ log.Info("NotifyChangeCloudbrainStatus cloudbrain.id=%d cloudbrain.status=%s oldStatus=%s", cloudbrain.ID, cloudbrain.Status, oldStatus)
+ operateType := wechat.GetJobOperateTypeFromCloudbrainStatus(cloudbrain)
+ if operateType == "" {
+ log.Info("NotifyChangeCloudbrainStatus operateType is incorrect")
+ return
+ }
+ template := wechat.GetTemplateFromOperateType(operateType)
+ go wechat.SendTemplateMsg(template, &wechat.TemplateContext{Cloudbrain: cloudbrain}, cloudbrain.UserID)
+}
diff --git a/modules/setting/cloudbrain.go b/modules/setting/cloudbrain.go
index 2d80eea25..c06676243 100755
--- a/modules/setting/cloudbrain.go
+++ b/modules/setting/cloudbrain.go
@@ -5,6 +5,7 @@ type CloudbrainLoginConfig struct {
Password string
Host string
ImageURLPrefix string
+ Expiration string
}
var (
@@ -17,5 +18,6 @@ func GetCloudbrainConfig() CloudbrainLoginConfig {
Cloudbrain.Password = cloudbrainSec.Key("PASSWORD").MustString("")
Cloudbrain.Host = cloudbrainSec.Key("REST_SERVER_HOST").MustString("")
Cloudbrain.ImageURLPrefix = cloudbrainSec.Key("IMAGE_URL_PREFIX").MustString("")
+ Cloudbrain.Expiration = cloudbrainSec.Key("EXPIRATION").MustString("604800")
return Cloudbrain
}
diff --git a/modules/setting/repository.go b/modules/setting/repository.go
index ee4f8b379..1eb1875b2 100644
--- a/modules/setting/repository.go
+++ b/modules/setting/repository.go
@@ -193,8 +193,9 @@ var (
Wiki: []string{"never"},
},
}
- RepoRootPath string
- ScriptType = "bash"
+ RepoRootPath string
+ RepoMaxReferenceDatasetNum int
+ ScriptType = "bash"
)
func newRepository() {
@@ -210,6 +211,8 @@ func newRepository() {
Repository.UseCompatSSHURI = sec.Key("USE_COMPAT_SSH_URI").MustBool()
Repository.MaxCreationLimit = sec.Key("MAX_CREATION_LIMIT").MustInt(-1)
RepoRootPath = sec.Key("ROOT").MustString(path.Join(homeDir, "gitea-repositories"))
+ RepoMaxReferenceDatasetNum = sec.Key("MAX_REF_DATASET_NUM").MustInt(20)
+
forcePathSeparator(RepoRootPath)
if !filepath.IsAbs(RepoRootPath) {
RepoRootPath = filepath.Join(AppWorkPath, RepoRootPath)
diff --git a/modules/setting/setting.go b/modules/setting/setting.go
index dca41b8a0..10950a90c 100755
--- a/modules/setting/setting.go
+++ b/modules/setting/setting.go
@@ -7,6 +7,7 @@ package setting
import (
"encoding/base64"
+ "encoding/json"
"fmt"
"io"
"io/ioutil"
@@ -64,7 +65,16 @@ const (
ReCaptcha = "recaptcha"
)
-// settings
+type C2NetSequenceInfo struct {
+ ID int `json:"id"`
+ Name string `json:"name"`
+ Content string `json:"content"`
+}
+
+type C2NetSqInfos struct {
+ C2NetSqInfo []*C2NetSequenceInfo `json:"sequence"`
+}
+
var (
// AppVer settings
AppVer string
@@ -456,20 +466,26 @@ var (
DecompressOBSTaskName string
//cloudbrain config
- CBAuthUser string
- CBAuthPassword string
- RestServerHost string
- JobPath string
- CBCodePathPrefix string
- JobType string
- GpuTypes string
- SpecialPools string
- DebugServerHost string
- ResourceSpecs string
- MaxDuration int64
- TrainGpuTypes string
- TrainResourceSpecs string
- MaxDatasetNum int
+
+ CBAuthUser string
+ CBAuthPassword string
+ RestServerHost string
+ JobPath string
+ CBCodePathPrefix string
+ JobType string
+ GpuTypes string
+ SpecialPools string
+ DebugServerHost string
+ ResourceSpecs string
+ MaxDuration int64
+ TrainGpuTypes string
+ TrainResourceSpecs string
+ InferenceGpuTypes string
+ InferenceResourceSpecs string
+ MaxModelSize float64
+ MaxDatasetNum int
+ CullIdleTimeout string
+ CullInterval string
//benchmark config
IsBenchmarkEnabled bool
@@ -526,22 +542,27 @@ var (
DebugHost string
ImageInfos string
Capacity int
+ MaxTempQueryTimes int
//train-job
- ResourcePools string
- Engines string
- EngineVersions string
- FlavorInfos string
- TrainJobFLAVORINFOS string
+ ResourcePools string
+ Engines string
+ EngineVersions string
+ FlavorInfos string
+ TrainJobFLAVORINFOS string
+ ModelArtsSpecialPools string
//grampus config
Grampus = struct {
- Env string
- Host string
- UserName string
- Password string
- SpecialPools string
+ Env string
+ Host string
+ UserName string
+ Password string
+ SpecialPools string
+ C2NetSequence string
}{}
+ C2NetInfos *C2NetSqInfos
+
//elk config
ElkUrl string
ElkUser string
@@ -573,6 +594,16 @@ var (
TreePathOfAutoMsgReply string
TreePathOfSubscribe string
+ //wechat template msg config
+ CloudbrainStartedTemplateId string
+ CloudbrainStartedNotifyList []string
+ CloudbrainStartedTitle string
+ CloudbrainStartedRemark string
+ CloudbrainStoppedTemplateId string
+ CloudbrainStoppedNotifyList []string
+ CloudbrainStoppedTitle string
+ CloudbrainStoppedRemark string
+
//nginx proxy
PROXYURL string
RadarMap = struct {
@@ -623,6 +654,24 @@ var (
OrgName string
TeamName string
}{}
+
+ ModelConvert = struct {
+ GPU_PYTORCH_IMAGE string
+ GpuQueue string
+ GPU_TENSORFLOW_IMAGE string
+ NPU_MINDSPORE_16_IMAGE string
+ PytorchOnnxBootFile string
+ PytorchTrTBootFile string
+ MindsporeBootFile string
+ TensorFlowNpuBootFile string
+ TensorFlowGpuBootFile string
+ ConvertRepoPath string
+ GPU_Resource_Specs_ID int
+ NPU_FlavorCode string
+ NPU_PoolID string
+ NPU_MINDSPORE_IMAGE_ID int
+ NPU_TENSORFLOW_IMAGE_ID int
+ }{}
)
// DateLang transforms standard language locale name to corresponding value in datetime plugin.
@@ -1322,9 +1371,14 @@ func NewContext() {
MaxDuration = sec.Key("MAX_DURATION").MustInt64(14400)
TrainGpuTypes = sec.Key("TRAIN_GPU_TYPES").MustString("")
TrainResourceSpecs = sec.Key("TRAIN_RESOURCE_SPECS").MustString("")
+ MaxModelSize = sec.Key("MAX_MODEL_SIZE").MustFloat64(500)
+ InferenceGpuTypes = sec.Key("INFERENCE_GPU_TYPES").MustString("")
+ InferenceResourceSpecs = sec.Key("INFERENCE_RESOURCE_SPECS").MustString("")
SpecialPools = sec.Key("SPECIAL_POOL").MustString("")
-
+
MaxDatasetNum = sec.Key("MAX_DATASET_NUM").MustInt(5)
+ CullIdleTimeout = sec.Key("CULL_IDLE_TIMEOUT").MustString("900")
+ CullInterval = sec.Key("CULL_INTERVAL").MustString("60")
sec = Cfg.Section("benchmark")
IsBenchmarkEnabled = sec.Key("ENABLED").MustBool(false)
@@ -1381,11 +1435,13 @@ func NewContext() {
Flavor = sec.Key("FLAVOR").MustString("")
ImageInfos = sec.Key("IMAGE_INFOS").MustString("")
Capacity = sec.Key("IMAGE_INFOS").MustInt(100)
+ MaxTempQueryTimes = sec.Key("MAX_TEMP_QUERY_TIMES").MustInt(30)
ResourcePools = sec.Key("Resource_Pools").MustString("")
Engines = sec.Key("Engines").MustString("")
EngineVersions = sec.Key("Engine_Versions").MustString("")
FlavorInfos = sec.Key("FLAVOR_INFOS").MustString("")
TrainJobFLAVORINFOS = sec.Key("TrainJob_FLAVOR_INFOS").MustString("")
+ ModelArtsSpecialPools = sec.Key("SPECIAL_POOL").MustString("")
sec = Cfg.Section("elk")
ElkUrl = sec.Key("ELKURL").MustString("")
@@ -1400,7 +1456,7 @@ func NewContext() {
WechatApiHost = sec.Key("HOST").MustString("https://api.weixin.qq.com")
WechatApiTimeoutSeconds = sec.Key("TIMEOUT_SECONDS").MustInt(3)
WechatAppId = sec.Key("APP_ID").MustString("wxba77b915a305a57d")
- WechatAppSecret = sec.Key("APP_SECRET").MustString("e48e13f315adc32749ddc7057585f198")
+ WechatAppSecret = sec.Key("APP_SECRET").MustString("")
WechatQRCodeExpireSeconds = sec.Key("QR_CODE_EXPIRE_SECONDS").MustInt(120)
WechatAuthSwitch = sec.Key("AUTH_SWITCH").MustBool(false)
UserNameOfWechatReply = sec.Key("AUTO_REPLY_USER_NAME").MustString("OpenIOSSG")
@@ -1409,6 +1465,14 @@ func NewContext() {
TreePathOfAutoMsgReply = sec.Key("AUTO_REPLY_TREE_PATH").MustString("wechat/auto_reply.json")
TreePathOfSubscribe = sec.Key("SUBSCRIBE_TREE_PATH").MustString("wechat/subscribe_reply.json")
WechatAuthSwitch = sec.Key("AUTH_SWITCH").MustBool(false)
+ CloudbrainStartedTemplateId = sec.Key("CLOUDBRAIN_STARTED_TEMPLATE_ID").MustString("")
+ CloudbrainStartedNotifyList = strings.Split(sec.Key("CLOUDBRAIN_STARTED_NOTIFY_LIST").MustString("DEBUG"), ",")
+ CloudbrainStartedTitle = sec.Key("CLOUDBRAIN_STARTED_TITLE").MustString("您好,您提交的算力资源申请已通过,任务已启动,请您关注运行情况。")
+ CloudbrainStartedRemark = sec.Key("CLOUDBRAIN_STARTED_REMARK").MustString("感谢您的耐心等待。")
+ CloudbrainStoppedTemplateId = sec.Key("CLOUDBRAIN_STOPPED_TEMPLATE_ID").MustString("")
+ CloudbrainStoppedNotifyList = strings.Split(sec.Key("CLOUDBRAIN_STOPPED_NOTIFY_LIST").MustString("TRAIN"), ",")
+ CloudbrainStoppedTitle = sec.Key("CLOUDBRAIN_STOPPED_TITLE").MustString("您好,您申请的算力资源已结束使用,任务已完成运行,请您关注运行结果。")
+ CloudbrainStoppedRemark = sec.Key("CLOUDBRAIN_STOPPED_REMARK").MustString("感谢您的耐心等待。")
sec = Cfg.Section("point")
CloudBrainPaySwitch = sec.Key("CLOUDBRAIN_PAY_SWITCH").MustBool(false)
@@ -1426,6 +1490,27 @@ func NewContext() {
Course.TeamName = sec.Key("team_name").MustString("")
GetGrampusConfig()
+
+ getModelConvertConfig()
+}
+
+func getModelConvertConfig() {
+ sec := Cfg.Section("model_convert")
+ ModelConvert.GPU_PYTORCH_IMAGE = sec.Key("GPU_PYTORCH_IMAGE").MustString("dockerhub.pcl.ac.cn:5000/user-images/openi:tensorRT_7_zouap")
+ ModelConvert.GpuQueue = sec.Key("GpuQueue").MustString("openidgx")
+ ModelConvert.GPU_TENSORFLOW_IMAGE = sec.Key("GPU_TENSORFLOW_IMAGE").MustString("dockerhub.pcl.ac.cn:5000/user-images/openi:tf2onnx")
+ ModelConvert.NPU_MINDSPORE_16_IMAGE = sec.Key("NPU_MINDSPORE_16_IMAGE").MustString("swr.cn-south-222.ai.pcl.cn/openi/mindspore1.6.1_train_v1_openi:v3_ascend")
+ ModelConvert.PytorchOnnxBootFile = sec.Key("PytorchOnnxBootFile").MustString("convert_pytorch.py")
+ ModelConvert.PytorchTrTBootFile = sec.Key("PytorchTrTBootFile").MustString("convert_pytorch_tensorrt.py")
+ ModelConvert.MindsporeBootFile = sec.Key("MindsporeBootFile").MustString("convert_mindspore.py")
+ ModelConvert.TensorFlowNpuBootFile = sec.Key("TensorFlowNpuBootFile").MustString("convert_tensorflow.py")
+ ModelConvert.TensorFlowGpuBootFile = sec.Key("TensorFlowGpuBootFile").MustString("convert_tensorflow_gpu.py")
+ ModelConvert.ConvertRepoPath = sec.Key("ConvertRepoPath").MustString("https://git.openi.org.cn/zouap/npu_test")
+ ModelConvert.GPU_Resource_Specs_ID = sec.Key("GPU_Resource_Specs_ID").MustInt(1)
+ ModelConvert.NPU_FlavorCode = sec.Key("NPU_FlavorCode").MustString("modelarts.bm.910.arm.public.1")
+ ModelConvert.NPU_PoolID = sec.Key("NPU_PoolID").MustString("pool7908321a")
+ ModelConvert.NPU_MINDSPORE_IMAGE_ID = sec.Key("NPU_MINDSPORE_IMAGE_ID").MustInt(121)
+ ModelConvert.NPU_TENSORFLOW_IMAGE_ID = sec.Key("NPU_TENSORFLOW_IMAGE_ID").MustInt(35)
}
func GetGrampusConfig() {
@@ -1436,7 +1521,12 @@ func GetGrampusConfig() {
Grampus.UserName = sec.Key("USERNAME").MustString("")
Grampus.Password = sec.Key("PASSWORD").MustString("")
Grampus.SpecialPools = sec.Key("SPECIAL_POOL").MustString("")
-
+ Grampus.C2NetSequence = sec.Key("C2NET_SEQUENCE").MustString("{\"sequence\":[{\"id\":1,\"name\":\"cloudbrain_one\",\"content\":\"鹏城云脑一号\"},{\"id\":2,\"name\":\"cloudbrain_two\",\"content\":\"鹏城云脑二号\"},{\"id\":3,\"name\":\"beida\",\"content\":\"北大人工智能集群系统\"},{\"id\":4,\"name\":\"hefei\",\"content\":\"合肥类脑智能开放平台\"},{\"id\":5,\"name\":\"wuhan\",\"content\":\"武汉人工智能计算中心\"},{\"id\":6,\"name\":\"xian\",\"content\":\"西安未来人工智能计算中心\"},{\"id\":7,\"pclcci\":\"more\",\"content\":\"鹏城云计算所\"},{\"id\":8,\"name\":\"xuchang\",\"content\":\"中原人工智能计算中心\"},{\"id\":9,\"name\":\"chengdu\",\"content\":\"成都人工智能计算中心\"},{\"id\":10,\"name\":\"more\",\"content\":\"横琴先进智能计算中心\"},{\"id\":11,\"name\":\"more\",\"content\":\"国家超级计算济南中心\"}]}")
+ if Grampus.C2NetSequence != "" {
+ if err := json.Unmarshal([]byte(Grampus.C2NetSequence), &C2NetInfos); err != nil {
+ log.Error("Unmarshal(C2NetSequence) failed:%v", err)
+ }
+ }
}
func SetRadarMapConfig() {
diff --git a/modules/storage/minio_ext.go b/modules/storage/minio_ext.go
index 514ac7204..4b738c068 100755
--- a/modules/storage/minio_ext.go
+++ b/modules/storage/minio_ext.go
@@ -179,31 +179,39 @@ func GetOneLevelAllObjectUnderDirMinio(bucket string, prefixRootPath string, rel
output, err := core.ListObjects(bucket, Prefix, "", "", 1000)
fileInfos := make([]FileInfo, 0)
prefixLen := len(Prefix)
+ fileMap := make(map[string]bool, 0)
if err == nil {
for _, val := range output.Contents {
+
log.Info("val key=" + val.Key)
var isDir bool
var fileName string
if val.Key == Prefix {
continue
}
- if strings.Contains(val.Key[prefixLen:len(val.Key)-1], "/") {
+ fileName = val.Key[prefixLen:]
+ log.Info("fileName =" + fileName)
+ files := strings.Split(fileName, "/")
+ if fileMap[files[0]] {
continue
+ } else {
+ fileMap[files[0]] = true
}
- if strings.HasSuffix(val.Key, "/") {
+ ParenDir := relativePath
+ fileName = files[0]
+ if len(files) > 1 {
isDir = true
- fileName = val.Key[prefixLen : len(val.Key)-1]
- relativePath += val.Key[prefixLen:]
+ ParenDir += fileName + "/"
} else {
isDir = false
- fileName = val.Key[prefixLen:]
}
+
fileInfo := FileInfo{
ModTime: val.LastModified.Local().Format("2006-01-02 15:04:05"),
FileName: fileName,
Size: val.Size,
IsDir: isDir,
- ParenDir: relativePath,
+ ParenDir: ParenDir,
}
fileInfos = append(fileInfos, fileInfo)
}
@@ -217,6 +225,49 @@ func GetOneLevelAllObjectUnderDirMinio(bucket string, prefixRootPath string, rel
}
+func MinioGetFilesSize(bucketName string, Files []string) int64 {
+ _, core, err := getClients()
+ var fileTotalSize int64
+ fileTotalSize = 0
+ if err != nil {
+ log.Error("getClients failed:", err.Error())
+ return fileTotalSize
+ }
+ for _, file := range Files {
+ log.Info("file=" + file)
+ meta, err := core.StatObject(bucketName, file, miniov6.StatObjectOptions{})
+ if err != nil {
+ log.Info("Get file error:" + err.Error())
+ }
+ fileTotalSize += meta.Size
+ }
+ return fileTotalSize
+}
+
+func MinioCopyFiles(bucketName string, srcPath string, destPath string, Files []string) (int64, error) {
+ _, core, err := getClients()
+ var fileTotalSize int64
+ fileTotalSize = 0
+ if err != nil {
+ log.Error("getClients failed:", err.Error())
+ return fileTotalSize, err
+ }
+
+ for _, file := range Files {
+ srcObjectName := srcPath + file
+ destObjectName := destPath + file
+ log.Info("srcObjectName=" + srcObjectName + " destObjectName=" + destObjectName)
+ meta, err := core.StatObject(bucketName, srcObjectName, miniov6.StatObjectOptions{})
+ if err != nil {
+ log.Info("Get file error:" + err.Error())
+ }
+ core.CopyObject(bucketName, srcObjectName, bucketName, destObjectName, meta.UserMetadata)
+ fileTotalSize += meta.Size
+ }
+
+ return fileTotalSize, nil
+}
+
func MinioPathCopy(bucketName string, srcPath string, destPath string) (int64, error) {
_, core, err := getClients()
var fileTotalSize int64
diff --git a/modules/storage/obs.go b/modules/storage/obs.go
index 33730b72c..2cb3af927 100755
--- a/modules/storage/obs.go
+++ b/modules/storage/obs.go
@@ -264,7 +264,47 @@ func ObsModelDownload(JobName string, fileName string) (io.ReadCloser, error) {
}
}
-func ObsCopyManyFile(srcBucket string, srcPath string, destBucket string, destPath string) (int64, error) {
+func ObsGetFilesSize(srcBucket string, Files []string) int64 {
+ var fileTotalSize int64
+ for _, file := range Files {
+ log.Info("file=" + file)
+ out, err := ObsCli.GetObjectMetadata(&obs.GetObjectMetadataInput{
+ Bucket: srcBucket,
+ Key: file,
+ })
+ if err != nil {
+ log.Info("Get File error, error=" + err.Error())
+ continue
+ }
+ fileTotalSize += out.ContentLength
+ }
+ return fileTotalSize
+}
+
+func ObsCopyManyFile(srcBucket string, srcPath string, destBucket string, destPath string, Files []string) (int64, error) {
+
+ var fileTotalSize int64
+
+ for _, file := range Files {
+ srcKey := srcPath + file
+ destKey := destPath + file
+ log.Info("srcKey=" + srcKey + " destKey=" + destKey)
+ out, err := ObsCli.GetObjectMetadata(&obs.GetObjectMetadataInput{
+ Bucket: srcBucket,
+ Key: srcKey,
+ })
+ if err != nil {
+ log.Info("Get File error, error=" + err.Error())
+ continue
+ }
+ obsCopyFile(srcBucket, srcKey, destBucket, destKey)
+ fileTotalSize += out.ContentLength
+ }
+
+ return fileTotalSize, nil
+}
+
+func ObsCopyAllFile(srcBucket string, srcPath string, destBucket string, destPath string) (int64, error) {
input := &obs.ListObjectsInput{}
input.Bucket = srcBucket
// 设置每页100个对象
@@ -330,6 +370,7 @@ func GetOneLevelAllObjectUnderDir(bucket string, prefixRootPath string, relative
output, err := ObsCli.ListObjects(input)
fileInfos := make([]FileInfo, 0)
prefixLen := len(input.Prefix)
+ fileMap := make(map[string]bool, 0)
if err == nil {
for _, val := range output.Contents {
log.Info("val key=" + val.Key)
@@ -338,23 +379,28 @@ func GetOneLevelAllObjectUnderDir(bucket string, prefixRootPath string, relative
if val.Key == input.Prefix {
continue
}
- if strings.Contains(val.Key[prefixLen:len(val.Key)-1], "/") {
+ fileName = val.Key[prefixLen:]
+ log.Info("fileName =" + fileName)
+ files := strings.Split(fileName, "/")
+ if fileMap[files[0]] {
continue
+ } else {
+ fileMap[files[0]] = true
}
- if strings.HasSuffix(val.Key, "/") {
+ ParenDir := relativePath
+ fileName = files[0]
+ if len(files) > 1 {
isDir = true
- fileName = val.Key[prefixLen : len(val.Key)-1]
- relativePath += val.Key[prefixLen:]
+ ParenDir += fileName + "/"
} else {
isDir = false
- fileName = val.Key[prefixLen:]
}
fileInfo := FileInfo{
ModTime: val.LastModified.Local().Format("2006-01-02 15:04:05"),
FileName: fileName,
Size: val.Size,
IsDir: isDir,
- ParenDir: relativePath,
+ ParenDir: ParenDir,
}
fileInfos = append(fileInfos, fileInfo)
}
@@ -424,6 +470,7 @@ func GetObsListObject(jobName, outPutPath, parentDir, versionName string) ([]Fil
input := &obs.ListObjectsInput{}
input.Bucket = setting.Bucket
input.Prefix = strings.TrimPrefix(path.Join(setting.TrainJobModelPath, jobName, outPutPath, versionName, parentDir), "/")
+ log.Info("bucket=" + input.Bucket + " Prefix=" + input.Prefix)
strPrefix := strings.Split(input.Prefix, "/")
output, err := ObsCli.ListObjects(input)
fileInfos := make([]FileInfo, 0)
@@ -575,6 +622,8 @@ func GetObsLogFileName(prefix string) (string, error) {
log.Error("PutObject failed:", err.Error())
return "", err
}
-
+ if output == nil || len(output.Contents) == 0 {
+ return "", errors.New("obs log files not exist")
+ }
return output.Contents[0].Key, nil
}
diff --git a/modules/templates/helper.go b/modules/templates/helper.go
index 797ccdb2e..3e424454b 100755
--- a/modules/templates/helper.go
+++ b/modules/templates/helper.go
@@ -97,23 +97,24 @@ func NewFuncMap() []template.FuncMap {
"AllowedReactions": func() []string {
return setting.UI.Reactions
},
- "AvatarLink": models.AvatarLink,
- "Safe": Safe,
- "SafeJS": SafeJS,
- "Str2html": Str2html,
- "subOne": subOne,
- "TimeSince": timeutil.TimeSince,
- "TimeSinceUnix": timeutil.TimeSinceUnix,
- "TimeSinceUnix1": timeutil.TimeSinceUnix1,
- "AttachmentResourceType": dataset.GetResourceType,
- "AttachmentStatus": dataset.GetStatusText,
- "TimeSinceUnixShort": timeutil.TimeSinceUnixShort,
- "RawTimeSince": timeutil.RawTimeSince,
- "FileSize": base.FileSize,
- "PrettyNumber": base.PrettyNumber,
- "Subtract": base.Subtract,
- "EntryIcon": base.EntryIcon,
- "MigrationIcon": MigrationIcon,
+ "AvatarLink": models.AvatarLink,
+ "Safe": Safe,
+ "SafeJS": SafeJS,
+ "Str2html": Str2html,
+ "subOne": subOne,
+ "TimeSince": timeutil.TimeSince,
+ "TimeSinceUnix": timeutil.TimeSinceUnix,
+ "TimeSinceUnix1": timeutil.TimeSinceUnix1,
+ "AttachmentResourceType": dataset.GetResourceType,
+ "AttachmentStatus": dataset.GetStatusText,
+ "IsShowDataSetOfCurrentRepo": dataset.IsShowDataSetOfCurrentRepo,
+ "TimeSinceUnixShort": timeutil.TimeSinceUnixShort,
+ "RawTimeSince": timeutil.RawTimeSince,
+ "FileSize": base.FileSize,
+ "PrettyNumber": base.PrettyNumber,
+ "Subtract": base.Subtract,
+ "EntryIcon": base.EntryIcon,
+ "MigrationIcon": MigrationIcon,
"Add": func(a, b int) int {
return a + b
},
@@ -357,13 +358,15 @@ func NewTextFuncMap() []texttmpl.FuncMap {
"AppDomain": func() string {
return setting.Domain
},
- "TimeSince": timeutil.TimeSince,
- "TimeSinceUnix": timeutil.TimeSinceUnix,
- "TimeSinceUnix1": timeutil.TimeSinceUnix1,
- "TimeSinceUnixShort": timeutil.TimeSinceUnixShort,
- "RawTimeSince": timeutil.RawTimeSince,
- "AttachmentResourceType": dataset.GetResourceType,
- "AttachmentStatus": dataset.GetStatusText,
+ "TimeSince": timeutil.TimeSince,
+ "TimeSinceUnix": timeutil.TimeSinceUnix,
+ "TimeSinceUnix1": timeutil.TimeSinceUnix1,
+ "TimeSinceUnixShort": timeutil.TimeSinceUnixShort,
+ "RawTimeSince": timeutil.RawTimeSince,
+ "AttachmentResourceType": dataset.GetResourceType,
+ "AttachmentStatus": dataset.GetStatusText,
+ "IsShowDataSetOfCurrentRepo": dataset.IsShowDataSetOfCurrentRepo,
+
"DateFmtLong": func(t time.Time) string {
return t.Format(time.RFC1123Z)
},
diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini
index 15f33fa68..e4567adcb 100755
--- a/options/locale/locale_en-US.ini
+++ b/options/locale/locale_en-US.ini
@@ -99,6 +99,7 @@ error500= Sorry, the site has encountered some problems, we are trying to %[1]s to %[3]s
@@ -2976,15 +3058,15 @@ mirror_sync_delete = synced and deleted reference %[2]s at %s#%[2]s`
reject_pull_request = `suggested changes for %s#%[2]s`
upload_dataset=`upload dataset %s`
-task_gpudebugjob=`created CPU/GPU type debugging task%s`
+task_gpudebugjob=`created CPU/GPU type debugging task %s`
task_npudebugjob=`created NPU type debugging task %s`
-task_nputrainjob=`created NPU training task%s`
+task_nputrainjob=`created NPU training task %s`
task_inferencejob=`created reasoning task %s`
task_benchmark=`created profiling task %s`
task_createmodel=`created new model %s`
-task_gputrainjob=`created CPU/GPU training task%s`
-task_c2netnputrainjob=`created NPU training task%s`
-task_c2netgputrainjob=`created CPU/GPU training task%s`
+task_gputrainjob=`created CPU/GPU training task %s`
+task_c2netnputrainjob=`created NPU training task %s`
+task_c2netgputrainjob=`created CPU/GPU training task %s`
[tool]
ago = %s ago
@@ -3073,9 +3155,13 @@ Platform_Tutorial = Tutorial
foot.advice_feedback = Feedback
[cloudbrain]
+all_resource_cluster=All Cluster
+all_ai_center=All Computing NET
resource_cluster = Resource Cluster
resource_cluster_openi = OpenI Resource Cluster
resource_cluster_c2net = China Computing NET
+resource_cluster_openi_simple = OpenI
+resource_cluster_c2net_simple = C²NET
compute_resource = Computing resources
task_name = Task name
task_type = Task type
@@ -3096,13 +3182,17 @@ select_dataset = select dataset
specification = specification
select_specification = select specification
description = description
+wrong_specification=You cannot use this specification, please choose another item.
+resource_use=Resource Occupancy
job_name_rule = Please enter letters, numbers, _ and - up to 64 characters and cannot end with a dash (-).
-dataset_path_rule = The dataset location is stored in the environment variable data_url, and the training output path is stored in the environment variable train_url.
+train_dataset_path_rule = The dataset location is stored in the environment variable data_url, and the output path is stored in the environment variable train_url.
+infer_dataset_path_rule = The dataset location is stored in the environment variable data_url, and the output path is stored in the environment variable result_url.
view_sample = View sample
inference_output_path_rule = The inference output path is stored in the environment variable result_url.
model_file_path_rule=The model file location is stored in the environment variable ckpt_url
-
+model_file_postfix_rule = The supported format of the model file is [ckpt, pb, h5, json, pkl, pth, t7, pdparams, onnx, pbtxt, keras, mlmodel, cfg, pt]
+model_convert_postfix_rule = The supported format of the model file is [.pth, .pkl, .onnx, .mindir, .ckpt, .pb]
delete_task = Delete task
task_delete_confirm = Are you sure you want to delete this task? Once this task is deleted, it cannot be recovered.
operate_confirm = confirm
@@ -3112,6 +3202,9 @@ gpu_num = GPU
cpu_num = CPU
memory = Memory
shared_memory = Shared Memory
+gpu_memory = GPU Memory
+free = Free
+point_hr = Point/hr
DEBUG = DEBUG
@@ -3122,7 +3215,16 @@ INFERENCE = INFERENCE
BENCHMARK = BENCHMARK
brain_area = Brain Area
+Delete_failed=Fail to delete the job, please try again later.
+Not_Stopped=The job is not stopped, can not be deleted.
+Already_stopped=The job is already stopped.
+Stopped_failed=Fail to stop the job, please try again later.
+Stopped_success_update_status_fail=Succeed in stopping th job, but failed to update the job status and duration time.
+load_code_failed=Fail to load code, please check if the right branch is selected.
+
error.dataset_select = dataset select error:the count exceed the limit or has same name
+new_train_gpu_tooltips = The code is storaged in %s, the dataset is storaged in %s, and please put your model into %s then you can download it online
+new_infer_gpu_tooltips = The dataset is stored in %s, the model file is stored in %s, please store the inference output in %s for subsequent downloads.
[points]
points = points
diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini
index 230bd35b1..0c6942e80 100755
--- a/options/locale/locale_zh-CN.ini
+++ b/options/locale/locale_zh-CN.ini
@@ -100,6 +100,7 @@ error500=抱歉,站点遇到一些问题,我们正尝试修复网页
[error]
occurred=发生错误
report_message=发生错误
+no_right=您没有权限执行本操作。
[install]
install=安装页面
@@ -255,13 +256,16 @@ page_dev_env_desc2_title=模型管理与共享
page_dev_env_desc2_desc=将模型与代码版本建立关联,可以基于代码历史版本,使用不同的方式调整模型,并将结果保存下来;训练好的模型可以开放共享,让更多人的使用模型测试并提出反馈
page_dev_env_desc3_title=一次配置,多次使用
page_dev_env_desc3_desc=提供执行环境共享,一次配置,多次使用,降低模型开发门槛,避免花费重复的时间配置复杂的环境
-page_dev_yunlao=鹏城云脑开源协同
-page_dev_yunlao_desc1=平台已经与鹏城云脑打通,可以利用鹏城云脑的丰富算力资源,完成AI开发任务
-page_dev_yunlao_desc2=鹏城云脑现有AI算力100P FLOPS@FP16(每秒十亿亿次半精度浮点计算),主要硬件基础设施由搭载英伟达Tesla V100 的GPU服务器和搭载鲲鹏、昇腾处理器的Atlas 900 AI集群构成
-page_dev_yunlao_desc3=开发者可以根据使用需求,自由选择相应计算资源,可以测试模型在不同硬件环境下的适配能力、性能、稳定性等
-page_dev_yunlao_desc4=如果您的模型需要更多的计算资源,也可以单独申请
+page_dev_yunlao=启智AI协作平台
+page_dev_yunlao_desc1=启智AI协作平台已经与鹏城云脑、中国算力网(C²NET)一期打通,可以利用鹏城云脑和中国算力网的丰富算力资源,完成AI开发任务。
+page_dev_yunlao_desc2=鹏城云脑现有AI算力100P FLOPS@FP16(每秒十亿亿次半精度浮点计算),主要硬件基础设施由搭载英伟达Tesla V100 和A100 的GPU服务器,以及搭载鲲鹏、昇腾处理器的Atlas 900 AI集群构成。
+page_dev_yunlao_desc3=中国算力网(C²NET)一期可实现不同人工智能计算中心之间高速网络互联,实现算力合理调度和资源弹性分配。目前已接入11家智算中心,算力总规模1924P OPS@FP16。启智AI协作平台已接入其中的鹏城云计算所、成都智算中心、中原智算中心、合肥类脑等节点。
+page_dev_yunlao_desc4=开发者可以根据使用需求,自由选择相应计算资源,可以测试模型在不同硬件环境下的适配能力、性能、稳定性等。
+page_dev_yunlao_desc5=如果您的模型需要更多的计算资源,也可以单独申请。
page_dev_yunlao_apply=单独申请
-
+c2net_title=智算网络
+c2net_desc=人工智能算力网络推进联盟已接入11家智算中心,算力总规模1924P
+c2net_center=中心
search=搜索
search_repo=项目
search_dataset=数据集
@@ -565,6 +569,7 @@ static.CollectImage=收藏镜像数
static.CollectedImage=被收藏镜像数
static.RecommendImage=被推荐镜像数
static.email=Email
+static.phone=电话
static.location=所在地区
static.all=所有
static.public.user_business_analysis_current_month=本月
@@ -830,6 +835,12 @@ create_dataset_fail=创建数据集失败。
query_dataset_fail=查询数据集失败。
edit_attachment_fail=修改描述失败。
+reference_dataset_fail=关联数据集失败,请稍后再试。
+cancel_reference_dataset_fail=取消关联数据集失败,请稍后再试。
+
+download_url=数据集下载地址
+download_copy=复制链接
+download_oper=操作
show_dataset=数据集
edit_dataset=编辑数据集
update_dataset=更新数据集
@@ -922,6 +933,7 @@ dataset_explain = 数据集:云脑1提供 CPU / GPU 资源,云脑2提供 Asc
dataset_instructions_for_use = 使用说明:可以参考启智AI协作平台
dataset_camp_course = 小白训练营课程
dataset_upload = 上传
+dataset_upload_status = 上传状态
dataset_file_name = 文件名称
dataset_available_clusters = 可用集群
dataset_upload_time = 上传时间
@@ -948,6 +960,13 @@ unzip_failed=解压失败
unzip_stared=解压中
unzip_status=解压状态
collection_num=收藏数量
+current_dataset=当前数据集
+linked_dataset=关联数据集
+unfavorite=取消收藏
+favorite=收藏
+disassociate=取消关联
+benchmark_dataset_tip=说明:先使用数据集功能上传模型,然后从数据集列表选模型。
+
[repo]
owner=拥有者
repo_name=项目名称
@@ -1006,7 +1025,7 @@ datasets.desc=数据集功能
cloudbrain_helper=使用GPU/NPU资源,开启Notebook、模型训练任务等
model_manager = 模型
-model_noright=无权限操作
+model_noright=您没有操作权限。
model_rename=模型名称重复,请修改模型名称
@@ -1037,8 +1056,9 @@ image_delete_fail=删除镜像失败,请稍后再试。
image_overwrite=您已经提交过相同名称的镜像,您确定要覆盖原来提交的镜像吗?
download=模型下载
score=评分
+wait_count_start = 当前有
+wait_count_end = 个任务正在排队
file_limit_100 = 单目录下最多显示100个文件或文件夹
-
images.name = 镜像Tag
images.name_placerholder = 请输入镜像Tag
image.label_tooltips = 如Python 3.7, Tensorflow 2.0, cuda 10, pytorch 1.6
@@ -1075,6 +1095,7 @@ cloudbrain_operate=操作
cloudbrain_status_createtime=状态/创建时间
cloudbrain_status_runtime = 运行时长
cloudbrain_jobname_err=只能以小写字母或数字开头且只包含小写字母、数字、_和-,不能以_结尾,最长36个字符。
+cloudbrain_bootfile_err=仓库中不存在启动文件
cloudbrain_query_fail=查询云脑任务失败。
cloudbrain.mirror_tag = 镜像标签
cloudbrain.mirror_description = 镜像描述
@@ -1107,6 +1128,10 @@ modelarts.deletetime=删除时间
modelarts.version_nums=版本数
modelarts.version=版本
modelarts.computing_resources=计算资源
+modelarts.cluster.computing_resources=集群/计算资源
+modelarts.ai_center=智算中心
+modelarts.card_type=卡类型
+modelarts.cluster=集群
modelarts.notebook=调试任务
modelarts.train_job=训练任务
modelarts.train_job.new_debug=新建调试任务
@@ -1229,6 +1254,58 @@ model.manage.Recall = 召回率
model.manage.sava_model = 保存模型
model.manage.model_manage = 模型管理
model.manage.model_accuracy = 模型精度
+model.convert=模型转换任务
+model.list=模型列表
+model.manage.create_new_convert_task=创建模型转换任务
+
+model.manage.notcreatemodel=未创建过模型
+model.manage.init1=代码版本:您还没有初始化代码仓库,请先
+model.manage.init2=创建代码版本;
+model.manage.createtrainjob_tip=训练任务:您还没创建过训练任务,请先创建
+model.manage.createtrainjob=训练任务
+model.manage.delete=删除模型
+model.manage.delete_confirm=你确认删除该模型么?此模型一旦删除不可恢复。
+model.manage.select.trainjob=选择训练任务
+model.manage.select.version=选择版本
+model.manage.engine=模型框架
+model.manage.select.engine=选择模型框架
+model.manage.modelfile=模型文件
+model.manage.modellabel=模型标签
+model.manage.modeldesc=模型描述
+model.manage.baseinfo=基本信息
+modelconvert.notcreate=未创建过模型转换任务
+modelconvert.importfirst1=请您先导入
+modelconvert.importfirst2=模型下载
+modelconvert.importfirst3=,然后再对其进行转换。
+modelconvert.download=下载
+modelconvert.taskname=任务名称
+modelconvert.modelname=模型名称
+modelconvert.selectmodel=选择模型
+modelconvert.modelversion=模型版本
+modelconvert.selectversion=选择版本
+modelconvert.selectmodelfile=选择模型文件
+modelconvert.taskstatus=状态
+modelconvert.srcengine=原模型框架
+modelconvert.outputformat=转换后格式
+modelconvert.createtime=创建时间
+modelconvert.inputdataformat=输入数据格式
+modelconvert.inputshape=输入张量形状
+modelconvert.inputshapetip=如:1,1,32,32,与输入数据格式对应。
+modelconvert.netoutputdata=网络输出数据类型
+modelconvert.taskdesc=任务描述
+modelconvert.newtask=新建任务
+modelconvert.createtask=创建模型转换任务
+
+modelconvert.taskurlname=模型转换任务
+log_scroll_start=滚动到顶部
+log_scroll_end=滚动到底部
+modelconvert.tasknameempty=请输入任务名称。
+modelconvert.inputshapeerror=格式输入错误,请输入如:1,1,32,32,与输入数据格式对应。
+
+modelconvert.manage.create_error1=相同的名称模型转换任务已经存在。
+modelconvert.manage.create_error2=只能创建一个正在运行的模型转换任务。
+modelconvert.manage.model_not_exist=选择的模型不存在。
+modelconvert.manage.no_operate_right=您没有操作权限。
grampus.train_job.ai_center=智算中心
grampus.dataset_path_rule = 训练脚本存储在/cache/code中,数据集存储在/cache/dataset中,训练输出请存储在/cache/output中以供后续下载。
@@ -2964,6 +3041,13 @@ notices.desc=提示描述
notices.op=操作
notices.delete_success=系统通知已被删除。
+user_management = 用户管理
+resource_management = 资源管理
+resource_pool = 资源池(队列)
+resource_price = 资源规格单价
+application_scenario = 应用场景
+system_configuration = 系统配置
+
[action]
create_repo=创建了项目 %s
rename_repo=重命名项目 %[1]s 为 %[3]s
@@ -3087,9 +3171,13 @@ Platform_Tutorial=新手指引
foot.advice_feedback = 意见反馈
[cloudbrain]
+all_resource_cluster=全部集群
+all_ai_center=全部智算中心
resource_cluster = 算力集群
resource_cluster_openi = 启智集群
resource_cluster_c2net = 智算网络集群
+resource_cluster_openi_simple = 启智
+resource_cluster_c2net_simple = 智算网络
compute_resource = 计算资源
task_name = 任务名称
task_type = 任务类型
@@ -3110,23 +3198,31 @@ select_dataset = 选择数据集
specification = 规格
select_specification = 选择资源规格
description = 描述
+card_duration = 运行卡时
+card_type = 卡类型
+wrong_specification=您目前不能使用这个资源规格,请选择其他资源规格。
job_name_rule = 请输入字母、数字、_和-,最长64个字符,且不能以中划线(-)结尾。
-dataset_path_rule = 数据集位置存储在环境变量data_url中,训练输出路径存储在环境变量train_url中。
+train_dataset_path_rule = 数据集位置存储在环境变量data_url中,训练输出路径存储在环境变量train_url中。
+infer_dataset_path_rule = 数据集位置存储在环境变量data_url中,推理输出路径存储在环境变量result_url中。
view_sample = 查看样例
inference_output_path_rule = 推理输出路径存储在环境变量result_url中。
model_file_path_rule = 模型文件位置存储在环境变量ckpt_url中。
-
+model_file_postfix_rule = 模型文件支持的格式为 [ckpt, pb, h5, json, pkl, pth, t7, pdparams, onnx, pbtxt, keras, mlmodel, cfg, pt]
+model_convert_postfix_rule = 模型文件支持的格式为 [.pth, .pkl, .onnx, .mindir, .ckpt, .pb]
delete_task = 删除任务
task_delete_confirm = 你确认删除该任务么?此任务一旦删除不可恢复。
operate_confirm = 确定操作
operate_cancel = 取消操作
-
+resource_use=资源占用情况
gpu_num = GPU数
cpu_num = CPU数
memory = 内存
shared_memory = 共享内存
+gpu_memory = 显存
+free = 免费
+point_hr = 积分/时
DEBUG = 调试任务
SNN4IMAGENET = 评测任务
@@ -3136,6 +3232,14 @@ INFERENCE = 推理任务
BENCHMARK = 评测任务
brain_area = 脑区
+Delete_failed=任务删除失败,请稍后再试。
+Not_Stopped=任务还未终止,不能删除。
+Already_stopped=任务已停止。
+Stopped_failed=任务停止失败,请稍后再试。
+Stopped_success_update_status_fail=任务停止成功,状态及运行时间更新失败。
+load_code_failed=代码加载失败,请确认选择了正确的分支。
+
+
error.dataset_select = 数据集选择错误:数量超过限制或者有同名数据集
[points]
@@ -3147,3 +3251,6 @@ hours = 小时
expected_time = ,预计可用
points_acquisition_instructions = 积分获取说明
insufficient_points_balance = 积分余额不足
+
+new_train_gpu_tooltips =训练脚本存储在%s中,数据集存储在%s中,训练输出请存储在%s中以供后续下载。
+new_infer_gpu_tooltips = 数据集存储在%s中,模型文件存储在%s中,推理输出请存储在%s中以供后续下载。
diff --git a/package-lock.json b/package-lock.json
index 9f8961467..192f3f342 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -4395,6 +4395,7 @@
"version": "3.0.0",
"resolved": "https://registry.npm.taobao.org/date-format/download/date-format-3.0.0.tgz",
"integrity": "sha1-64eANlx9KxURB4+0keZHl4DzrZU=",
+ "deprecated": "3.x is no longer supported. Please upgrade to 4.x or higher.",
"engines": {
"node": ">=4.0"
}
@@ -15641,6 +15642,7 @@
"version": "2.2.4",
"resolved": "https://registry.npm.taobao.org/streamroller/download/streamroller-2.2.4.tgz",
"integrity": "sha1-wZjO1C25QIamGTYIGHzoCl8rDlM=",
+ "deprecated": "2.x is no longer supported. Please upgrade to 3.x or higher.",
"dependencies": {
"date-format": "^2.1.0",
"debug": "^4.1.1",
@@ -15654,6 +15656,7 @@
"version": "2.1.0",
"resolved": "https://registry.npm.taobao.org/date-format/download/date-format-2.1.0.tgz",
"integrity": "sha1-MdW16iEc9f12TNOLr50DPffhJc8=",
+ "deprecated": "2.x is no longer supported. Please upgrade to 4.x or higher.",
"engines": {
"node": ">=4.0"
}
diff --git a/public/home/home.js b/public/home/home.js
index 95ea3da4c..70b9d7253 100755
--- a/public/home/home.js
+++ b/public/home/home.js
@@ -119,7 +119,6 @@ document.onreadystatechange = function () {
continue;
}
}
- refresh3DInfo(record);
var recordPrefix = getMsg(record);
if(record.OpType == "6" || record.OpType == "10" || record.OpType == "12" || record.OpType == "13"){
html += recordPrefix + actionName;
@@ -208,29 +207,6 @@ function getTaskLink(record){
return re;
}
-function refresh3DInfo(record){
- if(record.OpType == "25" || record.OpType == "29" || record.OpType == "31"){
- //cloudbrain one
- var lines = $('.rotation3D__line');
- var span = $('.rotation3D__line').find("span")[0];
- //console.log(span);
- span.innerText =record.RefName;
- //$('.rotation3D__line').find("span").eq(0).text(record.RefName)
- //console.log("cloudbrain one line length=" + lines.length);
- //lines[0].find("span").text(record.RefName);
- }else if(record.OpType == "26" || record.OpType == "27" || record.OpType == "28"){
- //cloudbrain two
- var lines = $('.rotation3D__line');
- //console.log("cloudbrain two line length=" + lines.length);
- var span = $('.rotation3D__line').find("span")[1];
- //console.log(span);
- if(span != null){
- span.innerText =record.RefName;
- }
- }
-
-}
-
function getMsg(record){
var html ="";
html += "