diff --git a/.gitignore b/.gitignore index 9f34fea2a..ceeeaa92c 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,7 @@ coverage.all !/custom/conf/templates /custom/conf/app.ini !/custom/conf/app.ini.sample +/custom/public/kanban /data /indexers /log diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 200ae83ad..9b915c095 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -208,7 +208,16 @@ func (task *Cloudbrain) CorrectCreateUnix() { func (task *Cloudbrain) IsTerminal() bool { status := task.Status - return status == string(ModelArtsTrainJobCompleted) || status == string(ModelArtsTrainJobFailed) || status == string(ModelArtsTrainJobKilled) || status == string(ModelArtsStopped) || status == string(JobStopped) || status == string(JobFailed) || status == string(JobSucceeded) + return status == string(ModelArtsTrainJobCompleted) || status == string(ModelArtsTrainJobFailed) || + status == string(ModelArtsTrainJobKilled) || status == string(ModelArtsStopped) || + status == string(JobStopped) || status == string(JobFailed) || + status == string(JobSucceeded) || status == GrampusStatusFailed || + status == GrampusStatusSucceeded || status == GrampusStatusStopped +} +func (task *Cloudbrain) IsRunning() bool { + status := task.Status + return status == string(ModelArtsTrainJobRunning) || status == string(ModelArtsRunning) || + status == string(JobRunning) || status == GrampusStatusRunning } func ConvertDurationToStr(duration int64) string { diff --git a/models/user_business_analysis.go b/models/user_business_analysis.go index cb503d669..47036e2e9 100644 --- a/models/user_business_analysis.go +++ b/models/user_business_analysis.go @@ -920,7 +920,7 @@ func CounDataByDateAndReCount(wikiCountMap map[string]int, startTime time.Time, CountDate = time.Date(startTime.Year(), startTime.Month(), startTime.Day(), 0, 1, 0, 0, currentTimeNow.Location()) } - DataDate := startTime.Format("2006-01-02") + DataDate := CountDate.Format("2006-01-02") CodeMergeCountMap := queryPullRequest(start_unix, end_unix) CommitCountMap := queryCommitAction(start_unix, end_unix, 5) IssueCountMap := queryCreateIssue(start_unix, end_unix) @@ -1103,6 +1103,7 @@ func updateNewUserAcitivity(currentUserActivity map[int64]map[int64]int64, userA ",activate_regist_user=" + fmt.Sprint(useMetrics.ActivateRegistUser) + ",not_activate_regist_user=" + fmt.Sprint(useMetrics.CurrentDayRegistUser-useMetrics.ActivateRegistUser) + ",current_day_regist_user=" + fmt.Sprint(useMetrics.CurrentDayRegistUser) + + ",data_date='" + time.Unix(key, 0).Format("2006-01-02") + "'" + " where count_date=" + fmt.Sprint(key) statictisSess.Exec(updateSql) diff --git a/modules/auth/wechat/client.go b/modules/auth/wechat/client.go index 9ed4b543f..bb6db09d0 100644 --- a/modules/auth/wechat/client.go +++ b/modules/auth/wechat/client.go @@ -19,6 +19,7 @@ const ( ACCESS_TOKEN_PATH = "/cgi-bin/token" QR_CODE_PATH = "/cgi-bin/qrcode/create" GET_MATERIAL_PATH = "/cgi-bin/material/batchget_material" + SEND_TEMPLATE_PATH = "/cgi-bin/message/template/send" ACTION_QR_STR_SCENE = "QR_STR_SCENE" ERR_CODE_ACCESSTOKEN_EXPIRE = 42001 @@ -41,12 +42,33 @@ type QRCodeRequest struct { Action_info ActionInfo `json:"action_info"` Expire_seconds int `json:"expire_seconds"` } + type MaterialRequest struct { Type string `json:"type"` Offset int `json:"offset"` Count int `json:"count"` } +type TemplateMsgRequest struct { + ToUser string `json:"touser"` + TemplateId string `json:"template_id"` + Url string `json:"url"` + ClientMsgId string `json:"client_msg_id"` + Data interface{} `json:"data"` +} +type TemplateValue struct { + Value string `json:"value"` + Color string `json:"color"` +} + +type CloudbrainTaskData struct { + First TemplateValue `json:"first"` + Keyword1 TemplateValue `json:"keyword1"` + Keyword2 TemplateValue `json:"keyword2"` + Keyword3 TemplateValue `json:"keyword3"` + Remark TemplateValue `json:"remark"` +} + type ActionInfo struct { Scene Scene `json:"scene"` } @@ -161,3 +183,27 @@ func getErrorCodeFromResponse(r *resty.Response) int { c, _ := strconv.Atoi(fmt.Sprint(code)) return c } + +func sendTemplateMsg(req TemplateMsgRequest) (error, bool) { + client := getWechatRestyClient() + + bodyJson, _ := json.Marshal(req) + r, err := client.R(). + SetHeader("Content-Type", "application/json"). + SetQueryParam("access_token", GetWechatAccessToken()). + SetBody(bodyJson). + Post(setting.WechatApiHost + SEND_TEMPLATE_PATH) + if err != nil { + log.Error("sendTemplateMsg,e=%v", err) + return nil, false + } + a := r.Body() + resultMap := make(map[string]interface{}, 0) + json.Unmarshal(a, &resultMap) + errcode := resultMap["errcode"] + log.Info("sendTemplateMsg,%v", r) + if errcode == fmt.Sprint(ERR_CODE_ACCESSTOKEN_EXPIRE) || errcode == fmt.Sprint(ERR_CODE_ACCESSTOKEN_INVALID) { + return nil, true + } + return nil, false +} diff --git a/modules/auth/wechat/template.go b/modules/auth/wechat/template.go new file mode 100644 index 000000000..6c19651a6 --- /dev/null +++ b/modules/auth/wechat/template.go @@ -0,0 +1,145 @@ +package wechat + +import ( + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" + "errors" + "fmt" + "time" +) + +type JobOperateType string + +const ( + JobOperateTypeStart JobOperateType = "start" + JobOperateTypeStop JobOperateType = "stop" +) + +func GetJobOperateTypeFromCloudbrainStatus(cloudbrain *models.Cloudbrain) JobOperateType { + if cloudbrain.IsTerminal() { + return JobOperateTypeStop + } + if cloudbrain.IsRunning() { + return JobOperateTypeStart + } + return "" +} + +func SendCloudbrainStartedMsg(operateType JobOperateType, cloudbrain models.Cloudbrain) error { + defer func() { + if err := recover(); err != nil { + combinedErr := fmt.Errorf("%s\n%s", err, log.Stack(2)) + log.Error("PANIC:", combinedErr) + } + }() + repo, err := models.GetRepositoryByID(cloudbrain.RepoID) + if err != nil { + log.Error("SendCloudbrainStartedMsg GetRepositoryByID error,%v", err) + } + + if setting.CloudbrainStartedTemplateId == "" { + return nil + } + + openId := models.GetUserWechatOpenId(cloudbrain.UserID) + if openId == "" { + return errors.New("Wechat openId not exist") + } + data := CloudbrainTaskData{ + First: TemplateValue{Value: getCloudbrainTemplateTitle(operateType)}, + Keyword1: TemplateValue{Value: cloudbrain.DisplayJobName}, + Keyword2: TemplateValue{Value: getJobTypeDisplayName(cloudbrain.JobType)}, + Keyword3: TemplateValue{Value: time.Unix(int64(cloudbrain.CreatedUnix), 0).Format("2006-01-02 15:04:05")}, + Remark: TemplateValue{Value: getCloudbrainTemplateRemark(operateType)}, + } + req := TemplateMsgRequest{ + ToUser: openId, + TemplateId: setting.CloudbrainStartedTemplateId, + Url: getCloudbrainTemplateUrl(cloudbrain, repo), + ClientMsgId: string(operateType) + "_" + fmt.Sprint(cloudbrain.ID), + Data: data, + } + err, retryFlag := sendTemplateMsg(req) + if retryFlag { + log.Info("retrySendCloudbrainTemplateMsg calling") + refreshAccessToken() + err, _ = sendTemplateMsg(req) + if err != nil { + log.Error("SendCloudbrainStartedMsg err. %v", err) + return err + } + return nil + } + if err != nil { + log.Error("SendCloudbrainStartedMsg err. %v", err) + return err + } + return nil +} + +func getCloudbrainTemplateUrl(cloudbrain models.Cloudbrain, repo *models.Repository) string { + url := setting.AppURL + repo.FullName() + + switch cloudbrain.JobType { + case string(models.JobTypeDebug): + if cloudbrain.ComputeResource == "CPU/GPU" { + url += "/cloudbrain/" + fmt.Sprint(cloudbrain.ID) + } else { + url += "/modelarts/notebook/" + fmt.Sprint(cloudbrain.ID) + } + case string(models.JobTypeBenchmark): + url += "/cloudbrain/benchmark/" + fmt.Sprint(cloudbrain.ID) + case string(models.JobTypeTrain): + if cloudbrain.Type == models.TypeCloudBrainOne { + url += "/cloudbrain/train-job/" + fmt.Sprint(cloudbrain.JobID) + } else if cloudbrain.Type == models.TypeCloudBrainTwo { + url += "/modelarts/train-job/" + fmt.Sprint(cloudbrain.JobID) + } else if cloudbrain.Type == models.TypeC2Net { + url += "/grampus/train-job/" + fmt.Sprint(cloudbrain.JobID) + } + case string(models.JobTypeInference): + url += "/modelarts/inference-job/" + fmt.Sprint(cloudbrain.JobID) + } + return url +} + +func getCloudbrainTemplateTitle(operateType JobOperateType) string { + var title string + switch operateType { + case JobOperateTypeStart: + title = "您好,您提交的算力资源申请已通过,任务已启动,请您关注运行情况。" + case JobOperateTypeStop: + title = "您好,您提交的任务已运行结束。" + } + + return title + +} + +func getCloudbrainTemplateRemark(operateType JobOperateType) string { + var remark string + switch operateType { + case JobOperateTypeStart: + remark = "感谢您的耐心等待。" + case JobOperateTypeStop: + remark = "点击可查看运行结果" + } + + return remark + +} + +func getJobTypeDisplayName(jobType string) string { + switch jobType { + case string(models.JobTypeDebug): + return "调试任务" + case string(models.JobTypeBenchmark): + return "评测任务" + case string(models.JobTypeTrain): + return "训练任务" + case string(models.JobTypeInference): + return "推理任务" + } + return "" +} diff --git a/modules/notification/base/notifier.go b/modules/notification/base/notifier.go index 8d6fdeb52..3bdc29d46 100644 --- a/modules/notification/base/notifier.go +++ b/modules/notification/base/notifier.go @@ -56,4 +56,6 @@ type Notifier interface { NotifySyncDeleteRef(doer *models.User, repo *models.Repository, refType, refFullName string) NotifyOtherTask(doer *models.User, repo *models.Repository, id string, name string, optype models.ActionType) + + NotifyChangeCloudbrainStatus(cloudbrain *models.Cloudbrain, oldStatus string) } diff --git a/modules/notification/base/null.go b/modules/notification/base/null.go index 0d3489882..6a24963d2 100644 --- a/modules/notification/base/null.go +++ b/modules/notification/base/null.go @@ -158,3 +158,7 @@ func (*NullNotifier) NotifySyncDeleteRef(doer *models.User, repo *models.Reposit func (*NullNotifier) NotifyOtherTask(doer *models.User, repo *models.Repository, id string, name string, optype models.ActionType) { } + +func (*NullNotifier) NotifyChangeCloudbrainStatus(cloudbrain *models.Cloudbrain, oldStatus string) { + +} diff --git a/modules/notification/notification.go b/modules/notification/notification.go index 0fd6fa471..b6d925f07 100644 --- a/modules/notification/notification.go +++ b/modules/notification/notification.go @@ -12,6 +12,7 @@ import ( "code.gitea.io/gitea/modules/notification/mail" "code.gitea.io/gitea/modules/notification/ui" "code.gitea.io/gitea/modules/notification/webhook" + wechatNotifier "code.gitea.io/gitea/modules/notification/wechat" "code.gitea.io/gitea/modules/repository" "code.gitea.io/gitea/modules/setting" ) @@ -35,6 +36,7 @@ func NewContext() { RegisterNotifier(indexer.NewNotifier()) RegisterNotifier(webhook.NewNotifier()) RegisterNotifier(action.NewNotifier()) + RegisterNotifier(wechatNotifier.NewNotifier()) } // NotifyUploadAttachment notifies attachment upload message to notifiers @@ -269,3 +271,10 @@ func NotifySyncDeleteRef(pusher *models.User, repo *models.Repository, refType, notifier.NotifySyncDeleteRef(pusher, repo, refType, refFullName) } } + +// NotifyChangeCloudbrainStatus +func NotifyChangeCloudbrainStatus(cloudbrain *models.Cloudbrain, oldStatus string) { + for _, notifier := range notifiers { + notifier.NotifyChangeCloudbrainStatus(cloudbrain, oldStatus) + } +} diff --git a/modules/notification/wechat/wechat.go b/modules/notification/wechat/wechat.go new file mode 100644 index 000000000..f77bfe741 --- /dev/null +++ b/modules/notification/wechat/wechat.go @@ -0,0 +1,44 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package wechat + +import ( + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/auth/wechat" + "code.gitea.io/gitea/modules/notification/base" + "code.gitea.io/gitea/modules/setting" +) + +type wechatNotifier struct { + base.NullNotifier +} + +var ( + _ base.Notifier = &wechatNotifier{} +) + +// NewNotifier create a new wechatNotifier notifier +func NewNotifier() base.Notifier { + return &wechatNotifier{} +} + +func (*wechatNotifier) NotifyChangeCloudbrainStatus(cloudbrain *models.Cloudbrain, oldStatus string) { + operateType := wechat.GetJobOperateTypeFromCloudbrainStatus(cloudbrain) + if operateType == "" { + return + } + switch operateType { + case wechat.JobOperateTypeStart: + if len(setting.CloudbrainStartedNotifyList) == 0 { + return + } + for _, v := range setting.CloudbrainStartedNotifyList { + if v == cloudbrain.JobType { + go wechat.SendCloudbrainStartedMsg(operateType, *cloudbrain) + return + } + } + } +} diff --git a/modules/setting/setting.go b/modules/setting/setting.go index 6ec54fdff..f63088091 100755 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -582,6 +582,10 @@ var ( TreePathOfAutoMsgReply string TreePathOfSubscribe string + //wechat template msg config + CloudbrainStartedTemplateId string + CloudbrainStartedNotifyList []string + //nginx proxy PROXYURL string RadarMap = struct { @@ -1432,7 +1436,7 @@ func NewContext() { WechatApiHost = sec.Key("HOST").MustString("https://api.weixin.qq.com") WechatApiTimeoutSeconds = sec.Key("TIMEOUT_SECONDS").MustInt(3) WechatAppId = sec.Key("APP_ID").MustString("wxba77b915a305a57d") - WechatAppSecret = sec.Key("APP_SECRET").MustString("e48e13f315adc32749ddc7057585f198") + WechatAppSecret = sec.Key("APP_SECRET").MustString("") WechatQRCodeExpireSeconds = sec.Key("QR_CODE_EXPIRE_SECONDS").MustInt(120) WechatAuthSwitch = sec.Key("AUTH_SWITCH").MustBool(true) UserNameOfWechatReply = sec.Key("AUTO_REPLY_USER_NAME").MustString("OpenIOSSG") @@ -1440,6 +1444,8 @@ func NewContext() { RefNameOfWechatReply = sec.Key("AUTO_REPLY_REF_NAME").MustString("master") TreePathOfAutoMsgReply = sec.Key("AUTO_REPLY_TREE_PATH").MustString("wechat/auto_reply.json") TreePathOfSubscribe = sec.Key("SUBSCRIBE_TREE_PATH").MustString("wechat/subscribe_reply.json") + CloudbrainStartedTemplateId = sec.Key("CLOUDBRAIN_STARTED_TEMPLATE_ID").MustString("") + CloudbrainStartedNotifyList = strings.Split(sec.Key("CLOUDBRAIN_STARTED_NOTIFY_LIST").MustString("DEBUG"), ",") SetRadarMapConfig() diff --git a/modules/storage/minio_ext.go b/modules/storage/minio_ext.go index 4ad83da82..4c0cbac55 100755 --- a/modules/storage/minio_ext.go +++ b/modules/storage/minio_ext.go @@ -187,9 +187,6 @@ func GetOneLevelAllObjectUnderDirMinio(bucket string, prefixRootPath string, rel if val.Key == Prefix { continue } - if strings.Contains(val.Key[prefixLen:len(val.Key)-1], "/") { - continue - } if strings.HasSuffix(val.Key, "/") { isDir = true fileName = val.Key[prefixLen : len(val.Key)-1] diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index b641f4011..105abf006 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1007,7 +1007,7 @@ cloudbrain.time.starttime=Start run time cloudbrain.time.endtime=End run time cloudbrain.datasetdownload=Dataset download url model_manager = Model -model_noright=No right +model_noright=You have no right to do the operation. model_rename=Duplicate model name, please modify model name. date=Date @@ -1225,7 +1225,7 @@ model.manage.create_new_convert_task=Create Model Transformation Task modelconvert.manage.create_error1=A model transformation task with the same name already exists. modelconvert.manage.create_error2=Only one running model transformation task can be created. modelconvert.manage.model_not_exist=The model does not exist. -modelconvert.manage.no_operate_right=No operation permission. +modelconvert.manage.no_operate_right=You have no right to do the operation. grampus.train_job.ai_center = AI Center grampus.dataset_path_rule = The code is storaged in /cache/code;the dataset is storaged in /cache/dataset;and please put your model into /cache/output, then you can download it online。 diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index 03df98b15..3d71bc3a7 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -1006,7 +1006,7 @@ datasets.desc=数据集功能 cloudbrain_helper=使用GPU/NPU资源,开启Notebook、模型训练任务等 model_manager = 模型 -model_noright=无权限操作 +model_noright=您没有操作权限。 model_rename=模型名称重复,请修改模型名称 @@ -1237,7 +1237,7 @@ model.manage.create_new_convert_task=创建模型转换任务 modelconvert.manage.create_error1=相同的名称模型转换任务已经存在。 modelconvert.manage.create_error2=只能创建一个正在运行的模型转换任务。 modelconvert.manage.model_not_exist=选择的模型不存在。 -modelconvert.manage.no_operate_right=无操作权限。 +modelconvert.manage.no_operate_right=您没有操作权限。 grampus.train_job.ai_center=智算中心 grampus.dataset_path_rule = 训练脚本存储在/cache/code中,数据集存储在/cache/dataset中,训练输出请存储在/cache/output中以供后续下载。 diff --git a/routers/api/v1/repo/cloudbrain.go b/routers/api/v1/repo/cloudbrain.go index d291024f9..c3a803f70 100755 --- a/routers/api/v1/repo/cloudbrain.go +++ b/routers/api/v1/repo/cloudbrain.go @@ -6,6 +6,7 @@ package repo import ( + "code.gitea.io/gitea/modules/notification" "encoding/json" "net/http" "sort" @@ -74,7 +75,7 @@ func GetCloudbrainTask(ctx *context.APIContext) { log.Error("ConvertToJobResultPayload failed:", err) return } - + oldStatus := job.Status job.Status = result.JobStatus.State taskRoles := result.TaskRoles taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) @@ -86,6 +87,9 @@ func GetCloudbrainTask(ctx *context.APIContext) { if result.JobStatus.State != string(models.JobWaiting) { models.ParseAndSetDurationFromCloudBrainOne(result, job) + if oldStatus != job.Status { + notification.NotifyChangeCloudbrainStatus(job, oldStatus) + } err = models.UpdateJob(job) if err != nil { log.Error("UpdateJob failed:", err) @@ -99,6 +103,7 @@ func GetCloudbrainTask(ctx *context.APIContext) { "SubState": result.JobStatus.SubState, "CreatedTime": time.Unix(result.JobStatus.CreatedTime/1000, 0).Format("2006-01-02 15:04:05"), "CompletedTime": time.Unix(result.JobStatus.CompletedTime/1000, 0).Format("2006-01-02 15:04:05"), + "JobDuration": job.TrainJobDuration, }) } @@ -123,7 +128,7 @@ func GetCloudBrainInferenceJob(ctx *context.APIContext) { log.Error("ConvertToJobResultPayload failed:", err) return } - + oldStatus := job.Status job.Status = result.JobStatus.State if result.JobStatus.State != string(models.JobWaiting) && result.JobStatus.State != string(models.JobFailed) { taskRoles := result.TaskRoles @@ -136,6 +141,9 @@ func GetCloudBrainInferenceJob(ctx *context.APIContext) { if result.JobStatus.State != string(models.JobWaiting) { models.ParseAndSetDurationFromCloudBrainOne(result, job) + if oldStatus != job.Status { + notification.NotifyChangeCloudbrainStatus(job, oldStatus) + } err = models.UpdateJob(job) if err != nil { log.Error("UpdateJob failed:", err) diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go index 187c16c50..7d30614b5 100755 --- a/routers/api/v1/repo/modelarts.go +++ b/routers/api/v1/repo/modelarts.go @@ -6,6 +6,7 @@ package repo import ( + "code.gitea.io/gitea/modules/notification" "encoding/json" "net/http" "path" @@ -42,8 +43,11 @@ func GetModelArtsNotebook(ctx *context.APIContext) { ctx.NotFound(err) return } - + oldStatus := job.Status job.Status = result.Status + if oldStatus != result.Status { + notification.NotifyChangeCloudbrainStatus(job, oldStatus) + } err = models.UpdateJob(job) if err != nil { log.Error("UpdateJob failed:", err) @@ -75,21 +79,26 @@ func GetModelArtsNotebook2(ctx *context.APIContext) { if job.StartTime == 0 && result.Lease.UpdateTime > 0 { job.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000) } + oldStatus := job.Status job.Status = result.Status if job.EndTime == 0 && models.IsModelArtsDebugJobTerminal(job.Status) { job.EndTime = timeutil.TimeStampNow() } job.CorrectCreateUnix() job.ComputeAndSetDuration() + if oldStatus != result.Status { + notification.NotifyChangeCloudbrainStatus(job, oldStatus) + } err = models.UpdateJob(job) if err != nil { log.Error("UpdateJob failed:", err) } ctx.JSON(http.StatusOK, map[string]interface{}{ - "ID": ID, - "JobName": job.JobName, - "JobStatus": result.Status, + "ID": ID, + "JobName": job.JobName, + "JobStatus": result.Status, + "JobDuration": job.TrainJobDuration, }) } @@ -111,10 +120,13 @@ func GetModelArtsTrainJob(ctx *context.APIContext) { ctx.NotFound(err) return } - + oldStatus := job.Status job.Status = modelarts.TransTrainJobStatus(result.IntStatus) job.Duration = result.Duration job.TrainJobDuration = result.TrainJobDuration + if oldStatus != job.Status { + notification.NotifyChangeCloudbrainStatus(job, oldStatus) + } err = models.UpdateJob(job) if err != nil { log.Error("UpdateJob failed:", err) @@ -155,7 +167,7 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { log.Error("ConvertToJobResultPayload failed:", err) return } - + oldStatus := job.Status job.Status = result.JobStatus.State if result.JobStatus.State != string(models.JobWaiting) && result.JobStatus.State != string(models.JobFailed) { taskRoles := result.TaskRoles @@ -168,6 +180,9 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { if result.JobStatus.State != string(models.JobWaiting) { models.ParseAndSetDurationFromCloudBrainOne(result, job) + if oldStatus != job.Status { + notification.NotifyChangeCloudbrainStatus(job, oldStatus) + } err = models.UpdateJob(job) if err != nil { log.Error("UpdateJob failed:", err) diff --git a/routers/repo/ai_model_manage.go b/routers/repo/ai_model_manage.go index 0aef1a70c..d01539a75 100644 --- a/routers/repo/ai_model_manage.go +++ b/routers/repo/ai_model_manage.go @@ -152,6 +152,10 @@ func saveModelByParameters(jobId string, versionName string, name string, versio } func SaveNewNameModel(ctx *context.Context) { + if !ctx.Repo.CanWrite(models.UnitTypeModelManage) { + ctx.Error(403, ctx.Tr("repo.model_noright")) + return + } name := ctx.Query("Name") if name == "" { ctx.Error(500, fmt.Sprintf("name or version is null.")) @@ -169,6 +173,10 @@ func SaveNewNameModel(ctx *context.Context) { } func SaveModel(ctx *context.Context) { + if !ctx.Repo.CanWrite(models.UnitTypeModelManage) { + ctx.Error(403, ctx.Tr("repo.model_noright")) + return + } log.Info("save model start.") JobId := ctx.Query("JobId") VersionName := ctx.Query("VersionName") @@ -177,16 +185,8 @@ func SaveModel(ctx *context.Context) { label := ctx.Query("Label") description := ctx.Query("Description") engine := ctx.QueryInt("Engine") - trainTaskCreate := ctx.QueryBool("trainTaskCreate") modelSelectedFile := ctx.Query("modelSelectedFile") log.Info("engine=" + fmt.Sprint(engine) + " modelSelectedFile=" + modelSelectedFile) - if !trainTaskCreate { - if !ctx.Repo.CanWrite(models.UnitTypeModelManage) { - //ctx.NotFound(ctx.Req.URL.RequestURI(), nil) - ctx.JSON(403, ctx.Tr("repo.model_noright")) - return - } - } if JobId == "" || VersionName == "" { ctx.Error(500, fmt.Sprintf("JobId or VersionName is null.")) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 53f465fae..110ac6cf5 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -2,6 +2,7 @@ package repo import ( "bufio" + "code.gitea.io/gitea/modules/notification" "encoding/json" "errors" "fmt" @@ -218,6 +219,255 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { return nil } +func cloudBrainTrainJobErrorPrepare(ctx *context.Context, form auth.CreateCloudBrainForm) error { + ctx.Data["PageIsCloudBrain"] = true + + if categories == nil { + json.Unmarshal([]byte(setting.BenchmarkCategory), &categories) + } + ctx.Data["benchmark_categories"] = categories.Category + + ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType + queuesDetail, _ := cloudbrain.GetQueuesDetail() + if queuesDetail != nil { + ctx.Data["QueuesDetail"] = queuesDetail + } + + cloudbrain.InitSpecialPool() + + if gpuInfos == nil { + json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos) + } + ctx.Data["gpu_types"] = gpuInfos.GpuInfo + + if trainGpuInfos == nil { + json.Unmarshal([]byte(setting.TrainGpuTypes), &trainGpuInfos) + } + ctx.Data["train_gpu_types"] = trainGpuInfos.GpuInfo + + if inferenceGpuInfos == nil && setting.InferenceGpuTypes != "" { + json.Unmarshal([]byte(setting.InferenceGpuTypes), &inferenceGpuInfos) + } + if inferenceGpuInfos != nil { + ctx.Data["inference_gpu_types"] = inferenceGpuInfos.GpuInfo + } + + if benchmarkGpuInfos == nil { + json.Unmarshal([]byte(setting.BenchmarkGpuTypes), &benchmarkGpuInfos) + } + ctx.Data["benchmark_gpu_types"] = benchmarkGpuInfos.GpuInfo + + if benchmarkResourceSpecs == nil { + json.Unmarshal([]byte(setting.BenchmarkResourceSpecs), &benchmarkResourceSpecs) + } + ctx.Data["benchmark_resource_specs"] = benchmarkResourceSpecs.ResourceSpec + + if cloudbrain.ResourceSpecs == nil { + json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs) + } + ctx.Data["resource_specs"] = cloudbrain.ResourceSpecs.ResourceSpec + + if cloudbrain.TrainResourceSpecs == nil { + json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) + } + ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec + + if cloudbrain.InferenceResourceSpecs == nil && setting.InferenceResourceSpecs != "" { + json.Unmarshal([]byte(setting.InferenceResourceSpecs), &cloudbrain.InferenceResourceSpecs) + } + if cloudbrain.InferenceResourceSpecs != nil { + ctx.Data["inference_resource_specs"] = cloudbrain.InferenceResourceSpecs.ResourceSpec + } + + if cloudbrain.SpecialPools != nil { + var debugGpuTypes []*models.GpuInfo + var trainGpuTypes []*models.GpuInfo + + for _, pool := range cloudbrain.SpecialPools.Pools { + org, _ := models.GetOrgByName(pool.Org) + if org != nil { + isOrgMember, _ := models.IsOrganizationMember(org.ID, ctx.User.ID) + if isOrgMember { + for _, jobType := range pool.JobType { + if jobType == string(models.JobTypeDebug) { + debugGpuTypes = append(debugGpuTypes, pool.Pool...) + if pool.ResourceSpec != nil { + ctx.Data["resource_specs"] = pool.ResourceSpec + } + } else if jobType == string(models.JobTypeTrain) { + trainGpuTypes = append(trainGpuTypes, pool.Pool...) + if pool.ResourceSpec != nil { + ctx.Data["train_resource_specs"] = pool.ResourceSpec + } + } + } + break + } + } + + } + + if len(debugGpuTypes) > 0 { + ctx.Data["gpu_types"] = debugGpuTypes + } + + if len(trainGpuTypes) > 0 { + ctx.Data["train_gpu_types"] = trainGpuTypes + } + + } + + var Parameters modelarts.Parameters + if err := json.Unmarshal([]byte(form.Params), &Parameters); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["params"] = Parameters.Parameter + ctx.Data["boot_file"] = form.BootFile + ctx.Data["attachment"] = form.Attachment + _, datasetNames, err := models.GetDatasetInfo(form.Attachment) + if err != nil { + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) + return nil + } + ctx.Data["dataset_name"] = datasetNames + ctx.Data["branch_name"] = form.BranchName + ctx.Data["datasetType"] = models.TypeCloudBrainOne + + ctx.Data["display_job_name"] = form.DisplayJobName + ctx.Data["image"] = form.Image + ctx.Data["job_type"] = form.JobType + ctx.Data["gpu_type"] = form.GpuType + ctx.Data["resource_spec_id"] = form.ResourceSpecId + return nil +} + +func cloudBrainInferenceJobErrorPrepare(ctx *context.Context, form auth.CreateCloudBrainInferencForm) error { + ctx.Data["PageIsCloudBrain"] = true + + if categories == nil { + json.Unmarshal([]byte(setting.BenchmarkCategory), &categories) + } + ctx.Data["benchmark_categories"] = categories.Category + + ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType + queuesDetail, _ := cloudbrain.GetQueuesDetail() + if queuesDetail != nil { + ctx.Data["QueuesDetail"] = queuesDetail + } + + cloudbrain.InitSpecialPool() + + if gpuInfos == nil { + json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos) + } + ctx.Data["gpu_types"] = gpuInfos.GpuInfo + + if trainGpuInfos == nil { + json.Unmarshal([]byte(setting.TrainGpuTypes), &trainGpuInfos) + } + ctx.Data["train_gpu_types"] = trainGpuInfos.GpuInfo + + if inferenceGpuInfos == nil && setting.InferenceGpuTypes != "" { + json.Unmarshal([]byte(setting.InferenceGpuTypes), &inferenceGpuInfos) + } + if inferenceGpuInfos != nil { + ctx.Data["inference_gpu_types"] = inferenceGpuInfos.GpuInfo + } + + if benchmarkGpuInfos == nil { + json.Unmarshal([]byte(setting.BenchmarkGpuTypes), &benchmarkGpuInfos) + } + ctx.Data["benchmark_gpu_types"] = benchmarkGpuInfos.GpuInfo + + if benchmarkResourceSpecs == nil { + json.Unmarshal([]byte(setting.BenchmarkResourceSpecs), &benchmarkResourceSpecs) + } + ctx.Data["benchmark_resource_specs"] = benchmarkResourceSpecs.ResourceSpec + + if cloudbrain.ResourceSpecs == nil { + json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs) + } + ctx.Data["resource_specs"] = cloudbrain.ResourceSpecs.ResourceSpec + + if cloudbrain.TrainResourceSpecs == nil { + json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) + } + ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec + + if cloudbrain.InferenceResourceSpecs == nil && setting.InferenceResourceSpecs != "" { + json.Unmarshal([]byte(setting.InferenceResourceSpecs), &cloudbrain.InferenceResourceSpecs) + } + if cloudbrain.InferenceResourceSpecs != nil { + ctx.Data["inference_resource_specs"] = cloudbrain.InferenceResourceSpecs.ResourceSpec + } + + if cloudbrain.SpecialPools != nil { + var debugGpuTypes []*models.GpuInfo + var trainGpuTypes []*models.GpuInfo + + for _, pool := range cloudbrain.SpecialPools.Pools { + org, _ := models.GetOrgByName(pool.Org) + if org != nil { + isOrgMember, _ := models.IsOrganizationMember(org.ID, ctx.User.ID) + if isOrgMember { + for _, jobType := range pool.JobType { + if jobType == string(models.JobTypeDebug) { + debugGpuTypes = append(debugGpuTypes, pool.Pool...) + if pool.ResourceSpec != nil { + ctx.Data["resource_specs"] = pool.ResourceSpec + } + } else if jobType == string(models.JobTypeTrain) { + trainGpuTypes = append(trainGpuTypes, pool.Pool...) + if pool.ResourceSpec != nil { + ctx.Data["train_resource_specs"] = pool.ResourceSpec + } + } + } + break + } + } + + } + if len(debugGpuTypes) > 0 { + ctx.Data["gpu_types"] = debugGpuTypes + } + + if len(trainGpuTypes) > 0 { + ctx.Data["train_gpu_types"] = trainGpuTypes + } + + } + var Parameters modelarts.Parameters + if err := json.Unmarshal([]byte(form.Params), &Parameters); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["params"] = Parameters.Parameter + ctx.Data["boot_file"] = form.BootFile + ctx.Data["attachment"] = form.Attachment + _, datasetNames, err := models.GetDatasetInfo(form.Attachment) + if err != nil { + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) + return nil + } + ctx.Data["dataset_name"] = datasetNames + ctx.Data["branch_name"] = form.BranchName + ctx.Data["datasetType"] = models.TypeCloudBrainOne + + ctx.Data["display_job_name"] = form.DisplayJobName + ctx.Data["image"] = form.Image + ctx.Data["job_type"] = form.JobType + ctx.Data["gpu_type"] = form.GpuType + ctx.Data["resource_spec_id"] = form.ResourceSpecId + ctx.Data["label_names"] = form.LabelName + ctx.Data["train_url"] = form.TrainUrl + ctx.Data["ckpt_name"] = form.CkptName + ctx.Data["model_name"] = form.ModelName + ctx.Data["model_version"] = form.ModelVersion + ctx.Data["description"] = form.Description + return nil +} func CloudBrainNew(ctx *context.Context) { err := cloudBrainNewDataPrepare(ctx) @@ -250,28 +500,28 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr("the job name did already exist", tpl, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr("system error", tpl, &form) return } } if !jobNamePattern.MatchString(displayJobName) { - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) return } if jobType != string(models.JobTypeBenchmark) && jobType != string(models.JobTypeDebug) && jobType != string(models.JobTypeTrain) { log.Error("jobtype error:", jobType, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr("jobtype error", tpl, &form) return } @@ -279,13 +529,13 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { count, err := models.GetCloudbrainCountByUserID(ctx.User.ID, jobType) if err != nil { log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr("system error", tpl, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tpl, &form) return } @@ -294,7 +544,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { datasetInfos, datasetNames, err := models.GetDatasetInfo(uuids) if err != nil { log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } @@ -315,7 +565,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { errStr := checkCloudBrainSpecialPool(ctx, jobType, gpuQueue, resourceSpecId) if errStr != "" { - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr(errStr, tpl, &form) return } @@ -361,7 +611,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { err = cloudbrain.GenerateTask(req) if err != nil { - cloudBrainNewDataPrepare(ctx) + cloudBrainTrainJobErrorPrepare(ctx, form) ctx.RenderWithErr(err.Error(), tpl, &form) return } @@ -401,20 +651,21 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainInferenceJobErrorPrepare(ctx, form) ctx.RenderWithErr("the job name did already exist", tpl, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainInferenceJobErrorPrepare(ctx, form) ctx.RenderWithErr("system error", tpl, &form) return } } if !jobNamePattern.MatchString(displayJobName) { + cloudBrainInferenceJobErrorPrepare(ctx, form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) return } @@ -422,13 +673,13 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra count, err := models.GetCloudbrainCountByUserID(ctx.User.ID, jobType) if err != nil { log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainInferenceJobErrorPrepare(ctx, form) ctx.RenderWithErr("system error", tpl, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainInferenceJobErrorPrepare(ctx, form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain.morethanonejob"), tpl, &form) return } @@ -448,7 +699,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid) if err != nil { log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) - cloudBrainNewDataPrepare(ctx) + cloudBrainInferenceJobErrorPrepare(ctx, form) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } @@ -485,7 +736,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra err = cloudbrain.GenerateTask(req) if err != nil { - cloudBrainNewDataPrepare(ctx) + cloudBrainInferenceJobErrorPrepare(ctx, form) ctx.RenderWithErr(err.Error(), tpl, &form) return } @@ -636,6 +887,7 @@ func CloudBrainTrainJobShow(ctx *context.Context) { func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.JobType) { ctx.Data["PageIsCloudBrain"] = true debugListType := ctx.Query("debugListType") + cloudbrain.InitSpecialPool() var task *models.Cloudbrain var err error @@ -647,22 +899,22 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo if err != nil { log.Info("error:" + err.Error()) - ctx.Data["error"] = err.Error() + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) return } result, err := cloudbrain.GetJob(task.JobID) if err != nil { log.Info("error:" + err.Error()) - ctx.Data["error"] = err.Error() + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) return } - + hasSpec := false if task.JobType == string(models.JobTypeTrain) { if cloudbrain.TrainResourceSpecs == nil { json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) } - hasSpec := false + for _, tmp := range cloudbrain.TrainResourceSpecs.ResourceSpec { if tmp.Id == task.ResourceSpecId { hasSpec = true @@ -670,24 +922,7 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo ctx.Data["CpuNum"] = tmp.CpuNum ctx.Data["MemMiB"] = tmp.MemMiB ctx.Data["ShareMemMiB"] = tmp.ShareMemMiB - } - } - - if !hasSpec && cloudbrain.SpecialPools != nil { - for _, specialPool := range cloudbrain.SpecialPools.Pools { - - if specialPool.ResourceSpec != nil { - - for _, spec := range specialPool.ResourceSpec { - if task.ResourceSpecId == spec.Id { - ctx.Data["GpuNum"] = spec.GpuNum - ctx.Data["CpuNum"] = spec.CpuNum - ctx.Data["MemMiB"] = spec.MemMiB - ctx.Data["ShareMemMiB"] = spec.ShareMemMiB - break - } - } - } + break } } @@ -697,10 +932,12 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo } for _, tmp := range cloudbrain.InferenceResourceSpecs.ResourceSpec { if tmp.Id == task.ResourceSpecId { + hasSpec = true ctx.Data["GpuNum"] = tmp.GpuNum ctx.Data["CpuNum"] = tmp.CpuNum ctx.Data["MemMiB"] = tmp.MemMiB ctx.Data["ShareMemMiB"] = tmp.ShareMemMiB + break } } } else { @@ -709,10 +946,32 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo } for _, tmp := range cloudbrain.ResourceSpecs.ResourceSpec { if tmp.Id == task.ResourceSpecId { + hasSpec = true ctx.Data["GpuNum"] = tmp.GpuNum ctx.Data["CpuNum"] = tmp.CpuNum ctx.Data["MemMiB"] = tmp.MemMiB ctx.Data["ShareMemMiB"] = tmp.ShareMemMiB + break + + } + } + } + + if !hasSpec && cloudbrain.SpecialPools != nil { + + for _, specialPool := range cloudbrain.SpecialPools.Pools { + + if specialPool.ResourceSpec != nil { + + for _, spec := range specialPool.ResourceSpec { + if task.ResourceSpecId == spec.Id { + ctx.Data["GpuNum"] = spec.GpuNum + ctx.Data["CpuNum"] = spec.CpuNum + ctx.Data["MemMiB"] = spec.MemMiB + ctx.Data["ShareMemMiB"] = spec.ShareMemMiB + break + } + } } } } @@ -731,14 +990,6 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo ctx.Data["resource_type"] = resourceType.Value } } - for _, specialPool := range cloudbrain.SpecialPools.Pools { - - for _, resourceType := range specialPool.Pool { - if resourceType.Queue == jobRes.Config.GpuType { - ctx.Data["resource_type"] = resourceType.Value - } - } - } } else if task.JobType == string(models.JobTypeInference) { if inferenceGpuInfos == nil { @@ -770,16 +1021,30 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo } } } + + if cloudbrain.SpecialPools != nil { + for _, specialPool := range cloudbrain.SpecialPools.Pools { + for _, resourceType := range specialPool.Pool { + if resourceType.Queue == jobRes.Config.GpuType { + ctx.Data["resource_type"] = resourceType.Value + } + } + } + } taskRoles := jobRes.TaskRoles taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) ctx.Data["taskRes"] = taskRes ctx.Data["ExitDiagnostics"] = taskRes.TaskStatuses[0].ExitDiagnostics + oldStatus := task.Status task.Status = taskRes.TaskStatuses[0].State task.ContainerID = taskRes.TaskStatuses[0].ContainerID task.ContainerIp = taskRes.TaskStatuses[0].ContainerIP models.ParseAndSetDurationFromCloudBrainOne(jobRes, task) if task.DeletedAt.IsZero() { //normal record + if oldStatus != task.Status { + notification.NotifyChangeCloudbrainStatus(task, oldStatus) + } err = models.UpdateJob(task) if err != nil { ctx.Data["error"] = err.Error() @@ -895,6 +1160,20 @@ func CloudBrainCommitImageShow(ctx *context.Context) { ctx.HTML(200, tplCloudBrainImageSubmit) } +func GetImage(ctx *context.Context) { + + var ID = ctx.Params(":id") + id, _ := strconv.ParseInt(ID, 10, 64) + + image, err := models.GetImageByID(id) + if err != nil { + log.Error("GetImageByID failed:%v", err.Error()) + ctx.JSON(http.StatusNotFound, nil) + } + ctx.JSON(http.StatusOK, image) + +} + func CloudBrainImageEdit(ctx *context.Context) { ctx.Data["PageIsImageEdit"] = true ctx.Data["PageFrom"] = ctx.Params(":from") @@ -1119,12 +1398,15 @@ func CloudBrainStop(ctx *context.Context) { errorMsg = "cloudbrain.Stopped_failed" break } - + oldStatus := task.Status task.Status = string(models.JobStopped) if task.EndTime == 0 { task.EndTime = timeutil.TimeStampNow() } task.ComputeAndSetDuration() + if oldStatus != task.Status { + notification.NotifyChangeCloudbrainStatus(task, oldStatus) + } err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"]) @@ -1218,11 +1500,15 @@ func logErrorAndUpdateJobStatus(err error, taskInfo *models.Cloudbrain) { if err != nil { log.Warn("Failed to stop cloudBrain job:"+taskInfo.JobID, err) } else { + oldStatus := taskInfo.Status taskInfo.Status = string(models.JobStopped) if taskInfo.EndTime == 0 { taskInfo.EndTime = timeutil.TimeStampNow() } taskInfo.ComputeAndSetDuration() + if oldStatus != taskInfo.Status { + notification.NotifyChangeCloudbrainStatus(taskInfo, oldStatus) + } err = models.UpdateJob(taskInfo) if err != nil { log.Warn("UpdateJob failed", err) @@ -1702,9 +1988,13 @@ func SyncCloudbrainStatus() { jobRes, _ := models.ConvertToJobResultPayload(result.Payload) taskRoles := jobRes.TaskRoles taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) + oldStatus := task.Status task.Status = taskRes.TaskStatuses[0].State if task.Status != string(models.JobWaiting) { models.ParseAndSetDurationFromCloudBrainOne(jobRes, task) + if oldStatus != task.Status { + notification.NotifyChangeCloudbrainStatus(task, oldStatus) + } err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.JobName, err) @@ -1731,6 +2021,9 @@ func SyncCloudbrainStatus() { task.EndTime = timeutil.TimeStampNow() } task.ComputeAndSetDuration() + if oldStatus != task.Status { + notification.NotifyChangeCloudbrainStatus(task, oldStatus) + } err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err) @@ -1749,6 +2042,7 @@ func SyncCloudbrainStatus() { } if result != nil { + oldStatus := task.Status task.Status = result.Status if task.StartTime == 0 && result.Lease.UpdateTime > 0 { task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000) @@ -1758,6 +2052,9 @@ func SyncCloudbrainStatus() { } task.CorrectCreateUnix() task.ComputeAndSetDuration() + if oldStatus != task.Status { + notification.NotifyChangeCloudbrainStatus(task, oldStatus) + } err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.JobName, err) @@ -1772,6 +2069,7 @@ func SyncCloudbrainStatus() { } if result != nil { + oldStatus := task.Status task.Status = modelarts.TransTrainJobStatus(result.IntStatus) task.Duration = result.Duration / 1000 task.TrainJobDuration = result.TrainJobDuration @@ -1784,6 +2082,9 @@ func SyncCloudbrainStatus() { task.EndTime = task.StartTime.Add(task.Duration) } task.CorrectCreateUnix() + if oldStatus != task.Status { + notification.NotifyChangeCloudbrainStatus(task, oldStatus) + } err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.JobName, err) @@ -1804,6 +2105,7 @@ func SyncCloudbrainStatus() { if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 { task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0] } + oldStatus := task.Status task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) task.Duration = result.JobInfo.RunSec task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) @@ -1815,6 +2117,9 @@ func SyncCloudbrainStatus() { task.EndTime = task.StartTime.Add(task.Duration) } task.CorrectCreateUnix() + if oldStatus != task.Status { + notification.NotifyChangeCloudbrainStatus(task, oldStatus) + } err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.JobName, err) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 978b7462d..a41f884d9 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -1,15 +1,8 @@ package repo import ( - "code.gitea.io/gitea/modules/auth" - "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/grampus" - "code.gitea.io/gitea/modules/modelarts" - "code.gitea.io/gitea/modules/timeutil" - "code.gitea.io/gitea/modules/util" "encoding/json" "errors" - "github.com/unknwon/com" "io/ioutil" "net/http" "os" @@ -18,6 +11,15 @@ import ( "strings" "time" + "code.gitea.io/gitea/modules/auth" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/grampus" + "code.gitea.io/gitea/modules/modelarts" + "code.gitea.io/gitea/modules/notification" + "code.gitea.io/gitea/modules/timeutil" + "code.gitea.io/gitea/modules/util" + "github.com/unknwon/com" + "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/cloudbrain" @@ -136,6 +138,93 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err return nil } +func grampusTrainJobErrorPrepare(ctx *context.Context, processType string, form auth.CreateGrampusTrainJobForm) error { + ctx.Data["PageIsCloudBrain"] = true + + //get valid images + images, err := grampus.GetImages(processType) + if err != nil { + log.Error("GetImages failed:", err.Error()) + } else { + ctx.Data["images"] = images.Infos + } + + grampus.InitSpecialPool() + + ctx.Data["GPUEnabled"] = true + ctx.Data["NPUEnabled"] = true + includeCenters := make(map[string]struct{}) + excludeCenters := make(map[string]struct{}) + if grampus.SpecialPools != nil { + for _, pool := range grampus.SpecialPools.Pools { + if pool.IsExclusive { + if !IsUserInOrgPool(ctx.User.ID, pool) { + ctx.Data[pool.Type+"Enabled"] = false + } + } else { + if strings.Contains(strings.ToLower(processType), strings.ToLower(pool.Type)) { + if IsUserInOrgPool(ctx.User.ID, pool) { + for _, center := range pool.Pool { + includeCenters[center.Queue] = struct{}{} + } + } else { + for _, center := range pool.Pool { + excludeCenters[center.Queue] = struct{}{} + } + + } + + } + + } + } + } + + //get valid resource specs + specs, err := grampus.GetResourceSpecs(processType) + + grampusSpecs := getFilterSpecBySpecialPool(specs, includeCenters, excludeCenters) + + if err != nil { + log.Error("GetResourceSpecs failed:", err.Error()) + } else { + ctx.Data["flavor_infos"] = grampusSpecs + } + + if processType == grampus.ProcessorTypeGPU { + ctx.Data["datasetType"] = models.TypeCloudBrainOne + } else if processType == grampus.ProcessorTypeNPU { + ctx.Data["datasetType"] = models.TypeCloudBrainTwo + } + + var Parameters modelarts.Parameters + if err := json.Unmarshal([]byte(form.Params), &Parameters); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["params"] = Parameters.Parameter + ctx.Data["boot_file"] = form.BootFile + ctx.Data["attachment"] = form.Attachment + _, datasetNames, err := models.GetDatasetInfo(form.Attachment) + if err != nil { + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) + return nil + } + ctx.Data["dataset_name"] = datasetNames + ctx.Data["branch_name"] = form.BranchName + ctx.Data["image_id"] = form.ImageID + + ctx.Data["display_job_name"] = form.DisplayJobName + ctx.Data["image"] = form.Image + ctx.Data["flavor"] = form.FlavorID + ctx.Data["flavor_name"] = form.FlavorName + ctx.Data["description"] = form.Description + ctx.Data["engine_name"] = form.EngineName + ctx.Data["work_server_number"] = form.WorkServerNumber + + return nil +} + func getFilterSpecBySpecialPool(specs *models.GetGrampusResourceSpecsResult, includeCenters map[string]struct{}, excludeCenters map[string]struct{}) []models.GrampusSpec { if len(includeCenters) == 0 && len(excludeCenters) == 0 { return specs.Infos @@ -206,14 +295,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain image := strings.TrimSpace(form.Image) if !jobNamePattern.MatchString(displayJobName) { - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobGPUNew, &form) return } errStr := checkSpecialPool(ctx, "GPU") if errStr != "" { - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr(errStr, tplGrampusTrainJobGPUNew, &form) return } @@ -222,13 +311,13 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.GPUResource) if err != nil { log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobGPUNew, &form) return } @@ -237,7 +326,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain //check param if err := grampusParamCheckCreateTrainJob(form); err != nil { log.Error("paramCheckCreateTrainJob failed:(%v)", err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form) return } @@ -247,14 +336,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobGPUNew, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form) return } @@ -264,7 +353,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain attachment, err := models.GetAttachmentByUUID(uuid) if err != nil { log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobGPUNew, &form) return } @@ -277,7 +366,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form) return } @@ -286,7 +375,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain //upload code if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form) return } @@ -294,7 +383,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain modelPath := setting.JobPath + jobName + cloudbrain.ModelMountPath + "/" if err := mkModelPath(modelPath); err != nil { log.Error("Failed to mkModelPath: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form) return } @@ -302,7 +391,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain //init model readme if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil { log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form) return } @@ -311,7 +400,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", dataMinioPath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", attachment.Name) if err != nil { log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form) return } @@ -343,7 +432,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain err = grampus.GenerateTrainJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form) ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form) return } @@ -390,14 +479,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain engineName := form.EngineName if !jobNamePattern.MatchString(displayJobName) { - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobNPUNew, &form) return } errStr := checkSpecialPool(ctx, "NPU") if errStr != "" { - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr(errStr, tplGrampusTrainJobGPUNew, &form) return } @@ -406,13 +495,13 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.NPUResource) if err != nil { log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobNPUNew, &form) return } @@ -421,7 +510,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain //check param if err := grampusParamCheckCreateTrainJob(form); err != nil { log.Error("paramCheckCreateTrainJob failed:(%v)", err) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form) return } @@ -431,14 +520,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobNPUNew, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form) return } @@ -448,7 +537,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain attachment, err := models.GetAttachmentByUUID(uuid) if err != nil { log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobNPUNew, &form) return } @@ -461,7 +550,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("Create task failed, server timed out", tplGrampusTrainJobNPUNew, &form) return } @@ -469,14 +558,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain //todo: upload code (send to file_server todo this work?) if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("Failed to obsMkdir_output", tplGrampusTrainJobNPUNew, &form) return } if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("Failed to uploadCodeToObs", tplGrampusTrainJobNPUNew, &form) return } @@ -485,7 +574,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", dataObsPath+"'"+attachment.Name+"'", bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, attachment.Name) if err != nil { log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobNPUNew, &form) return } @@ -521,7 +610,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain err = grampus.GenerateTrainJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) - grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form) ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form) return } @@ -550,12 +639,15 @@ func GrampusStopJob(ctx *context.Context) { errorMsg = res.ErrorMsg break } - + oldStatus := task.Status task.Status = string(models.GrampusStatusStopped) if task.EndTime == 0 { task.EndTime = timeutil.TimeStampNow() } task.ComputeAndSetDuration() + if oldStatus != task.Status { + notification.NotifyChangeCloudbrainStatus(task, oldStatus) + } err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"]) @@ -626,7 +718,7 @@ func GrampusTrainJobShow(ctx *context.Context) { task, err := models.GetCloudbrainByJobIDWithDeleted(ctx.Params(":jobid")) if err != nil { log.Error("GetCloudbrainByJobID failed:" + err.Error()) - ctx.ServerError("system error", err) + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) return } @@ -634,14 +726,15 @@ func GrampusTrainJobShow(ctx *context.Context) { result, err := grampus.GetJob(task.JobID) if err != nil { log.Error("GetJob failed:" + err.Error()) - //ctx.ServerError("GetJob failed", err) - //return + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) + return } if result != nil { if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 { task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0] } + oldStatus := task.Status task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) if task.Status != result.JobInfo.Status || result.JobInfo.Status == models.GrampusStatusRunning { task.Duration = result.JobInfo.RunSec @@ -654,6 +747,9 @@ func GrampusTrainJobShow(ctx *context.Context) { task.EndTime = task.StartTime.Add(task.Duration) } task.CorrectCreateUnix() + if oldStatus != task.Status { + notification.NotifyChangeCloudbrainStatus(task, oldStatus) + } err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob failed:" + err.Error()) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index b5afa4713..ef9fe41b2 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -257,27 +257,29 @@ func NotebookShow(ctx *context.Context) { var ID = ctx.Params(":id") task, err := models.GetCloudbrainByIDWithDeleted(ID) if err != nil { - ctx.Data["error"] = err.Error() - ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil) + log.Error("GET job error", err.Error()) + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) return } result, err := modelarts.GetNotebook2(task.JobID) if err != nil { - ctx.Data["error"] = err.Error() - ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil) + log.Error("GET job error", err.Error()) + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) return } if result != nil { if task.DeletedAt.IsZero() { //normal record if task.Status != result.Status { + oldStatus := task.Status task.Status = result.Status models.ParseAndSetDurationFromModelArtsNotebook(result, task) + notification.NotifyChangeCloudbrainStatus(task, oldStatus) err = models.UpdateJob(task) if err != nil { - ctx.Data["error"] = err.Error() - ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil) + log.Error("GET job error", err.Error()) + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) return } } @@ -510,11 +512,15 @@ func NotebookManage(ctx *context.Context) { ID = strconv.FormatInt(newTask.ID, 10) notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, ID, task.DisplayJobName, models.ActionCreateDebugNPUTask) } else { + oldStatus := task.Status task.Status = res.Status if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { task.EndTime = timeutil.TimeStampNow() } task.ComputeAndSetDuration() + if oldStatus != task.Status { + notification.NotifyChangeCloudbrainStatus(task, oldStatus) + } err = models.UpdateJob(task) if err != nil { log.Error("UpdateJob(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) @@ -772,6 +778,12 @@ func trainJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArts ctx.Data["config_list"] = configList.ParaConfigs ctx.Data["bootFile"] = form.BootFile ctx.Data["uuid"] = form.Attachment + _, datasetNames, err := models.GetDatasetInfo(form.Attachment) + if err != nil { + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) + return nil + } + ctx.Data["dataset_name"] = datasetNames ctx.Data["branch_name"] = form.BranchName ctx.Data["datasetType"] = models.TypeCloudBrainTwo @@ -1643,7 +1655,11 @@ func TrainJobShow(ctx *context.Context) { if err != nil { log.Error("GetVersionListTasks(%s) failed:%v", jobID, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) + return + } + if len(VersionListTasks) == 0 { + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) return } //设置权限 @@ -2276,6 +2292,12 @@ func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModel ctx.Data["config_list"] = configList.ParaConfigs ctx.Data["bootFile"] = form.BootFile ctx.Data["uuid"] = form.Attachment + _, datasetNames, err := models.GetDatasetInfo(form.Attachment) + if err != nil { + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) + return nil + } + ctx.Data["dataset_name"] = datasetNames ctx.Data["branch_name"] = form.BranchName ctx.Data["model_name"] = form.ModelName ctx.Data["model_version"] = form.ModelVersion @@ -2297,7 +2319,7 @@ func InferenceJobShow(ctx *context.Context) { if err != nil { log.Error("GetInferenceTask(%s) failed:%v", jobID, err.Error()) - ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobShow, nil) + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) return } //设置权限 diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 35a26f585..f917aebf1 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1016,6 +1016,7 @@ func RegisterRoutes(m *macaron.Macaron) { }, context.RepoAssignment(), context.RepoMustNotBeArchived(), reqRepoAdmin) m.Group("/image/:id", func() { + m.Get("", repo.GetImage) m.Get("/:from", cloudbrain.AdminOrImageCreaterRight, repo.CloudBrainImageEdit) m.Post("", cloudbrain.AdminOrImageCreaterRight, bindIgnErr(auth.EditImageCloudBrainForm{}), repo.CloudBrainImageEditPost) m.Delete("", cloudbrain.AdminOrImageCreaterRight, repo.CloudBrainImageDelete) diff --git a/templates/admin/dataset/list.tmpl b/templates/admin/dataset/list.tmpl index 9e4e72b68..9712f2e7b 100644 --- a/templates/admin/dataset/list.tmpl +++ b/templates/admin/dataset/list.tmpl @@ -35,7 +35,7 @@ {{range .Datasets}}