Browse Source

merge

tags/v1.22.12.1^2
ychao_1983 3 years ago
parent
commit
355a27e4dc
7 changed files with 246 additions and 2 deletions
  1. +60
    -0
      models/cloudbrain.go
  2. +17
    -2
      modules/cron/tasks_basic.go
  3. +22
    -0
      modules/setting/setting.go
  4. +1
    -0
      options/locale/locale_en-US.ini
  5. +2
    -0
      options/locale/locale_zh-CN.ini
  6. +12
    -0
      routers/repo/cloudbrain.go
  7. +132
    -0
      services/cloudbrain/clear.go

+ 60
- 0
models/cloudbrain.go View File

@@ -205,6 +205,7 @@ type Cloudbrain struct {
BenchmarkTypeRankLink string `xorm:"-"`
StartTime timeutil.TimeStamp
EndTime timeutil.TimeStamp
Cleared bool `xorm:"DEFAULT false"`
Spec *Specification `xorm:"-"`
}

@@ -2116,6 +2117,65 @@ func GetCloudBrainUnStoppedJob() ([]*Cloudbrain, error) {
Find(&cloudbrains)
}

func GetCloudBrainOneStoppedJobDaysAgo(days int, limit int) ([]*Cloudbrain, error) {
cloudbrains := make([]*Cloudbrain, 0, 10)
endTimeBefore := time.Now().Unix() - int64(days)*24*3600
missEndTimeBefore := endTimeBefore - 24*3600
return cloudbrains, x.Cols("id,job_name,job_id").
In("status",
JobStopped, JobSucceeded, JobFailed, ModelArtsCreateFailed, ModelArtsStartFailed, ModelArtsUnavailable, ModelArtsResizFailed, ModelArtsDeleted,
ModelArtsStopped, ModelArtsTrainJobCanceled, ModelArtsTrainJobCheckFailed, ModelArtsTrainJobCompleted, ModelArtsTrainJobDeleteFailed, ModelArtsTrainJobDeployServiceFailed,
ModelArtsTrainJobFailed, ModelArtsTrainJobImageFailed, ModelArtsTrainJobKilled, ModelArtsTrainJobLost, ModelArtsTrainJobSubmitFailed, ModelArtsTrainJobSubmitModelFailed).
Where("((end_time is null and created_unix<?) or end_time<?) and cleared=false and type=0", missEndTimeBefore, endTimeBefore).
Limit(limit).
Find(&cloudbrains)
}

func UpdateCloudBrainRecordsCleared(ids []int64) error {
pageSize := 150
n := len(ids) / pageSize

var err error

for i := 1; i <= n+1; i++ {
tempIds := getPageIds(ids, i, pageSize)
if len(tempIds) > 0 {
idsIn := ""
for i, id := range tempIds {
if i == 0 {
idsIn += strconv.FormatInt(id, 10)
} else {
idsIn += "," + strconv.FormatInt(id, 10)
}
}

_, errTemp := x.Unscoped().Exec("update cloudbrain set cleared=true where id in (" + idsIn + ")")
if errTemp != nil {
err = errTemp
}

}

}
return err

}

func getPageIds(ids []int64, page int, pagesize int) []int64 {
begin := (page - 1) * pagesize
end := (page) * pagesize

if begin > len(ids)-1 {
return []int64{}
}
if end > len(ids)-1 {
return ids[begin:]
} else {
return ids[begin:end]
}

}

func GetStoppedJobWithNoDurationJob() ([]*Cloudbrain, error) {
cloudbrains := make([]*Cloudbrain, 0)
return cloudbrains, x.


+ 17
- 2
modules/cron/tasks_basic.go View File

@@ -5,10 +5,13 @@
package cron

import (
"code.gitea.io/gitea/modules/urfs_client/urchin"
"code.gitea.io/gitea/modules/setting"
"context"
"time"

"code.gitea.io/gitea/modules/urfs_client/urchin"
cloudbrainService "code.gitea.io/gitea/services/cloudbrain"

"code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/services/cloudbrain/resource"
"code.gitea.io/gitea/services/reward"
@@ -190,6 +193,17 @@ func registerHandleRepoAndUserStatistic() {
})
}

func registerHandleClearCloudbrainResult() {
RegisterTaskFatal("handle_cloudbrain_one_result_clear", &BaseConfig{
Enabled: true,
RunAtStart: setting.ClearStrategy.RunAtStart,
Schedule: setting.ClearStrategy.Cron,
}, func(ctx context.Context, _ *models.User, _ Config) error {
cloudbrainService.ClearCloudbrainResultSpace()
return nil
})
}

func registerHandleSummaryStatistic() {
RegisterTaskFatal("handle_summary_statistic", &BaseConfig{
Enabled: true,
@@ -306,6 +320,7 @@ func initBasicTasks() {

registerHandleRepoAndUserStatistic()
registerHandleSummaryStatistic()
registerHandleClearCloudbrainResult()

registerSyncCloudbrainStatus()
registerHandleOrgStatistic()
@@ -317,6 +332,6 @@ func initBasicTasks() {

registerHandleModelSafetyTask()

registerHandleScheduleRecord()
registerHandleScheduleRecord()
registerHandleCloudbrainDurationStatistic()
}

+ 22
- 0
modules/setting/setting.go View File

@@ -519,6 +519,7 @@ var (
CullIdleTimeout string
CullInterval string


//benchmark config
IsBenchmarkEnabled bool
BenchmarkOwner string
@@ -613,6 +614,15 @@ var (
UsageRateBeginTime string
}{}

ClearStrategy= struct {
Enabled bool
ResultSaveDays int
BatchSize int
TrashSaveDays int
Cron string
RunAtStart bool
}{}

C2NetInfos *C2NetSqInfos
CenterInfos *AiCenterInfos
C2NetMapInfo map[string]*C2NetSequenceInfo
@@ -1619,6 +1629,7 @@ func NewContext() {
getModelConvertConfig()
getModelSafetyConfig()
getModelAppConfig()
getClearStrategy()
}

func getModelSafetyConfig() {
@@ -1679,6 +1690,17 @@ func getModelartsCDConfig() {
getNotebookFlavorInfos()
}

func getClearStrategy(){

sec := Cfg.Section("clear_strategy")
ClearStrategy.Enabled=sec.Key("ENABLED").MustBool(false)
ClearStrategy.ResultSaveDays=sec.Key("RESULT_SAVE_DAYS").MustInt(30)
ClearStrategy.BatchSize=sec.Key("BATCH_SIZE").MustInt(500)
ClearStrategy.TrashSaveDays=sec.Key("TRASH_SAVE_DAYS").MustInt(90)
ClearStrategy.Cron=sec.Key("CRON").MustString("* 0,30 2-8 * * ? *")
ClearStrategy.RunAtStart=sec.Key("RUN_AT_START").MustBool(false)
}

func getGrampusConfig() {
sec := Cfg.Section("grampus")



+ 1
- 0
options/locale/locale_en-US.ini View File

@@ -3248,6 +3248,7 @@ specification = specification
select_specification = select specification
description = description
wrong_specification=You cannot use this specification, please choose another item.
result_cleared=The files of the task have been cleared, can not restart any more, please create a new debug task instead.
resource_use=Resource Occupancy

job_name_rule = Please enter letters, numbers, _ and - up to 64 characters and cannot end with a dash (-).


+ 2
- 0
options/locale/locale_zh-CN.ini View File

@@ -3268,6 +3268,8 @@ card_duration = 运行卡时
card_type = 卡类型
wrong_specification=您目前不能使用这个资源规格,请选择其他资源规格。

result_cleared=本任务的文件已被清理,无法再次调试,请新建调试任务。

job_name_rule = 请输入字母、数字、_和-,最长64个字符,且不能以中划线(-)结尾。
train_dataset_path_rule = 数据集位置存储在运行参数 <strong style="color:#010101">data_url</strong> 中,预训练模型存放在运行参数 <strong style="color:#010101">ckpt_url</strong> 中,训练输出路径存储在运行参数 <strong style="color:#010101">train_url</strong> 中。
infer_dataset_path_rule = 数据集位置存储在运行参数 <strong style="color:#010101">data_url</strong> 中,推理输出路径存储在运行参数 <strong style="color:#010101">result_url</strong> 中。


+ 12
- 0
routers/repo/cloudbrain.go View File

@@ -683,6 +683,13 @@ func CloudBrainRestart(ctx *context.Context) {
break
}

if _, err := os.Stat(getOldJobPath(task)); err != nil {
log.Error("Can not find job minio path", err)
resultCode = "-1"
errorMsg = ctx.Tr("cloudbrain.result_cleared")
break
}

count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, string(models.JobTypeDebug))
if err != nil {
log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"])
@@ -722,6 +729,7 @@ func CloudBrainRestart(ctx *context.Context) {
})
}


func HasModelFile(task *models.Cloudbrain) bool {
if task.PreTrainModelUrl == "" {
return true
@@ -747,6 +755,10 @@ func HasModelFile(task *models.Cloudbrain) bool {
return isFind
}

func getOldJobPath(task *models.Cloudbrain) string {
return setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.CBCodePathPrefix + task.JobName
}

func CloudBrainBenchMarkShow(ctx *context.Context) {
cloudBrainShow(ctx, tplCloudBrainBenchmarkShow, models.JobTypeBenchmark)
}


+ 132
- 0
services/cloudbrain/clear.go View File

@@ -0,0 +1,132 @@
package cloudbrain

import (
"io/ioutil"
"os"
"sort"
"time"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
)

func ClearCloudbrainResultSpace() {
if !setting.ClearStrategy.Enabled{
return
}

tasks, err := models.GetCloudBrainOneStoppedJobDaysAgo(setting.ClearStrategy.ResultSaveDays, setting.ClearStrategy.BatchSize)

if err != nil {
log.Warn("Failed to get cloudbrain, clear result failed.", err)
return
}
var ids []int64
for _, task := range tasks {
err := DeleteCloudbrainOneJobStorage(task.JobName)
if err == nil {
ids = append(ids, task.ID)
}
}

err = models.UpdateCloudBrainRecordsCleared(ids)
if err != nil {
log.Warn("Failed to set cloudbrain cleared status", err)
}
//如果云脑表处理完了,通过遍历minio对象处理历史垃圾数据,如果存在的话
if len(tasks) < setting.ClearStrategy.BatchSize {
clearLocalHistoryTrashFile()
clearMinioHistoryTrashFile()

}

}

func clearMinioHistoryTrashFile() {
JobRealPrefix := setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.CBCodePathPrefix

miniofiles, err := ioutil.ReadDir(JobRealPrefix)

processCount := 0
if err != nil {
log.Warn("Can not browser minio job path.")
} else {
SortModTimeAscend(miniofiles)
for _, file := range miniofiles {

if file.ModTime().Before(time.Now().AddDate(0, 0, -setting.ClearStrategy.TrashSaveDays)) {
dirPath := setting.CBCodePathPrefix + file.Name() + "/"
storage.Attachments.DeleteDir(dirPath)
processCount++
if processCount == setting.ClearStrategy.BatchSize {
break
}
} else {
break
}

}

}
}

func clearLocalHistoryTrashFile() {
files, err := ioutil.ReadDir(setting.JobPath)
processCount := 0
if err != nil {
log.Warn("Can not browser local job path.")
} else {
SortModTimeAscend(files)
for _, file := range files {
//清理n天前的历史垃圾数据,清理job目录
if file.ModTime().Before(time.Now().AddDate(0, 0, -setting.ClearStrategy.TrashSaveDays)) {
os.RemoveAll(setting.JobPath + file.Name())
processCount++
if processCount == setting.ClearStrategy.BatchSize {
break
}
} else {
break
}

}

}

}

func SortModTimeAscend(files []os.FileInfo) {
sort.Slice(files, func(i, j int) bool {
return files[i].ModTime().Before(files[j].ModTime())
})
}
func SortModTimeAscendForMinio(files []storage.FileInfo) {
sort.Slice(files, func(i, j int) bool {
timeI, _ := time.Parse("2006-01-02 15:04:05", files[i].ModTime)
timeJ, _ := time.Parse("2006-01-02 15:04:05", files[i].ModTime)
return timeI.Before(timeJ)
})
}

func DeleteCloudbrainOneJobStorage(jobName string) error {
//delete local
localJobPath := setting.JobPath + jobName
err := os.RemoveAll(localJobPath)
if err != nil {
log.Error("RemoveAll(%s) failed:%v", localJobPath, err)
}

dirPath := setting.CBCodePathPrefix + jobName + "/"
err1 := storage.Attachments.DeleteDir(dirPath)

if err1 != nil {
log.Error("DeleteDir(%s) failed:%v", localJobPath, err)
}
if err == nil {
err = err1
}

return err
}

Loading…
Cancel
Save