| @@ -403,6 +403,8 @@ type BenchmarkDataset struct { | |||
| Id int `json:"id"` | |||
| Value string `json:"value"` //二级算法类型名称 | |||
| Attachment string `json:"attachment"` //数据集的uuid | |||
| Owner string `json:"owner"` //评估脚本所在仓库的拥有者 | |||
| RepoName string `json:"repo_name"` //评估脚本所在仓库的名称 | |||
| } | |||
| type GpuInfos struct { | |||
| @@ -476,7 +478,7 @@ type MatchInfo struct { | |||
| type GetJobLogResult struct { | |||
| ScrollID string `json:"_scroll_id"` | |||
| Took int `json:"took"` | |||
| Took int `json:"took"` | |||
| TimedOut bool `json:"timed_out"` | |||
| Shards struct { | |||
| Total int `json:"total"` | |||
| @@ -485,18 +487,34 @@ type GetJobLogResult struct { | |||
| Failed int `json:"failed"` | |||
| } `json:"_shards"` | |||
| Hits struct { | |||
| Hits []struct { | |||
| Index string `json:"_index"` | |||
| Type string `json:"_type"` | |||
| ID string `json:"_id"` | |||
| Source struct { | |||
| Message string `json:"message"` | |||
| } `json:"_source"` | |||
| Sort []int `json:"sort"` | |||
| } `json:"hits"` | |||
| Hits []Hits `json:"hits"` | |||
| } `json:"hits"` | |||
| } | |||
| type Hits struct { | |||
| Index string `json:"_index"` | |||
| Type string `json:"_type"` | |||
| ID string `json:"_id"` | |||
| Source struct { | |||
| Message string `json:"message"` | |||
| } `json:"_source"` | |||
| Sort []int `json:"sort"` | |||
| } | |||
| type GetAllJobLogParams struct { | |||
| Scroll string `json:"scroll"` | |||
| ScrollID string `json:"scroll_id"` | |||
| } | |||
| type DeleteJobLogTokenParams struct { | |||
| ScrollID string `json:"scroll_id"` | |||
| } | |||
| type DeleteJobLogTokenResult struct { | |||
| Succeeded bool `json:"succeeded"` | |||
| NumFreed int `json:"num_freed"` | |||
| } | |||
| type CloudBrainResult struct { | |||
| Code string `json:"code"` | |||
| Msg string `json:"msg"` | |||
| @@ -17,7 +17,8 @@ const ( | |||
| Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple; | |||
| service ssh stop; | |||
| jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"` | |||
| CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"` | |||
| //CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"` | |||
| CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"` | |||
| CodeMountPath = "/code" | |||
| DataSetMountPath = "/dataset" | |||
| ModelMountPath = "/model" | |||
| @@ -26,6 +26,8 @@ const ( | |||
| JobHasBeenStopped = "S410" | |||
| Public = "public" | |||
| Custom = "custom" | |||
| LogPageSize = 500 | |||
| LogPageTokenExpired = "5m" | |||
| ) | |||
| func getRestyClient() *resty.Client { | |||
| @@ -279,7 +281,7 @@ func GetJobLog(jobID string) (*models.GetJobLogResult, error) { | |||
| client := getRestyClient() | |||
| var result models.GetJobLogResult | |||
| req := models.GetJobLogParams{ | |||
| Size: "5000", | |||
| Size: strconv.Itoa(LogPageSize), | |||
| Sort: "log.offset", | |||
| QueryInfo: models.QueryInfo{ | |||
| MatchInfo: models.MatchInfo{ | |||
| @@ -293,17 +295,79 @@ func GetJobLog(jobID string) (*models.GetJobLogResult, error) { | |||
| SetAuthToken(TOKEN). | |||
| SetBody(req). | |||
| SetResult(&result). | |||
| Post(HOST + "es/_search?_source=message&scroll=5m") | |||
| Post(HOST + "es/_search?_source=message&scroll=" + LogPageTokenExpired) | |||
| if err != nil { | |||
| log.Info("GetJobLog failed: %v", err) | |||
| log.Error("GetJobLog failed: %v", err) | |||
| return &result, fmt.Errorf("resty GetJobLog: %v, %s", err, res.String()) | |||
| } | |||
| if !strings.Contains(res.Status(), strconv.Itoa(http.StatusOK)) { | |||
| log.Info("res.Status(): %s, response: %s", res.Status(), res.String()) | |||
| log.Error("res.Status(): %s, response: %s", res.Status(), res.String()) | |||
| return &result, errors.New(res.String()) | |||
| } | |||
| return &result, nil | |||
| } | |||
| func GetJobAllLog(scrollID string) (*models.GetJobLogResult, error) { | |||
| checkSetting() | |||
| client := getRestyClient() | |||
| var result models.GetJobLogResult | |||
| req := models.GetAllJobLogParams{ | |||
| Scroll: LogPageTokenExpired, | |||
| ScrollID: scrollID, | |||
| } | |||
| res, err := client.R(). | |||
| SetHeader("Content-Type", "application/json"). | |||
| SetAuthToken(TOKEN). | |||
| SetBody(req). | |||
| SetResult(&result). | |||
| Post(HOST + "es/_search/scroll") | |||
| if err != nil { | |||
| log.Error("GetJobAllLog failed: %v", err) | |||
| return &result, fmt.Errorf("resty GetJobAllLog: %v, %s", err, res.String()) | |||
| } | |||
| if !strings.Contains(res.Status(), strconv.Itoa(http.StatusOK)) { | |||
| log.Error("res.Status(): %s, response: %s", res.Status(), res.String()) | |||
| return &result, errors.New(res.String()) | |||
| } | |||
| return &result, nil | |||
| } | |||
| func DeleteJobLogToken(scrollID string) (error) { | |||
| checkSetting() | |||
| client := getRestyClient() | |||
| var result models.DeleteJobLogTokenResult | |||
| req := models.DeleteJobLogTokenParams{ | |||
| ScrollID: scrollID, | |||
| } | |||
| res, err := client.R(). | |||
| SetHeader("Content-Type", "application/json"). | |||
| SetAuthToken(TOKEN). | |||
| SetBody(req). | |||
| SetResult(&result). | |||
| Delete(HOST + "es/_search/scroll") | |||
| if err != nil { | |||
| log.Error("DeleteJobLogToken failed: %v", err) | |||
| return fmt.Errorf("resty DeleteJobLogToken: %v, %s", err, res.String()) | |||
| } | |||
| if !strings.Contains(res.Status(), strconv.Itoa(http.StatusOK)) { | |||
| log.Error("res.Status(): %s, response: %s", res.Status(), res.String()) | |||
| return errors.New(res.String()) | |||
| } | |||
| if !result.Succeeded { | |||
| log.Error("DeleteJobLogToken failed") | |||
| return errors.New("DeleteJobLogToken failed") | |||
| } | |||
| return nil | |||
| } | |||
| @@ -102,25 +102,45 @@ func CloudbrainGetLog(ctx *context.Context) { | |||
| return | |||
| } | |||
| var hits []models.Hits | |||
| result, err := cloudbrain.GetJobLog(jobID) | |||
| if err != nil{ | |||
| log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"]) | |||
| ctx.ServerError(err.Error(), err) | |||
| return | |||
| } | |||
| hits = result.Hits.Hits | |||
| //if the size equal page_size, then take the scroll_id to get all log and delete the scroll_id(the num of scroll_id is limited) | |||
| if len(result.Hits.Hits) >= cloudbrain.LogPageSize { | |||
| for { | |||
| resultNext, err := cloudbrain.GetJobAllLog(result.ScrollID) | |||
| if err != nil{ | |||
| log.Error("GetJobAllLog failed: %v", err, ctx.Data["MsgID"]) | |||
| } else { | |||
| for _, hit := range resultNext.Hits.Hits { | |||
| hits = append(hits, hit) | |||
| } | |||
| } | |||
| if len(resultNext.Hits.Hits) < cloudbrain.LogPageSize { | |||
| log.Info("get all log already") | |||
| break | |||
| } | |||
| } | |||
| } | |||
| sort.Slice(result.Hits.Hits, func(i, j int) bool { | |||
| return result.Hits.Hits[i].Sort[0] < result.Hits.Hits[j].Sort[0] | |||
| cloudbrain.DeleteJobLogToken(result.ScrollID) | |||
| sort.Slice(hits, func(i, j int) bool { | |||
| return hits[i].Sort[0] < hits[j].Sort[0] | |||
| }) | |||
| log.Info("%v", result.Hits.Hits) | |||
| var content []string | |||
| for _, log := range result.Hits.Hits { | |||
| content = append(content, log.Source.Message + "\n") | |||
| var content string | |||
| for _, log := range hits { | |||
| content += log.Source.Message + "\n" | |||
| } | |||
| log.Info("%v", content) | |||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
| "JobID": jobID, | |||
| "Content": content, | |||
| @@ -1062,12 +1062,12 @@ func CloudBrainBenchmarkNew(ctx *context.Context) { | |||
| ctx.HTML(200, tplCloudBrainBenchmarkNew) | |||
| } | |||
| func getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID int) (string, error) { | |||
| uuid := "" | |||
| func getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID int) (*models.BenchmarkDataset, error) { | |||
| var childInfo *models.BenchmarkDataset | |||
| if benchmarkTypes == nil { | |||
| if err := json.Unmarshal([]byte(setting.BenchmarkTypes), &benchmarkTypes); err != nil { | |||
| log.Error("json.Unmarshal BenchmarkTypes(%s) failed:%v", setting.BenchmarkTypes, err) | |||
| return uuid, err | |||
| return childInfo, err | |||
| } | |||
| } | |||
| @@ -1076,7 +1076,7 @@ func getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID int) (string, | |||
| if benchmarkType.Id == benchmarkTypeID { | |||
| for _, childType := range benchmarkType.Second { | |||
| if childType.Id == benchmarkChildTypeID { | |||
| uuid = childType.Attachment | |||
| childInfo = childType | |||
| isExist = true | |||
| break | |||
| } | |||
| @@ -1087,10 +1087,10 @@ func getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID int) (string, | |||
| if !isExist { | |||
| log.Error("no such benchmark_type_id&benchmark_child_type_id") | |||
| return uuid, errors.New("no such benchmark_type_id&benchmark_child_type_id") | |||
| return childInfo, errors.New("no such benchmark_type_id&benchmark_child_type_id") | |||
| } | |||
| return uuid, nil | |||
| return childInfo, nil | |||
| } | |||
| func getBenchmarkGpuQueue(gpuQueue string) (string, error) { | |||
| @@ -1161,7 +1161,7 @@ func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainF | |||
| return | |||
| } | |||
| uuid, err := getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID) | |||
| childInfo, err := getBenchmarkAttachment(benchmarkTypeID, benchmarkChildTypeID) | |||
| if err != nil { | |||
| log.Error("getBenchmarkAttachment failed:%v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| @@ -1240,7 +1240,7 @@ func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainF | |||
| } | |||
| } | |||
| if err := downloadRateCode(repo, jobName, setting.BenchmarkOwner, setting.BenchmarkName, benchmarkPath, form.BenchmarkCategory, gpuType); err != nil { | |||
| if err := downloadRateCode(repo, jobName, childInfo.Owner, childInfo.RepoName, benchmarkPath, form.BenchmarkCategory, gpuType); err != nil { | |||
| log.Error("downloadRateCode failed, %v", err, ctx.Data["MsgID"]) | |||
| //cloudBrainNewDataPrepare(ctx) | |||
| //ctx.RenderWithErr("system error", tplCloudBrainBenchmarkNew, &form) | |||
| @@ -1254,7 +1254,7 @@ func CloudBrainBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainF | |||
| //return | |||
| } | |||
| err = cloudbrain.GenerateTask(ctx, jobName, image, command, uuid, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), | |||
| err = cloudbrain.GenerateTask(ctx, jobName, image, command, childInfo.Attachment, storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"), | |||
| storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"), | |||
| storage.GetMinioPath(jobName, cloudbrain.BenchMarkMountPath+"/"), storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), | |||
| storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), string(models.JobTypeBenchmark), gpuQueue, form.Description, | |||