From c5eee763b2c494708f6401a46dffd2aa9dfc71f1 Mon Sep 17 00:00:00 2001 From: ph Date: Sat, 31 Oct 2020 10:19:35 +0800 Subject: [PATCH] fix issue: UI show error tip and return homepage when train job does not exist --- mindinsight/ui/src/locales/en-us.json | 17 ++++++----------- mindinsight/ui/src/locales/zh-cn.json | 17 ++++++----------- mindinsight/ui/src/services/fetcher.js | 4 ++-- .../views/profiling-gpu/profiling-dashboard.vue | 15 +++++++++++---- .../ui/src/views/profiling-gpu/profiling.vue | 2 +- .../ui/src/views/profiling/data-process.vue | 9 +-------- .../src/views/profiling/profiling-dashboard.vue | 1 + .../ui/src/views/profiling/profiling.vue | 2 +- .../ui/src/views/profiling/step-trace.vue | 17 ++++++++++------- 9 files changed, 39 insertions(+), 45 deletions(-) diff --git a/mindinsight/ui/src/locales/en-us.json b/mindinsight/ui/src/locales/en-us.json index 0c3bd44f..ceedfaad 100644 --- a/mindinsight/ui/src/locales/en-us.json +++ b/mindinsight/ui/src/locales/en-us.json @@ -138,7 +138,7 @@ "trainingScalar": "Training Scalar Information", "samplingData": "Data Sampling", "imagesampleSwitch": "Switch Tag", - "invalidId": "Invalid training job.", + "invalidId": "Invalid job.", "summaryDirPath": "Summary path:", "loadingTip": "Loading...", "waitLoading": "waiting to be loaded", @@ -439,20 +439,14 @@ "chartTitle": "Average Time Consumption Ranking" }, "profilingGPU": { - "minddata_device_queue": { - "desc": "The ratio of empty primary queues is {n1}/{n2}, and the ratio of full queues is {n3}/{n4}." - }, "minddata_get_next_queue": { "desc": "The ratio of empty data queues is {n1}/{n2}." }, - "connectorQuene": "Primary Queues", - "dataProcess": "This shows the data processing. Data is stored in the primary queue during data processing, and then stored in the data queue during data transmission. Finally, the forward and backward propagation get_next transmits the data to forward propagation.", - "dataProcessInfo": "By determining the empty primary and data queues, you can preliminarily determine the stage where the performance is abnormal.", + "dataProcess": "This shows the data processing. Data is stored in the host queue during data processing, and then stored in the data queue during data transmission. Finally, the forward and backward propagation get_next transmits the data to forward propagation.", + "dataProcessInfo": "By determining the empty host and data queues, you can preliminarily determine the stage where the performance is abnormal.", "analysisOne": "1. If the step interval is long and some batches of the data queue are empty, the performance is abnormal during data processing and transmission. Otherwise, locate the internal problem of the forward and backward propagation get_next.", - "analysisTwo": "2. If the performance is abnormal during data processing and transmission, check the primary queue. If the primary queue is empty at a high probability, the exception may occur during data transmission.", - "chipInfo": "Ratio of empty data queues:", - "hostIsEmpty": "Ratio of empty primary queues:", - "hostIsFull": "Ratio of full primary queues:" + "analysisTwo": "2. If the performance is abnormal during data processing and transmission, check the host queue. If the host queue is empty at a high probability, the exception may occur during data transmission.", + "chipInfo": "Ratio of empty data queues:" }, "components": { "summaryTitle": "Training Selection", @@ -593,6 +587,7 @@ "50545012": "The tensor data does not exist. Please refresh.", "50545013": "The requested data is too large. Try another dimension.", "50545014": "The queried tensor data has been replaced by new data. Please refresh.", + "50546083": "The profiler directory does not exist.", "50548001": "Ascend AI Processor information query timed out.", "5054B080": "Incorrect parameter type. Please check the input parameter type.", "5054B081": "Incorrect parameter value. Please check the input parameter.", diff --git a/mindinsight/ui/src/locales/zh-cn.json b/mindinsight/ui/src/locales/zh-cn.json index a206f880..9e50863d 100644 --- a/mindinsight/ui/src/locales/zh-cn.json +++ b/mindinsight/ui/src/locales/zh-cn.json @@ -138,7 +138,7 @@ "trainingScalar": "训练标量信息", "samplingData": "数据抽样", "imagesampleSwitch": "切换标签", - "invalidId": "无效的训练作业", + "invalidId": "无效的作业", "summaryDirPath": "训练日志路径:", "loadingTip": "加载中", "waitLoading": "待加载", @@ -438,20 +438,14 @@ "chartTitle": "平均耗时排名" }, "profilingGPU": { - "minddata_device_queue": { - "desc": "主队列为空比例{n1}/{n2},为满比例{n3}/{n4}。" - }, "minddata_get_next_queue": { "desc": "数据队列为空比例{n1}/{n2}。" }, - "connectorQuene": "主队列", - "dataProcess": "该图展示了数据处理阶段的流程,数据通过数据处理阶段存入主队列,再通过数据传输阶段存入数据队列,最终由数据传输算子get_next发送给前向训练使用。", - "dataProcessInfo": "综合分析该阶段的流程,通过判断主队列和数据队列为空的情况就可以初步判断可能出现性能异常的阶段。", + "dataProcess": "该图展示了数据处理阶段的流程,数据通过数据处理阶段存入主机队列,再通过数据传输阶段存入数据队列,最终由数据传输算子get_next发送给前向训练使用。", + "dataProcessInfo": "综合分析该阶段的流程,通过判断主机队列和数据队列为空的情况就可以初步判断可能出现性能异常的阶段。", "analysisOne": "1、如果迭代间隙较长,并且数据队列部分batch为空,那么可能由于数据处理和数据传输阶段导致的性能异常,参考2,反之则定位数据传输算子get_next内部问题;", - "analysisTwo": "2、如果通过1定位为数据处理、数据传输阶段异常,则查看主队列情况,如果大概率为空,则可能为数据处理阶段导致异常,如果大概率不为空,则可能数据传输阶段异常;", - "chipInfo": "数据队列为空比例:", - "hostIsEmpty": "主队列为空比例:", - "hostIsFull": "主队列为满比例:" + "analysisTwo": "2、如果通过1定位为数据处理、数据传输阶段异常,则查看主机队列情况,如果大概率为空,则可能为数据处理阶段导致异常,如果大概率不为空,则可能数据传输阶段异常;", + "chipInfo": "数据队列为空比例:" }, "components": { "summaryTitle": "训练选择", @@ -592,6 +586,7 @@ "50545012": "张量数据不存在,请刷新。", "50545013": "请求的数据过大,请使用其他维度重试。", "50545014": "查询的张量数据已被新数据替换,请刷新。", + "50546083": "性能数据目录不存在", "50548001": "昇腾AI处理器信息查询超时", "5054B080": "参数类型错误,请检查输入参数类型", "5054B081": "参数值错误,请检查输入参数", diff --git a/mindinsight/ui/src/services/fetcher.js b/mindinsight/ui/src/services/fetcher.js index 0206813c..c86f742c 100644 --- a/mindinsight/ui/src/services/fetcher.js +++ b/mindinsight/ui/src/services/fetcher.js @@ -74,7 +74,7 @@ axios.interceptors.response.use( const errorCode = error.response.data.error_code.toString(); const ignoreCode = { - ignoreError: ['50545005'], + ignoreError: ['50545005', '50546083'], regardError: ['50545013', '50545014', '5054500D'], }; @@ -84,7 +84,7 @@ axios.interceptors.response.use( } setTimeout(()=>{ router.push('/'); - }, 3000); + }, 2500); return Promise.reject(error); } if ( diff --git a/mindinsight/ui/src/views/profiling-gpu/profiling-dashboard.vue b/mindinsight/ui/src/views/profiling-gpu/profiling-dashboard.vue index 7081dd58..253e1fc4 100644 --- a/mindinsight/ui/src/views/profiling-gpu/profiling-dashboard.vue +++ b/mindinsight/ui/src/views/profiling-gpu/profiling-dashboard.vue @@ -61,10 +61,10 @@ limitations under the License. {{processSummary.get_next.empty}} / {{processSummary.get_next.total}}
-
{{$t('profilingGPU.hostIsEmpty')}} +
{{$t('profiling.hostIsEmpty')}} {{processSummary.device.empty}} / {{processSummary.device.total}}
-
{{$t('profilingGPU.hostIsFull')}} +
{{$t('profiling.hostIsFull')}} {{processSummary.device.full}} / {{processSummary.device.total}}
@@ -97,7 +97,7 @@ limitations under the License. alt="" />
-
{{$t('profilingGPU.connectorQuene')}}
+
{{$t('profiling.connectorQuene')}}
{ + this.pieChart.data = []; this.pieChart.noData = true; this.pieChart.initOver = true; }); diff --git a/mindinsight/ui/src/views/profiling-gpu/profiling.vue b/mindinsight/ui/src/views/profiling-gpu/profiling.vue index 0d166edd..b9d83e79 100644 --- a/mindinsight/ui/src/views/profiling-gpu/profiling.vue +++ b/mindinsight/ui/src/views/profiling-gpu/profiling.vue @@ -80,7 +80,7 @@ export default { collapse: false, curDashboardInfo: { // Current Select card info - curCardNum: '', + curCardNum: null, query: {}, }, }; diff --git a/mindinsight/ui/src/views/profiling/data-process.vue b/mindinsight/ui/src/views/profiling/data-process.vue index 19d3326b..23c42946 100644 --- a/mindinsight/ui/src/views/profiling/data-process.vue +++ b/mindinsight/ui/src/views/profiling/data-process.vue @@ -460,11 +460,7 @@ export default { }); }, init() { - this.connectorQuene = this.$t( - `profiling${ - location.href.includes('#/profiling-gpu/') ? 'GPU' : '' - }.connectorQuene`, - ); + this.connectorQuene = this.$t('profiling.connectorQuene'); this.queryProcessSummary(); }, /** @@ -588,9 +584,6 @@ export default { setOption(chart) { const myChart = echarts.init(document.getElementById(chart.id)); const option = { - title: { - text: '', - }, tooltip: { trigger: 'axis', }, diff --git a/mindinsight/ui/src/views/profiling/profiling-dashboard.vue b/mindinsight/ui/src/views/profiling/profiling-dashboard.vue index 1e67188d..6c540881 100644 --- a/mindinsight/ui/src/views/profiling/profiling-dashboard.vue +++ b/mindinsight/ui/src/views/profiling/profiling-dashboard.vue @@ -670,6 +670,7 @@ export default { } }) .catch(() => { + this.pieChart.data = []; this.pieChart.noData = true; this.pieChart.initOver = true; }); diff --git a/mindinsight/ui/src/views/profiling/profiling.vue b/mindinsight/ui/src/views/profiling/profiling.vue index f261d6f3..e635d8e7 100644 --- a/mindinsight/ui/src/views/profiling/profiling.vue +++ b/mindinsight/ui/src/views/profiling/profiling.vue @@ -80,7 +80,7 @@ export default { collapse: false, curDashboardInfo: { // Current Select card info - curCardNum: '', + curCardNum: null, query: {}, initOver: false, }, diff --git a/mindinsight/ui/src/views/profiling/step-trace.vue b/mindinsight/ui/src/views/profiling/step-trace.vue index f22c30c6..ed0d583d 100644 --- a/mindinsight/ui/src/views/profiling/step-trace.vue +++ b/mindinsight/ui/src/views/profiling/step-trace.vue @@ -290,10 +290,7 @@ export default { label: this.$t('profiling.stepInputTip'), }; - this.getTimeInfo('fp-bp', 'fp_and_bp'); - this.getTimeInfo('iter-gap', 'iteration_interval'); - this.getTimeInfo('tailing', 'tail'); - this.queryTrainingTrace(0); + this.queryTrainingTrace(0, true); }, /** * Change the current step value @@ -303,13 +300,13 @@ export default { if (value === 0 || (!this.steps.step && this.steps.step !== 0)) { this.steps.step = null; this.steps.trueStep = null; - this.queryTrainingTrace(0); + this.queryTrainingTrace(0, false); } else if ( /^[0-9]*[1-9][0-9]*$/.test(this.steps.step) && this.steps.step <= this.steps.max ) { this.steps.trueStep = this.steps.step; - this.queryTrainingTrace(this.steps.step); + this.queryTrainingTrace(this.steps.step, false); } else { this.steps.step = this.steps.trueStep; this.$message.error( @@ -471,8 +468,9 @@ export default { /** * Get training trace information * @param {Number} step Current step value + * @param {Boolean} init Init flag */ - queryTrainingTrace(step) { + queryTrainingTrace(step, init) { const params = { dir: this.relativePath, type: step, @@ -506,6 +504,11 @@ export default { JSON.parse(JSON.stringify(res.data.training_trace_graph)), ); }); + if (init) { + this.getTimeInfo('fp-bp', 'fp_and_bp'); + this.getTimeInfo('iter-gap', 'iteration_interval'); + this.getTimeInfo('tailing', 'tail'); + } } else { this.fp_start = '--'; this.bp_end = '--';