From e57f07bef5afe1e361565cabe1325ed22e0ad87b Mon Sep 17 00:00:00 2001 From: yelihua Date: Mon, 14 Sep 2020 10:37:45 +0800 Subject: [PATCH] fix the bug for tensor history --- mindinsight/backend/debugger/debugger_api.py | 2 +- mindinsight/debugger/debugger_grpc_server.py | 3 ++- mindinsight/debugger/debugger_server.py | 3 +-- mindinsight/debugger/stream_cache/tensor.py | 5 ++++- .../debugger/stream_handler/tensor_handler.py | 18 +++++++++--------- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/mindinsight/backend/debugger/debugger_api.py b/mindinsight/backend/debugger/debugger_api.py index 2b46c02e..a0b122f4 100644 --- a/mindinsight/backend/debugger/debugger_api.py +++ b/mindinsight/backend/debugger/debugger_api.py @@ -126,7 +126,7 @@ def tensor_comparisons(): Examples: >>> Get http://xxxx/v1/mindinsight/debugger/tensor-comparisons? - name=node_name&detail=data&shape=[0, 0, :, :]&tolerance=0.5 + >>> name=node_name&detail=data&shape=[0, 0, :, :]&tolerance=0.5 """ name = request.args.get('name') detail = request.args.get('detail', 'data') diff --git a/mindinsight/debugger/debugger_grpc_server.py b/mindinsight/debugger/debugger_grpc_server.py index 88a6dad0..6ea69ebd 100644 --- a/mindinsight/debugger/debugger_grpc_server.py +++ b/mindinsight/debugger/debugger_grpc_server.py @@ -257,7 +257,6 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): def SendTensors(self, request_iterator, context): """Send tensors into DebuggerCache.""" log.info("Received tensor.") - self._received_view_cmd['wait_for_tensor'] = False tensor_construct = [] tensor_stream = self._cache_store.get_stream_handler(Streams.TENSOR) metadata_stream = self._cache_store.get_stream_handler(Streams.METADATA) @@ -266,6 +265,8 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): for tensor in request_iterator: tensor_construct.append(tensor) if tensor.finished: + if self._received_view_cmd.get('wait_for_tensor') and tensor.tensor_content: + self._received_view_cmd['wait_for_tensor'] = False tensor_stream.put({'step': step, 'tensor_protos': tensor_construct}) tensor_construct = [] tensor_names.append(':'.join([tensor.node_name, tensor.slot])) diff --git a/mindinsight/debugger/debugger_server.py b/mindinsight/debugger/debugger_server.py index bdc170d1..601f3ecd 100644 --- a/mindinsight/debugger/debugger_server.py +++ b/mindinsight/debugger/debugger_server.py @@ -445,8 +445,7 @@ class DebuggerServer: if metadata_stream.state != ServerStatus.WAITING.value: log.error("Failed to create watchpoint as the MindSpore is not in waiting state.") raise DebuggerCreateWatchPointError( - "Failed to create watchpoint as the MindSpore is not in waiting state." - ) + "Failed to create watchpoint as the MindSpore is not in waiting state.") if metadata_stream.backend == 'GPU' and watch_condition.get('condition') == 'OVERFLOW': log.error("GPU doesn't support OVERFLOW watch condition.") raise DebuggerParamValueError("GPU doesn't support OVERFLOW watch condition.") diff --git a/mindinsight/debugger/stream_cache/tensor.py b/mindinsight/debugger/stream_cache/tensor.py index 98920ae1..01975254 100644 --- a/mindinsight/debugger/stream_cache/tensor.py +++ b/mindinsight/debugger/stream_cache/tensor.py @@ -152,7 +152,9 @@ class OpTensor(BaseTensor): statistics = TensorUtils.get_statistics_from_tensor(tensor_value) res['statistics'] = TensorUtils.get_statistics_dict(statistics) res['value'] = tensor_value.tolist() - return res + elif isinstance(tensor_value, str): + res['value'] = tensor_value + return res def get_tensor_value_by_shape(self, shape=None): @@ -188,6 +190,7 @@ class OpTensor(BaseTensor): value = np.asarray(value) return value + class ConstTensor(BaseTensor): """Tensor data structure for Const Node.""" diff --git a/mindinsight/debugger/stream_handler/tensor_handler.py b/mindinsight/debugger/stream_handler/tensor_handler.py index d76b90df..e9b5dd91 100644 --- a/mindinsight/debugger/stream_handler/tensor_handler.py +++ b/mindinsight/debugger/stream_handler/tensor_handler.py @@ -188,6 +188,9 @@ class TensorHandler(StreamHandlerBase): # add `has_prev_step` field to tensor basic info. if basic_info: tensor_info.update(basic_info) + if not basic_info.get('value'): + missed_tensors.append(tensor_info) + log.debug("Add view cmd for %s", tensor_name) else: missed_tensors.append(tensor_info) log.debug("Add view cmd for %s", tensor_name) @@ -221,13 +224,7 @@ class TensorHandler(StreamHandlerBase): if prev_step < 0: return flag tensor = self._get_tensor(tensor_name, step=prev_step) - if not tensor: - # the tensor need to be queried from client - flag = False - elif tensor.value: - flag = True - - return flag + return bool(tensor and tensor.valule) def get_tensor_value_by_name(self, tensor_name, prev=False): """Get tensor value by name in numpy type.""" @@ -283,13 +280,16 @@ class TensorHandler(StreamHandlerBase): curr_tensor_slice = curr_tensor.get_tensor_value_by_shape(shape) prev_tensor_slice = prev_tensor.get_tensor_value_by_shape(shape) tensor_info = curr_tensor.get_basic_info() + if isinstance(tensor_info, dict): + del tensor_info['has_prev_step'] + del tensor_info['value'] if isinstance(curr_tensor_slice, np.ndarray) and isinstance(prev_tensor_slice, np.ndarray): diff_tensor = TensorUtils.calc_diff_between_two_tensor(curr_tensor_slice, prev_tensor_slice, tolerance) result = np.stack([prev_tensor_slice, curr_tensor_slice, diff_tensor], axis=-1) tensor_info['diff'] = result.tolist() stats = TensorUtils.get_statistics_from_tensor(diff_tensor) tensor_info['statistics'] = TensorUtils.get_statistics_dict(stats) - del tensor_info['has_prev_step'] - del tensor_info['value'] + elif isinstance(curr_tensor_slice, str): + tensor_info['diff'] = curr_tensor_slice reply = {'tensor_value': tensor_info} return reply