diff --git a/tests/ut/data/dump/gpu_dumps/golden/sync_trans_false_read_tensors.expected b/tests/ut/data/dump/gpu_dumps/golden/sync_trans_false_read_tensors.expected new file mode 100644 index 0000000000..fa958ec719 --- /dev/null +++ b/tests/ut/data/dump/gpu_dumps/golden/sync_trans_false_read_tensors.expected @@ -0,0 +1,70 @@ +----------------------------------------------------------- +tensor_info_1 attributes: +node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias +slot = 0 +iteration = 2 +device_id = None +root_graph_id = 0 +is_parameter = True + +tensor_data_1 attributes: +data (printed in uint8) = [ 0 0 0 0 195 127 0 0 176 202 195 248 194 127 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 58 196 248 + 194 127 0 0 17 0 0 0 0 0 0 0 160 76 6 140 195 127 + 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 0 0 + 64 195 195 248 194 127 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 88 1 196 248 194 127 0 0 18 0 0 0 + 0 0 0 0 160 47 6 140 195 127 0 0 69 0 0 0 0 0 + 0 0 1 0 0 0 195 127 0 0 176 203 195 248 194 127 0 0 + 176 204 195 248 194 127 0 0 0 0 0 0 0 0 0 0 216 241 + 195 248 194 127 0 0 19 0 0 0 0 0 0 0 96 39 6 140 + 195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 + 0 0 112 52 196 248 194 127 0 0 176 52 196 248 194 127 0 0 + 0 0 0 0 0 0 0 0 88 250 195 248 194 127 0 0 20 0 + 0 0 0 0 0 0 128 130 5 140 195 127 0 0 69 0 0 0 + 0 0 0 0 0 0 0 0 195 127 0 0 208 136 195 248 194 127 + 0 0 176 202 195 248 194 127 0 0 48 52 196 248 194 127 0 0 + 184 247 195 248 194 127 0 0 21 0 0 0 0 0 0 0 176 213 + 4 140 195 127 0 0 69 0 0 0 0 0 0 0 0 0 0 0 + 195 127 0 0 48 52 196 248 194 127 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 8 249 195 248 194 127 0 0 + 22 0 0 0 0 0 0 0 16 46 4 140 195 127 0 0 69 0 + 0 0 0 0 0 0 1 0 0 0 195 127 0 0 64 137 195 248 + 194 127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 88 12 196 248 194 127 0 0 23 0 0 0 0 0 0 0 + 32 137 3 140 195 127 0 0 85 0 0 0 0 0 0 0 0 0 + 0 0 195 127 0 0 176 202 195 248 194 127 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 104 246 195 248 194 127 + 0 0 24 0 0 0 0 0 0 0 48 104 15 140 195 127 0 0 + 32 104 15 140 195 127 0 0] +size in bytes = 512 +debugger dtype = 11 +shape = [128] +----------------------------------------------------------- +tensor_info_2 attributes: +node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308 +slot = 0 +iteration = 2 +device_id = None +root_graph_id = 0 +is_parameter = False + +tensor_data_2 attributes: +data (printed in uint8) = [ 0 0 0 ... 0 0 192] +size in bytes = 1024 +debugger dtype = 11 +shape = [4, 4, 4, 4] +----------------------------------------------------------- +tensor_info_3 attributes: +node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300 +slot = 1 +iteration = 2 +device_id = None +root_graph_id = 0 +is_parameter = False + +tensor_data_3 attributes: +data (printed in uint8) = [ 0 169 0 ... 244 21 184] +size in bytes = 1024 +debugger dtype = 8 +shape = [256] diff --git a/tests/ut/data/dump/gpu_dumps/golden/sync_trans_false_watchpoints.expected b/tests/ut/data/dump/gpu_dumps/golden/sync_trans_false_watchpoints.expected new file mode 100644 index 0000000000..7778d7d32b --- /dev/null +++ b/tests/ut/data/dump/gpu_dumps/golden/sync_trans_false_watchpoints.expected @@ -0,0 +1,33 @@ +----------------------------------------------------------- +watchpoint_hit for test_1 attributes: +name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308 +slot = 0 +condition = 6 +watchpoint_id = 1 +parameter 0 name = param +parameter 0 disabled = False +parameter 0 value = 0.0 +parameter 0 hit = True +parameter 0 actual_value = -2.0 +error code = 0 +device_id = 0 +root_graph_id = 0 +----------------------------------------------------------- +watchpoint_hit for test_4 attributes: +name = Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc3.bias +slot = 0 +condition = 18 +watchpoint_id = 3 +parameter 0 name = abs_mean_update_ratio_gt +parameter 0 disabled = False +parameter 0 value = 0.0 +parameter 0 hit = True +parameter 0 actual_value = 1.793662034335766e-35 +parameter 1 name = epsilon +parameter 1 disabled = True +parameter 1 value = 0.0 +parameter 1 hit = False +parameter 1 actual_value = 0.0 +error code = 0 +device_id = 0 +root_graph_id = 0 diff --git a/tests/ut/data/dump/gpu_dumps/golden/sync_trans_true_read_tensors.expected b/tests/ut/data/dump/gpu_dumps/golden/sync_trans_true_read_tensors.expected new file mode 100644 index 0000000000..18b5d3ee17 --- /dev/null +++ b/tests/ut/data/dump/gpu_dumps/golden/sync_trans_true_read_tensors.expected @@ -0,0 +1,70 @@ +----------------------------------------------------------- +tensor_info_1 attributes: +node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias +slot = 0 +iteration = 2 +device_id = None +root_graph_id = 0 +is_parameter = True + +tensor_data_1 attributes: +data (printed in uint8) = [ 1 0 0 0 195 127 0 0 80 58 118 65 195 127 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 40 186 117 65 + 195 127 0 0 5 0 0 0 0 0 0 0 160 76 6 204 195 127 + 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 0 0 + 48 135 117 65 195 127 0 0 16 58 118 65 195 127 0 0 144 58 + 118 65 195 127 0 0 168 186 117 65 195 127 0 0 6 0 0 0 + 0 0 0 0 160 47 6 204 195 127 0 0 69 0 0 0 0 0 + 0 0 1 0 0 0 195 127 0 0 80 58 118 65 195 127 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 184 249 + 117 65 195 127 0 0 7 0 0 0 0 0 0 0 96 39 6 204 + 195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 + 0 0 224 218 117 65 195 127 0 0 0 0 0 0 0 0 0 0 + 224 219 117 65 195 127 0 0 200 17 118 65 195 127 0 0 8 0 + 0 0 0 0 0 0 128 130 5 204 195 127 0 0 69 0 0 0 + 0 0 0 0 1 0 0 0 195 127 0 0 120 233 255 59 196 127 + 0 0 224 217 117 65 195 127 0 0 224 214 117 65 195 127 0 0 + 120 250 117 65 195 127 0 0 9 0 0 0 0 0 0 0 176 213 + 4 204 195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0 + 195 127 0 0 240 66 118 65 195 127 0 0 160 218 117 65 195 127 + 0 0 224 215 117 65 195 127 0 0 40 9 118 65 195 127 0 0 + 10 0 0 0 0 0 0 0 16 46 4 204 195 127 0 0 69 0 + 0 0 0 0 0 0 1 0 0 0 195 127 0 0 208 59 118 65 + 195 127 0 0 0 0 0 0 0 0 0 0 96 218 117 65 195 127 + 0 0 56 251 117 65 195 127 0 0 11 0 0 0 0 0 0 0 + 32 137 3 204 195 127 0 0 85 0 0 0 0 0 0 0 1 0 + 0 0 195 127 0 0 224 214 117 65 195 127 0 0 144 59 118 65 + 195 127 0 0 160 214 117 65 195 127 0 0 136 62 118 65 195 127 + 0 0 12 0 0 0 0 0 0 0 48 104 15 204 195 127 0 0 + 32 104 15 204 195 127 0 0] +size in bytes = 512 +debugger dtype = 11 +shape = [128] +----------------------------------------------------------- +tensor_info_2 attributes: +node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308 +slot = 0 +iteration = 2 +device_id = None +root_graph_id = 0 +is_parameter = False + +tensor_data_2 attributes: +data (printed in uint8) = [206 239 74 ... 76 157 184] +size in bytes = 1024 +debugger dtype = 11 +shape = [4, 4, 4, 4] +----------------------------------------------------------- +tensor_info_3 attributes: +node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300 +slot = 1 +iteration = 2 +device_id = None +root_graph_id = 0 +is_parameter = False + +tensor_data_3 attributes: +data (printed in uint8) = [206 239 74 ... 76 157 184] +size in bytes = 1024 +debugger dtype = 8 +shape = [256] diff --git a/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--ReLUV2-op300_output_0_shape_4_4_4_4_Float32_DefaultFormat.bin b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--ReLUV2-op300_output_0_shape_4_4_4_4_Float32_DefaultFormat.bin new file mode 100644 index 0000000000..16d35a0c53 Binary files /dev/null and b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--ReLUV2-op300_output_0_shape_4_4_4_4_Float32_DefaultFormat.bin differ diff --git a/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--ReLUV2-op300_output_1_shape_256_UInt32_DefaultFormat.bin b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--ReLUV2-op300_output_1_shape_256_UInt32_DefaultFormat.bin new file mode 100644 index 0000000000..2bec0f6432 Binary files /dev/null and b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--ReLUV2-op300_output_1_shape_256_UInt32_DefaultFormat.bin differ diff --git a/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--conv3-Conv2d--Conv2D-op308_output_0_shape_4_4_4_4_Float32_NCHW.bin b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--conv3-Conv2d--Conv2D-op308_output_0_shape_4_4_4_4_Float32_NCHW.bin new file mode 100644 index 0000000000..ce36b266ea Binary files /dev/null and b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--conv3-Conv2d--Conv2D-op308_output_0_shape_4_4_4_4_Float32_NCHW.bin differ diff --git a/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/conv2.bias_output_0_shape_128_Float32_DefaultFormat.bin b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/conv2.bias_output_0_shape_128_Float32_DefaultFormat.bin new file mode 100755 index 0000000000..8f636efb98 Binary files /dev/null and b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/conv2.bias_output_0_shape_128_Float32_DefaultFormat.bin differ diff --git a/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/fc3.bias_output_0_shape_10_Float32_DefaultFormat.bin b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/fc3.bias_output_0_shape_10_Float32_DefaultFormat.bin new file mode 100755 index 0000000000..0a2c98caba Binary files /dev/null and b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/fc3.bias_output_0_shape_10_Float32_DefaultFormat.bin differ diff --git a/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/moments.conv2.bias_output_0_shape_128_Float32_DefaultFormat.bin b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/moments.conv2.bias_output_0_shape_128_Float32_DefaultFormat.bin new file mode 100755 index 0000000000..8f636efb98 Binary files /dev/null and b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/moments.conv2.bias_output_0_shape_128_Float32_DefaultFormat.bin differ diff --git a/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/moments.fc3.bias_output_0_shape_10_Float32_DefaultFormat.bin b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/moments.fc3.bias_output_0_shape_10_Float32_DefaultFormat.bin new file mode 100755 index 0000000000..0f9093faae Binary files /dev/null and b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_2/moments.fc3.bias_output_0_shape_10_Float32_DefaultFormat.bin differ diff --git a/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_3/fc3.bias_output_0_shape_10_Float32_DefaultFormat.bin b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_3/fc3.bias_output_0_shape_10_Float32_DefaultFormat.bin new file mode 100755 index 0000000000..088889aaa5 Binary files /dev/null and b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_3/fc3.bias_output_0_shape_10_Float32_DefaultFormat.bin differ diff --git a/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_3/moments.fc3.bias_output_0_shape_10_Float32_DefaultFormat.bin b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_3/moments.fc3.bias_output_0_shape_10_Float32_DefaultFormat.bin new file mode 100755 index 0000000000..d13bd1d6e9 Binary files /dev/null and b/tests/ut/data/dump/gpu_dumps/sync_trans_false/alexnet/device_0/iteration_3/moments.fc3.bias_output_0_shape_10_Float32_DefaultFormat.bin differ diff --git a/tests/ut/data/dump/gpu_dumps/sync_trans_true/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--ReLUV2-op300_output_0_shape_4_4_4_4_kNumberTypeFloat32_DefaultFormat.bin b/tests/ut/data/dump/gpu_dumps/sync_trans_true/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--ReLUV2-op300_output_0_shape_4_4_4_4_kNumberTypeFloat32_DefaultFormat.bin new file mode 100644 index 0000000000..b64ffc519f Binary files /dev/null and b/tests/ut/data/dump/gpu_dumps/sync_trans_true/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--ReLUV2-op300_output_0_shape_4_4_4_4_kNumberTypeFloat32_DefaultFormat.bin differ diff --git a/tests/ut/data/dump/gpu_dumps/sync_trans_true/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--ReLUV2-op300_output_1_shape_256_kNumberTypeUInt32_DefaultFormat.bin b/tests/ut/data/dump/gpu_dumps/sync_trans_true/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--ReLUV2-op300_output_1_shape_256_kNumberTypeUInt32_DefaultFormat.bin new file mode 100644 index 0000000000..b64ffc519f Binary files /dev/null and b/tests/ut/data/dump/gpu_dumps/sync_trans_true/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--ReLUV2-op300_output_1_shape_256_kNumberTypeUInt32_DefaultFormat.bin differ diff --git a/tests/ut/data/dump/gpu_dumps/sync_trans_true/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--conv3-Conv2d--Conv2D-op308_output_0_shape_4_4_4_4_kNumberTypeFloat32_DefaultFormat.bin b/tests/ut/data/dump/gpu_dumps/sync_trans_true/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--conv3-Conv2d--Conv2D-op308_output_0_shape_4_4_4_4_kNumberTypeFloat32_DefaultFormat.bin new file mode 100644 index 0000000000..b64ffc519f Binary files /dev/null and b/tests/ut/data/dump/gpu_dumps/sync_trans_true/alexnet/device_0/iteration_2/Default--network-WithLossCell--_backbone-AlexNet--conv3-Conv2d--Conv2D-op308_output_0_shape_4_4_4_4_kNumberTypeFloat32_DefaultFormat.bin differ diff --git a/tests/ut/data/dump/gpu_dumps/sync_trans_true/alexnet/device_0/iteration_2/conv2.bias_output_0_shape_128_kNumberTypeFloat32_DefaultFormat.bin b/tests/ut/data/dump/gpu_dumps/sync_trans_true/alexnet/device_0/iteration_2/conv2.bias_output_0_shape_128_kNumberTypeFloat32_DefaultFormat.bin new file mode 100755 index 0000000000..1c18b992af Binary files /dev/null and b/tests/ut/data/dump/gpu_dumps/sync_trans_true/alexnet/device_0/iteration_2/conv2.bias_output_0_shape_128_kNumberTypeFloat32_DefaultFormat.bin differ diff --git a/tests/ut/data/dump/gpu_dumps/sync_trans_true/alexnet/device_0/iteration_2/moments.conv2.bias_output_0_shape_128_kNumberTypeFloat32_DefaultFormat.bin b/tests/ut/data/dump/gpu_dumps/sync_trans_true/alexnet/device_0/iteration_2/moments.conv2.bias_output_0_shape_128_kNumberTypeFloat32_DefaultFormat.bin new file mode 100755 index 0000000000..1c18b992af Binary files /dev/null and b/tests/ut/data/dump/gpu_dumps/sync_trans_true/alexnet/device_0/iteration_2/moments.conv2.bias_output_0_shape_128_kNumberTypeFloat32_DefaultFormat.bin differ diff --git a/tests/ut/python/debugger/gpu_tests/dump_test_utils.py b/tests/ut/python/debugger/gpu_tests/dump_test_utils.py new file mode 100644 index 0000000000..7876ce0a93 --- /dev/null +++ b/tests/ut/python/debugger/gpu_tests/dump_test_utils.py @@ -0,0 +1,29 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""" +Utils for testing offline debugger. +""" + +import filecmp +import os + + +def compare_actual_with_expected(test_name): + """Compare actual file with expected.""" + is_eq = filecmp.cmp("../data/dump/gpu_dumps/golden/" + + test_name + ".expected", test_name + ".actual", shallow=False) + if os.path.exists(test_name + ".actual"): + os.remove(test_name + ".actual") + return is_eq diff --git a/tests/ut/python/debugger/gpu_tests/test_sync_trans_false_read_tensors.py b/tests/ut/python/debugger/gpu_tests/test_sync_trans_false_read_tensors.py new file mode 100644 index 0000000000..2d2ee7344a --- /dev/null +++ b/tests/ut/python/debugger/gpu_tests/test_sync_trans_false_read_tensors.py @@ -0,0 +1,89 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""" +Read tensor test script for offline debugger APIs. +""" + +import mindspore.offline_debug.dbg_services as d +import numpy as np +from dump_test_utils import compare_actual_with_expected + +GENERATE_GOLDEN = False +test_name = "sync_trans_false_read_tensors" + + +def test_sync_trans_false_read_tensors(): + + debugger_backend = d.DbgServices( + dump_file_path="../data/dump/gpu_dumps/sync_trans_false/alexnet") + + _ = debugger_backend.initialize( + net_name="alexnet", is_sync_mode=True) + + # parameter + info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias", + slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True) + # output tensor with zero slot + info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308", + slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) + # output tensor with non-zero slot + info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300", + slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) + + tensor_info = [info1, info2, info3] + + tensor_data = debugger_backend.read_tensors(tensor_info) + + print_read_tensors(tensor_info, tensor_data) + assert compare_actual_with_expected(test_name) + + +def print_read_tensors(tensor_info, tensor_data): + """Print read tensors.""" + if GENERATE_GOLDEN: + f_write = open(test_name + ".expected", "w") + else: + f_write = open(test_name + ".actual", "w") + + for x, _ in enumerate(tensor_info): + f_write.write( + "-----------------------------------------------------------\n") + f_write.write("tensor_info_" + str(x+1) + " attributes:\n") + f_write.write("node name = " + tensor_info[x].node_name + "\n") + f_write.write("slot = " + str(tensor_info[x].slot) + "\n") + f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n") + f_write.write("device_id = " + str(tensor_info[x].device_id) + "\n") + f_write.write("root_graph_id = " + + str(tensor_info[x].root_graph_id) + "\n") + f_write.write("is_parameter = " + + str(tensor_info[x].is_parameter) + "\n") + f_write.write("\n") + f_write.write("tensor_data_" + str(x+1) + " attributes:\n") + f_write.write("data (printed in uint8) = " + str(np.frombuffer( + tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n") + py_byte_size = len(tensor_data[x].data_ptr) + c_byte_size = tensor_data[x].data_size + if c_byte_size != py_byte_size: + f_write.write("The python byte size of " + str(py_byte_size) + + " does not match the C++ byte size of " + str(c_byte_size) + "\n") + f_write.write("size in bytes = " + + str(tensor_data[x].data_size) + "\n") + f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n") + f_write.write("shape = " + str(tensor_data[x].shape) + "\n") + f_write.close() + + +if __name__ == "__main__": + test_sync_trans_false_read_tensors() diff --git a/tests/ut/python/debugger/gpu_tests/test_sync_trans_false_watchpoints.py b/tests/ut/python/debugger/gpu_tests/test_sync_trans_false_watchpoints.py new file mode 100644 index 0000000000..82a8b7ea93 --- /dev/null +++ b/tests/ut/python/debugger/gpu_tests/test_sync_trans_false_watchpoints.py @@ -0,0 +1,130 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""" +Watchpoints test script for offline debugger APIs. +""" + +import mindspore.offline_debug.dbg_services as d +from dump_test_utils import compare_actual_with_expected + +GENERATE_GOLDEN = False +test_name = "sync_trans_false_watchpoints" + + +def test_sync_trans_false_watchpoints(): + + if GENERATE_GOLDEN: + f_write = open(test_name + ".expected", "w") + else: + f_write = open(test_name + ".actual", "w") + + debugger_backend = d.DbgServices( + dump_file_path="../data/dump/gpu_dumps/sync_trans_false/alexnet") + + _ = debugger_backend.initialize( + net_name="Alexnet", is_sync_mode=True) + + # NOTES: + # -> watch_condition=6 is MIN_LT + # -> watch_condition=18 is CHANGE_TOO_LARGE + + # test 1: watchpoint set and hit (watch_condition=6) + param1 = d.Parameter(name="param", disabled=False, value=0.0) + _ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6, + check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/" + "Conv2D-op308": + {"device_id": [0], "root_graph_id": [0], "is_parameter": False + }}, parameter_list=[param1]) + + watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2) + if len(watchpoint_hits_test_1) != 1: + f_write.write( + "ERROR -> test 1: watchpoint set but not hit just once\n") + print_watchpoint_hits(watchpoint_hits_test_1, 1, f_write) + + # test 2: watchpoint remove and ensure it's not hit + _ = debugger_backend.remove_watchpoint(watchpoint_id=1) + watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2) + if watchpoint_hits_test_2: + f_write.write("ERROR -> test 2: watchpoint removed but hit\n") + + # test 3: watchpoint set and not hit, then remove + param2 = d.Parameter(name="param", disabled=False, value=-1000.0) + _ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6, + check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/" + "Conv2D-op308": + {"device_id": [0], "root_graph_id": [0], "is_parameter": False + }}, parameter_list=[param2]) + + watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2) + if watchpoint_hits_test_3: + f_write.write( + "ERROR -> test 3: watchpoint set but not supposed to be hit\n") + _ = debugger_backend.remove_watchpoint(watchpoint_id=2) + + # test 4: weight change watchpoint set and hit + param_abs_mean_update_ratio_gt = d.Parameter( + name="abs_mean_update_ratio_gt", disabled=False, value=0.0) + param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0) + _ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18, + check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/" + "Parameter[6]_11/fc3.bias": + {"device_id": [0], "root_graph_id": [0], "is_parameter": True + }}, parameter_list=[param_abs_mean_update_ratio_gt, + param_epsilon]) + + watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3) + if len(watchpoint_hits_test_4) != 1: + f_write.write( + "ERROR -> test 4: watchpoint weight change set but not hit just once\n") + print_watchpoint_hits(watchpoint_hits_test_4, 4, f_write) + f_write.close() + assert compare_actual_with_expected(test_name) + + +def print_watchpoint_hits(watchpoint_hits, test_id, f_write): + """Print watchpoint hits.""" + for x, _ in enumerate(watchpoint_hits): + f_write.write( + "-----------------------------------------------------------\n") + f_write.write("watchpoint_hit for test_%u attributes:" % + test_id + "\n") + f_write.write("name = " + str(watchpoint_hits[x].name) + "\n") + f_write.write("slot = " + str(watchpoint_hits[x].slot) + "\n") + f_write.write("condition = " + + str(watchpoint_hits[x].condition) + "\n") + f_write.write("watchpoint_id = " + + str(watchpoint_hits[x].watchpoint_id) + "\n") + for p, _ in enumerate(watchpoint_hits[x].parameters): + f_write.write("parameter " + str(p) + " name = " + + watchpoint_hits[x].parameters[p].name + "\n") + f_write.write("parameter " + str(p) + " disabled = " + + str(watchpoint_hits[x].parameters[p].disabled) + "\n") + f_write.write("parameter " + str(p) + " value = " + + str(watchpoint_hits[x].parameters[p].value) + "\n") + f_write.write("parameter " + str(p) + " hit = " + + str(watchpoint_hits[x].parameters[p].hit) + "\n") + f_write.write("parameter " + str(p) + " actual_value = " + + str(watchpoint_hits[x].parameters[p].actual_value) + "\n") + f_write.write("error code = " + + str(watchpoint_hits[x].error_code) + "\n") + f_write.write("device_id = " + + str(watchpoint_hits[x].device_id) + "\n") + f_write.write("root_graph_id = " + + str(watchpoint_hits[x].root_graph_id) + "\n") + + +if __name__ == "__main__": + test_sync_trans_false_watchpoints() diff --git a/tests/ut/python/debugger/gpu_tests/test_sync_trans_read_tensors.py b/tests/ut/python/debugger/gpu_tests/test_sync_trans_read_tensors.py new file mode 100644 index 0000000000..aa0d051935 --- /dev/null +++ b/tests/ut/python/debugger/gpu_tests/test_sync_trans_read_tensors.py @@ -0,0 +1,89 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""" +Read tensor test script for offline debugger APIs. +""" + +import mindspore.offline_debug.dbg_services as d +import numpy as np +from dump_test_utils import compare_actual_with_expected + +GENERATE_GOLDEN = False +test_name = "sync_trans_true_read_tensors" + + +def test_sync_trans_read_tensors(): + + debugger_backend = d.DbgServices( + dump_file_path="../data/dump/gpu_dumps/sync_trans_true/alexnet") + + _ = debugger_backend.initialize( + net_name="Network Name goes here!", is_sync_mode=True) + + # parameter + info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias", + slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True) + # output tensor with zero slot + info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308", + slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) + # output tensor with non-zero slot + info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300", + slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) + + tensor_info = [info1, info2, info3] + + tensor_data = debugger_backend.read_tensors(tensor_info) + + print_read_tensors(tensor_info, tensor_data) + assert compare_actual_with_expected(test_name) + + +def print_read_tensors(tensor_info, tensor_data): + """Print read tensors.""" + if GENERATE_GOLDEN: + f_write = open(test_name + ".expected", "w") + else: + f_write = open(test_name + ".actual", "w") + + for x, _ in enumerate(tensor_info): + f_write.write( + "-----------------------------------------------------------\n") + f_write.write("tensor_info_" + str(x+1) + " attributes:\n") + f_write.write("node name = " + tensor_info[x].node_name + "\n") + f_write.write("slot = " + str(tensor_info[x].slot) + "\n") + f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n") + f_write.write("device_id = " + str(tensor_info[x].device_id) + "\n") + f_write.write("root_graph_id = " + + str(tensor_info[x].root_graph_id) + "\n") + f_write.write("is_parameter = " + + str(tensor_info[x].is_parameter) + "\n") + f_write.write("\n") + f_write.write("tensor_data_" + str(x+1) + " attributes:\n") + f_write.write("data (printed in uint8) = " + str(np.frombuffer( + tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n") + py_byte_size = len(tensor_data[x].data_ptr) + c_byte_size = tensor_data[x].data_size + if c_byte_size != py_byte_size: + f_write.write("The python byte size of " + str(py_byte_size) + + " does not match the C++ byte size of " + str(c_byte_size) + "\n") + f_write.write("size in bytes = " + + str(tensor_data[x].data_size) + "\n") + f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n") + f_write.write("shape = " + str(tensor_data[x].shape) + "\n") + f_write.close() + + +if __name__ == "__main__": + test_sync_trans_read_tensors() diff --git a/tests/ut/python/runtest.sh b/tests/ut/python/runtest.sh index c662200741..f4b67afc52 100755 --- a/tests/ut/python/runtest.sh +++ b/tests/ut/python/runtest.sh @@ -39,6 +39,14 @@ if [ $# -eq 1 ] && ([ "$1" == "stage1" ] || [ "$1" == "stage2" ] || [ "$1" == exit ${RET} fi + echo "run python debugger gpu ut" + pytest -v $CURRPATH/debugger/gpu_tests + + RET=$? + if [ ${RET} -ne 0 ]; then + exit ${RET} + fi + elif [ $1 == "stage2" ]; then echo "run python parallel" pytest -s $CURRPATH/parallel/*.py @@ -72,7 +80,7 @@ if [ $# -eq 1 ] && ([ "$1" == "stage1" ] || [ "$1" == "stage2" ] || [ "$1" == exit ${RET} fi - pytest -v --ignore=$CURRPATH/dataset --ignore=$CURRPATH/parallel --ignore=$CURRPATH/ops --ignore=$CURRPATH/pynative_mode --ignore=$CURRPATH/pipeline --ignore=$CURRPATH/train --ignore=$CURRPATH/nn $IGNORE_EXEC $CURRPATH + pytest -v --ignore=$CURRPATH/dataset --ignore=$CURRPATH/debugger/gpu_tests --ignore=$CURRPATH/parallel --ignore=$CURRPATH/ops --ignore=$CURRPATH/pynative_mode --ignore=$CURRPATH/pipeline --ignore=$CURRPATH/train --ignore=$CURRPATH/nn $IGNORE_EXEC $CURRPATH RET=$? if [ ${RET} -ne 0 ]; then @@ -87,6 +95,12 @@ else exit ${RET} fi + pytest $CURRPATH/debugger/gpu_tests + RET=$? + if [ ${RET} -ne 0 ]; then + exit ${RET} + fi + pytest -v $CURRPATH/parallel/*.py RET=$? if [ ${RET} -ne 0 ]; then @@ -111,7 +125,7 @@ else exit ${RET} fi - pytest -v --ignore=$CURRPATH/dataset --ignore=$CURRPATH/parallel --ignore=$CURRPATH/ops --ignore=$CURRPATH/pynative_mode --ignore=$CURRPATH/pipeline --ignore=$CURRPATH/train --ignore=$CURRPATH/nn $IGNORE_EXEC $CURRPATH + pytest -v --ignore=$CURRPATH/dataset --ignore=$CURRPATH/debugger/gpu_tests --ignore=$CURRPATH/parallel --ignore=$CURRPATH/ops --ignore=$CURRPATH/pynative_mode --ignore=$CURRPATH/pipeline --ignore=$CURRPATH/train --ignore=$CURRPATH/nn $IGNORE_EXEC $CURRPATH RET=$? if [ ${RET} -ne 0 ]; then exit ${RET}