From: @islam_amin Reviewed-by: @john_tzanakakis,@tom__chen,@robingrosman Signed-off-by: @john_tzanakakispull/15758/MERGE
| @@ -0,0 +1,70 @@ | |||
| ----------------------------------------------------------- | |||
| tensor_info_1 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias | |||
| slot = 0 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = True | |||
| tensor_data_1 attributes: | |||
| data (printed in uint8) = [ 0 0 0 0 195 127 0 0 176 202 195 248 194 127 0 0 0 0 | |||
| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 58 196 248 | |||
| 194 127 0 0 17 0 0 0 0 0 0 0 160 76 6 140 195 127 | |||
| 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 0 0 | |||
| 64 195 195 248 194 127 0 0 0 0 0 0 0 0 0 0 0 0 | |||
| 0 0 0 0 0 0 88 1 196 248 194 127 0 0 18 0 0 0 | |||
| 0 0 0 0 160 47 6 140 195 127 0 0 69 0 0 0 0 0 | |||
| 0 0 1 0 0 0 195 127 0 0 176 203 195 248 194 127 0 0 | |||
| 176 204 195 248 194 127 0 0 0 0 0 0 0 0 0 0 216 241 | |||
| 195 248 194 127 0 0 19 0 0 0 0 0 0 0 96 39 6 140 | |||
| 195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 | |||
| 0 0 112 52 196 248 194 127 0 0 176 52 196 248 194 127 0 0 | |||
| 0 0 0 0 0 0 0 0 88 250 195 248 194 127 0 0 20 0 | |||
| 0 0 0 0 0 0 128 130 5 140 195 127 0 0 69 0 0 0 | |||
| 0 0 0 0 0 0 0 0 195 127 0 0 208 136 195 248 194 127 | |||
| 0 0 176 202 195 248 194 127 0 0 48 52 196 248 194 127 0 0 | |||
| 184 247 195 248 194 127 0 0 21 0 0 0 0 0 0 0 176 213 | |||
| 4 140 195 127 0 0 69 0 0 0 0 0 0 0 0 0 0 0 | |||
| 195 127 0 0 48 52 196 248 194 127 0 0 0 0 0 0 0 0 | |||
| 0 0 0 0 0 0 0 0 0 0 8 249 195 248 194 127 0 0 | |||
| 22 0 0 0 0 0 0 0 16 46 4 140 195 127 0 0 69 0 | |||
| 0 0 0 0 0 0 1 0 0 0 195 127 0 0 64 137 195 248 | |||
| 194 127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |||
| 0 0 88 12 196 248 194 127 0 0 23 0 0 0 0 0 0 0 | |||
| 32 137 3 140 195 127 0 0 85 0 0 0 0 0 0 0 0 0 | |||
| 0 0 195 127 0 0 176 202 195 248 194 127 0 0 0 0 0 0 | |||
| 0 0 0 0 0 0 0 0 0 0 0 0 104 246 195 248 194 127 | |||
| 0 0 24 0 0 0 0 0 0 0 48 104 15 140 195 127 0 0 | |||
| 32 104 15 140 195 127 0 0] | |||
| size in bytes = 512 | |||
| debugger dtype = 11 | |||
| shape = [128] | |||
| ----------------------------------------------------------- | |||
| tensor_info_2 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308 | |||
| slot = 0 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = False | |||
| tensor_data_2 attributes: | |||
| data (printed in uint8) = [ 0 0 0 ... 0 0 192] | |||
| size in bytes = 1024 | |||
| debugger dtype = 11 | |||
| shape = [4, 4, 4, 4] | |||
| ----------------------------------------------------------- | |||
| tensor_info_3 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300 | |||
| slot = 1 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = False | |||
| tensor_data_3 attributes: | |||
| data (printed in uint8) = [ 0 169 0 ... 244 21 184] | |||
| size in bytes = 1024 | |||
| debugger dtype = 8 | |||
| shape = [256] | |||
| @@ -0,0 +1,33 @@ | |||
| ----------------------------------------------------------- | |||
| watchpoint_hit for test_1 attributes: | |||
| name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308 | |||
| slot = 0 | |||
| condition = 6 | |||
| watchpoint_id = 1 | |||
| parameter 0 name = param | |||
| parameter 0 disabled = False | |||
| parameter 0 value = 0.0 | |||
| parameter 0 hit = True | |||
| parameter 0 actual_value = -2.0 | |||
| error code = 0 | |||
| device_id = 0 | |||
| root_graph_id = 0 | |||
| ----------------------------------------------------------- | |||
| watchpoint_hit for test_4 attributes: | |||
| name = Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc3.bias | |||
| slot = 0 | |||
| condition = 18 | |||
| watchpoint_id = 3 | |||
| parameter 0 name = abs_mean_update_ratio_gt | |||
| parameter 0 disabled = False | |||
| parameter 0 value = 0.0 | |||
| parameter 0 hit = True | |||
| parameter 0 actual_value = 1.793662034335766e-35 | |||
| parameter 1 name = epsilon | |||
| parameter 1 disabled = True | |||
| parameter 1 value = 0.0 | |||
| parameter 1 hit = False | |||
| parameter 1 actual_value = 0.0 | |||
| error code = 0 | |||
| device_id = 0 | |||
| root_graph_id = 0 | |||
| @@ -0,0 +1,70 @@ | |||
| ----------------------------------------------------------- | |||
| tensor_info_1 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias | |||
| slot = 0 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = True | |||
| tensor_data_1 attributes: | |||
| data (printed in uint8) = [ 1 0 0 0 195 127 0 0 80 58 118 65 195 127 0 0 0 0 | |||
| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 40 186 117 65 | |||
| 195 127 0 0 5 0 0 0 0 0 0 0 160 76 6 204 195 127 | |||
| 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 0 0 | |||
| 48 135 117 65 195 127 0 0 16 58 118 65 195 127 0 0 144 58 | |||
| 118 65 195 127 0 0 168 186 117 65 195 127 0 0 6 0 0 0 | |||
| 0 0 0 0 160 47 6 204 195 127 0 0 69 0 0 0 0 0 | |||
| 0 0 1 0 0 0 195 127 0 0 80 58 118 65 195 127 0 0 | |||
| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 184 249 | |||
| 117 65 195 127 0 0 7 0 0 0 0 0 0 0 96 39 6 204 | |||
| 195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 | |||
| 0 0 224 218 117 65 195 127 0 0 0 0 0 0 0 0 0 0 | |||
| 224 219 117 65 195 127 0 0 200 17 118 65 195 127 0 0 8 0 | |||
| 0 0 0 0 0 0 128 130 5 204 195 127 0 0 69 0 0 0 | |||
| 0 0 0 0 1 0 0 0 195 127 0 0 120 233 255 59 196 127 | |||
| 0 0 224 217 117 65 195 127 0 0 224 214 117 65 195 127 0 0 | |||
| 120 250 117 65 195 127 0 0 9 0 0 0 0 0 0 0 176 213 | |||
| 4 204 195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0 | |||
| 195 127 0 0 240 66 118 65 195 127 0 0 160 218 117 65 195 127 | |||
| 0 0 224 215 117 65 195 127 0 0 40 9 118 65 195 127 0 0 | |||
| 10 0 0 0 0 0 0 0 16 46 4 204 195 127 0 0 69 0 | |||
| 0 0 0 0 0 0 1 0 0 0 195 127 0 0 208 59 118 65 | |||
| 195 127 0 0 0 0 0 0 0 0 0 0 96 218 117 65 195 127 | |||
| 0 0 56 251 117 65 195 127 0 0 11 0 0 0 0 0 0 0 | |||
| 32 137 3 204 195 127 0 0 85 0 0 0 0 0 0 0 1 0 | |||
| 0 0 195 127 0 0 224 214 117 65 195 127 0 0 144 59 118 65 | |||
| 195 127 0 0 160 214 117 65 195 127 0 0 136 62 118 65 195 127 | |||
| 0 0 12 0 0 0 0 0 0 0 48 104 15 204 195 127 0 0 | |||
| 32 104 15 204 195 127 0 0] | |||
| size in bytes = 512 | |||
| debugger dtype = 11 | |||
| shape = [128] | |||
| ----------------------------------------------------------- | |||
| tensor_info_2 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308 | |||
| slot = 0 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = False | |||
| tensor_data_2 attributes: | |||
| data (printed in uint8) = [206 239 74 ... 76 157 184] | |||
| size in bytes = 1024 | |||
| debugger dtype = 11 | |||
| shape = [4, 4, 4, 4] | |||
| ----------------------------------------------------------- | |||
| tensor_info_3 attributes: | |||
| node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300 | |||
| slot = 1 | |||
| iteration = 2 | |||
| device_id = None | |||
| root_graph_id = 0 | |||
| is_parameter = False | |||
| tensor_data_3 attributes: | |||
| data (printed in uint8) = [206 239 74 ... 76 157 184] | |||
| size in bytes = 1024 | |||
| debugger dtype = 8 | |||
| shape = [256] | |||
| @@ -0,0 +1,29 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Utils for testing offline debugger. | |||
| """ | |||
| import filecmp | |||
| import os | |||
| def compare_actual_with_expected(test_name): | |||
| """Compare actual file with expected.""" | |||
| is_eq = filecmp.cmp("../data/dump/gpu_dumps/golden/" + | |||
| test_name + ".expected", test_name + ".actual", shallow=False) | |||
| if os.path.exists(test_name + ".actual"): | |||
| os.remove(test_name + ".actual") | |||
| return is_eq | |||
| @@ -0,0 +1,89 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Read tensor test script for offline debugger APIs. | |||
| """ | |||
| import mindspore.offline_debug.dbg_services as d | |||
| import numpy as np | |||
| from dump_test_utils import compare_actual_with_expected | |||
| GENERATE_GOLDEN = False | |||
| test_name = "sync_trans_false_read_tensors" | |||
| def test_sync_trans_false_read_tensors(): | |||
| debugger_backend = d.DbgServices( | |||
| dump_file_path="../data/dump/gpu_dumps/sync_trans_false/alexnet") | |||
| _ = debugger_backend.initialize( | |||
| net_name="alexnet", is_sync_mode=True) | |||
| # parameter | |||
| info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias", | |||
| slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True) | |||
| # output tensor with zero slot | |||
| info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308", | |||
| slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) | |||
| # output tensor with non-zero slot | |||
| info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300", | |||
| slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) | |||
| tensor_info = [info1, info2, info3] | |||
| tensor_data = debugger_backend.read_tensors(tensor_info) | |||
| print_read_tensors(tensor_info, tensor_data) | |||
| assert compare_actual_with_expected(test_name) | |||
| def print_read_tensors(tensor_info, tensor_data): | |||
| """Print read tensors.""" | |||
| if GENERATE_GOLDEN: | |||
| f_write = open(test_name + ".expected", "w") | |||
| else: | |||
| f_write = open(test_name + ".actual", "w") | |||
| for x, _ in enumerate(tensor_info): | |||
| f_write.write( | |||
| "-----------------------------------------------------------\n") | |||
| f_write.write("tensor_info_" + str(x+1) + " attributes:\n") | |||
| f_write.write("node name = " + tensor_info[x].node_name + "\n") | |||
| f_write.write("slot = " + str(tensor_info[x].slot) + "\n") | |||
| f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n") | |||
| f_write.write("device_id = " + str(tensor_info[x].device_id) + "\n") | |||
| f_write.write("root_graph_id = " + | |||
| str(tensor_info[x].root_graph_id) + "\n") | |||
| f_write.write("is_parameter = " + | |||
| str(tensor_info[x].is_parameter) + "\n") | |||
| f_write.write("\n") | |||
| f_write.write("tensor_data_" + str(x+1) + " attributes:\n") | |||
| f_write.write("data (printed in uint8) = " + str(np.frombuffer( | |||
| tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n") | |||
| py_byte_size = len(tensor_data[x].data_ptr) | |||
| c_byte_size = tensor_data[x].data_size | |||
| if c_byte_size != py_byte_size: | |||
| f_write.write("The python byte size of " + str(py_byte_size) + | |||
| " does not match the C++ byte size of " + str(c_byte_size) + "\n") | |||
| f_write.write("size in bytes = " + | |||
| str(tensor_data[x].data_size) + "\n") | |||
| f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n") | |||
| f_write.write("shape = " + str(tensor_data[x].shape) + "\n") | |||
| f_write.close() | |||
| if __name__ == "__main__": | |||
| test_sync_trans_false_read_tensors() | |||
| @@ -0,0 +1,130 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Watchpoints test script for offline debugger APIs. | |||
| """ | |||
| import mindspore.offline_debug.dbg_services as d | |||
| from dump_test_utils import compare_actual_with_expected | |||
| GENERATE_GOLDEN = False | |||
| test_name = "sync_trans_false_watchpoints" | |||
| def test_sync_trans_false_watchpoints(): | |||
| if GENERATE_GOLDEN: | |||
| f_write = open(test_name + ".expected", "w") | |||
| else: | |||
| f_write = open(test_name + ".actual", "w") | |||
| debugger_backend = d.DbgServices( | |||
| dump_file_path="../data/dump/gpu_dumps/sync_trans_false/alexnet") | |||
| _ = debugger_backend.initialize( | |||
| net_name="Alexnet", is_sync_mode=True) | |||
| # NOTES: | |||
| # -> watch_condition=6 is MIN_LT | |||
| # -> watch_condition=18 is CHANGE_TOO_LARGE | |||
| # test 1: watchpoint set and hit (watch_condition=6) | |||
| param1 = d.Parameter(name="param", disabled=False, value=0.0) | |||
| _ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6, | |||
| check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/" | |||
| "Conv2D-op308": | |||
| {"device_id": [0], "root_graph_id": [0], "is_parameter": False | |||
| }}, parameter_list=[param1]) | |||
| watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2) | |||
| if len(watchpoint_hits_test_1) != 1: | |||
| f_write.write( | |||
| "ERROR -> test 1: watchpoint set but not hit just once\n") | |||
| print_watchpoint_hits(watchpoint_hits_test_1, 1, f_write) | |||
| # test 2: watchpoint remove and ensure it's not hit | |||
| _ = debugger_backend.remove_watchpoint(watchpoint_id=1) | |||
| watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2) | |||
| if watchpoint_hits_test_2: | |||
| f_write.write("ERROR -> test 2: watchpoint removed but hit\n") | |||
| # test 3: watchpoint set and not hit, then remove | |||
| param2 = d.Parameter(name="param", disabled=False, value=-1000.0) | |||
| _ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6, | |||
| check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/" | |||
| "Conv2D-op308": | |||
| {"device_id": [0], "root_graph_id": [0], "is_parameter": False | |||
| }}, parameter_list=[param2]) | |||
| watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2) | |||
| if watchpoint_hits_test_3: | |||
| f_write.write( | |||
| "ERROR -> test 3: watchpoint set but not supposed to be hit\n") | |||
| _ = debugger_backend.remove_watchpoint(watchpoint_id=2) | |||
| # test 4: weight change watchpoint set and hit | |||
| param_abs_mean_update_ratio_gt = d.Parameter( | |||
| name="abs_mean_update_ratio_gt", disabled=False, value=0.0) | |||
| param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0) | |||
| _ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18, | |||
| check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/" | |||
| "Parameter[6]_11/fc3.bias": | |||
| {"device_id": [0], "root_graph_id": [0], "is_parameter": True | |||
| }}, parameter_list=[param_abs_mean_update_ratio_gt, | |||
| param_epsilon]) | |||
| watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3) | |||
| if len(watchpoint_hits_test_4) != 1: | |||
| f_write.write( | |||
| "ERROR -> test 4: watchpoint weight change set but not hit just once\n") | |||
| print_watchpoint_hits(watchpoint_hits_test_4, 4, f_write) | |||
| f_write.close() | |||
| assert compare_actual_with_expected(test_name) | |||
| def print_watchpoint_hits(watchpoint_hits, test_id, f_write): | |||
| """Print watchpoint hits.""" | |||
| for x, _ in enumerate(watchpoint_hits): | |||
| f_write.write( | |||
| "-----------------------------------------------------------\n") | |||
| f_write.write("watchpoint_hit for test_%u attributes:" % | |||
| test_id + "\n") | |||
| f_write.write("name = " + str(watchpoint_hits[x].name) + "\n") | |||
| f_write.write("slot = " + str(watchpoint_hits[x].slot) + "\n") | |||
| f_write.write("condition = " + | |||
| str(watchpoint_hits[x].condition) + "\n") | |||
| f_write.write("watchpoint_id = " + | |||
| str(watchpoint_hits[x].watchpoint_id) + "\n") | |||
| for p, _ in enumerate(watchpoint_hits[x].parameters): | |||
| f_write.write("parameter " + str(p) + " name = " + | |||
| watchpoint_hits[x].parameters[p].name + "\n") | |||
| f_write.write("parameter " + str(p) + " disabled = " + | |||
| str(watchpoint_hits[x].parameters[p].disabled) + "\n") | |||
| f_write.write("parameter " + str(p) + " value = " + | |||
| str(watchpoint_hits[x].parameters[p].value) + "\n") | |||
| f_write.write("parameter " + str(p) + " hit = " + | |||
| str(watchpoint_hits[x].parameters[p].hit) + "\n") | |||
| f_write.write("parameter " + str(p) + " actual_value = " + | |||
| str(watchpoint_hits[x].parameters[p].actual_value) + "\n") | |||
| f_write.write("error code = " + | |||
| str(watchpoint_hits[x].error_code) + "\n") | |||
| f_write.write("device_id = " + | |||
| str(watchpoint_hits[x].device_id) + "\n") | |||
| f_write.write("root_graph_id = " + | |||
| str(watchpoint_hits[x].root_graph_id) + "\n") | |||
| if __name__ == "__main__": | |||
| test_sync_trans_false_watchpoints() | |||
| @@ -0,0 +1,89 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| """ | |||
| Read tensor test script for offline debugger APIs. | |||
| """ | |||
| import mindspore.offline_debug.dbg_services as d | |||
| import numpy as np | |||
| from dump_test_utils import compare_actual_with_expected | |||
| GENERATE_GOLDEN = False | |||
| test_name = "sync_trans_true_read_tensors" | |||
| def test_sync_trans_read_tensors(): | |||
| debugger_backend = d.DbgServices( | |||
| dump_file_path="../data/dump/gpu_dumps/sync_trans_true/alexnet") | |||
| _ = debugger_backend.initialize( | |||
| net_name="Network Name goes here!", is_sync_mode=True) | |||
| # parameter | |||
| info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias", | |||
| slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True) | |||
| # output tensor with zero slot | |||
| info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308", | |||
| slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) | |||
| # output tensor with non-zero slot | |||
| info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300", | |||
| slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False) | |||
| tensor_info = [info1, info2, info3] | |||
| tensor_data = debugger_backend.read_tensors(tensor_info) | |||
| print_read_tensors(tensor_info, tensor_data) | |||
| assert compare_actual_with_expected(test_name) | |||
| def print_read_tensors(tensor_info, tensor_data): | |||
| """Print read tensors.""" | |||
| if GENERATE_GOLDEN: | |||
| f_write = open(test_name + ".expected", "w") | |||
| else: | |||
| f_write = open(test_name + ".actual", "w") | |||
| for x, _ in enumerate(tensor_info): | |||
| f_write.write( | |||
| "-----------------------------------------------------------\n") | |||
| f_write.write("tensor_info_" + str(x+1) + " attributes:\n") | |||
| f_write.write("node name = " + tensor_info[x].node_name + "\n") | |||
| f_write.write("slot = " + str(tensor_info[x].slot) + "\n") | |||
| f_write.write("iteration = " + str(tensor_info[x].iteration) + "\n") | |||
| f_write.write("device_id = " + str(tensor_info[x].device_id) + "\n") | |||
| f_write.write("root_graph_id = " + | |||
| str(tensor_info[x].root_graph_id) + "\n") | |||
| f_write.write("is_parameter = " + | |||
| str(tensor_info[x].is_parameter) + "\n") | |||
| f_write.write("\n") | |||
| f_write.write("tensor_data_" + str(x+1) + " attributes:\n") | |||
| f_write.write("data (printed in uint8) = " + str(np.frombuffer( | |||
| tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size)) + "\n") | |||
| py_byte_size = len(tensor_data[x].data_ptr) | |||
| c_byte_size = tensor_data[x].data_size | |||
| if c_byte_size != py_byte_size: | |||
| f_write.write("The python byte size of " + str(py_byte_size) + | |||
| " does not match the C++ byte size of " + str(c_byte_size) + "\n") | |||
| f_write.write("size in bytes = " + | |||
| str(tensor_data[x].data_size) + "\n") | |||
| f_write.write("debugger dtype = " + str(tensor_data[x].dtype) + "\n") | |||
| f_write.write("shape = " + str(tensor_data[x].shape) + "\n") | |||
| f_write.close() | |||
| if __name__ == "__main__": | |||
| test_sync_trans_read_tensors() | |||
| @@ -39,6 +39,14 @@ if [ $# -eq 1 ] && ([ "$1" == "stage1" ] || [ "$1" == "stage2" ] || [ "$1" == | |||
| exit ${RET} | |||
| fi | |||
| echo "run python debugger gpu ut" | |||
| pytest -v $CURRPATH/debugger/gpu_tests | |||
| RET=$? | |||
| if [ ${RET} -ne 0 ]; then | |||
| exit ${RET} | |||
| fi | |||
| elif [ $1 == "stage2" ]; then | |||
| echo "run python parallel" | |||
| pytest -s $CURRPATH/parallel/*.py | |||
| @@ -72,7 +80,7 @@ if [ $# -eq 1 ] && ([ "$1" == "stage1" ] || [ "$1" == "stage2" ] || [ "$1" == | |||
| exit ${RET} | |||
| fi | |||
| pytest -v --ignore=$CURRPATH/dataset --ignore=$CURRPATH/parallel --ignore=$CURRPATH/ops --ignore=$CURRPATH/pynative_mode --ignore=$CURRPATH/pipeline --ignore=$CURRPATH/train --ignore=$CURRPATH/nn $IGNORE_EXEC $CURRPATH | |||
| pytest -v --ignore=$CURRPATH/dataset --ignore=$CURRPATH/debugger/gpu_tests --ignore=$CURRPATH/parallel --ignore=$CURRPATH/ops --ignore=$CURRPATH/pynative_mode --ignore=$CURRPATH/pipeline --ignore=$CURRPATH/train --ignore=$CURRPATH/nn $IGNORE_EXEC $CURRPATH | |||
| RET=$? | |||
| if [ ${RET} -ne 0 ]; then | |||
| @@ -87,6 +95,12 @@ else | |||
| exit ${RET} | |||
| fi | |||
| pytest $CURRPATH/debugger/gpu_tests | |||
| RET=$? | |||
| if [ ${RET} -ne 0 ]; then | |||
| exit ${RET} | |||
| fi | |||
| pytest -v $CURRPATH/parallel/*.py | |||
| RET=$? | |||
| if [ ${RET} -ne 0 ]; then | |||
| @@ -111,7 +125,7 @@ else | |||
| exit ${RET} | |||
| fi | |||
| pytest -v --ignore=$CURRPATH/dataset --ignore=$CURRPATH/parallel --ignore=$CURRPATH/ops --ignore=$CURRPATH/pynative_mode --ignore=$CURRPATH/pipeline --ignore=$CURRPATH/train --ignore=$CURRPATH/nn $IGNORE_EXEC $CURRPATH | |||
| pytest -v --ignore=$CURRPATH/dataset --ignore=$CURRPATH/debugger/gpu_tests --ignore=$CURRPATH/parallel --ignore=$CURRPATH/ops --ignore=$CURRPATH/pynative_mode --ignore=$CURRPATH/pipeline --ignore=$CURRPATH/train --ignore=$CURRPATH/nn $IGNORE_EXEC $CURRPATH | |||
| RET=$? | |||
| if [ ${RET} -ne 0 ]; then | |||
| exit ${RET} | |||