You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_profiler.py 9.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. import os
  16. import shutil
  17. import sys
  18. from tests.security_utils import security_off_wrap
  19. import pytest
  20. from mindspore import dataset as ds
  21. from mindspore import nn, Tensor, context
  22. from mindspore.nn.metrics import Accuracy
  23. from mindspore.nn.optim import Momentum
  24. from mindspore.dataset.transforms import c_transforms as C
  25. from mindspore.dataset.vision import c_transforms as CV
  26. from mindspore.dataset.vision import Inter
  27. from mindspore.common import dtype as mstype
  28. from mindspore.common.initializer import TruncatedNormal
  29. from mindspore.train import Model
  30. from mindspore.profiler import Profiler
  31. def conv(in_channels, out_channels, kernel_size, stride=1, padding=0):
  32. """weight initial for conv layer"""
  33. weight = weight_variable()
  34. return nn.Conv2d(in_channels, out_channels,
  35. kernel_size=kernel_size, stride=stride, padding=padding,
  36. weight_init=weight, has_bias=False, pad_mode="valid")
  37. def fc_with_initialize(input_channels, out_channels):
  38. """weight initial for fc layer"""
  39. weight = weight_variable()
  40. bias = weight_variable()
  41. return nn.Dense(input_channels, out_channels, weight, bias)
  42. def weight_variable():
  43. """weight initial"""
  44. return TruncatedNormal(0.02)
  45. class LeNet5(nn.Cell):
  46. """Define LeNet5 network."""
  47. def __init__(self, num_class=10, channel=1):
  48. super(LeNet5, self).__init__()
  49. self.num_class = num_class
  50. self.conv1 = conv(channel, 6, 5)
  51. self.conv2 = conv(6, 16, 5)
  52. self.fc1 = fc_with_initialize(16 * 5 * 5, 120)
  53. self.fc2 = fc_with_initialize(120, 84)
  54. self.fc3 = fc_with_initialize(84, self.num_class)
  55. self.relu = nn.ReLU()
  56. self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
  57. self.flatten = nn.Flatten()
  58. self.channel = Tensor(channel)
  59. def construct(self, data):
  60. """define construct."""
  61. output = self.conv1(data)
  62. output = self.relu(output)
  63. output = self.max_pool2d(output)
  64. output = self.conv2(output)
  65. output = self.relu(output)
  66. output = self.max_pool2d(output)
  67. output = self.flatten(output)
  68. output = self.fc1(output)
  69. output = self.relu(output)
  70. output = self.fc2(output)
  71. output = self.relu(output)
  72. output = self.fc3(output)
  73. return output
  74. def create_dataset(data_path, batch_size=32, repeat_size=1, num_parallel_workers=1):
  75. """create dataset for train"""
  76. # define dataset
  77. mnist_ds = ds.MnistDataset(data_path, num_samples=batch_size * 100)
  78. resize_height, resize_width = 32, 32
  79. rescale = 1.0 / 255.0
  80. rescale_nml = 1 / 0.3081
  81. shift_nml = -1 * 0.1307 / 0.3081
  82. # define map operations
  83. resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) # Bilinear mode
  84. rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
  85. rescale_op = CV.Rescale(rescale, shift=0.0)
  86. hwc2chw_op = CV.HWC2CHW()
  87. type_cast_op = C.TypeCast(mstype.int32)
  88. # apply map operations on images
  89. mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
  90. mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
  91. mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
  92. mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
  93. mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
  94. # apply DatasetOps
  95. mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
  96. mnist_ds = mnist_ds.repeat(repeat_size)
  97. return mnist_ds
  98. def cleanup():
  99. data_path = os.path.join(os.getcwd(), "data")
  100. kernel_meta_path = os.path.join(os.getcwd(), "kernel_data")
  101. cache_path = os.path.join(os.getcwd(), "__pycache__")
  102. if os.path.exists(data_path):
  103. shutil.rmtree(data_path)
  104. if os.path.exists(kernel_meta_path):
  105. shutil.rmtree(kernel_meta_path)
  106. if os.path.exists(cache_path):
  107. shutil.rmtree(cache_path)
  108. class TestProfiler:
  109. device_id = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0
  110. rank_id = int(os.getenv('RANK_ID')) if os.getenv('RANK_ID') else 0
  111. mnist_path = '/home/workspace/mindspore_dataset/mnist'
  112. @classmethod
  113. def setup_class(cls):
  114. """Run begin all test case start."""
  115. cleanup()
  116. @staticmethod
  117. def teardown():
  118. """Run after each test case end."""
  119. cleanup()
  120. @pytest.mark.level2
  121. @pytest.mark.platform_x86_cpu
  122. @pytest.mark.env_onecard
  123. @security_off_wrap
  124. def test_cpu_profiler(self):
  125. if sys.platform != 'linux':
  126. return
  127. self._train_with_profiler(device_target="CPU")
  128. self._check_cpu_profiling_file()
  129. @pytest.mark.level1
  130. @pytest.mark.platform_x86_gpu_training
  131. @pytest.mark.env_onecard
  132. @security_off_wrap
  133. def test_gpu_profiler(self):
  134. self._train_with_profiler(device_target="GPU")
  135. self._check_gpu_profiling_file()
  136. @pytest.mark.level0
  137. @pytest.mark.platform_arm_ascend_training
  138. @pytest.mark.platform_x86_ascend_training
  139. @pytest.mark.env_onecard
  140. @security_off_wrap
  141. def test_ascend_profiler(self):
  142. self._train_with_profiler(device_target="Ascend")
  143. self._check_d_profiling_file()
  144. def _train_with_profiler(self, device_target):
  145. context.set_context(mode=context.GRAPH_MODE, device_target=device_target)
  146. profiler = Profiler(profile_memory=True, output_path='data')
  147. profiler_name = os.listdir(os.path.join(os.getcwd(), 'data'))[0]
  148. self.profiler_path = os.path.join(os.getcwd(), f'data/{profiler_name}/')
  149. ds_train = create_dataset(os.path.join(self.mnist_path, "train"))
  150. if ds_train.get_dataset_size() == 0:
  151. raise ValueError("Please check dataset size > 0 and batch_size <= dataset size")
  152. lenet = LeNet5()
  153. loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
  154. optim = Momentum(lenet.trainable_params(), learning_rate=0.1, momentum=0.9)
  155. model = Model(lenet, loss_fn=loss, optimizer=optim, metrics={'acc': Accuracy()})
  156. model.train(1, ds_train, dataset_sink_mode=True)
  157. profiler.analyse()
  158. def _check_gpu_profiling_file(self):
  159. op_detail_file = self.profiler_path + f'gpu_op_detail_info_{self.device_id}.csv'
  160. op_type_file = self.profiler_path + f'gpu_op_type_info_{self.device_id}.csv'
  161. activity_file = self.profiler_path + f'gpu_activity_data_{self.device_id}.csv'
  162. timeline_file = self.profiler_path + f'gpu_timeline_display_{self.device_id}.json'
  163. getnext_file = self.profiler_path + f'minddata_getnext_profiling_{self.device_id}.txt'
  164. pipeline_file = self.profiler_path + f'minddata_pipeline_raw_{self.device_id}.csv'
  165. gpu_profiler_files = (op_detail_file, op_type_file, activity_file,
  166. timeline_file, getnext_file, pipeline_file)
  167. for file in gpu_profiler_files:
  168. assert os.path.isfile(file)
  169. def _check_d_profiling_file(self):
  170. aicore_file = self.profiler_path + f'aicore_intermediate_{self.rank_id}_detail.csv'
  171. step_trace_file = self.profiler_path + f'step_trace_raw_{self.rank_id}_detail_time.csv'
  172. timeline_file = self.profiler_path + f'ascend_timeline_display_{self.rank_id}.json'
  173. aicpu_file = self.profiler_path + f'aicpu_intermediate_{self.rank_id}.csv'
  174. minddata_pipeline_file = self.profiler_path + f'minddata_pipeline_raw_{self.rank_id}.csv'
  175. queue_profiling_file = self.profiler_path + f'device_queue_profiling_{self.rank_id}.txt'
  176. memory_file = self.profiler_path + f'memory_usage_{self.rank_id}.pb'
  177. d_profiler_files = (aicore_file, step_trace_file, timeline_file, aicpu_file,
  178. minddata_pipeline_file, queue_profiling_file, memory_file)
  179. for file in d_profiler_files:
  180. assert os.path.isfile(file)
  181. def _check_cpu_profiling_file(self):
  182. op_detail_file = self.profiler_path + f'cpu_op_detail_info_{self.device_id}.csv'
  183. op_type_file = self.profiler_path + f'cpu_op_type_info_{self.device_id}.csv'
  184. timeline_file = self.profiler_path + f'cpu_op_execute_timestamp_{self.device_id}.txt'
  185. cpu_profiler_files = (op_detail_file, op_type_file, timeline_file)
  186. for file in cpu_profiler_files:
  187. assert os.path.isfile(file)