You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_ms_maximum.py 2.0 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. # Copyright 2020-2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License
  14. import numpy as np
  15. from tests.common.gen_random import random_gaussian
  16. from akg.utils import kernel_exec as utils
  17. from akg.utils.result_analysis import gpu_profiling
  18. from akg.utils.format_transform import to_tvm_nd_array
  19. from tests.common.tensorio import compare_tensor
  20. from akg.ops.math_gpu.maximum import maximum
  21. def gen_data(shape1, shape2, dtype):
  22. support_list = {"float16": np.float16, "float32": np.float32}
  23. lhs = random_gaussian(shape1, miu=1, sigma=0.1).astype(support_list[dtype])
  24. rhs = random_gaussian(shape2, miu=1, sigma=0.1).astype(support_list[dtype])
  25. expect = np.maximum(lhs, rhs)
  26. output = np.full(expect.shape, np.nan, expect.dtype)
  27. return lhs, rhs, output, expect
  28. def test_ms_maximum(shape1, shape2, dtype, poly_sch=False):
  29. if poly_sch:
  30. mod = utils.op_build_test(maximum, (shape1, shape2), (dtype, dtype), kernel_name="maximum", attrs={"target": "cuda"})
  31. lhs, rhs, output, expect = gen_data(shape1, shape2, dtype)
  32. args = (lhs, rhs, output)
  33. output = utils.mod_launch(mod, args, expect=expect)
  34. res = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8)
  35. print("Test {}".format("Pass" if res else "Fail"))
  36. if not res:
  37. print("Error cuda:========================")
  38. print(mod.imported_modules[0].get_source())
  39. raise AssertionError("Test fail")
  40. lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect])
  41. gpu_profiling(mod, lhs, rhs, expect, 400)

AKG(Auto Kernel Generator)对深度神经网络中的算子进行优化,并提供特定模式下的算子自动融合功能。AKG与MindSpore的图算融合功能协同工作,可提升在不同硬件后端上运行网络的性能。