You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

square_impl.py 2.9 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. from __future__ import absolute_import
  16. from te import tvm
  17. from topi import generic
  18. import te.lang.cce
  19. from topi.cce import util
  20. from te.platform.fusion_manager import fusion_manager
  21. from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
  22. # shape size limit for aicore is 2**31
  23. SHAPE_SIZE_LIMIT = 200000000
  24. @fusion_manager.register("square")
  25. def square_compute(input_x, output_y, kernel_name="square"):
  26. """
  27. algorithm: square
  28. calculating data's square,y= x*x
  29. Parameters
  30. ----------
  31. input_x: TVM tensor
  32. the placeholder of input data
  33. output_y: dict
  34. shape and dtype of output, should be same shape and type as input
  35. kernel_name: str
  36. cce kernel name, default value is square
  37. Returns
  38. -------
  39. res : tvm.tensor
  40. the result of square
  41. """
  42. res = te.lang.cce.vmul(input_x, input_x)
  43. return res
  44. cus_conv2D_op_info = TBERegOp("CusSquare") \
  45. .fusion_type("OPAQUE") \
  46. .async_flag(False) \
  47. .binfile_name("square.so") \
  48. .compute_cost(10) \
  49. .kernel_name("CusSquare") \
  50. .partial_flag(True) \
  51. .input(0, "x", False, "required", "all") \
  52. .output(0, "y", False, "required", "all") \
  53. .dtype_format(DataType.F32_Default, DataType.F32_Default) \
  54. .get_op_info()
  55. @op_info_register(cus_conv2D_op_info)
  56. def CusSquare(input_x, output_y, kernel_name="square"):
  57. """
  58. algorithm: square
  59. calculating data's square,y= x*x
  60. Parameters
  61. ----------
  62. input_x : dict
  63. shape and dtype of input, only support float32
  64. output_y: dict
  65. shape and dtype of output, should be same shape and type as input
  66. kernel_name : str
  67. kernel name, default value is "square"
  68. Returns
  69. -------
  70. None
  71. """
  72. shape = input_x.get("shape")
  73. dtype = input_x.get("dtype").lower()
  74. shape = util.shape_refine(shape)
  75. data = tvm.placeholder(shape, name="data", dtype=dtype.lower())
  76. with tvm.target.cce():
  77. res = square_compute(data, output_y, kernel_name)
  78. sch = generic.auto_schedule(res)
  79. config = {"print_ir": False,
  80. "name": kernel_name,
  81. "tensor_list": [data, res]}
  82. te.lang.cce.cce_build_code(sch, config)