You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

floor.py 2.6 kB

5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. #!/usr/bin/env python3
  2. # coding: utf-8
  3. # Copyright 2019 Huawei Technologies Co., Ltd
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. """operator dsl function:floor"""
  17. from akg import tvm
  18. import akg
  19. from akg.utils import validation_check as vc_util
  20. from akg.utils import kernel_exec as utils
  21. @vc_util.check_input_type(akg.tvm.tensor.Tensor)
  22. def floor(data):
  23. """
  24. Returns element-wise largest integer not greater than x.
  25. Args:
  26. data (tvm.tensor.Tensor): Tensor of type float16, and float32
  27. Returns:
  28. tvm.tensor.Tensor, has the same shape as data and type of int32.
  29. """
  30. vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
  31. shape = [x.value for x in data.shape]
  32. vc_util.check_shape(shape)
  33. if utils.product_is_mini() and data.dtype == "float32":
  34. # solve the problem of 87==floor(86.9996) when high_precision is needed.
  35. # problem is caused by such as fp16(86.9996)==87.
  36. # detect problem by fp32(86.9996) - fp32(floor(fp16(86.9996))) < 0
  37. # floor could only apply on float16
  38. data_fp16 = akg.lang.cce.cast_to(data, "float16")
  39. floor_data = akg.lang.cce.floor(data_fp16)
  40. floor_fp16 = akg.lang.cce.cast_to(floor_data, "float16")
  41. floor_fp32 = akg.lang.cce.cast(floor_fp16, "float32")
  42. # if diff=1e-7, we cannot get right sign of fp16(diff)
  43. # but we can get right sign of 10000*diff = 1e-3, which has the same
  44. # sign as diff
  45. diff = (data - floor_fp32) * 10000
  46. diff_fp16 = akg.lang.cce.cast_to(diff, "float16")
  47. # if diff < 0 and floor == ceil, then it's 87 = floor(86.99999)
  48. res = akg.tvm.compute(shape,
  49. lambda *i: akg.tvm.expr.Select(
  50. diff_fp16(*i) < tvm.const(0, "float16"),
  51. floor_fp16(*i) - tvm.const(1, "float16"),
  52. floor_fp16(*i)),
  53. name="res")
  54. res = akg.lang.cce.cast_to(res, "int32")
  55. else:
  56. res = akg.lang.cce.floor(data)
  57. return res