#!/usr/bin/env python3 # coding: utf-8 # Copyright 2019 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """operator dsl function:floor""" from akg import tvm import akg from akg.utils import validation_check as vc_util from akg.utils import kernel_exec as utils @vc_util.check_input_type(akg.tvm.tensor.Tensor) def floor(data): """ Returns element-wise largest integer not greater than x. Args: data (tvm.tensor.Tensor): Tensor of type float16, and float32 Returns: tvm.tensor.Tensor, has the same shape as data and type of int32. """ vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) shape = [x.value for x in data.shape] vc_util.check_shape(shape) if utils.product_is_mini() and data.dtype == "float32": # solve the problem of 87==floor(86.9996) when high_precision is needed. # problem is caused by such as fp16(86.9996)==87. # detect problem by fp32(86.9996) - fp32(floor(fp16(86.9996))) < 0 # floor could only apply on float16 data_fp16 = akg.lang.cce.cast_to(data, "float16") floor_data = akg.lang.cce.floor(data_fp16) floor_fp16 = akg.lang.cce.cast_to(floor_data, "float16") floor_fp32 = akg.lang.cce.cast(floor_fp16, "float32") # if diff=1e-7, we cannot get right sign of fp16(diff) # but we can get right sign of 10000*diff = 1e-3, which has the same # sign as diff diff = (data - floor_fp32) * 10000 diff_fp16 = akg.lang.cce.cast_to(diff, "float16") # if diff < 0 and floor == ceil, then it's 87 = floor(86.99999) res = akg.tvm.compute(shape, lambda *i: akg.tvm.expr.Select( diff_fp16(*i) < tvm.const(0, "float16"), floor_fp16(*i) - tvm.const(1, "float16"), floor_fp16(*i)), name="res") res = akg.lang.cce.cast_to(res, "int32") else: res = akg.lang.cce.floor(data) return res