#!/usr/bin/env python3 # coding: utf-8 # Copyright 2019 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """common""" import akg.tvm from .elewise_compute import vmuls, vadds, vmax, vmin, vabs, vrec, vmul, set_is_need_save_dtype from .cast_compute import floor, round, cast def fargmax(x, y): """ Build expression for the index of maximum value among input expressions x and y. Args: x (tvm.expr.Expr): Input expression. y (tvm.expr.Expr): Input expression. Returns: tvm.expr.Expr. The call expression. Examples: >>> n = akg.tvm.var('n') >>> m = akg.tvm.var('m') >>> data = akg.tvm.placeholder((n, m), name='data') >>> k = akg.tvm.reduce_axis((0, m), "k") >>> reducer = akg.tvm.comm_reducer(lambda x,y: akg.fargmax(x, y), lambda t: akg.tvm.min_value(t), name="argmax") >>> res = akg.tvm.compute((n,), lambda *indice: reducer(data(*indice, k), axis=k), name="res") """ return akg.tvm.call_pure_intrin(x.dtype, "fargmax", x, y) def fargmin(x, y): """ Build expression for the index of minimum value among input expressions x and y. Args: x (tvm.expr.Expr): Input expression. y (tvm.expr.Expr): Input expression. Returns: tvm.expr.Expr. The call expression. """ return akg.tvm.call_pure_intrin(x.dtype, "fargmin", x, y) def mad(x, y): """ Build expression for two matrices multiplication and add. Args: x (tvm.expr.Expr): Input expression. y (tvm.expr.Expr): Input expression. Returns: tvm.expr.Expr. The call expression. Examples: >>> n = akg.tvm.var('n') >>> m = akg.tvm.var('m') >>> k = akg.tvm.var('k') >>> A = akg.tvm.placeholder((m, k), name='A') >>> B = akg.tvm.placeholder((k, n), name='B') >>> kk = akg.tvm.reduce_axis((0, k), name='kk') >>> mmad = akg.tvm.comm_reducer(lambda x, y: akg.mad(x, y), lambda t: akg.tvm.const(0, dtype=t), name="mmad") >>> C = akg.tvm.compute((m, n), lambda i, j: mmad(A[i, kk] * B[kk, j], axis=kk), name="C") """ return akg.tvm.call_pure_intrin(x.dtype, "mad", x, y) mmad = akg.tvm.comm_reducer(lambda x, y: mad(x, y), lambda t: akg.tvm.const(0, dtype=t), name="mmad") def dropout(x, y): """ Build expression with dropout function. Args: x (tvm.expr.Expr): Input expression. y (tvm.expr.Expr): Input expression. Returns: tvm.expr.Expr. The call expression. """ return akg.tvm.call_pure_intrin(y.dtype, "dropout", x, y) def iou(x, y): """ Return the intersection over union of x, y box. Args: x (tvm.expr.Expr): Input expression. y (tvm.expr.Expr): Input expression. Returns: tvm.expr.Expr. The call expression. """ return akg.tvm.call_pure_intrin(x.dtype, "iou", x, y) def nms(x, y, scalar): """ return nonmaximum suppresion result x, y box. Args: x (tvm.expr.Expr): Input argument of reduced tensor. y (tvm.expr.Expr): Input argument. scalar (Union[tvm.expr.Expr, float]): Score threshold of nms. Returns: z : tvm.expr.Expr. The result is store in fp16, each fp16 is a hex number indicating suppresion. """ return akg.tvm.call_pure_intrin(x.dtype, "nms", x, y, scalar) def topk_sort(dst, src, topk): """ sort the proposal box and return topk result, used when the sort process need partition the sorting loop. Args: dst (tvm.expr.Expr): Input argument. The destination of sort generated by common reducer. src (tvm.expr.Expr): Input argument. Strictly required that the box number can be divisible by 16 and item number is 8. topk (tvm.expr.Expr): Input argument. Constant tvm.expr.Expr indicating the required topk number. Returns: z : tvm.expr.Expr. The result. """ return akg.tvm.call_pure_intrin(src.dtype, "topk_sort", dst, src, topk) def proposal_sort(dst, src, topk): """ sort the proposal box and return topk result. Args: dst (tvm.expr.Expr): Input argument. The destination of sort generated by common reducer. src (tvm.expr.Expr): Input argument. Strictly required that the box number can be divisible by 16 and item number is 8. topk (tvm.expr.Expr): Input argument. Constant tvm.expr.Expr indicating the required topk number. Returns: z : tvm.expr.Expr. The result. """ return akg.tvm.call_pure_intrin(src.dtype, "proposal_sort", dst, src, topk) def fnot(x): return akg.tvm.call_pure_intrin(x.dtype, "not", x) def round_to(data, max_, min_): """ round data to [min,max] Args: data (Tensor): tensors need to change dtype. max_ (float): the range of res. min_ (float): the range of res. Returns: tensor : akg.tvm.tensor ,elements in tensor is in range [min,max] """ data_tmp = vmuls(data, 0) data_min = vadds(data_tmp, min_) data_max = vadds(data_tmp, max_) data1 = vmax(data, data_min) data1 = vmin(data1, data_max) return data1 def cast_to(data, dtype, f1628_int_flag=False): """ a wrapped cast operations , cast data to the type of dtype Args: data (Tensor): akg.tvm.tensor needs to change dtype. dtype (String): dst dtype need to cast to. f1628_int_flag (bool): before fp16->int8/uint8, the data is all interger or not. default value is False. Returns: tensor : akg.tvm.tensor. """ if isinstance(data, akg.tvm.tensor.Tensor): data_dtype = getattr(data, 'dtype') else: raise RuntimeError("The cast input type must be akg.tvm.tensor") if (data_dtype == "float16") and (dtype == "int32"): fp16_max = akg.tvm.const(32768, dtype="float16") fp16_min = akg.tvm.const(2 ** (-15), dtype="float16") data1 = round_to(data, 0.5, -0.5) new_data = vmuls(data1, fp16_max) tmp2 = vabs(new_data) tmp3 = vadds(tmp2, fp16_min) fp16_res = vmul(new_data, vrec(tmp3)) sign_res = round(fp16_res) floor_data = floor(vabs(data)) res = vmul(floor_data, sign_res) return res if data_dtype == "float16" and dtype in ("int8", "uint8") and not f1628_int_flag: fp16_half = akg.tvm.const(-0.5, dtype="float16") set_is_need_save_dtype() data = vadds(data, fp16_half) if data_dtype == dtype: return data if data_dtype == "float16": tmp = data else: tmp = cast(data, dst_dtype="float16") return cast(tmp, dst_dtype=dtype) def four2five_nchw(data): return akg.tvm.call_pure_intrin(data.dtype, "four2five_nchw", data) def load_im2col_c1_buf(data, pad_h, pad_t, pad_l, pad_r, fm_h, fm_w, stride_h, stride_w, filter_h, filter_w, dilation_h, dilation_w, repeat_mode, jmp_offset): return akg.tvm.call_pure_intrin(data.dtype, "load_im2col_c1_buf", data, pad_h, pad_t, pad_l, pad_r, fm_h, fm_w, stride_h, stride_w, filter_h, filter_w, dilation_h, dilation_w, repeat_mode, jmp_offset) def sin(data): return akg.tvm.call_pure_intrin(data.dtype, "sin", data) def cos(data): return akg.tvm.call_pure_intrin(data.dtype, "cos", data) def sinh(data): return akg.tvm.call_pure_intrin(data.dtype, "sinh", data) def cosh(data): return akg.tvm.call_pure_intrin(data.dtype, "cosh", data) def divide_var(data, divisor): return akg.tvm.call_pure_intrin(data.dtype, "divide_var", data, divisor) def vmadd(x, y, z): """ Call the vmadd instruction to calculate :math:`x * y + z`. Args: x (tvm.tensor.Tensor): input x. y (tvm.tensor.Tensor): input y. z (tvm.tensor.Tensor): input z. Returns: tensor : akg.tvm.tensor. """ return akg.tvm.call_pure_intrin(x.dtype, "vmadd", y, z, x) def vmla(x, y, z): """ Call the vmla instruction to calculate :math:`x + y * z`. Args: x (tvm.tensor.Tensor): input x. y (tvm.tensor.Tensor): input y. z (tvm.tensor.Tensor): input z. Returns: tensor : akg.tvm.tensor. """ return akg.tvm.call_pure_intrin(x.dtype, "vmla", y, z, x)