diff --git a/ci/compatibility/fbs/V2-backup/dtype.fbs b/ci/compatibility/fbs/V2-backup/dtype.fbs new file mode 100644 index 00000000..a387d05c --- /dev/null +++ b/ci/compatibility/fbs/V2-backup/dtype.fbs @@ -0,0 +1,43 @@ +namespace mgb.serialization.fbs; + +// Keep in sync with dnn/include/megdnn/dtype.h +// Please only add new dtypes at the end of this list +enum DTypeEnum : byte { + Float32, + Uint8, + Int8, + Int16, + Int32, + IntB1, + IntB2, + IntB4, + Byte, + Float16, + UintB4, + Quantized8Asymm, + QuantizedS32, + QuantizedS8, + Quantized4Asymm, + QuantizedS4, + QuantizedS16, + BFloat16, + Bool, + Uint16, + QuantizedS1, +} + +table LinearQuantizationParam { + scale:float; + + // Won't be set for symmetric quantization types + zero_point:ubyte; +} + +union DTypeParam { + LinearQuantizationParam, +} + +table DType { + type:DTypeEnum; + param:DTypeParam; +} diff --git a/ci/compatibility/fbs/V2-backup/mgb_cpp_opr.fbs b/ci/compatibility/fbs/V2-backup/mgb_cpp_opr.fbs new file mode 100644 index 00000000..48dfe44c --- /dev/null +++ b/ci/compatibility/fbs/V2-backup/mgb_cpp_opr.fbs @@ -0,0 +1,62 @@ +include "dtype.fbs"; + +namespace mgb.serialization.fbs.param; + +struct PersistentDTypeScalar { + dtype:DTypeEnum; + storage:[ubyte:4]; +} + +table MGBAddUpdate { + alpha:PersistentDTypeScalar; + beta:PersistentDTypeScalar; + bias:PersistentDTypeScalar; +} + +table Host2DeviceCopy { + enable_value_infer:bool = true; + dump_default_value:bool = false; + allow_cpu_mem_fwd:bool = true; +} + +table Dimshuffle { + pattern:[int]; + ndim:uint; +} + +enum AxisDescMethod : byte { + ADD_1, + REMOVE, +} + +struct AxisDesc { + method:AxisDescMethod; + axis:int; +} + +table AxisAddRemove { + desc:[AxisDesc]; +} + +table MGBSleep { + device:bool = true; + host:bool = false; + seconds:double; +} + +struct IndexDescMaskItem { + axis:byte; + begin:bool; + end:bool; + step:bool; + idx:bool; +} + +table IndexDescMaskDump { + items:[IndexDescMaskItem]; +} + +table NMSKeep { + iou_thresh:float; + max_output:uint; +} diff --git a/ci/compatibility/fbs/V2-backup/mgb_opr_param_defs.fbs b/ci/compatibility/fbs/V2-backup/mgb_opr_param_defs.fbs new file mode 100644 index 00000000..f21634b2 --- /dev/null +++ b/ci/compatibility/fbs/V2-backup/mgb_opr_param_defs.fbs @@ -0,0 +1,237 @@ +// generated by gen_param_defs.py for c23d51f3c4f33119fd74f58f04d112ccea8f64f1249ab372300975ab7e710e9a +include "dtype.fbs"; +namespace mgb.serialization.fbs.param; + +/// mode of collective communication +enum CollectiveCommMode : uint { + /// reduce by sum to output computing node + REDUCE_SUM = 0, + /// copy input value to each output computing node + BROADCAST = 1, + /// each output comp node gets the concatenated value of all inputs + ALL_GATHER = 2, + /// reduce inputs by sum and each output gets one part of it + REDUCE_SCATTER_SUM = 3, + /// every output gets the sum of all inputs + ALL_REDUCE_SUM = 4, + /// every output gets the max of all inputs + ALL_REDUCE_MAX = 5, + /// every output gets the min of all inputs + ALL_REDUCE_MIN = 6, + /// every output gets the prod of all inputs + ALL_REDUCE_PROD = 7, + /// concat inputs to one node + GATHER = 8, + /// scatter input to each output computing node + SCATTER = 9, + /// scatter inputs and gather them on each computing node + ALL_TO_ALL = 10, +} + +/// mode for computing the gradient +enum CondExecMarkGradMode : uint { + /// normal gradient mode: sum all the activated components + SUM = 0, + /// use :attr:`CondExecMerge.SUM_COND_OUT` mode so oprs that depend on the + /// gradient opr would not be executed if the forward var is not used. + SUM_COND_OUT = 1, +} + +/// static inference option. **Note:** This is a workaround: since +/// currently static inference in MegBrain does not take conditional +/// execution into account, this option can be used to bypass static +/// inference errors. This is currently only used by automatically +/// generated gradient oprs. +enum CondExecMarkStaticInfer : uint { + /// enable both shape and value inference + SHAPE_VALUE = 0, + /// only enable shape inference (disable value inference) + SHAPE_ONLY = 1, + /// disable both shape and value inference + NONE = 2, +} + +enum CondExecMergeMode : uint { + /// copy the var whose mask is activated to the output, requiring that + /// exactly one branch is active + EXACT_ONE = 0, + /// like :attr:`EXACT_ONE` with the requirement that all branches have the + /// same shape, so shape inference can be easier + EXACT_ONE_SAME_SHAPE = 1, + /// sum all the active branches into output var; require all branches to + /// have the same shape. Extra shape vars are needed in this mod, so the + /// outputs can be initialized to zero when no input is active (and their + /// shapes are probably unknown). + SUM = 2, + /// like :attr:`SUM` but also add an ExecutionMask to the readers of output + /// vars, so they would be skipped if no branch is taken + SUM_COND_OUT = 3, +} + +/// how to compare predicate var with branch keys +enum CondExecPredMode : uint { + /// The outputs correspond to branch keys, and the one which equals + /// predicate would be activated. This behaves like a case-statement in many + /// languages. + CASE = 0, + /// like :attr:`CASE`, but add an extra output that would be activated if no + /// branch is matched + CASE_FALLBACK = 1, + /// One more outputs would be produced than the number of branch keys, + /// representing the interval in which the predicate var fits in. The + /// intervals are defined as :math:`(-\\infty, k_0), [k_0, k_1), \\ldots, + /// [k_{n-2}, k_{n-1}), [k_{n-1}, \infty)`. The keys must be given in + /// ascending order. + PIECEWISE = 2, +} + +enum CondExecPredLogicalMode : uint { + /// logical or + OR = 0, + /// logical and + AND = 1, + /// exclusive-or + XOR = 2, + /// not or(inputs) + NOR = 3, + /// not and(inputs) + NAND = 4, + /// not xor(inputs) + XNOR = 5, +} + +enum ExecutionPolicyStrategy : uint (bit_flags) { + /// use heuristic to choose the fastest algorithm + HEURISTIC = 0, + /// run possible algorithms on real device to find the best + PROFILE = 1, + /// when profile or heuristic algo selection it require the algosmust be + /// reproducible + REPRODUCIBLE = 2, + /// profile require algos are optmized to achieve fast-profile + OPTIMIZED = 3, +} + +enum ExecutionPolicyV0Strategy : uint { + /// use heuristic to choose the fastest algorithm + HEURISTIC = 0, + /// use heuristic to choose the fastest algorithm, and the chosen algorithm + /// is reproducible + HEURISTIC_REPRODUCIBLE = 1, + /// run possible algorithms on real device to find the best + PROFILE = 2, + /// the fastest of profile result that is also reproducible + PROFILE_REPRODUCIBLE = 3, + /// use profile result and heuristic to choose the fastest algorithm + PROFILE_HEURISTIC = 4, +} + +table DType { + dtype:DTypeEnum = Byte; +} + +table PersistentOutputStorage { + /// This is used for controlling memory sharing. Multiple + /// ``PersistentOutputStorage'' oprs with the same ``share_key'' would share + /// underlying tensor storage. Note that the value ``-1'' is treated + /// specially: storage of oprs with this key would be private and would not + /// be shared with any other opr. + share_key:int = -1; +} + +/// optinal axis: axis == -1 means no axis +table OptionalAxis { + axis:int = -1; +} + +/// optinal axis: axis == MAX_NDIM means no axis +table OptionalAxisV1 { + axis:int = 7; +} + +table ExecutionPolicyV0 { + strategy:ExecutionPolicyV0Strategy = HEURISTIC; + /// workspace limit in bytes + workspace_limit:ulong = 18446744073709551615; +} + +/// specify how to select an algorithm for an operator +table ExecutionPolicy { + strategy:ExecutionPolicyStrategy = 1; + /// workspace limit in bytes + workspace_limit:ulong = 18446744073709551615; +} + +table AssertEqual { + /// max allowed error; error is defined as the minimal of absolute and + /// relative error + maxerr:float = 0.0001; + /// whether to print maxerr to stdout during opr exec + verbose:bool = false; +} + +table FpgaConv { + need_output_quantize:bool = false; + need_output_threshold:bool = false; + stride:int = 1; + input_bit_width:int = 2; + output_bit_width:int = 2; + weight_bit_width:int = 2; + thres0:int = 0; + thres1:int = 1; + unpool_size:uint = 4; + direct_size:uint = 4; +} + +/// collective communication between multiple computing nodes on localhost +table CollectiveComm { + /// mode of collective communication + mode:CollectiveCommMode = REDUCE_SUM; +} + +/// HACK: The tag of this param def is actually used for another non-generated +/// param def SerializedDType, the sole purpose of this param def is to provide +/// a spare tag. Do not use. +table FakeSerializedDType { +} + +/// evaluate a predicate and branch keys to setup ExecutionMask objects with +/// associated predicate proxy vars (PPVs) +table CondExecPred { + /// how to compare predicate var with branch keys + mode:CondExecPredMode = CASE; + /// threshold for checking equality of float point values + eps:float = 0.0001; +} + +/// compute a logical function over a set of PPVs +table CondExecPredLogical { + mode:CondExecPredLogicalMode = OR; +} + +/// add ExecutionMask of the input PPV to this opr and readers of the outputs of +/// this opr +table CondExecMark { + /// mode for computing the gradient + grad_mode:CondExecMarkGradMode = SUM; + /// static inference option. **Note:** This is a workaround: since + /// currently static inference in MegBrain does not take conditional + /// execution into account, this option can be used to bypass static + /// inference errors. This is currently only used by automatically + /// generated gradient oprs. + static_infer:CondExecMarkStaticInfer = SHAPE_VALUE; +} + +/// merge multiple conditional execution branches +table CondExecMerge { + /// number of output vars (i.e. vars per branch) + nr_output:uint = 1; + mode:CondExecMergeMode = EXACT_ONE; +} + +/// opr Implements NVIDIA Optical Flow SDK. +table NvOf { + precision:uint = 1; +} + + diff --git a/ci/compatibility/fbs/V2-backup/opr_param_defs.fbs b/ci/compatibility/fbs/V2-backup/opr_param_defs.fbs new file mode 100644 index 00000000..1ef3ac1a --- /dev/null +++ b/ci/compatibility/fbs/V2-backup/opr_param_defs.fbs @@ -0,0 +1,1912 @@ +// generated by gen_param_defs.py for 53ca6252b5b9568f67b9767fb4fd0d2ef6b717b28a861692e9105d5f796a9472 +include "dtype.fbs"; +namespace mgb.serialization.fbs.param; + +enum ArgsortOrder : uint { + ASCENDING = 0, + DESCENDING = 1, +} + +enum BNFwdMode : uint { + /// Training phase. + TRAINING = 0, + /// Inference phase. + INFERENCE = 1, +} + +enum BNParamDim : uint { + /// Dim of params (Sigma, Mu) is 1 x 1 x H x W + DIM_11HW = 0, + /// Dim of params (Sigma, Mu) is 1 x C x H x W + DIM_1CHW = 1, + /// Dim of params (Sigma, Mu) is 1 x C x 1 x 1 + DIM_1C11 = 2, + /// Dim of params (Sigma, Mu) is 1 x 1 x 1 x C + DIM_111C = 3, +} + +enum CondTakeMode : uint { + /// take if ``abs(data-val)=eps`` + NEQ = 1, + /// take if ``dataval`` + GT = 4, + /// take if ``data>=val`` + GEQ = 5, +} + +enum Conv3DBiasNonlineMode : uint { + IDENTITY = 0, + RELU = 1, + SIGMOID = 2, +} + +enum ConvBiasV0NonlineMode : uint { + IDENTITY = 0, + RELU = 1, + SIGMOID = 2, + H_SWISH = 3, +} + +enum ConvPoolingMethod : uint { + WITH_TEXTURE_OBJ = 0, + WITH_SHARED_MEM = 1, +} + +enum ConvPoolingNonlineMode : uint { + IDENTITY = 0, + RELU = 1, + SIGMOID = 2, +} + +enum ConvPoolingPoolMode : uint { + AVERAGE = 0, + MAX_ = 1, +} + +/// convolution data/filter/output format; see :class:`RelayoutFormat` for more +/// details +enum ConvolutionFormat : uint { + NCHW = 0, + NHWC = 1, + NHWCD4 = 2, + NCHW4 = 3, + NCHW8 = 4, + NCHW32 = 5, + NCHW88 = 6, + NCHW44 = 7, + NCHW44_DOT = 8, + /// NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is + /// nchw32 layout + NCHW4_NCHW32 = 9, + /// NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is + /// nchw4 layout + NCHW32_NCHW4 = 10, + /// NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw + /// layout + NCHW4_NCHW = 11, + /// NHWC_NCHW means input tensors are nhwc layout, output tensor is nchw + /// layout + NHWC_NCHW = 12, + /// NHWC_NCHW4_IC_SMALL means input tensors are nhwc(c < 4) layout, output + /// tensor is nchw4 layout, padding c=4 + NHWC_NCHW4_IC_SMALL = 13, + /// NCHW_NCHW4_IC_SMALL means input tensors are nchw(c < 4) layout, output + /// tensor is nchw4 layout, padding c=4 + NCHW_NCHW4_IC_SMALL = 14, + /// CHWN4 is currently only used on Nvidia platform for fast implementation + /// of convolution using CUDA/SASS. The channels are splitted to groups of 4 + /// channels. + CHWN4 = 15, + /// NCHW64 is designed for convolution implementation to utilizing + /// TensorCore instructions for 4-bit integers on Nvidia platforms + NCHW64 = 16, + /// NCHW4_NHWC means input tensors are nchw4 layout, output tensor is nhwc + /// layout + NCHW4_NHWC = 17, +} + +enum Convolution3DDataType : uint { + /// input/output both float32/float16 + FLOAT = 0, + /// input/output both float16, the internal compute is float32 + FLOAT_IO16xC32 = 1, +} + +enum Convolution3DFormat : uint { + NCDHW = 0, + NDHWC = 1, +} + +enum Convolution3DMode : uint { + CROSS_CORRELATION = 0, + CONVOLUTION = 1, +} + +enum Convolution3DSparse : uint { + /// dense convolution: filter shape should be [oc, ic, spatial...] if format + /// is NCDHW, [oc, spatial..., ic] if format is NDHWC + DENSE = 0, + /// group convolution: filter shape should be [group, oc_per_group, + /// ic_per_group, spatial...] if format is NCDHW, [group, oc_per_group, + /// spatial..., ic_per_group] if format is NDHWC + GROUP = 1, +} + +enum ConvolutionV0DataType : uint { + /// input/output both float32/float16 + FLOAT = 0, + INT8x8x16 = 1, + INT8x8x32 = 2, + /// input/output both float16, the internal compute is float32 + FLOAT_IO16xC32 = 3, + /// input QuantizedAsymm8, output QuantizedS32 + QUINT8x8x32 = 4, + /// input int8, output specified by tensor DType + INT8x8xX = 5, + /// input QuantizedAsymm4, output QuantizedS32 + QUINT4x4x32 = 6, +} + +/// convolution data/filter/output format; see :class:`RelayoutFormat` for more +/// details +enum ConvolutionV0Format : uint { + NCHW = 0, + NHWC = 1, + NHWCD4 = 2, + NCHW4 = 3, + NCHW8 = 4, + NCHW32 = 5, + NCHW88 = 6, + NCHW44 = 7, + NCHW44_DOT = 8, + /// NCHW layout with weights tranformed by winograd + NCHW_WINOGRAD = 9, + /// NCHW88 layout with weights tranformed by winograd + NCHW88_WINOGRAD = 10, + /// NCHW44 layout with weights tranformed by winograd + NCHW44_WINOGRAD = 11, + /// NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is + /// nchw32 layout + NCHW4_NCHW32 = 12, + /// NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is + /// nchw4 layout + NCHW32_NCHW4 = 13, + /// NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw + /// layout + NCHW4_NCHW = 14, + /// NHWC_NCHW means input tensors are nhwc layout, output tensor is nchw + /// layout + NHWC_NCHW = 15, + /// NHWC_NCHW4_IC_SMALL means input tensors are nhwc(c < 4) layout, output + /// tensor is nchw4 layout, padding c=4 + NHWC_NCHW4_IC_SMALL = 16, + /// NCHW_NCHW4_IC_SMALL means input tensors are nchw(c < 4) layout, output + /// tensor is nchw4 layout, padding c=4 + NCHW_NCHW4_IC_SMALL = 17, + /// CHWN4 is currently only used on Nvidia platform for fast implementation + /// of convolution using CUDA/SASS. The channels are splitted to groups of 4 + /// channels. + CHWN4 = 18, + /// NCHW4_NHWC means input tensors are nchw4 layout, output tensor is nhwc + /// layout + NCHW4_NHWC = 19, +} + +enum ConvolutionV0Mode : uint { + CROSS_CORRELATION = 0, + CONVOLUTION = 1, +} + +enum ConvolutionV0Sparse : uint { + /// dense convolution: filter shape should be [oc, ic, spatial...] if format + /// is NCHW, [oc, spatial..., ic] if format is NHWC + DENSE = 0, + /// group convolution: filter shape should be [group, oc_per_group, + /// ic_per_group, spatial...] if format is NCHW, [group, oc_per_group, + /// spatial..., ic_per_group] if format is NHWC + GROUP = 1, +} + +/// Specifies special computation modes, e.g. different combinations of +/// intermediate result data types. +enum ConvolutionV1ComputeMode : uint { + /// No special requirements on the precision of intermediate results. + DEFAULT = 0, + /// Use Float32 accumulator and intermediate result. Only supported when + /// input and output is Float16. + FLOAT32 = 1, +} + +enum CvtColorMode : uint { + RGB2GRAY = 0, + RGB2YUV = 1, + YUV2RGB = 2, + GRAY2RGB = 3, + RGBA2RGB = 4, + RGBA2BGR = 5, + RGBA2GRAY = 6, + RGB2BGR = 7, + BGR2GRAY = 8, + BGR2RGB = 9, + /// For historical reasons, referred to as YCC by opencv + YUV2GRAY_NV21 = 10, + YUV2RGB_NV21 = 11, + YUV2BGR_NV21 = 12, + YUV2GRAY_NV12 = 13, + YUV2RGB_NV12 = 14, + YUV2BGR_NV12 = 15, + YUV2GRAY_YV12 = 16, + YUV2RGB_YV12 = 17, + YUV2BGR_YV12 = 18, + YUV2GRAY_YU12 = 19, + YUV2RGB_YU12 = 20, + YUV2BGR_YU12 = 21, + YCrCb2RGB = 22, + YCrCb2BGR = 23, + /// BT601 yuv format, referred to as YUV by opencv + BT601_YUV2RGB_NV21 = 24, + BT601_YUV2BGR_NV21 = 25, + BT601_YUV2RGB_NV12 = 26, + BT601_YUV2BGR_NV12 = 27, + BT601_YUV2RGB_YV12 = 28, + BT601_YUV2BGR_YV12 = 29, + BT601_YUV2RGB_YU12 = 30, + BT601_YUV2BGR_YU12 = 31, +} + +enum DctChannelSelectV0FastImpl : uint { + NONE = 0, + FIX_32_MASK = 1, +} + +enum ElemwiseMode : uint { + /// unary: max(x, 0) + RELU = 0, + /// unary: abs(x) + ABS = 1, + /// unary: acos(x) + ACOS = 2, + /// unary: asin(x) + ASIN = 3, + /// unary: ceil(x) + CEIL = 4, + /// unary: cos(x) + COS = 5, + /// unary: exp(x) + EXP = 6, + /// unary: numerically stable exp(x)-1 + EXPM1 = 7, + /// unary: floor(x) + FLOOR = 8, + /// unary: natural logarithm, log(x) + LOG = 9, + /// unary: numerically stable log(x+1) + LOG1P = 10, + /// unary: -x + NEGATE = 11, + /// unary: 1/(1+exp(-x)) + SIGMOID = 12, + /// unary: sin(x) + SIN = 13, + /// unary: tanh(x) + TANH = 14, + /// binary: x > 0 ? y : -y + ABS_GRAD = 15, + /// binary: x + y + ADD = 16, + /// binary: floor(x / y) + FLOOR_DIV = 17, + /// binary: max(x, y) + MAX_ = 18, + /// binary: min(x, y) + MIN_ = 19, + /// binary: x % y or fmodf(x, y) + MOD = 20, + /// binary: x * y + MUL = 21, + /// binary: pow(x, y) + POW = 22, + /// binary: x * (1 - x) * y + SIGMOID_GRAD = 23, + /// binary: x - y + SUB = 24, + /// binary: (x > 0) * y + SWITCH_GT0 = 25, + /// binary: (1 - x * x) * y + TANH_GRAD = 26, + /// binary: x / y + TRUE_DIV = 27, + /// binary: numerically stable log(exp(x) + exp(y)) + LOG_SUM_EXP = 28, + /// binary: x < y + LT = 29, + /// binary: x <= y + LEQ = 30, + /// binary: x == y + EQ = 31, + /// bitwise binary: x << y. Note that result is undefined if y < 0 or y >= + /// bitwidth. Logical shift is performed for unsigned intergers, and + /// arithmetic shift for signed ones. + SHL = 32, + /// bitwise binary: x >> y; see SHL mode for more details + SHR = 33, + /// ternary: x <= y ? z : 0 + COND_LEQ_MOV = 34, + /// compute ``a * b + c`` where c must either have same layout as a or b, or + /// be a scalar + FUSE_MUL_ADD3 = 35, + /// compute ``a * A + b * B`` where a and b must have equal layout, and A + /// and B must have equal layout. In the inputs ``b`` and ``B`` can be + /// swapped + FUSE_MUL_ADD4 = 36, + /// binary: max(x+y, 0) + FUSE_ADD_RELU = 37, + /// binary: 1/(1+exp(-(x+y))) + FUSE_ADD_SIGMOID = 38, + /// binary: tanh(x+y) + FUSE_ADD_TANH = 39, + /// unary: rational approximation of tanh(x) + FAST_TANH = 40, + /// binary: grad of the rational approximation of tanh(x) + FAST_TANH_GRAD = 41, + /// unary: round(x), the nearest integer value to x, rounding halfway cases + /// away from zero. Float only. + ROUND = 42, + /// binary: rounded higher l bits of x * y, where l is the bit length of x. + RMULH = 43, + /// binary: atan2(y,x) + ATAN2 = 44, + /// unary: erf(x) + ERF = 45, + /// unary: inverse function of erf(x) + ERFINV = 46, + /// unary: erfc(x) + ERFC = 47, + /// unary: inverse function of erfc(x) + ERFCINV = 48, + /// unary: x * clip(x + 3, 0, 6) / 6 + H_SWISH = 49, + /// binary: x < -3 ? 0 : (x > 3 ? y : (2 * x + 3) / 6 * y) + H_SWISH_GRAD = 50, + /// binary: hswish(x+y) + FUSE_ADD_H_SWISH = 51, + /// unary: !x + NOT = 52, + /// binary: x && y + AND = 53, + /// binary: x || y + OR = 54, + /// binary: x ^ y + XOR = 55, + /// unary: x / (1 + exp(-x)) + SILU = 56, + /// binary: grad(x / (1 + exp(-x)) + SILU_GRAD = 57, + /// unary: x Phi(x) + GELU = 58, + /// binary: grad(x Phi(x)) + GELU_GRAD = 59, +} + +enum ElemwiseMultiTypeMode : uint { + /// compute ``a * b + c`` requiring that ``a`` be int16 and ``b`` and ``c`` + /// int32, and the result is int32. This mode is optimized for the channel- + /// broadacsted case, i.e. ``a`` has shape (A, B, C) and ``b`` and ``c`` + /// have shape (1, C, 1) + FUSE_MUL_ADD3_INT16x32x32x32 = 0, + /// compuate ``a * b + c`` where the inputs ``a`` is an integer type ``b`` + /// and ``c`` are both ``float32``, the result is ``int8``. This is + /// currently only optimized for ``(1, x)`` broadcast for ``b`` and ``c``. + /// Computation is carried in floating points and results are rounded + /// towards zero with saturated cast to int. + FUSE_MUL_ADD3_IXxF32xF32xI8 = 1, + /// Compute ``a >> b``, round the result according to lower ``b`` bits of + /// ``a``` and make a saturating conversion to int8. Where ``a`` should be + /// an integer tensor and ``b`` should be an int8 scalar. + ROUND_SHR_SATURATE_IXxI8xI8 = 2, + /// Fused operation of an int16 elemwise add, an int16 rounding multiply + /// high and an int16 to int8 rounding right shift with saturation. + FUSE_ADD_RMULH_ROUND_SHR_SATURATE_INT16x16x16x8 = 3, + /// Fused operation of an int32 elemwise add, an int32 rounding multiply + /// high and an int32 to int8 rounding right shift with saturation. + FUSE_ADD_RMULH_ROUND_SHR_SATURATE_INT32x32x32x8 = 4, + /// Compute ``a >> b``, round the result according to lower ``b`` bits of + /// ``a``` and make a saturating conversion to int16. Where ``a`` should be + /// an integer tensor and ``b`` should be an int8 scalar. + ROUND_SHR_SATURATE_IXxI8xI16 = 5, + /// Fused elemwise add two quantized int8 with specifiedoutput quantized + /// dtype + QADD = 6, + /// Fused elemwise add two quantized int8 followed by ReLU and typecvt to + /// specified dtype + QFUSE_ADD_RELU = 7, + /// Fused elemwise multiply two quantized int8 with specifiedoutput + /// quantized dtype + QMUL = 8, + /// Fused elemwise min two quantized int8 with specifiedoutput quantized + /// dtype + QMIN = 9, + /// quantized: max(x, y), with specified output quantized dtype + QMAX = 10, + /// quantized: x - y + QSUB = 11, + /// quantized: x / y + QTRUE_DIV = 12, + /// quantized: sigmoid(x + y) + QFUSE_ADD_SIGMOID = 13, + /// quantized: tanh(x + y) + QFUSE_ADD_TANH = 14, + /// quantized: x > 0 ? x : 0 + QRELU = 15, + /// quantized: x > 0 ? x : -x + QABS = 16, + /// quantized: sigmoid(x) + QSIGMOID = 17, + /// quantized: exp(x) + QEXP = 18, + /// quantized: tanh(x) + QTANH = 19, + /// quantized: x * y + z + QFUSE_MUL_ADD3 = 20, + /// quantized: fast_tanh(x) + QFAST_TANH = 21, + /// quantized: -x + QNEGATE = 22, + /// quantized: acos(x) + QACOS = 23, + /// quantized: asin(x) + QASIN = 24, + /// quantized: ceil(x) + QCEIL = 25, + /// quantized: cos(x) + QCOS = 26, + /// quantized: expm1(x) + QEXPM1 = 27, + /// quantized: floor(x) + QFLOOR = 28, + /// quantized: log(x) + QLOG = 29, + /// quantized: log1p(x) + QLOG1P = 30, + /// quantized: sin(x) + QSIN = 31, + /// quantized: round(x) + QROUND = 32, + /// quantized: erf(x) + QERF = 33, + /// quantized: erfinv(x) + QERFINV = 34, + /// quantized: erfc(x) + QERFC = 35, + /// quantized: erfcinv(x) + QERFCINV = 36, + /// quantized: abs_grad + QABS_GRAD = 37, + /// quantized floor_div + QFLOOR_DIV = 38, + /// quantized mod + QMOD = 39, + /// quantized sigmoid_grad + QSIGMOID_GRAD = 40, + /// quantized switch_gt0 + QSWITCH_GT0 = 41, + /// quantized tanh_grad + QTANH_GRAD = 42, + /// quantized lt + QLT = 43, + /// quantized leq + QLEQ = 44, + /// quantized eq + QEQ = 45, + /// quantized pow + QPOW = 46, + /// quantized log_sum_exp + QLOG_SUM_EXP = 47, + /// quantized fast_tanh_grad + QFAST_TANH_GRAD = 48, + /// quantized atan2 + QATAN2 = 49, + /// quantized cond_leq_mov + QCOND_LEQ_MOV = 50, + /// quantized h_swish + QH_SWISH = 51, + /// quantized h_swish(x+y) + QFUSE_ADD_H_SWISH = 52, + /// quantized h_swish_grad + QH_SWISH_GRAD = 53, + /// compute ``a * b + c`` requiring that ``a`` be int16 and ``b`` and ``c`` + /// float32, and the result is float32. + FUSE_MUL_ADD3_INT16xF32xF32xF32 = 54, + /// compute ``a * b `` requiring that ``a`` be int16 and ``b`` float32, and + /// the result is float32. + MUL_INT16xF32xF32 = 55, + /// compute ``a * b + c`` requiring that ``a`` be uint8 and ``b`` and ``c`` + /// float32, and the result is float32. + FUSE_MUL_ADD3_UINT8xF32xF32xF32 = 56, +} + +enum MatrixMulFormat : uint { + /// Normal matrix mul: (M, K) x (K, N) = (M, N) + DEFAULT = 0, + /// Split 4 from M and K, better for neon compute:(M/4, K/4, 4(k), 4(m)) x + /// (K/4, N, 4(k)). if transposeA the layout is (K/4, M/4, 4(k), 4(m)) x + /// (K/4, N, 4(k)) + MK4 = 1, + /// Split 8 from M and K, better for neon compute:(M/8, K/8, 8(k), 8(m)) x + /// (K/8, N, 8(k)). if transposeA the layout is (K/8, M/8, 8(k), 8(m)) x + /// (K/8, N, 8(k)) + MK8 = 2, + /// Split 4 from M and K, better for neon dotprod:M/4, K/4, 4(m), 4(k)) x + /// (K/4, N, 4(k)). if transposeA the layout is (K/4, M/4, 4(m), 4(k)) x + /// (K/4, N, 4(k)) + MK4_DOT = 3, +} + +enum MatrixMulV0DataType : uint { + /// input/output both float32/float16 + FLOAT = 0, + INT8x8x16 = 1, + INT8x8x32 = 2, + /// input/output both float16, the internal compute is float32 + FLOAT_IO16xC32 = 3, + /// input QuantizedAsymm8, output QuantizedS32 + QUINT8x8x32 = 4, + /// input QuantizedAsymm4, output QuantizedS32 + QUINT4x4x32 = 5, +} + +/// Specifies special computation modes, e.g. different combinations of +/// intermediate result data types. +enum MatrixMulV1ComputeMode : uint { + /// No special requirements on the precision of intermediate results. + DEFAULT = 0, + /// Use Float32 accumulator and intermediate result. Only supported when + /// input and output is Float16. + FLOAT32 = 1, +} + +enum PaddingPaddingMode : uint { + /// aaaaaa|abcdefgh|hhhhhhh + REPLICATE = 0, + /// fedcba|abcdefgh|hgfedcb + REFLECT = 1, + /// iiiiii|abcdefgh|iiiiiii + CONSTANT = 2, +} + +enum PoolingV0Mode : uint { + /// maximum value inside pooling window + MAX_ = 0, + /// arithmetic mean of all values inside pooling window. Padding values are + /// taken into account and are viewed as zero + AVERAGE = 1, + /// arithmetic mean of all values inside pooling window. No padding isused. + AVERAGE_COUNT_EXCLUDE_PADDING = 2, +} + +enum RNNCellNonlineMode : uint { + IDENTITY = 0, + RELU = 1, + TANH = 2, +} + +enum ROIAlignV0Mode : uint { + MAX_ = 0, + AVERAGE = 1, +} + +enum ROIPoolingMode : uint { + /// maximum value inside pooling window; pooling result would be 0 if + /// pooling window is empty + MAX_ = 0, + /// arithmetic mean of all values inside pooling window; pooling result + /// would be 0 if pooling window is empty + AVERAGE = 1, +} + +enum ReduceDataType : uint { + /// input/output are the same data type, and the internal computation type would be chosen by the input/output dtypes and the reduction mode. + /// Currently, ```DEFAULT``` mode means: + /// + /// +--------------------+-----------------------------------+-------------------+ + /// | Input/Output DType | Mode | Computation DType | + /// +====================+===================================+===================+ + /// | FLOAT32 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | FLOAT32 | + /// +--------------------+-----------------------------------+-------------------+ + /// | FLOAT16 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | FLOAT16 | + /// +--------------------+-----------------------------------+-------------------+ + /// | INT32 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | INT32 | + /// +--------------------+-----------------------------------+-------------------+ + /// | INT8 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | INT8 | + /// +--------------------+-----------------------------------+-------------------+ + /// | QuantizedS8 | MIN/MAX | QuantizedS8 | + /// +--------------------+-----------------------------------+-------------------+ + /// | QuantizedS8 | MEAN/SUM | QuantizedS32 | + /// +--------------------+-----------------------------------+-------------------+ + /// | Quantized8Asymm | MIN/MAX | Quantized8Asymm | + /// +--------------------+-----------------------------------+-------------------+ + /// | Quantized8Asymm | MEAN/SUM | QuantizedS32 | + /// +--------------------+-----------------------------------+-------------------+ + /// + /// + DEFAULT = 0, + /// Deprecated. This was replaced by FLOAT_O16xC32, and input's dtype + /// decided by actual input tensor. + FLOAT_IO16xC32 = 1, + /// compute/output both are float32 + FLOAT_O32xC32 = 2, + /// compute are float32, output float16 + FLOAT_O16xC32 = 3, + /// input quint8, compute and output are qint32 + QUINT_I8xO32 = 4, + /// input qint8, compute and output are qint32 + QINT_I8xO32 = 5, +} + +enum ReduceMode : uint { + SUM = 0, + /// sum of x * x for each element x + SUM_SQR = 1, + PRODUCT = 2, + MIN_ = 3, + MAX_ = 4, + MEAN = 5, +} + +enum ReduceV0Mode : uint { + SUM = 0, + /// sum of x * x for each element x + SUM_SQR = 1, + PRODUCT = 2, + MIN_ = 3, + MAX_ = 4, +} + +enum ReduceV1DataType : uint { + /// input/output are the same data type, and the internal computation type would be chosen by the input/output dtypes and the reduction mode. + /// Currently, ```DEFAULT``` mode means: + /// + /// +--------------------+-----------------------------------+-------------------+ + /// | Input/Output DType | Mode | Computation DType | + /// +====================+===================================+===================+ + /// | FLOAT32 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | FLOAT32 | + /// +--------------------+-----------------------------------+-------------------+ + /// | FLOAT16 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | FLOAT16 | + /// +--------------------+-----------------------------------+-------------------+ + /// | INT32 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | INT32 | + /// +--------------------+-----------------------------------+-------------------+ + /// | INT8 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | INT8 | + /// +--------------------+-----------------------------------+-------------------+ + /// | QuantizedS8 | MIN/MAX | QuantizedS8 | + /// +--------------------+-----------------------------------+-------------------+ + /// | QuantizedS8 | MEAN/SUM | QuantizedS32 | + /// +--------------------+-----------------------------------+-------------------+ + /// | Quantized8Asymm | MIN/MAX | Quantized8Asymm | + /// +--------------------+-----------------------------------+-------------------+ + /// | Quantized8Asymm | MEAN/SUM | QuantizedS32 | + /// +--------------------+-----------------------------------+-------------------+ + /// + /// + DEFAULT = 0, + /// Deprecated. This was replaced by FLOAT_O16xC32, and input's dtype + /// decided by actual input tensor. + FLOAT_IO16xC32 = 1, + /// compute/output both are float32 + FLOAT_O32xC32 = 2, + /// compute are float32, output float16 + FLOAT_O16xC32 = 3, + /// input quint8, compute and output are qint32 + QUINT_I8xO32 = 4, + /// input qint8, compute and output are qint32 + QINT_I8xO32 = 5, +} + +enum ReduceV1Mode : uint { + SUM = 0, + /// sum of x * x for each element x + SUM_SQR = 1, + PRODUCT = 2, + MIN_ = 3, + MAX_ = 4, + MEAN = 5, +} + +/// Relayout mode. +/// +/// **Naming conventions** +/// +/// 1. ``A_B`` means change from layout format ``A`` to ``B``. +/// 2. ``INTER_WEIGHT_xx`` means relayout the weight for faster processing by +/// :attr:`Convolution.Format.NHWCD4` convolutions. +/// 3. A suffix of ``I`` means ``Image2DPack4TensorFormat`` tensor format is used +/// for faster processing on GPUs. +/// +/// **Layout definitions** +/// +/// * ``NCHW`` layout: ``{N, C, H, W}`` +/// * ``NHWC`` layout: ``{N, H, W, C}`` +/// * ``NHWCD4`` layout: ``{N, H, (C + 3) / 4, W, 4}`` +/// * ``NHWCD4I`` layout: with ``align_axis = 2`` +/// * ``NCHW4`` layout: ``{N, C/4, H, W, 4}`` +/// * ``NCHW88`` layout: ``{N, C/8, H, W, 8}`` +/// * ``CHWN4`` layout: ``{C/4, H, W, N, 4}`` +/// * ``NCHW64`` layout: ``{N, C/64, H, W, 64}`` +/// +/// **Float weight transformation definitions** +/// +/// +---------------+---------------------------------+--------------------+--------------------------------------+------+ +/// | Sparsity Type | Input Layout | Input Req | Output Layout | Axis | +/// +===============+=================================+====================+======================================+======+ +/// | DENSE | ``{OC, IC, FH, FW}`` | ``OC % 4 == 0`` | ``{OC/4, FH, FW, IC, 4}`` | 3 | +/// +---------------+---------------------------------+--------------------+--------------------------------------+------+ +/// | GROUP | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 4 == 0`` | ``{GROUP, OCPG/4, FH, FW, ICPG, 4}`` | 4 | +/// | | | ``ICPG % 4 == 0`` | | | +/// +---------------+---------------------------------+--------------------+--------------------------------------+------+ +/// | CHAN | ``{GROUP, 1, 1, FH, FW}`` | ``GROUP % 4 == 0`` | ``{GROUP / 4, 1, FH ,FW, 4}`` | 1 | +/// +---------------+---------------------------------+--------------------+--------------------------------------+------+ +/// +/// **Float weight transformation nchw88 definitions** +/// +/// +---------------+---------------------------------+--------------------+--------------------------------------+ +/// | Sparsity Type | Input Layout | Input Req | Output Layout | +/// +===============+=================================+====================+======================================+ +/// | DENSE | ``{OC, IC, FH, FW}`` | ``OC % 8 == 0`` |``{OC/8, IC/8 ,FH, FW, 8(IC), 8(OC)}``| +/// | | | ``IC % 8 == 0`` | | +/// +---------------+---------------------------------+--------------------+--------------------------------------+ +/// | GROUP | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 8 == 0`` | ``{GROUP, OCPG/8, ICPG/8 FH, FW, | +/// | | | ``ICPG % 8 == 0`` | 8(ICPG), 8(OCPG)} `` | +/// +---------------+---------------------------------+--------------------+--------------------------------------+ +/// | CHAN | ``{GROUP, 1, 1, FH, FW}`` | ``GROUP % 8 == 0`` | ``{GROUP / 8, 1, FH ,FW, 8}`` | +/// +---------------+---------------------------------+--------------------+--------------------------------------+ +/// +/// **Int8(DOT) weight transformation definitions** +/// +/// +---------------+---------------------------------+--------------------+------------------------------------------+------+ +/// | Sparsity Type | Input Layout | Input Req | Output Layout | Axis | +/// +===============+=================================+====================+==========================================+======+ +/// | DENSE | ``{OC, IC, FH, FW}`` | ``OC % 4 == 0`` | ``{OC/4, FH, FW, IC/4, 4, 4}` | 3 | +/// +---------------+---------------------------------+--------------------+------------------------------------------+------+ +/// | GROUP | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 4 == 0`` | ``{GROUP, OCPG/4, FH, FW, ICPG/4, 4, 4}``| 4 | +/// | | | ``ICPG % 4 == 0`` | | | +/// +---------------+---------------------------------+--------------------+------------------------------------------+------+ +/// +/// Note: the axis column means the corresponding ``align_axis`` for image format +/// when the ``I`` suffix is present. +/// +/// Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later opr by seting group and oc param with NCHW4_NCHW +/// +enum RelayoutFormatV0Mode : uint { + NHWC_NHWCD4 = 0, + NHWCD4_NHWC = 1, + NHWC_NHWCD4I = 2, + NCHW_NHWCD4 = 3, + NCHW_NHWCD4I = 4, + NHWCD4I_NCHW = 5, + NHWCD4_NCHW = 6, + INTER_WEIGHT_DENSE = 7, + INTER_WEIGHT_DENSEI = 8, + INTER_WEIGHT_GROUP = 9, + INTER_WEIGHT_GROUPI = 10, + INTER_WEIGHT_CHAN = 11, + INTER_WEIGHT_CHANI = 12, + INTER_WEIGHT_DENSEI_DOT = 13, + INTER_WEIGHT_GROUPI_DOT = 14, + NCHW4_CHWN4 = 15, + CHWN4_NCHW4 = 16, + NCHW_NCHW88_CONV_DENSE_WEIGHT = 17, + NCHW_NCHW88_CONV_CHAN_WEIGHT = 18, + NCHW_NCHW88_CONV_GROUP_WEIGHT = 19, + NCHW_NCHW88 = 20, + NCHW88_NCHW = 21, + NCHW_NCHW4_IC_SMALL = 22, + NCHW_NCHW4_IC_SMALL_CONV_DENSE_WEIGHT = 23, + NCHW_NCHW4 = 24, + NCHW4_NCHW = 25, + NCHW_NCHW4_WEIGHT = 26, + NCHW_NCHW64 = 27, + NCHW64_NCHW = 28, + NCHW_NHWC = 29, + NHWC_NCHW = 30, + NHWCD4I_NHWC = 31, +} + +enum SeparableConvBorderMode : uint { + BORDER_REPLICATE = 0, + BORDER_REFLECT = 1, + BORDER_REFLECT_101 = 2, + BORDER_WRAP = 3, + BORDER_CONSTANT = 4, + BORDER_TRANSPARENT = 5, + BORDER_ISOLATED = 6, +} + +enum SeparableConv3DBorderMode : uint { + BORDER_REPLICATE = 0, + BORDER_REFLECT = 1, + BORDER_REFLECT_101 = 2, + BORDER_WRAP = 3, + BORDER_CONSTANT = 4, + BORDER_TRANSPARENT = 5, + BORDER_ISOLATED = 6, +} + +enum SpatialTfGridGeneratorMode : uint { + AFFINE = 0, +} + +enum SpatialTfSamplerMode : uint { + BILINEAR = 0, +} + +enum TopKMode : uint { + /// only the value of the k'th element would be computed + KTH_ONLY = 0, + /// all the top-k values and corresponding indices would be computed; no + /// order is guaranteed + VALUE_IDX_NOSORT = 1, + /// all the top-k values and corresponding indices sorted + VALUE_IDX_SORTED = 2, +} + +enum WarpPerspectiveV1BorderMode : uint { + /// aaaaaa|abcdefgh|hhhhhhh + REPLICATE = 0, + /// fedcba|abcdefgh|hgfedcb + REFLECT = 1, + /// gfedcb|abcdefgh|gfedcba + REFLECT_101 = 2, + /// cdefgh|abcdefgh|abcdefg + WRAP = 3, + /// iiiiii|abcdefgh|iiiiiii + CONSTANT = 4, + TRANSPARENT = 5, + ISOLATED = 6, +} + +enum WarpPerspectiveV1InterpolationMode : uint { + NEAREST = 0, + LINEAR = 1, + AREA = 2, + CUBIC = 3, + LANCZOS4 = 4, +} + +table Empty { +} + +table Axis { + axis:int = 0; +} + +table ConvolutionV0 { + mode:ConvolutionV0Mode = CROSS_CORRELATION; + /// padding on one side on the first dimension + pad_h:uint = 0; + /// padding on one side on the second dimension + pad_w:uint = 0; + /// kernel stride on the first dimension + stride_h:uint = 1; + /// kernel stride on the second dimension + stride_w:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_h:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_w:uint = 1; + data_type:ConvolutionV0DataType = FLOAT; + sparse:ConvolutionV0Sparse = DENSE; + /// convolution data/filter/output format; see :class:`RelayoutFormat` for + /// more details + format:ConvolutionV0Format = NCHW; +} + +table ConvolutionV1 { + mode:ConvolutionV0Mode = CROSS_CORRELATION; + /// padding on one side on the first dimension + pad_h:uint = 0; + /// padding on one side on the second dimension + pad_w:uint = 0; + /// kernel stride on the first dimension + stride_h:uint = 1; + /// kernel stride on the second dimension + stride_w:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_h:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_w:uint = 1; + sparse:ConvolutionV0Sparse = DENSE; + format:ConvolutionV0Format = NCHW; + /// Specifies special computation modes, e.g. different combinations of + /// intermediate result data types. + compute_mode:ConvolutionV1ComputeMode = DEFAULT; +} + +table Convolution { + mode:ConvolutionV0Mode = CROSS_CORRELATION; + /// padding on one side on the first dimension + pad_h:uint = 0; + /// padding on one side on the second dimension + pad_w:uint = 0; + /// kernel stride on the first dimension + stride_h:uint = 1; + /// kernel stride on the second dimension + stride_w:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_h:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_w:uint = 1; + sparse:ConvolutionV0Sparse = DENSE; + /// convolution data/filter/output format; see :class:`RelayoutFormat` for + /// more details + format:ConvolutionFormat = NCHW; + compute_mode:ConvolutionV1ComputeMode = DEFAULT; +} + +table MaskPropagate { + /// padding on one side on the first dimension + pad_h:uint = 0; + /// padding on one side on the second dimension + pad_w:uint = 0; + /// kernel stride on the first dimension + stride_h:uint = 1; + /// kernel stride on the second dimension + stride_w:uint = 1; + /// kernel height + kernel_h:uint = 1; + /// kernel width + kernel_w:uint = 1; + /// dilate height + dilate_h:uint = 1; + /// dilate width + dilate_w:uint = 1; +} + +table ConvPooling { + method:ConvPoolingMethod = WITH_TEXTURE_OBJ; + convMode:ConvolutionV0Mode = CROSS_CORRELATION; + poolMode:ConvPoolingPoolMode = AVERAGE; + nonlineMode:ConvPoolingNonlineMode = IDENTITY; + pool_shape_h:uint = 1; + pool_shape_w:uint = 1; + pool_stride_h:uint = 1; + pool_stride_w:uint = 1; + pool_pad_h:uint = 0; + pool_pad_w:uint = 0; + conv_stride_h:uint = 1; + conv_stride_w:uint = 1; + conv_pad_h:uint = 0; + conv_pad_w:uint = 0; +} + +/// legacy conv_bias +table ConvBiasV0 { + nonlineMode:ConvBiasV0NonlineMode = IDENTITY; + mode:ConvolutionV0Mode = CROSS_CORRELATION; + pad_h:uint = 0; + pad_w:uint = 0; + stride_h:uint = 1; + stride_w:uint = 1; +} + +/// active(conv(x, w) + bias) +table ConvBiasV1 { + nonlineMode:ConvBiasV0NonlineMode = IDENTITY; + mode:ConvolutionV0Mode = CROSS_CORRELATION; + data_type:ConvolutionV0DataType = FLOAT; + sparse:ConvolutionV0Sparse = DENSE; + format:ConvolutionV0Format = NCHW; + /// padding on one side on the first dimension + pad_h:uint = 0; + /// padding on one side on the second dimension + pad_w:uint = 0; + /// kernel stride on the first dimension + stride_h:uint = 1; + /// kernel stride on the second dimension + stride_w:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_h:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_w:uint = 1; +} + +/// active(conv(x, w) + bias) +table ConvBiasV2 { + nonlineMode:ConvBiasV0NonlineMode = IDENTITY; + mode:ConvolutionV0Mode = CROSS_CORRELATION; + sparse:ConvolutionV0Sparse = DENSE; + format:ConvolutionV0Format = NCHW; + /// padding on one side on the first dimension + pad_h:uint = 0; + /// padding on one side on the second dimension + pad_w:uint = 0; + /// kernel stride on the first dimension + stride_h:uint = 1; + /// kernel stride on the second dimension + stride_w:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_h:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_w:uint = 1; + compute_mode:ConvolutionV1ComputeMode = DEFAULT; +} + +/// active(conv(x, w) + bias) +table ConvBiasV3 { + nonlineMode:ConvBiasV0NonlineMode = IDENTITY; + mode:ConvolutionV0Mode = CROSS_CORRELATION; + sparse:ConvolutionV0Sparse = DENSE; + format:ConvolutionV0Format = NCHW; + /// padding on one side on the first dimension + pad_h:uint = 0; + /// padding on one side on the second dimension + pad_w:uint = 0; + /// kernel stride on the first dimension + stride_h:uint = 1; + /// kernel stride on the second dimension + stride_w:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_h:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_w:uint = 1; + /// detail meaning \see winograd in conv bias + output_block_size:uint = 0; + compute_mode:ConvolutionV1ComputeMode = DEFAULT; +} + +/// active(conv(x, w) + bias) +table ConvBias { + nonlineMode:ConvBiasV0NonlineMode = IDENTITY; + mode:ConvolutionV0Mode = CROSS_CORRELATION; + sparse:ConvolutionV0Sparse = DENSE; + format:ConvolutionFormat = NCHW; + /// padding on one side on the first dimension + pad_h:uint = 0; + /// padding on one side on the second dimension + pad_w:uint = 0; + /// kernel stride on the first dimension + stride_h:uint = 1; + /// kernel stride on the second dimension + stride_w:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_h:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_w:uint = 1; + compute_mode:ConvolutionV1ComputeMode = DEFAULT; +} + +table SeparableConv { + mode:ConvolutionV0Mode = CROSS_CORRELATION; + borderMode:SeparableConvBorderMode = BORDER_REPLICATE; + is_symm_kernel:bool = true; + pad_h:uint = 0; + pad_w:uint = 0; + stride_h:uint = 1; + stride_w:uint = 1; + ksize_h:uint = 3; + ksize_w:uint = 3; + anchor_h:uint = 1; + anchor_w:uint = 1; +} + +table Images2Neibs { + pad_h:uint = 0; + pad_w:uint = 0; + stride_h:uint = 1; + stride_w:uint = 1; + dilate_h:uint = 1; + dilate_w:uint = 1; + window_h:uint = 3; + window_w:uint = 3; +} + +table SlidingWindowTranspose { + out_h:uint = 0; + out_w:uint = 0; + pad_h:uint = 0; + pad_w:uint = 0; + stride_h:uint = 1; + stride_w:uint = 1; + dilate_h:uint = 1; + dilate_w:uint = 1; + window_h:uint = 3; + window_w:uint = 3; +} + +table PoolingV0 { + mode:PoolingV0Mode = MAX_; + pad_h:uint = 0; + pad_w:uint = 0; + stride_h:uint = 2; + stride_w:uint = 2; + window_h:uint = 2; + window_w:uint = 2; + format:ConvolutionV0Format = NCHW; +} + +table Pooling { + mode:PoolingV0Mode = MAX_; + pad_h:uint = 0; + pad_w:uint = 0; + stride_h:uint = 2; + stride_w:uint = 2; + window_h:uint = 2; + window_w:uint = 2; + format:ConvolutionFormat = NCHW; +} + +table Softmax { + axis:int = -1; +} + +table AdaptivePoolingV0 { + mode:PoolingV0Mode = MAX_; + format:ConvolutionV0Format = NCHW; +} + +table AdaptivePooling { + mode:PoolingV0Mode = MAX_; + format:ConvolutionFormat = NCHW; +} + +/// see ImageNet Classification with Deep Convolutional Neural Networks for +/// meaning of the fields +table LRN { + /// must be odd + n:uint = 5; + k:float = 2.; + alpha:float = 1e-4; + beta:float = 0.75; +} + +table BN { + param_dim:BNParamDim = DIM_11HW; + fwd_mode:BNFwdMode = TRAINING; + epsilon:double = 1e-4; + avg_factor:double = 1.; + scale:float = 1.; + bias:float = 0.; +} + +table ROIPooling { + mode:ROIPoolingMode = MAX_; + scale:float = 1.; +} + +table WarpPerspectiveV1 { + imode:WarpPerspectiveV1InterpolationMode = LINEAR; + bmode:WarpPerspectiveV1BorderMode = REPLICATE; + format:ConvolutionV0Format = NCHW; + /// used for CONSTANT bmode + border_val:float = .0; +} + +table WarpPerspective { + imode:WarpPerspectiveV1InterpolationMode = LINEAR; + bmode:WarpPerspectiveV1BorderMode = REPLICATE; + format:ConvolutionFormat = NCHW; + /// used for CONSTANT bmode + border_val:float = .0; +} + +table SpatialTfGridGenerator { + mode:SpatialTfGridGeneratorMode = AFFINE; +} + +table SpatialTfSampler { + mode:SpatialTfSamplerMode = BILINEAR; +} + +table AddUpdate { + alpha:float = 1.; + beta:float = 1.; + bias:float = 0.; +} + +table Elemwise { + mode:ElemwiseMode = RELU; +} + +table ElemwiseMultiType { + mode:ElemwiseMultiTypeMode = FUSE_MUL_ADD3_INT16x32x32x32; +} + +/// power with constant exponent +table PowC { + exp:float = 0; +} + +/// 2d discrete cosine transform +table DctChannelSelectV0 { + format:ConvolutionV0Format = NCHW; + fastImpl:DctChannelSelectV0FastImpl = NONE; + dct_block_size:int = 8; +} + +/// 2d discrete cosine transform +table DctChannelSelect { + format:ConvolutionFormat = NCHW; + fastImpl:DctChannelSelectV0FastImpl = NONE; + dct_block_size:int = 8; +} + +table MatrixMulV0 { + transposeA:bool = false; + transposeB:bool = false; + data_type:MatrixMulV0DataType = FLOAT; +} + +table MatrixMulV1 { + transposeA:bool = false; + transposeB:bool = false; + /// Specifies special computation modes, e.g. different combinations of + /// intermediate result data types. + compute_mode:MatrixMulV1ComputeMode = DEFAULT; +} + +table MatrixMul { + transposeA:bool = false; + transposeB:bool = false; + compute_mode:MatrixMulV1ComputeMode = DEFAULT; + format:MatrixMulFormat = DEFAULT; +} + +table SVD { + /// Whether to compute the full-sized u and v or only the leading min(m, n) + /// singular vectors. Ignored if compute_uv is false. + full_matrices:bool = false; + /// Whether the left (u) and right (v) singular vectors will be computed and + /// outputted. + compute_uv:bool = true; +} + +/// legacy reduce +table ReduceV0 { + mode:ReduceV0Mode = SUM; + /// axis along which reduction is performed; if -1 is given, reduce to given + /// target shape (only used in megbrain) + axis:int = -1; +} + +/// reduce along given axis +table ReduceV1 { + mode:ReduceV1Mode = SUM; + /// axis along which reduction is performed; if -1 is given, reduce to given + /// target shape (only used in megbrain) + axis:int = -1; + data_type:ReduceV1DataType = DEFAULT; +} + +/// reduce along given axis +table Reduce { + mode:ReduceMode = SUM; + /// axis along which reduction is performed; if INT_MAX is given, reduce to + /// given target shape (only used in megbrain) + axis:int = 2147483647; + data_type:ReduceDataType = DEFAULT; +} + +/// calculate accumulated sum along given axis +table CumsumV0 { + /// axis along which cumsum is performed + axis:int = -1; + /// whether the current element is taken into account + exclusive:bool = true; + /// whether the cumsum is forward or backward + reverse:bool = false; +} + +/// calculate accumulated sum along given axis +table Cumsum { + /// axis along which cumsum is performed, default with INT_MAX + axis:int = 2147483647; + /// whether the current element is taken into account + exclusive:bool = true; + /// whether the cumsum is forward or backward + reverse:bool = false; +} + +table CondTake { + mode:CondTakeMode = EQ; + /// the value to be compared with; note that for integer data, val is also + /// converted to int + val:float = 0; + /// used for float equality comparison + eps:float = 1e-06; +} + +table Argsort { + order:ArgsortOrder = ASCENDING; +} + +table IndexingRemap { + /// Whether no two dst element maps to the same src element. Enabling this + /// option can accelerate gradient operator since atomic adding operations + /// could be avoided. + is_non_overlapping:bool = false; +} + +table Sleep { + /// time to sleep in seconds + time:float = 0; +} + +table Linspace { + /// Whether stop is included in the generated tensor + endpoint:bool = true; +} + +table LinspaceFull { + /// The first val. + start:double = 0; + /// The last val. + stop:double = 1; + /// Whether stop is included in the generated tensor + endpoint:bool = true; +} + +table Eye { + /// Index of the diagonal: 0 (the default) refers to the main diagonal, a + /// positive value refers to an upper diagonal, and a negative value to a + /// lower diagonal. + k:int = 0; + /// data type of output value + dtype:DTypeEnum = Float32; +} + +table Diag { + /// Index of the diagonal: 0 (the default) refers to the main diagonal, a + /// positive value refers to an upper diagonal, and a negative value to a + /// lower diagonal. + k:int = 0; +} + +table UniformRNGV0 { + seed:ulong = 0; +} + +table UniformRNG { + seed:ulong = 0; + /// The dtype of output Tensor. Only support Float32. + dtype:DTypeEnum = Float32; +} + +table GaussianRNGV0 { + seed:ulong = 0; + mean:float = 0; + std:float = 1; +} + +table GaussianRNG { + seed:ulong = 0; + mean:float = 0; + std:float = 1; + /// The dtype of output Tensor. Only support Float32. + dtype:DTypeEnum = Float32; +} + +table GammaRNG { + seed:ulong = 0; +} + +table BetaRNG { + seed:ulong = 0; +} + +table PoissonRNG { + seed:ulong = 0; +} + +table PermutationRNG { + seed:ulong = 0; + /// The dtype of output Tensor. Int32, Int16 and Float32 are supported. + dtype:DTypeEnum = Int32; +} + +table ShuffleRNG { + seed:ulong = 0; +} + +table Flip { + vertical:bool = false; + horizontal:bool = false; +} + +table Rotate { + clockwise:bool = true; +} + +table ROICopy { + row_from:uint = 0; + row_to:uint = 0; + col_from:uint = 0; + col_to:uint = 0; +} + +table CvtColor { + mode:CvtColorMode = RGB2GRAY; +} + +table WarpAffineV0 { + imode:WarpPerspectiveV1InterpolationMode = LINEAR; + border_mode:WarpPerspectiveV1BorderMode = REPLICATE; + /// used for CONSTANT bmode + border_val:float = .0; +} + +table WarpAffineV1 { + imode:WarpPerspectiveV1InterpolationMode = LINEAR; + border_mode:WarpPerspectiveV1BorderMode = REPLICATE; + /// used for CONSTANT bmode + border_val:float = .0; + format:ConvolutionV0Format = NHWC; +} + +table WarpAffine { + imode:WarpPerspectiveV1InterpolationMode = LINEAR; + border_mode:WarpPerspectiveV1BorderMode = REPLICATE; + /// used for CONSTANT bmode + border_val:float = .0; + format:ConvolutionFormat = NHWC; +} + +table GaussianBlur { + border_mode:WarpPerspectiveV1BorderMode = REPLICATE; + kernel_height:uint = 0; + kernel_width:uint = 0; + sigma_x:float = 0.; + sigma_y:float = 0.; +} + +table ResizeV0 { + imode:WarpPerspectiveV1InterpolationMode = LINEAR; +} + +table ResizeV1 { + imode:WarpPerspectiveV1InterpolationMode = LINEAR; + format:ConvolutionV0Format = NHWC; +} + +table Resize { + imode:WarpPerspectiveV1InterpolationMode = LINEAR; + format:ConvolutionFormat = NHWC; +} + +table RemapV0 { + imode:WarpPerspectiveV1InterpolationMode = LINEAR; + border_type:WarpPerspectiveV1BorderMode = REPLICATE; + format:ConvolutionV0Format = NHWC; + scalar:float = 0.; +} + +table Remap { + imode:WarpPerspectiveV1InterpolationMode = LINEAR; + border_type:WarpPerspectiveV1BorderMode = REPLICATE; + format:ConvolutionFormat = NHWC; + scalar:float = 0.; +} + +table Convolution3D { + mode:Convolution3DMode = CROSS_CORRELATION; + /// padding on one side on the first dimension + pad_d:uint = 0; + /// padding on one side on the second dimension + pad_h:uint = 0; + /// padding on one side on the third dimension + pad_w:uint = 0; + /// kernel stride on the first dimension + stride_d:uint = 1; + /// kernel stride on the second dimension + stride_h:uint = 1; + /// kernel stride on the third dimension + stride_w:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the first + /// dimension + dilate_d:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_h:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the third + /// dimension + dilate_w:uint = 1; + sparse:Convolution3DSparse = DENSE; + data_type:Convolution3DDataType = FLOAT; + format:Convolution3DFormat = NCDHW; +} + +table Conv3DBias { + nonlineMode:Conv3DBiasNonlineMode = IDENTITY; + mode:Convolution3DMode = CROSS_CORRELATION; + pad_d:uint = 0; + pad_h:uint = 0; + pad_w:uint = 0; + stride_d:uint = 1; + stride_h:uint = 1; + stride_w:uint = 0; +} + +table SeparableConv3D { + mode:Convolution3DMode = CROSS_CORRELATION; + borderMode:SeparableConv3DBorderMode = BORDER_REPLICATE; + is_symm_kernel:bool = true; + pad_d:uint = 0; + pad_h:uint = 0; + pad_w:uint = 0; + stride_d:uint = 0; + stride_h:uint = 1; + stride_w:uint = 1; + ksize_d:uint = 0; + ksize_h:uint = 3; + ksize_w:uint = 3; + anchor_d:uint = 0; + anchor_h:uint = 1; + anchor_w:uint = 1; +} + +table TopK { + mode:TopKMode = KTH_ONLY; +} + +/// Change the tensor layout format +table RelayoutFormatV0 { + /// Relayout mode. + /// + /// **Naming conventions** + /// + /// 1. ``A_B`` means change from layout format ``A`` to ``B``. + /// 2. ``INTER_WEIGHT_xx`` means relayout the weight for faster processing by + /// :attr:`Convolution.Format.NHWCD4` convolutions. + /// 3. A suffix of ``I`` means ``Image2DPack4TensorFormat`` tensor format is used + /// for faster processing on GPUs. + /// + /// **Layout definitions** + /// + /// * ``NCHW`` layout: ``{N, C, H, W}`` + /// * ``NHWC`` layout: ``{N, H, W, C}`` + /// * ``NHWCD4`` layout: ``{N, H, (C + 3) / 4, W, 4}`` + /// * ``NHWCD4I`` layout: with ``align_axis = 2`` + /// * ``NCHW4`` layout: ``{N, C/4, H, W, 4}`` + /// * ``NCHW88`` layout: ``{N, C/8, H, W, 8}`` + /// * ``CHWN4`` layout: ``{C/4, H, W, N, 4}`` + /// * ``NCHW64`` layout: ``{N, C/64, H, W, 64}`` + /// + /// **Float weight transformation definitions** + /// + /// +---------------+---------------------------------+--------------------+--------------------------------------+------+ + /// | Sparsity Type | Input Layout | Input Req | Output Layout | Axis | + /// +===============+=================================+====================+======================================+======+ + /// | DENSE | ``{OC, IC, FH, FW}`` | ``OC % 4 == 0`` | ``{OC/4, FH, FW, IC, 4}`` | 3 | + /// +---------------+---------------------------------+--------------------+--------------------------------------+------+ + /// | GROUP | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 4 == 0`` | ``{GROUP, OCPG/4, FH, FW, ICPG, 4}`` | 4 | + /// | | | ``ICPG % 4 == 0`` | | | + /// +---------------+---------------------------------+--------------------+--------------------------------------+------+ + /// | CHAN | ``{GROUP, 1, 1, FH, FW}`` | ``GROUP % 4 == 0`` | ``{GROUP / 4, 1, FH ,FW, 4}`` | 1 | + /// +---------------+---------------------------------+--------------------+--------------------------------------+------+ + /// + /// **Float weight transformation nchw88 definitions** + /// + /// +---------------+---------------------------------+--------------------+--------------------------------------+ + /// | Sparsity Type | Input Layout | Input Req | Output Layout | + /// +===============+=================================+====================+======================================+ + /// | DENSE | ``{OC, IC, FH, FW}`` | ``OC % 8 == 0`` |``{OC/8, IC/8 ,FH, FW, 8(IC), 8(OC)}``| + /// | | | ``IC % 8 == 0`` | | + /// +---------------+---------------------------------+--------------------+--------------------------------------+ + /// | GROUP | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 8 == 0`` | ``{GROUP, OCPG/8, ICPG/8 FH, FW, | + /// | | | ``ICPG % 8 == 0`` | 8(ICPG), 8(OCPG)} `` | + /// +---------------+---------------------------------+--------------------+--------------------------------------+ + /// | CHAN | ``{GROUP, 1, 1, FH, FW}`` | ``GROUP % 8 == 0`` | ``{GROUP / 8, 1, FH ,FW, 8}`` | + /// +---------------+---------------------------------+--------------------+--------------------------------------+ + /// + /// **Int8(DOT) weight transformation definitions** + /// + /// +---------------+---------------------------------+--------------------+------------------------------------------+------+ + /// | Sparsity Type | Input Layout | Input Req | Output Layout | Axis | + /// +===============+=================================+====================+==========================================+======+ + /// | DENSE | ``{OC, IC, FH, FW}`` | ``OC % 4 == 0`` | ``{OC/4, FH, FW, IC/4, 4, 4}` | 3 | + /// +---------------+---------------------------------+--------------------+------------------------------------------+------+ + /// | GROUP | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 4 == 0`` | ``{GROUP, OCPG/4, FH, FW, ICPG/4, 4, 4}``| 4 | + /// | | | ``ICPG % 4 == 0`` | | | + /// +---------------+---------------------------------+--------------------+------------------------------------------+------+ + /// + /// Note: the axis column means the corresponding ``align_axis`` for image format + /// when the ``I`` suffix is present. + /// + /// Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later opr by seting group and oc param with NCHW4_NCHW + /// + mode:RelayoutFormatV0Mode = NHWC_NHWCD4; +} + +/// Change the tensor layout format +table RelayoutFormat { + mode:RelayoutFormatV0Mode = NHWC_NHWCD4; + oc:uint = 0; + group:uint = 1; +} + +table SeparableFilterV0 { + format:ConvolutionV0Format = NCHW; + borderMode:WarpPerspectiveV1BorderMode = REPLICATE; + is_symm_kernel:bool = true; + ksize_h:uint = 3; + ksize_w:uint = 3; + anchor_h:uint = 1; + anchor_w:uint = 1; +} + +table SeparableFilter { + format:ConvolutionFormat = NCHW; + borderMode:WarpPerspectiveV1BorderMode = REPLICATE; + is_symm_kernel:bool = true; + ksize_h:uint = 3; + ksize_w:uint = 3; + anchor_h:uint = 1; + anchor_w:uint = 1; +} + +/// Local share convolution +table LocalShareV0 { + mode:ConvolutionV0Mode = CROSS_CORRELATION; + /// padding on one side on the first dimension + pad_h:uint = 0; + /// padding on one side on the second dimension + pad_w:uint = 0; + /// kernel stride on the first dimension + stride_h:uint = 1; + /// kernel stride on the second dimension + stride_w:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_h:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_w:uint = 1; + /// spatial groups on the first dimension + spatial_groups_h:uint = 1; + /// spatial groups on the second dimension + spatial_groups_w:uint = 1; + sparse:ConvolutionV0Sparse = DENSE; + format:ConvolutionV0Format = NCHW; + computeMode:ConvolutionV1ComputeMode = DEFAULT; +} + +/// Local share convolution +table LocalShare { + mode:ConvolutionV0Mode = CROSS_CORRELATION; + /// padding on one side on the first dimension + pad_h:uint = 0; + /// padding on one side on the second dimension + pad_w:uint = 0; + /// kernel stride on the first dimension + stride_h:uint = 1; + /// kernel stride on the second dimension + stride_w:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_h:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_w:uint = 1; + /// spatial groups on the first dimension + spatial_groups_h:uint = 1; + /// spatial groups on the second dimension + spatial_groups_w:uint = 1; + sparse:ConvolutionV0Sparse = DENSE; + format:ConvolutionFormat = NCHW; + computeMode:ConvolutionV1ComputeMode = DEFAULT; +} + +table ROIAlignV0 { + mode:ROIAlignV0Mode = MAX_; + format:ConvolutionV0Format = NCHW; + spatial_scale:float = 1.0; + offset:float = 0.0; + pooled_height:uint = 1; + pooled_width:uint = 1; + sample_height:uint = 2; + sample_width:uint = 2; +} + +table ROIAlign { + mode:ROIAlignV0Mode = MAX_; + format:ConvolutionFormat = NCHW; + spatial_scale:float = 1.0; + offset:float = 0.0; + pooled_height:uint = 1; + pooled_width:uint = 1; + sample_height:uint = 2; + sample_width:uint = 2; +} + +table Correlation { + format:ConvolutionV0Format = NCHW; + kernel_size:uint = 1; + max_displacement:uint = 1; + stride1:uint = 1; + stride2:uint = 1; + pad_size:uint = 0; + is_multiply:bool = true; +} + +table DeformablePSROIPooling { + no_trans:bool = true; + spatial_scale:float = 1; + trans_std:float = 1; + /// height of pooling output + pooled_h:uint = 1; + /// width of pooling output + pooled_w:uint = 1; + /// size of each deformable part + part_size:uint = 1; + /// sample count of each bbox + sample_per_part:uint = 1; +} + +/// Batch convolution (unshare weights on the batch dimension) +table BatchConvBiasV0 { + nonlineMode:ConvBiasV0NonlineMode = IDENTITY; + mode:ConvolutionV0Mode = CROSS_CORRELATION; + /// padding on one side on the first dimension + pad_h:uint = 0; + /// padding on one side on the second dimension + pad_w:uint = 0; + /// kernel stride on the first dimension + stride_h:uint = 1; + /// kernel stride on the second dimension + stride_w:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_h:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_w:uint = 1; + sparse:ConvolutionV0Sparse = DENSE; + format:ConvolutionV0Format = NCHW; + compute_mode:ConvolutionV1ComputeMode = DEFAULT; +} + +/// Batch convolution (unshare weights on the batch dimension) +table BatchConvBias { + nonlineMode:ConvBiasV0NonlineMode = IDENTITY; + mode:ConvolutionV0Mode = CROSS_CORRELATION; + /// padding on one side on the first dimension + pad_h:uint = 0; + /// padding on one side on the second dimension + pad_w:uint = 0; + /// kernel stride on the first dimension + stride_h:uint = 1; + /// kernel stride on the second dimension + stride_w:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_h:uint = 1; + /// dilation (i.e. size of each zero-padded kernel block) on the second + /// dimension + dilate_w:uint = 1; + sparse:ConvolutionV0Sparse = DENSE; + format:ConvolutionFormat = NCHW; + compute_mode:ConvolutionV1ComputeMode = DEFAULT; +} + +table FakeQuant { + qmin:int = -2147483648; + qmax:int = 2147483647; +} + +table TQT { + qmin:int = -2147483648; + qmax:int = 2147483647; +} + +table LSQ { + qmin:int = -2147483648; + qmax:int = 2147483647; +} + +table Fill { + value:float = 0; +} + +table CheckNonFinite { + scale:float = 1.0; +} + +table Padding { + /// offset in dim 0 + front_offset_dim0:uint = 0; + /// offset in dim 1 + front_offset_dim1:uint = 0; + /// offset in dim 2 + front_offset_dim2:uint = 0; + /// offset in dim 3 + front_offset_dim3:uint = 0; + /// offset in dim 4 + front_offset_dim4:uint = 0; + /// offset in dim 5 + front_offset_dim5:uint = 0; + /// offset in dim 6 + front_offset_dim6:uint = 0; + /// back offset in dim0 + back_offset_dim0:uint = 0; + /// back offset in dim1 + back_offset_dim1:uint = 0; + /// back offset in dim2 + back_offset_dim2:uint = 0; + /// back offset in dim3 + back_offset_dim3:uint = 0; + /// back offset in dim4 + back_offset_dim4:uint = 0; + /// back offset in dim5 + back_offset_dim5:uint = 0; + /// back offset in dim6 + back_offset_dim6:uint = 0; + /// param of padding opr + padding_val:float = 0; + padding_mode:PaddingPaddingMode = CONSTANT; +} + +table LayerNorm { + affine:bool = true; + eps:float = 1e-5; + normalized_dim:ulong = 1; + normalized_size:ulong = 1; +} + +table Dropout { + drop_prob:float = 0; + seed:ulong = 0; +} + +table RNNCell { + nonlineMode:RNNCellNonlineMode = IDENTITY; +} + +table RNN { + /// Number of recurrent layers + num_layers:uint = 1; + /// If becomes a bidirectional RNN + bidirectional:bool = false; + /// If the layer use bias weights b_ih and b_hh + bias:bool = true; + /// The number of features in the hidden state + hidden_size:uint = 128; + /// If introduce a Dropout layer on the outputs of each RNN layer + dropout:float = 0.; + nonlineMode:RNNCellNonlineMode = IDENTITY; + fwd_mode:BNFwdMode = TRAINING; +} + +table LSTM { + /// Number of recurrent layers + num_layers:uint = 1; + /// If becomes a bidirectional LSTM + bidirectional:bool = false; + /// If the layer use bias weights b_ih and b_hh + bias:bool = true; + /// The number of features in the hidden state + hidden_size:uint = 128; + /// If use LSTM with projections of corresponding size + proj_size:uint = 0; + /// If introduce a Dropout layer on the outputs of each LSTM layer + dropout:float = 0.; + fwd_mode:BNFwdMode = TRAINING; +} + + diff --git a/ci/compatibility/fbs/V2-backup/schema_v2.fbs b/ci/compatibility/fbs/V2-backup/schema_v2.fbs new file mode 100644 index 00000000..7bbb847e --- /dev/null +++ b/ci/compatibility/fbs/V2-backup/schema_v2.fbs @@ -0,0 +1,228 @@ +include "dtype.fbs"; +include "opr_param_defs.fbs"; +include "mgb_opr_param_defs.fbs"; +include "mgb_cpp_opr.fbs"; + +namespace mgb.serialization.fbs.v2; + +file_identifier "mge2"; + +table CompNode { + logical_locator:string; +} + +table DefaultTensorFormat{} + +table Image2DPackedTensorFormat{ + align_axis: ubyte; +} + +table LowbitsAlignedTensorFormat{ + size_nbits: ubyte; + align_size_in_bits: ubyte; +} + +/// The Tensor Format +union TensorFormat { + DefaultTensorFormat = 1, + Image2DPackedTensorFormat = 2, + LowbitsAlignedTensorFormat = 3, +} + +/// Opaque byte buffer defined by operator implementation +table Blob { + data:[ubyte]; +} + +table Tensor { + name:string; + shape:[uint]; + comp_node:CompNode; + dtype:DType; + format:TensorFormat; + /// The tensor raw data + data:[ubyte]; +} + +table Reserved0 {} +table DeprecatedParam {} + +union OperatorParam { + param.Empty = 1, + param.Axis = 2, + param.Convolution = 3, + param.MaskPropagate = 4, + param.ConvPooling = 5, + param.ConvBias = 6, + param.SeparableConv = 7, + param.Images2Neibs = 8, + param.Pooling = 9, + param.LRN = 10, + param.BN = 11, + param.ROIPooling = 12, + param.WarpPerspective = 13, + param.SpatialTfGridGenerator = 14, + param.SpatialTfSampler = 15, + param.MGBAddUpdate = 16, + param.Elemwise = 17, + param.ElemwiseMultiType = 18, + param.PowC = 19, + param.MatrixMul = 20, + //Reserved for param.Winograd = 21, + DeprecatedParam = 21, + param.SVD = 22, + param.Reduce = 23, + param.Cumsum = 24, + param.CondTake = 25, + param.Argsort = 26, + param.IndexingRemap = 27, + param.MGBSleep = 28, + param.Linspace = 29, + param.LinspaceFull = 30, + param.Eye = 31, + param.UniformRNG = 32, + param.GaussianRNG = 33, + param.Flip = 34, + param.Rotate = 35, + param.ROICopy = 36, + param.CvtColor = 37, + param.WarpAffine = 38, + param.GaussianBlur = 39, + param.Resize = 40, + param.Convolution3D = 41, + param.Conv3DBias = 42, + param.SeparableConv3D = 43, + param.TopK = 44, + param.RelayoutFormat = 45, + param.SeparableFilter = 46, + param.LocalShare = 47, + param.ROIAlign = 48, + param.DeformablePSROIPooling = 49, + param.BatchConvBias = 50, + param.DType = 51, + param.PersistentOutputStorage = 52, + param.OptionalAxis = 53, + param.OptionalAxisV1 = 54, + param.ExecutionPolicy = 55, + param.AssertEqual = 56, + param.FpgaConv = 57, + param.CollectiveComm = 58, + param.CondExecPred = 59, + param.CondExecPredLogical = 60, + param.CondExecMark = 61, + param.CondExecMerge = 62, + param.Host2DeviceCopy = 63, + param.Dimshuffle = 64, + param.AxisAddRemove = 65, + param.IndexDescMaskDump = 66, + DType = 67, + param.Remap = 68, + param.NMSKeep = 69, + param.AdaptivePooling = 70, + param.NvOf = 71, + param.DctChannelSelect = 72, + param.FakeQuant = 73, + param.TQT = 74, + param.Correlation = 75, + param.LSQ = 76, + param.GammaRNG = 77, + param.PoissonRNG = 78, + param.PermutationRNG = 79, + param.BetaRNG = 80, + param.SlidingWindowTranspose = 81, + param.Padding = 82, + param.ShuffleRNG = 83, + param.CheckNonFinite = 84, + param.LayerNorm = 85, + param.Dropout = 86, + param.RNNCell = 87, + param.RNN = 88, + param.LSTM = 89, + param.Softmax = 90, + param.Diag = 91, +} + +table Operator { + /// the Operator type id + type:string; + /// sometime type maybe not exist, so add type_id + type_id:ulong; + name:string; + + /// Operator parameter + param:OperatorParam; + /// Operator may want to save more than one OperatorParam + additional_params:[OperatorParam]; + + /// ID of the input tensor in the middle_tensors of a model + inputs:[uint]; + + /// ID of the output tensor in the middle_tensors of a model + outputs:[uint]; + + comp_node:[CompNode]; + output_dtype:DType; + + /// the const value in tensor format of the Operator + tensors:[Tensor]; + + /// opr version, with develop of MegEngine, some opr may have multi version + opr_version:uint; + + /// the order of the Operator in the graph + priority:int = 0; + + /// custom may want to save big, opaque byte buffers. + custom_data:[Blob]; +} + +table Metadata { + is_valid:bool; + graph_modified:bool; + optimize_options:ulong; + user_info:string; +} + +table MiddleTensor { + name:string; + shape:[uint]; + comp_node:CompNode; + dtype:DType; + format:TensorFormat; +} + +table OutputVar { + /// the id of the middle tensor in graph, the same as the inputs in Operator + compact_id:uint; + original_id:uint; +} + +table OutputAlias { + id:uint; + name:string; +} + +table Model { + /// the megengine version when serialize the model + mge_version:uint; + + /// model version, now model support: + /// version v1: the original fbs serialization version + /// version v2: support backward and poor forward compatibility + model_version:uint; + + oprs:[Operator]; + + /// the tensors produce and consume by the Operators, not the input or + /// output tensor + middle_tensors:[MiddleTensor]; + + output_vars_idx:[OutputVar]; + output_alias:[OutputAlias]; + + nr_shared_tensor:uint; + /// the Metadata to storage the custom data or some flags + metadata:Metadata; +} + +root_type Model; diff --git a/imperative/CMakeLists.txt b/imperative/CMakeLists.txt index 37e6b86e..c943e6fb 100644 --- a/imperative/CMakeLists.txt +++ b/imperative/CMakeLists.txt @@ -66,7 +66,8 @@ target_link_libraries(${MODULE_NAME} PRIVATE nlohmann_json::nlohmann_json) target_include_directories( ${MODULE_NAME} PUBLIC src/include - PRIVATE ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR} ${CPP_REDIS_INCLUDES}) + PRIVATE ${PROJECT_SOURCE_DIR}/third_party/boost_subset/boost ${PYTHON_INCLUDE_DIRS} + ${NUMPY_INCLUDE_DIR} ${CPP_REDIS_INCLUDES}) target_link_libraries(${MODULE_NAME} PRIVATE mgb_opdef_inc) target_compile_definitions(${MODULE_NAME} PRIVATE MODULE_NAME=${MODULE_NAME}) target_compile_options(${MODULE_NAME} PRIVATE -Wno-unused-parameter)