|
|
@@ -0,0 +1,237 @@ |
|
|
|
|
|
// generated by gen_param_defs.py for c23d51f3c4f33119fd74f58f04d112ccea8f64f1249ab372300975ab7e710e9a |
|
|
|
|
|
include "dtype.fbs"; |
|
|
|
|
|
namespace mgb.serialization.fbs.param; |
|
|
|
|
|
|
|
|
|
|
|
/// mode of collective communication |
|
|
|
|
|
enum CollectiveCommMode : uint { |
|
|
|
|
|
/// reduce by sum to output computing node |
|
|
|
|
|
REDUCE_SUM = 0, |
|
|
|
|
|
/// copy input value to each output computing node |
|
|
|
|
|
BROADCAST = 1, |
|
|
|
|
|
/// each output comp node gets the concatenated value of all inputs |
|
|
|
|
|
ALL_GATHER = 2, |
|
|
|
|
|
/// reduce inputs by sum and each output gets one part of it |
|
|
|
|
|
REDUCE_SCATTER_SUM = 3, |
|
|
|
|
|
/// every output gets the sum of all inputs |
|
|
|
|
|
ALL_REDUCE_SUM = 4, |
|
|
|
|
|
/// every output gets the max of all inputs |
|
|
|
|
|
ALL_REDUCE_MAX = 5, |
|
|
|
|
|
/// every output gets the min of all inputs |
|
|
|
|
|
ALL_REDUCE_MIN = 6, |
|
|
|
|
|
/// every output gets the prod of all inputs |
|
|
|
|
|
ALL_REDUCE_PROD = 7, |
|
|
|
|
|
/// concat inputs to one node |
|
|
|
|
|
GATHER = 8, |
|
|
|
|
|
/// scatter input to each output computing node |
|
|
|
|
|
SCATTER = 9, |
|
|
|
|
|
/// scatter inputs and gather them on each computing node |
|
|
|
|
|
ALL_TO_ALL = 10, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// mode for computing the gradient |
|
|
|
|
|
enum CondExecMarkGradMode : uint { |
|
|
|
|
|
/// normal gradient mode: sum all the activated components |
|
|
|
|
|
SUM = 0, |
|
|
|
|
|
/// use :attr:`CondExecMerge.SUM_COND_OUT` mode so oprs that depend on the |
|
|
|
|
|
/// gradient opr would not be executed if the forward var is not used. |
|
|
|
|
|
SUM_COND_OUT = 1, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// static inference option. **Note:** This is a workaround: since |
|
|
|
|
|
/// currently static inference in MegBrain does not take conditional |
|
|
|
|
|
/// execution into account, this option can be used to bypass static |
|
|
|
|
|
/// inference errors. This is currently only used by automatically |
|
|
|
|
|
/// generated gradient oprs. |
|
|
|
|
|
enum CondExecMarkStaticInfer : uint { |
|
|
|
|
|
/// enable both shape and value inference |
|
|
|
|
|
SHAPE_VALUE = 0, |
|
|
|
|
|
/// only enable shape inference (disable value inference) |
|
|
|
|
|
SHAPE_ONLY = 1, |
|
|
|
|
|
/// disable both shape and value inference |
|
|
|
|
|
NONE = 2, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
enum CondExecMergeMode : uint { |
|
|
|
|
|
/// copy the var whose mask is activated to the output, requiring that |
|
|
|
|
|
/// exactly one branch is active |
|
|
|
|
|
EXACT_ONE = 0, |
|
|
|
|
|
/// like :attr:`EXACT_ONE` with the requirement that all branches have the |
|
|
|
|
|
/// same shape, so shape inference can be easier |
|
|
|
|
|
EXACT_ONE_SAME_SHAPE = 1, |
|
|
|
|
|
/// sum all the active branches into output var; require all branches to |
|
|
|
|
|
/// have the same shape. Extra shape vars are needed in this mod, so the |
|
|
|
|
|
/// outputs can be initialized to zero when no input is active (and their |
|
|
|
|
|
/// shapes are probably unknown). |
|
|
|
|
|
SUM = 2, |
|
|
|
|
|
/// like :attr:`SUM` but also add an ExecutionMask to the readers of output |
|
|
|
|
|
/// vars, so they would be skipped if no branch is taken |
|
|
|
|
|
SUM_COND_OUT = 3, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// how to compare predicate var with branch keys |
|
|
|
|
|
enum CondExecPredMode : uint { |
|
|
|
|
|
/// The outputs correspond to branch keys, and the one which equals |
|
|
|
|
|
/// predicate would be activated. This behaves like a case-statement in many |
|
|
|
|
|
/// languages. |
|
|
|
|
|
CASE = 0, |
|
|
|
|
|
/// like :attr:`CASE`, but add an extra output that would be activated if no |
|
|
|
|
|
/// branch is matched |
|
|
|
|
|
CASE_FALLBACK = 1, |
|
|
|
|
|
/// One more outputs would be produced than the number of branch keys, |
|
|
|
|
|
/// representing the interval in which the predicate var fits in. The |
|
|
|
|
|
/// intervals are defined as :math:`(-\\infty, k_0), [k_0, k_1), \\ldots, |
|
|
|
|
|
/// [k_{n-2}, k_{n-1}), [k_{n-1}, \infty)`. The keys must be given in |
|
|
|
|
|
/// ascending order. |
|
|
|
|
|
PIECEWISE = 2, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
enum CondExecPredLogicalMode : uint { |
|
|
|
|
|
/// logical or |
|
|
|
|
|
OR = 0, |
|
|
|
|
|
/// logical and |
|
|
|
|
|
AND = 1, |
|
|
|
|
|
/// exclusive-or |
|
|
|
|
|
XOR = 2, |
|
|
|
|
|
/// not or(inputs) |
|
|
|
|
|
NOR = 3, |
|
|
|
|
|
/// not and(inputs) |
|
|
|
|
|
NAND = 4, |
|
|
|
|
|
/// not xor(inputs) |
|
|
|
|
|
XNOR = 5, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
enum ExecutionPolicyStrategy : uint (bit_flags) { |
|
|
|
|
|
/// use heuristic to choose the fastest algorithm |
|
|
|
|
|
HEURISTIC = 0, |
|
|
|
|
|
/// run possible algorithms on real device to find the best |
|
|
|
|
|
PROFILE = 1, |
|
|
|
|
|
/// when profile or heuristic algo selection it require the algosmust be |
|
|
|
|
|
/// reproducible |
|
|
|
|
|
REPRODUCIBLE = 2, |
|
|
|
|
|
/// profile require algos are optmized to achieve fast-profile |
|
|
|
|
|
OPTIMIZED = 3, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
enum ExecutionPolicyV0Strategy : uint { |
|
|
|
|
|
/// use heuristic to choose the fastest algorithm |
|
|
|
|
|
HEURISTIC = 0, |
|
|
|
|
|
/// use heuristic to choose the fastest algorithm, and the chosen algorithm |
|
|
|
|
|
/// is reproducible |
|
|
|
|
|
HEURISTIC_REPRODUCIBLE = 1, |
|
|
|
|
|
/// run possible algorithms on real device to find the best |
|
|
|
|
|
PROFILE = 2, |
|
|
|
|
|
/// the fastest of profile result that is also reproducible |
|
|
|
|
|
PROFILE_REPRODUCIBLE = 3, |
|
|
|
|
|
/// use profile result and heuristic to choose the fastest algorithm |
|
|
|
|
|
PROFILE_HEURISTIC = 4, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
table DType { |
|
|
|
|
|
dtype:DTypeEnum = Byte; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
table PersistentOutputStorage { |
|
|
|
|
|
/// This is used for controlling memory sharing. Multiple |
|
|
|
|
|
/// ``PersistentOutputStorage'' oprs with the same ``share_key'' would share |
|
|
|
|
|
/// underlying tensor storage. Note that the value ``-1'' is treated |
|
|
|
|
|
/// specially: storage of oprs with this key would be private and would not |
|
|
|
|
|
/// be shared with any other opr. |
|
|
|
|
|
share_key:int = -1; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// optinal axis: axis == -1 means no axis |
|
|
|
|
|
table OptionalAxis { |
|
|
|
|
|
axis:int = -1; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// optinal axis: axis == MAX_NDIM means no axis |
|
|
|
|
|
table OptionalAxisV1 { |
|
|
|
|
|
axis:int = 7; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
table ExecutionPolicyV0 { |
|
|
|
|
|
strategy:ExecutionPolicyV0Strategy = HEURISTIC; |
|
|
|
|
|
/// workspace limit in bytes |
|
|
|
|
|
workspace_limit:ulong = 18446744073709551615; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// specify how to select an algorithm for an operator |
|
|
|
|
|
table ExecutionPolicy { |
|
|
|
|
|
strategy:ExecutionPolicyStrategy = 1; |
|
|
|
|
|
/// workspace limit in bytes |
|
|
|
|
|
workspace_limit:ulong = 18446744073709551615; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
table AssertEqual { |
|
|
|
|
|
/// max allowed error; error is defined as the minimal of absolute and |
|
|
|
|
|
/// relative error |
|
|
|
|
|
maxerr:float = 0.0001; |
|
|
|
|
|
/// whether to print maxerr to stdout during opr exec |
|
|
|
|
|
verbose:bool = false; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
table FpgaConv { |
|
|
|
|
|
need_output_quantize:bool = false; |
|
|
|
|
|
need_output_threshold:bool = false; |
|
|
|
|
|
stride:int = 1; |
|
|
|
|
|
input_bit_width:int = 2; |
|
|
|
|
|
output_bit_width:int = 2; |
|
|
|
|
|
weight_bit_width:int = 2; |
|
|
|
|
|
thres0:int = 0; |
|
|
|
|
|
thres1:int = 1; |
|
|
|
|
|
unpool_size:uint = 4; |
|
|
|
|
|
direct_size:uint = 4; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// collective communication between multiple computing nodes on localhost |
|
|
|
|
|
table CollectiveComm { |
|
|
|
|
|
/// mode of collective communication |
|
|
|
|
|
mode:CollectiveCommMode = REDUCE_SUM; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// HACK: The tag of this param def is actually used for another non-generated |
|
|
|
|
|
/// param def SerializedDType, the sole purpose of this param def is to provide |
|
|
|
|
|
/// a spare tag. Do not use. |
|
|
|
|
|
table FakeSerializedDType { |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// evaluate a predicate and branch keys to setup ExecutionMask objects with |
|
|
|
|
|
/// associated predicate proxy vars (PPVs) |
|
|
|
|
|
table CondExecPred { |
|
|
|
|
|
/// how to compare predicate var with branch keys |
|
|
|
|
|
mode:CondExecPredMode = CASE; |
|
|
|
|
|
/// threshold for checking equality of float point values |
|
|
|
|
|
eps:float = 0.0001; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// compute a logical function over a set of PPVs |
|
|
|
|
|
table CondExecPredLogical { |
|
|
|
|
|
mode:CondExecPredLogicalMode = OR; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// add ExecutionMask of the input PPV to this opr and readers of the outputs of |
|
|
|
|
|
/// this opr |
|
|
|
|
|
table CondExecMark { |
|
|
|
|
|
/// mode for computing the gradient |
|
|
|
|
|
grad_mode:CondExecMarkGradMode = SUM; |
|
|
|
|
|
/// static inference option. **Note:** This is a workaround: since |
|
|
|
|
|
/// currently static inference in MegBrain does not take conditional |
|
|
|
|
|
/// execution into account, this option can be used to bypass static |
|
|
|
|
|
/// inference errors. This is currently only used by automatically |
|
|
|
|
|
/// generated gradient oprs. |
|
|
|
|
|
static_infer:CondExecMarkStaticInfer = SHAPE_VALUE; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// merge multiple conditional execution branches |
|
|
|
|
|
table CondExecMerge { |
|
|
|
|
|
/// number of output vars (i.e. vars per branch) |
|
|
|
|
|
nr_output:uint = 1; |
|
|
|
|
|
mode:CondExecMergeMode = EXACT_ONE; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// opr Implements NVIDIA Optical Flow SDK. |
|
|
|
|
|
table NvOf { |
|
|
|
|
|
precision:uint = 1; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|