|
|
|
@@ -46,16 +46,16 @@ void IndirectGemmFp32(float *output, const float *input, const float *weight, co |
|
|
|
int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3); |
|
|
|
int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2); |
|
|
|
int offset4d(const int *shape, const int *dims); |
|
|
|
inline bool isAddOverflow(int32_t x, int32_t y) { |
|
|
|
|
|
|
|
static inline bool isAddOverflow(int32_t x, int32_t y) { |
|
|
|
int32_t sum = x + y; |
|
|
|
return (x > 0 && y > 0 && sum < 0) || (x < 0 && y < 0 && sum > 0); |
|
|
|
} |
|
|
|
|
|
|
|
inline bool isMulOverflow(int32_t x, int32_t y) { |
|
|
|
static inline bool isMulOverflow(int32_t x, int32_t y) { |
|
|
|
int32_t p = x * y; |
|
|
|
return (x != 0) && (p / x != y); |
|
|
|
} |
|
|
|
|
|
|
|
#ifdef ENABLE_ARM64 |
|
|
|
void BiasAdd(const float *bias, float *data, size_t oc4, size_t plan_size); |
|
|
|
void BiasAddRelu6(const float *bias, float *data, size_t oc4, size_t plan_size); |
|
|
|
|