Merge pull request !4415 from songhonglei413/roitags/v0.7.0-beta
| @@ -47,9 +47,9 @@ int ActivationCPUKernel::DoActivation(int task_id) { | |||||
| auto error_code = RET_OK; | auto error_code = RET_OK; | ||||
| if (type_ == schema::ActivationType_RELU) { | if (type_ == schema::ActivationType_RELU) { | ||||
| error_code = Relu(input_addr + stride * task_id, count, output_addr + stride * task_id); | |||||
| error_code = Fp32Relu(input_addr + stride * task_id, count, output_addr + stride * task_id); | |||||
| } else if (type_ == schema::ActivationType_RELU6) { | } else if (type_ == schema::ActivationType_RELU6) { | ||||
| error_code = Relu6(input_addr + stride * task_id, count, output_addr + stride * task_id); | |||||
| error_code = Fp32Relu6(input_addr + stride * task_id, count, output_addr + stride * task_id); | |||||
| } else if (type_ == schema::ActivationType_LEAKY_RELU) { | } else if (type_ == schema::ActivationType_LEAKY_RELU) { | ||||
| error_code = LRelu(input_addr + stride * task_id, count, output_addr + stride * task_id, alpha_); | error_code = LRelu(input_addr + stride * task_id, count, output_addr + stride * task_id, alpha_); | ||||
| } else if (type_ == schema::ActivationType_SIGMOID) { | } else if (type_ == schema::ActivationType_SIGMOID) { | ||||
| @@ -13,33 +13,17 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_GRAD_ACTIVATION_GRAD_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_GRAD_ACTIVATION_GRAD_H_ | |||||
| #include "nnacl/activation_grad.h" | |||||
| #include <math.h> | |||||
| #include "nnacl/op_base.h" | |||||
| #include "nnacl/fp32/arithmetic.h" | |||||
| #include "nnacl/errorcode.h" | |||||
| typedef struct ActivationGradParameter { | |||||
| OpParameter op_parameter{}; | |||||
| int type_; | |||||
| float alpha_{0.01}; | |||||
| } ActivationGradParameter; | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| inline int ReluGrad(float *src0, float *src1, int length, float *dst) { | |||||
| int ReluGrad(float *src0, float *src1, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | for (int i = 0; i < length; ++i) { | ||||
| dst[i] = src1[i] > 0 ? 1.0f : 0.0f; | dst[i] = src1[i] > 0 ? 1.0f : 0.0f; | ||||
| } | } | ||||
| ElementMul(src0, dst, dst, length); | ElementMul(src0, dst, dst, length); | ||||
| return OPCLIB_OK; | |||||
| return NNACL_OK; | |||||
| } | } | ||||
| inline int Relu6Grad(float *src0, float *src1, int length, float *dst) { | |||||
| int Relu6Grad(float *src0, float *src1, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | for (int i = 0; i < length; ++i) { | ||||
| if (src1[i] < 0) { | if (src1[i] < 0) { | ||||
| dst[i] = 0; | dst[i] = 0; | ||||
| @@ -48,49 +32,43 @@ inline int Relu6Grad(float *src0, float *src1, int length, float *dst) { | |||||
| } | } | ||||
| } | } | ||||
| ElementMul(src0, dst, dst, length); | ElementMul(src0, dst, dst, length); | ||||
| return OPCLIB_OK; | |||||
| return NNACL_OK; | |||||
| } | } | ||||
| inline int LReluGrad(float *src0, float *src1, int length, float *dst, float alpha) { | |||||
| int LReluGrad(float *src0, float *src1, int length, float *dst, float alpha) { | |||||
| for (int i = 0; i < length; ++i) { | for (int i = 0; i < length; ++i) { | ||||
| dst[i] = src1[i] > 0.0f ? 1.0f : alpha; | dst[i] = src1[i] > 0.0f ? 1.0f : alpha; | ||||
| } | } | ||||
| ElementMul(src0, dst, dst, length); | ElementMul(src0, dst, dst, length); | ||||
| return OPCLIB_OK; | |||||
| return NNACL_OK; | |||||
| } | } | ||||
| inline int SigmoidGrad(float *src0, float *src1, int length, float *dst) { | |||||
| int SigmoidGrad(float *src0, float *src1, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | for (int i = 0; i < length; ++i) { | ||||
| dst[i] = src0[i] * (src1[i] * (1.0f - src1[i])); | dst[i] = src0[i] * (src1[i] * (1.0f - src1[i])); | ||||
| } | } | ||||
| return OPCLIB_OK; | |||||
| return NNACL_OK; | |||||
| } | } | ||||
| inline int TanhGrad(float *src0, float *src1, int length, float *dst) { | |||||
| int TanhGrad(float *src0, float *src1, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | for (int i = 0; i < length; ++i) { | ||||
| dst[i] = (1.0f - (src1[i] * src1[i])) * src0[i]; | dst[i] = (1.0f - (src1[i] * src1[i])) * src0[i]; | ||||
| } | } | ||||
| return OPCLIB_OK; | |||||
| return NNACL_OK; | |||||
| } | } | ||||
| inline int HSwishGrad(float *src0, float *src1, int length, float *dst) { | |||||
| int HSwishGrad(float *src0, float *src1, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | for (int i = 0; i < length; ++i) { | ||||
| float tmp = (src1[i] > 3.0f ? 1.0f : (src1[i] < -3.0f ? 0.0f : (2.0f * src1[i] + 3.0f) / 6.0f)); | float tmp = (src1[i] > 3.0f ? 1.0f : (src1[i] < -3.0f ? 0.0f : (2.0f * src1[i] + 3.0f) / 6.0f)); | ||||
| dst[i] = tmp * src0[i]; | dst[i] = tmp * src0[i]; | ||||
| } | } | ||||
| return OPCLIB_OK; | |||||
| return NNACL_OK; | |||||
| } | } | ||||
| inline int HSigmoidGrad(float *src0, float *src1, int length, float *dst) { | |||||
| int HSigmoidGrad(float *src0, float *src1, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | for (int i = 0; i < length; ++i) { | ||||
| float tmp = (src1[i] > 3.0f ? 1.0f : (src1[i] < -3.0f ? 0.0f : 1.0f / 6.0f)); | float tmp = (src1[i] > 3.0f ? 1.0f : (src1[i] < -3.0f ? 0.0f : 1.0f / 6.0f)); | ||||
| dst[i] = tmp * src0[i]; | dst[i] = tmp * src0[i]; | ||||
| } | } | ||||
| return OPCLIB_OK; | |||||
| return NNACL_OK; | |||||
| } | } | ||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_GRAD_ACTIVATION_GRAD_H_ | |||||
| @@ -22,7 +22,7 @@ | |||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| typedef struct ActivationGradParameter { | typedef struct ActivationGradParameter { | ||||
| OpParameter op_parameter{}; | |||||
| OpParameter op_parameter; | |||||
| int type_; | int type_; | ||||
| float alpha_; | float alpha_; | ||||
| } ActivationGradParameter; | } ActivationGradParameter; | ||||
| @@ -30,63 +30,14 @@ typedef struct ActivationGradParameter { | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| inline int ReluGrad(float *src0, float *src1, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| dst[i] = src1[i] > 0 ? 1.0f : 0.0f; | |||||
| } | |||||
| ElementMul(src0, dst, dst, length); | |||||
| return NNACL_OK; | |||||
| } | |||||
| inline int Relu6Grad(float *src0, float *src1, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| if (src1[i] < 0) { | |||||
| dst[i] = 0; | |||||
| } else { | |||||
| dst[i] = src1[i] > 6.0f ? 0.0f : 1.0f; | |||||
| } | |||||
| } | |||||
| ElementMul(src0, dst, dst, length); | |||||
| return NNACL_OK; | |||||
| } | |||||
| inline int LReluGrad(float *src0, float *src1, int length, float *dst, float alpha) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| dst[i] = src1[i] > 0.0f ? 1.0f : alpha; | |||||
| } | |||||
| ElementMul(src0, dst, dst, length); | |||||
| return NNACL_OK; | |||||
| } | |||||
| inline int SigmoidGrad(float *src0, float *src1, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| dst[i] = src0[i] * (src1[i] * (1.0f - src1[i])); | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| inline int TanhGrad(float *src0, float *src1, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| dst[i] = (1.0f - (src1[i] * src1[i])) * src0[i]; | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| int ReluGrad(float *src0, float *src1, int length, float *dst); | |||||
| int Relu6Grad(float *src0, float *src1, int length, float *dst); | |||||
| int LReluGrad(float *src0, float *src1, int length, float *dst, float alpha); | |||||
| int SigmoidGrad(float *src0, float *src1, int length, float *dst); | |||||
| int TanhGrad(float *src0, float *src1, int length, float *dst); | |||||
| int HSwishGrad(float *src0, float *src1, int length, float *dst); | |||||
| int HSigmoidGrad(float *src0, float *src1, int length, float *dst); | |||||
| inline int HSwishGrad(float *src0, float *src1, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| float tmp = (src1[i] > 3.0f ? 1.0f : (src1[i] < -3.0f ? 0.0f : (2.0f * src1[i] + 3.0f) / 6.0f)); | |||||
| dst[i] = tmp * src0[i]; | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| inline int HSigmoidGrad(float *src0, float *src1, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| float tmp = (src1[i] > 3.0f ? 1.0f : (src1[i] < -3.0f ? 0.0f : 1.0f / 6.0f)); | |||||
| dst[i] = tmp * src0[i]; | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -17,6 +17,16 @@ | |||||
| #include "nnacl/common_func.h" | #include "nnacl/common_func.h" | ||||
| #include "nnacl/quantization/fixed_point.h" | #include "nnacl/quantization/fixed_point.h" | ||||
| int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3) { | |||||
| return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3] + dim3; | |||||
| } | |||||
| int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2) { | |||||
| return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3]; | |||||
| } | |||||
| int offset4d(const int *shape, const int *dims) { return offset(shape, dims[0], dims[1], dims[2], dims[3]); } | |||||
| #ifndef ENABLE_ARM64 | #ifndef ENABLE_ARM64 | ||||
| void IndirectGemmFp32(float *output, const float *input, const float *weight, const float *bias, size_t step, int ic4, | void IndirectGemmFp32(float *output, const float *input, const float *weight, const float *bias, size_t step, int ic4, | ||||
| int output_channel, size_t offset, size_t relu, size_t relu6) { | int output_channel, size_t offset, size_t relu, size_t relu6) { | ||||
| @@ -45,16 +45,9 @@ void IndirectGemmFp32_Comm(float *output, const float *input, const float *weigh | |||||
| size_t offset); | size_t offset); | ||||
| void IndirectGemmFp32(float *output, const float *input, const float *weight, const float *bias, size_t step, int ic4, | void IndirectGemmFp32(float *output, const float *input, const float *weight, const float *bias, size_t step, int ic4, | ||||
| int output_channel, size_t offset, size_t relu, size_t relu6); | int output_channel, size_t offset, size_t relu, size_t relu6); | ||||
| inline int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3) { | |||||
| return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3] + dim3; | |||||
| } | |||||
| inline int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2) { | |||||
| return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3]; | |||||
| } | |||||
| inline int offset4d(const int *shape, const int *dims) { return offset(shape, dims[0], dims[1], dims[2], dims[3]); } | |||||
| int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3); | |||||
| int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2); | |||||
| int offset4d(const int *shape, const int *dims); | |||||
| #ifdef ENABLE_ARM64 | #ifdef ENABLE_ARM64 | ||||
| void BiasAdd(const float *bias, float *data, size_t oc4, size_t plan_size); | void BiasAdd(const float *bias, float *data, size_t oc4, size_t plan_size); | ||||
| @@ -0,0 +1,66 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "nnacl/fp32/activation.h" | |||||
| #include "nnacl/errorcode.h" | |||||
| int Fp32Relu(const float *src, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| dst[i] = src[i] > 0 ? src[i] : 0; | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| int Fp32Relu6(const float *src, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| if (src[i] < 0) { | |||||
| dst[i] = 0; | |||||
| } else { | |||||
| dst[i] = src[i] > 6.0f ? 6.0f : src[i]; | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| int LRelu(const float *src, int length, float *dst, float alpha) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| dst[i] = src[i] > 0 ? src[i] : (src[i] * alpha); | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| int Sigmoid(const float *src, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| dst[i] = 1.0f / (1.0f + exp(-src[i])); | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| int Tanh(const float *src, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| dst[i] = 1.0f - 2.0f / (exp(2 * src[i]) + 1); | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| int HSwish(const float *src, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| float in = src[i]; | |||||
| float relu6 = MSMIN(MSMAX(in + 3, 0), 6); | |||||
| dst[i] = in * relu6 / 6; | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| @@ -18,7 +18,6 @@ | |||||
| #include <math.h> | #include <math.h> | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #include "nnacl/errorcode.h" | |||||
| #include "nnacl/quantization/fixed_point.h" | #include "nnacl/quantization/fixed_point.h" | ||||
| typedef struct ActivationParameter { | typedef struct ActivationParameter { | ||||
| @@ -27,52 +26,16 @@ typedef struct ActivationParameter { | |||||
| float alpha_; | float alpha_; | ||||
| } ActivationParameter; | } ActivationParameter; | ||||
| inline int Relu(const float *src, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| dst[i] = src[i] > 0 ? src[i] : 0; | |||||
| } | |||||
| return NNACL_OK; | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| int Fp32Relu(const float *src, int length, float *dst); | |||||
| int Fp32Relu6(const float *src, int length, float *dst); | |||||
| int LRelu(const float *src, int length, float *dst, float alpha); | |||||
| int Sigmoid(const float *src, int length, float *dst); | |||||
| int Tanh(const float *src, int length, float *dst); | |||||
| int HSwish(const float *src, int length, float *dst); | |||||
| #ifdef __cplusplus | |||||
| } | } | ||||
| inline int Relu6(const float *src, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| if (src[i] < 0) { | |||||
| dst[i] = 0; | |||||
| } else { | |||||
| dst[i] = src[i] > 6.0f ? 6.0f : src[i]; | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| inline int LRelu(const float *src, int length, float *dst, float alpha) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| dst[i] = src[i] > 0 ? src[i] : (src[i] * alpha); | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| inline int Sigmoid(const float *src, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| dst[i] = 1.0f / (1.0f + exp(-src[i])); | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| inline int Tanh(const float *src, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| dst[i] = 1.0f - 2.0f / (exp(2 * src[i]) + 1); | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| inline int HSwish(const float *src, int length, float *dst) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| float in = src[i]; | |||||
| float relu6 = MSMIN(MSMAX(in + 3, 0), 6); | |||||
| dst[i] = in * relu6 / 6; | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| #endif | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_ACTIVATION_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_ACTIVATION_H_ | ||||
| @@ -119,7 +119,7 @@ void RowMajor2Col8Major(float *src_ptr, float *dst_ptr, size_t row, size_t col) | |||||
| return; | return; | ||||
| } | } | ||||
| inline void MatrixUnPackUnit(const void *src, void *dst, size_t row, size_t col, size_t src_stride, size_t dst_stride, | |||||
| void MatrixUnPackUnit(const void *src, void *dst, size_t row, size_t col, size_t src_stride, size_t dst_stride, | |||||
| size_t data_lenth) { | size_t data_lenth) { | ||||
| size_t copy_size = col * data_lenth; | size_t copy_size = col * data_lenth; | ||||
| size_t src_size = src_stride * data_lenth; | size_t src_size = src_stride * data_lenth; | ||||
| @@ -17,6 +17,7 @@ | |||||
| #include "nnacl/int8/div_int8.h" | #include "nnacl/int8/div_int8.h" | ||||
| #include "nnacl/quantization/fixed_point.h" | #include "nnacl/quantization/fixed_point.h" | ||||
| #include "nnacl/errorcode.h" | #include "nnacl/errorcode.h" | ||||
| #include "nnacl/quantization/quantize.h" | |||||
| int DivInt8(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, DivQuantArg *para) { | int DivInt8(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, DivQuantArg *para) { | ||||
| int index = 0; | int index = 0; | ||||
| @@ -0,0 +1,30 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "nnacl/int8/relux_int8.h" | |||||
| void ReluXInt8(const int8_t *src, int length, int8_t *dst, ReluXQuantArg *arg) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| if (src[i] <= arg->input_arg.zp_) { | |||||
| dst[i] = arg->output_arg.zp_; | |||||
| continue; | |||||
| } | |||||
| const int32_t input_val = src[i] - arg->input_arg.zp_; | |||||
| const int32_t scaled_input = SaturatingRoundingDoublingHighMul(input_val, arg->input_multiplier_); | |||||
| const int32_t shifted_input = RoundingDivideByPOT(scaled_input * (1 << arg->left_shift_), -arg->right_shift_); | |||||
| const int32_t output = shifted_input + arg->output_arg.zp_; | |||||
| dst[i] = (int8_t)MSMIN(output, arg->quantized_output_max); | |||||
| } | |||||
| } | |||||
| @@ -35,19 +35,7 @@ typedef struct ReluXQuantArg { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| inline void ReluXInt8(const int8_t *src, int length, int8_t *dst, ReluXQuantArg *arg) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| if (src[i] <= arg->input_arg.zp_) { | |||||
| dst[i] = arg->output_arg.zp_; | |||||
| continue; | |||||
| } | |||||
| const int32_t input_val = src[i] - arg->input_arg.zp_; | |||||
| const int32_t scaled_input = SaturatingRoundingDoublingHighMul(input_val, arg->input_multiplier_); | |||||
| const int32_t shifted_input = RoundingDivideByPOT(scaled_input * (1 << arg->left_shift_), -arg->right_shift_); | |||||
| const int32_t output = shifted_input + arg->output_arg.zp_; | |||||
| dst[i] = (int8_t)MSMIN(output, arg->quantized_output_max); | |||||
| } | |||||
| } | |||||
| void ReluXInt8(const int8_t *src, int length, int8_t *dst, ReluXQuantArg *arg); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -0,0 +1,507 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "nnacl/matrix_table.h" | |||||
| void MatrixG4x2(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 0.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 0.5f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = -0.5f; | |||||
| matrix_data[6] = 0.0f; | |||||
| matrix_data[7] = 1.0f; | |||||
| } | |||||
| void MatrixGT2x4(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 1.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 0.0f; | |||||
| matrix_data[4] = 0.0f; | |||||
| matrix_data[5] = 0.5f; | |||||
| matrix_data[6] = -0.5f; | |||||
| matrix_data[7] = 1.0f; | |||||
| } | |||||
| void MatrixG8x2(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 0.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 0.5f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = -0.5f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 1.0f; | |||||
| matrix_data[8] = 1.0f; | |||||
| matrix_data[9] = -1.0f; | |||||
| matrix_data[10] = 1.0f; | |||||
| matrix_data[11] = 1.5f; | |||||
| matrix_data[12] = 1.0f; | |||||
| matrix_data[13] = -1.5f; | |||||
| matrix_data[14] = 0.0f; | |||||
| matrix_data[15] = 1.0f; | |||||
| } | |||||
| void MatrixGT2x8(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 1.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 1.5f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = 1.0f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 0.0f; | |||||
| matrix_data[8] = 0.0f; | |||||
| matrix_data[9] = 0.5f; | |||||
| matrix_data[10] = -0.5f; | |||||
| matrix_data[11] = 1.0f; | |||||
| matrix_data[12] = -1.0f; | |||||
| matrix_data[13] = 1.5f; | |||||
| matrix_data[14] = -1.5f; | |||||
| matrix_data[15] = 1.0f; | |||||
| } | |||||
| void MatrixG8x3(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 0.0f; | |||||
| matrix_data[2] = 0.0f; | |||||
| matrix_data[3] = 1.0f; | |||||
| matrix_data[4] = 0.5f; | |||||
| matrix_data[5] = 0.25f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = -0.5f; | |||||
| matrix_data[8] = 0.25f; | |||||
| matrix_data[9] = 1.0f; | |||||
| matrix_data[10] = 1.0f; | |||||
| matrix_data[11] = 1.0f; | |||||
| matrix_data[12] = 1.0f; | |||||
| matrix_data[13] = -1.0f; | |||||
| matrix_data[14] = 1.0f; | |||||
| matrix_data[15] = 1.0f; | |||||
| matrix_data[16] = 1.5f; | |||||
| matrix_data[17] = 2.25f; | |||||
| matrix_data[18] = 1.0f; | |||||
| matrix_data[19] = -1.5f; | |||||
| matrix_data[20] = 2.25f; | |||||
| matrix_data[21] = 0.0f; | |||||
| matrix_data[22] = 0.0f; | |||||
| matrix_data[23] = 1.0f; | |||||
| } | |||||
| void MatrixGT3x8(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 1.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 1.0f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = 1.0f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 0.0f; | |||||
| matrix_data[8] = 0.0f; | |||||
| matrix_data[9] = 0.5f; | |||||
| matrix_data[10] = -0.5f; | |||||
| matrix_data[11] = 1.0f; | |||||
| matrix_data[12] = -1.0f; | |||||
| matrix_data[13] = 1.5f; | |||||
| matrix_data[14] = -1.5f; | |||||
| matrix_data[15] = 0.0f; | |||||
| matrix_data[16] = 0.0f; | |||||
| matrix_data[17] = 0.25f; | |||||
| matrix_data[18] = 0.25f; | |||||
| matrix_data[19] = 1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = 2.25f; | |||||
| matrix_data[22] = 2.25f; | |||||
| matrix_data[23] = 1.0f; | |||||
| } | |||||
| void MatrixG8x4(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 0.0f; | |||||
| matrix_data[2] = 0.0f; | |||||
| matrix_data[3] = 0.0f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = 0.5f; | |||||
| matrix_data[6] = 0.25f; | |||||
| matrix_data[7] = 0.125f; | |||||
| matrix_data[8] = 1.0f; | |||||
| matrix_data[9] = -0.5f; | |||||
| matrix_data[10] = 0.25f; | |||||
| matrix_data[11] = -0.125f; | |||||
| matrix_data[12] = 1.0f; | |||||
| matrix_data[13] = 1.0f; | |||||
| matrix_data[14] = 1.0f; | |||||
| matrix_data[15] = 1.0f; | |||||
| matrix_data[16] = 1.0f; | |||||
| matrix_data[17] = -1.0f; | |||||
| matrix_data[18] = 1.0f; | |||||
| matrix_data[19] = -1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = 1.5f; | |||||
| matrix_data[22] = 2.25f; | |||||
| matrix_data[23] = 3.375f; | |||||
| matrix_data[24] = 1.0f; | |||||
| matrix_data[25] = -1.5f; | |||||
| matrix_data[26] = 2.25f; | |||||
| matrix_data[27] = -3.375f; | |||||
| matrix_data[28] = 0.0f; | |||||
| matrix_data[29] = 0.0f; | |||||
| matrix_data[30] = 0.0f; | |||||
| matrix_data[31] = 1.0f; | |||||
| } | |||||
| void MatrixGT4x8(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 1.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 1.0f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = 1.0f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 0.0f; | |||||
| matrix_data[8] = 0.0f; | |||||
| matrix_data[9] = 0.5f; | |||||
| matrix_data[10] = -0.5f; | |||||
| matrix_data[11] = 1.0f; | |||||
| matrix_data[12] = -1.0f; | |||||
| matrix_data[13] = 1.5f; | |||||
| matrix_data[14] = -1.5f; | |||||
| matrix_data[15] = 0.0f; | |||||
| matrix_data[16] = 0.0f; | |||||
| matrix_data[17] = 0.25f; | |||||
| matrix_data[18] = 0.25f; | |||||
| matrix_data[19] = 1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = 2.25f; | |||||
| matrix_data[22] = 2.25f; | |||||
| matrix_data[23] = 0.0f; | |||||
| matrix_data[24] = 0.0f; | |||||
| matrix_data[25] = 0.125f; | |||||
| matrix_data[26] = -0.125f; | |||||
| matrix_data[27] = 1.0f; | |||||
| matrix_data[28] = -1.0f; | |||||
| matrix_data[29] = 3.375f; | |||||
| matrix_data[30] = -3.375f; | |||||
| matrix_data[31] = 1.0f; | |||||
| } | |||||
| void MatrixG8x5(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 0.0f; | |||||
| matrix_data[2] = 0.0f; | |||||
| matrix_data[3] = 0.0f; | |||||
| matrix_data[4] = 0.0f; | |||||
| matrix_data[5] = 1.0f; | |||||
| matrix_data[6] = 0.5f; | |||||
| matrix_data[7] = 0.25f; | |||||
| matrix_data[8] = 0.125f; | |||||
| matrix_data[9] = 0.0625f; | |||||
| matrix_data[10] = 1.0f; | |||||
| matrix_data[11] = -0.5f; | |||||
| matrix_data[12] = 0.25f; | |||||
| matrix_data[13] = -0.125f; | |||||
| matrix_data[14] = 0.0625f; | |||||
| matrix_data[15] = 1.0f; | |||||
| matrix_data[16] = 1.0f; | |||||
| matrix_data[17] = 1.0f; | |||||
| matrix_data[18] = 1.0f; | |||||
| matrix_data[19] = 1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = -1.0f; | |||||
| matrix_data[22] = 1.0f; | |||||
| matrix_data[23] = -1.0f; | |||||
| matrix_data[24] = 1.0f; | |||||
| matrix_data[25] = 1.0f; | |||||
| matrix_data[26] = 1.5f; | |||||
| matrix_data[27] = 2.25f; | |||||
| matrix_data[28] = 3.375f; | |||||
| matrix_data[29] = 5.0625f; | |||||
| matrix_data[30] = 1.0f; | |||||
| matrix_data[31] = -1.5f; | |||||
| matrix_data[32] = 2.25f; | |||||
| matrix_data[33] = -3.375f; | |||||
| matrix_data[34] = 5.0625f; | |||||
| matrix_data[35] = 0.0f; | |||||
| matrix_data[36] = 0.0f; | |||||
| matrix_data[37] = 0.0f; | |||||
| matrix_data[38] = 0.0f; | |||||
| matrix_data[39] = 1.0f; | |||||
| } | |||||
| void MatrixGT5x8(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 1.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 1.0f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = 1.0f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 0.0f; | |||||
| matrix_data[8] = 0.0f; | |||||
| matrix_data[9] = 0.5f; | |||||
| matrix_data[10] = -0.5f; | |||||
| matrix_data[11] = 1.0f; | |||||
| matrix_data[12] = -1.0f; | |||||
| matrix_data[13] = 1.5f; | |||||
| matrix_data[14] = -1.5f; | |||||
| matrix_data[15] = 0.0f; | |||||
| matrix_data[16] = 0.0f; | |||||
| matrix_data[17] = 0.25f; | |||||
| matrix_data[18] = 0.25f; | |||||
| matrix_data[19] = 1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = 2.25f; | |||||
| matrix_data[22] = 2.25f; | |||||
| matrix_data[23] = 0.0f; | |||||
| matrix_data[24] = 0.0f; | |||||
| matrix_data[25] = 0.125f; | |||||
| matrix_data[26] = -0.125f; | |||||
| matrix_data[27] = 1.0f; | |||||
| matrix_data[28] = -1.0f; | |||||
| matrix_data[29] = 3.375f; | |||||
| matrix_data[30] = -3.375f; | |||||
| matrix_data[31] = 0.0f; | |||||
| matrix_data[32] = 0.0f; | |||||
| matrix_data[33] = 0.0625f; | |||||
| matrix_data[34] = 0.0625f; | |||||
| matrix_data[35] = 1.0f; | |||||
| matrix_data[36] = 1.0f; | |||||
| matrix_data[37] = 5.0625f; | |||||
| matrix_data[38] = 5.0625f; | |||||
| matrix_data[39] = 1.0f; | |||||
| } | |||||
| void MatrixG8x6(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 0.0f; | |||||
| matrix_data[2] = 0.0f; | |||||
| matrix_data[3] = 0.0f; | |||||
| matrix_data[4] = 0.0f; | |||||
| matrix_data[5] = 0.0f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 0.5f; | |||||
| matrix_data[8] = 0.25f; | |||||
| matrix_data[9] = 0.125f; | |||||
| matrix_data[10] = 0.0625f; | |||||
| matrix_data[11] = 0.03125f; | |||||
| matrix_data[12] = 1.0f; | |||||
| matrix_data[13] = -0.5f; | |||||
| matrix_data[14] = 0.25f; | |||||
| matrix_data[15] = -0.125f; | |||||
| matrix_data[16] = 0.0625f; | |||||
| matrix_data[17] = -0.03125f; | |||||
| matrix_data[18] = 1.0f; | |||||
| matrix_data[19] = 1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = 1.0f; | |||||
| matrix_data[22] = 1.0f; | |||||
| matrix_data[23] = 1.0f; | |||||
| matrix_data[24] = 1.0f; | |||||
| matrix_data[25] = -1.0f; | |||||
| matrix_data[26] = 1.0f; | |||||
| matrix_data[27] = -1.0f; | |||||
| matrix_data[28] = 1.0f; | |||||
| matrix_data[29] = -1.0f; | |||||
| matrix_data[30] = 1.0f; | |||||
| matrix_data[31] = 1.5f; | |||||
| matrix_data[32] = 2.25f; | |||||
| matrix_data[33] = 3.375f; | |||||
| matrix_data[34] = 5.0625f; | |||||
| matrix_data[35] = 7.59375f; | |||||
| matrix_data[36] = 1.0f; | |||||
| matrix_data[37] = -1.5f; | |||||
| matrix_data[38] = 2.25f; | |||||
| matrix_data[39] = -3.375f; | |||||
| matrix_data[40] = 5.0625f; | |||||
| matrix_data[41] = -7.59375f; | |||||
| matrix_data[42] = 0.0f; | |||||
| matrix_data[43] = 0.0f; | |||||
| matrix_data[44] = 0.0f; | |||||
| matrix_data[45] = 0.0f; | |||||
| matrix_data[46] = 0.0f; | |||||
| matrix_data[47] = 1.0f; | |||||
| } | |||||
| void MatrixGT6x8(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 1.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 1.0f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = 1.0f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 0.0f; | |||||
| matrix_data[8] = 0.0f; | |||||
| matrix_data[9] = 0.5f; | |||||
| matrix_data[10] = -0.5f; | |||||
| matrix_data[11] = 1.0f; | |||||
| matrix_data[12] = -1.0f; | |||||
| matrix_data[13] = 1.5f; | |||||
| matrix_data[14] = -1.5f; | |||||
| matrix_data[15] = 0.0f; | |||||
| matrix_data[16] = 0.0f; | |||||
| matrix_data[17] = 0.25f; | |||||
| matrix_data[18] = 0.25f; | |||||
| matrix_data[19] = 1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = 2.25f; | |||||
| matrix_data[22] = 2.25f; | |||||
| matrix_data[23] = 0.0f; | |||||
| matrix_data[24] = 0.0f; | |||||
| matrix_data[25] = 0.125f; | |||||
| matrix_data[26] = -0.125f; | |||||
| matrix_data[27] = 1.0f; | |||||
| matrix_data[28] = -1.0f; | |||||
| matrix_data[29] = 3.375f; | |||||
| matrix_data[30] = -3.375f; | |||||
| matrix_data[31] = 0.0f; | |||||
| matrix_data[32] = 0.0f; | |||||
| matrix_data[33] = 0.0625f; | |||||
| matrix_data[34] = 0.0625f; | |||||
| matrix_data[35] = 1.0f; | |||||
| matrix_data[36] = 1.0f; | |||||
| matrix_data[37] = 5.0625f; | |||||
| matrix_data[38] = 5.0625f; | |||||
| matrix_data[39] = 0.0f; | |||||
| matrix_data[40] = 0.0; | |||||
| matrix_data[41] = 0.03125f; | |||||
| matrix_data[42] = -0.03125f; | |||||
| matrix_data[43] = 1.0f; | |||||
| matrix_data[44] = -1.0f; | |||||
| matrix_data[45] = 7.59375f; | |||||
| matrix_data[46] = -7.59375f; | |||||
| matrix_data[47] = 0.0f; | |||||
| matrix_data[48] = 1.0f; | |||||
| } | |||||
| void MatrixG8x7(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 0.0f; | |||||
| matrix_data[2] = 0.0f; | |||||
| matrix_data[3] = 0.0f; | |||||
| matrix_data[4] = 0.0f; | |||||
| matrix_data[5] = 0.0f; | |||||
| matrix_data[6] = 0.0f; | |||||
| matrix_data[7] = 1.0f; | |||||
| matrix_data[8] = 0.5f; | |||||
| matrix_data[9] = 0.25f; | |||||
| matrix_data[10] = 0.125f; | |||||
| matrix_data[11] = 0.0625f; | |||||
| matrix_data[12] = 0.03125f; | |||||
| matrix_data[13] = 0.015625f; | |||||
| matrix_data[14] = 1.0f; | |||||
| matrix_data[15] = -0.5f; | |||||
| matrix_data[16] = 0.25f; | |||||
| matrix_data[17] = -0.125f; | |||||
| matrix_data[18] = 0.0625f; | |||||
| matrix_data[19] = -0.03125f; | |||||
| matrix_data[20] = 0.015625f; | |||||
| matrix_data[21] = 1.0f; | |||||
| matrix_data[22] = 1.0f; | |||||
| matrix_data[23] = 1.0f; | |||||
| matrix_data[24] = 1.0f; | |||||
| matrix_data[25] = 1.0f; | |||||
| matrix_data[26] = 1.0f; | |||||
| matrix_data[27] = 1.0f; | |||||
| matrix_data[28] = 1.0f; | |||||
| matrix_data[29] = -1.0f; | |||||
| matrix_data[30] = 1.0f; | |||||
| matrix_data[31] = -1.0f; | |||||
| matrix_data[32] = 1.0f; | |||||
| matrix_data[33] = -1.0f; | |||||
| matrix_data[34] = 1.0f; | |||||
| matrix_data[35] = 1.0f; | |||||
| matrix_data[36] = 1.5f; | |||||
| matrix_data[37] = 2.25f; | |||||
| matrix_data[38] = 3.375f; | |||||
| matrix_data[39] = 5.0625f; | |||||
| matrix_data[40] = 7.59375f; | |||||
| matrix_data[41] = 11.390625f; | |||||
| matrix_data[42] = 1.0f; | |||||
| matrix_data[43] = -1.5f; | |||||
| matrix_data[44] = 2.25f; | |||||
| matrix_data[45] = -3.375f; | |||||
| matrix_data[46] = 5.0625f; | |||||
| matrix_data[47] = -7.59375f; | |||||
| matrix_data[48] = 11.390625f; | |||||
| matrix_data[49] = 0.0f; | |||||
| matrix_data[50] = 0.0f; | |||||
| matrix_data[51] = 0.0f; | |||||
| matrix_data[52] = 0.0f; | |||||
| matrix_data[53] = 0.0f; | |||||
| matrix_data[54] = 0.0f; | |||||
| matrix_data[55] = 1.0f; | |||||
| } | |||||
| void MatrixGT7x8(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 1.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 1.0f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = 1.0f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 0.0f; | |||||
| matrix_data[8] = 0.0f; | |||||
| matrix_data[9] = 0.5f; | |||||
| matrix_data[10] = -0.5f; | |||||
| matrix_data[11] = 1.0f; | |||||
| matrix_data[12] = -1.0f; | |||||
| matrix_data[13] = 1.5f; | |||||
| matrix_data[14] = -1.5f; | |||||
| matrix_data[15] = 0.0f; | |||||
| matrix_data[16] = 0.0f; | |||||
| matrix_data[17] = 0.25f; | |||||
| matrix_data[18] = 0.25f; | |||||
| matrix_data[19] = 1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = 2.25f; | |||||
| matrix_data[22] = 2.25f; | |||||
| matrix_data[23] = 0.0f; | |||||
| matrix_data[24] = 0.0f; | |||||
| matrix_data[25] = 0.125f; | |||||
| matrix_data[26] = -0.125f; | |||||
| matrix_data[27] = 1.0f; | |||||
| matrix_data[28] = -1.0f; | |||||
| matrix_data[29] = 3.375f; | |||||
| matrix_data[30] = -3.375f; | |||||
| matrix_data[31] = 0.0f; | |||||
| matrix_data[32] = 0.0f; | |||||
| matrix_data[33] = 0.0625f; | |||||
| matrix_data[34] = 0.0625f; | |||||
| matrix_data[35] = 1.0f; | |||||
| matrix_data[36] = 1.0f; | |||||
| matrix_data[37] = 5.0625f; | |||||
| matrix_data[38] = 5.0625f; | |||||
| matrix_data[39] = 0.0f; | |||||
| matrix_data[40] = 0.0; | |||||
| matrix_data[41] = 0.03125f; | |||||
| matrix_data[42] = -0.03125f; | |||||
| matrix_data[43] = 1.0f; | |||||
| matrix_data[44] = -1.0f; | |||||
| matrix_data[45] = 7.59375f; | |||||
| matrix_data[46] = -7.59375f; | |||||
| matrix_data[47] = 0.0f; | |||||
| matrix_data[48] = 0.0f; | |||||
| matrix_data[49] = 0.015625f; | |||||
| matrix_data[50] = 0.015625f; | |||||
| matrix_data[51] = 1.0f; | |||||
| matrix_data[52] = 1.0f; | |||||
| matrix_data[53] = 11.390625f; | |||||
| matrix_data[54] = 11.390625f; | |||||
| matrix_data[55] = 1.0f; | |||||
| } | |||||
| @@ -20,496 +20,33 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| inline void MatrixG4x2(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 0.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 0.5f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = -0.5f; | |||||
| matrix_data[6] = 0.0f; | |||||
| matrix_data[7] = 1.0f; | |||||
| } | |||||
| void MatrixG4x2(float *matrix_data); | |||||
| inline void MatrixGT2x4(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 1.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 0.0f; | |||||
| matrix_data[4] = 0.0f; | |||||
| matrix_data[5] = 0.5f; | |||||
| matrix_data[6] = -0.5f; | |||||
| matrix_data[7] = 1.0f; | |||||
| } | |||||
| void MatrixGT2x4(float *matrix_data); | |||||
| inline void MatrixG8x2(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 0.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 0.5f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = -0.5f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 1.0f; | |||||
| matrix_data[8] = 1.0f; | |||||
| matrix_data[9] = -1.0f; | |||||
| matrix_data[10] = 1.0f; | |||||
| matrix_data[11] = 1.5f; | |||||
| matrix_data[12] = 1.0f; | |||||
| matrix_data[13] = -1.5f; | |||||
| matrix_data[14] = 0.0f; | |||||
| matrix_data[15] = 1.0f; | |||||
| } | |||||
| void MatrixG8x2(float *matrix_data); | |||||
| inline void MatrixGT2x8(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 1.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 1.5f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = 1.0f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 0.0f; | |||||
| matrix_data[8] = 0.0f; | |||||
| matrix_data[9] = 0.5f; | |||||
| matrix_data[10] = -0.5f; | |||||
| matrix_data[11] = 1.0f; | |||||
| matrix_data[12] = -1.0f; | |||||
| matrix_data[13] = 1.5f; | |||||
| matrix_data[14] = -1.5f; | |||||
| matrix_data[15] = 1.0f; | |||||
| } | |||||
| void MatrixGT2x8(float *matrix_data); | |||||
| inline void MatrixG8x3(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 0.0f; | |||||
| matrix_data[2] = 0.0f; | |||||
| matrix_data[3] = 1.0f; | |||||
| matrix_data[4] = 0.5f; | |||||
| matrix_data[5] = 0.25f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = -0.5f; | |||||
| matrix_data[8] = 0.25f; | |||||
| matrix_data[9] = 1.0f; | |||||
| matrix_data[10] = 1.0f; | |||||
| matrix_data[11] = 1.0f; | |||||
| matrix_data[12] = 1.0f; | |||||
| matrix_data[13] = -1.0f; | |||||
| matrix_data[14] = 1.0f; | |||||
| matrix_data[15] = 1.0f; | |||||
| matrix_data[16] = 1.5f; | |||||
| matrix_data[17] = 2.25f; | |||||
| matrix_data[18] = 1.0f; | |||||
| matrix_data[19] = -1.5f; | |||||
| matrix_data[20] = 2.25f; | |||||
| matrix_data[21] = 0.0f; | |||||
| matrix_data[22] = 0.0f; | |||||
| matrix_data[23] = 1.0f; | |||||
| } | |||||
| void MatrixG8x3(float *matrix_data); | |||||
| inline void MatrixGT3x8(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 1.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 1.0f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = 1.0f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 0.0f; | |||||
| matrix_data[8] = 0.0f; | |||||
| matrix_data[9] = 0.5f; | |||||
| matrix_data[10] = -0.5f; | |||||
| matrix_data[11] = 1.0f; | |||||
| matrix_data[12] = -1.0f; | |||||
| matrix_data[13] = 1.5f; | |||||
| matrix_data[14] = -1.5f; | |||||
| matrix_data[15] = 0.0f; | |||||
| matrix_data[16] = 0.0f; | |||||
| matrix_data[17] = 0.25f; | |||||
| matrix_data[18] = 0.25f; | |||||
| matrix_data[19] = 1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = 2.25f; | |||||
| matrix_data[22] = 2.25f; | |||||
| matrix_data[23] = 1.0f; | |||||
| } | |||||
| void MatrixGT3x8(float *matrix_data); | |||||
| inline void MatrixG8x4(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 0.0f; | |||||
| matrix_data[2] = 0.0f; | |||||
| matrix_data[3] = 0.0f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = 0.5f; | |||||
| matrix_data[6] = 0.25f; | |||||
| matrix_data[7] = 0.125f; | |||||
| matrix_data[8] = 1.0f; | |||||
| matrix_data[9] = -0.5f; | |||||
| matrix_data[10] = 0.25f; | |||||
| matrix_data[11] = -0.125f; | |||||
| matrix_data[12] = 1.0f; | |||||
| matrix_data[13] = 1.0f; | |||||
| matrix_data[14] = 1.0f; | |||||
| matrix_data[15] = 1.0f; | |||||
| matrix_data[16] = 1.0f; | |||||
| matrix_data[17] = -1.0f; | |||||
| matrix_data[18] = 1.0f; | |||||
| matrix_data[19] = -1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = 1.5f; | |||||
| matrix_data[22] = 2.25f; | |||||
| matrix_data[23] = 3.375f; | |||||
| matrix_data[24] = 1.0f; | |||||
| matrix_data[25] = -1.5f; | |||||
| matrix_data[26] = 2.25f; | |||||
| matrix_data[27] = -3.375f; | |||||
| matrix_data[28] = 0.0f; | |||||
| matrix_data[29] = 0.0f; | |||||
| matrix_data[30] = 0.0f; | |||||
| matrix_data[31] = 1.0f; | |||||
| } | |||||
| void MatrixG8x4(float *matrix_data); | |||||
| inline void MatrixGT4x8(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 1.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 1.0f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = 1.0f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 0.0f; | |||||
| matrix_data[8] = 0.0f; | |||||
| matrix_data[9] = 0.5f; | |||||
| matrix_data[10] = -0.5f; | |||||
| matrix_data[11] = 1.0f; | |||||
| matrix_data[12] = -1.0f; | |||||
| matrix_data[13] = 1.5f; | |||||
| matrix_data[14] = -1.5f; | |||||
| matrix_data[15] = 0.0f; | |||||
| matrix_data[16] = 0.0f; | |||||
| matrix_data[17] = 0.25f; | |||||
| matrix_data[18] = 0.25f; | |||||
| matrix_data[19] = 1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = 2.25f; | |||||
| matrix_data[22] = 2.25f; | |||||
| matrix_data[23] = 0.0f; | |||||
| matrix_data[24] = 0.0f; | |||||
| matrix_data[25] = 0.125f; | |||||
| matrix_data[26] = -0.125f; | |||||
| matrix_data[27] = 1.0f; | |||||
| matrix_data[28] = -1.0f; | |||||
| matrix_data[29] = 3.375f; | |||||
| matrix_data[30] = -3.375f; | |||||
| matrix_data[31] = 1.0f; | |||||
| } | |||||
| void MatrixGT4x8(float *matrix_data); | |||||
| inline void MatrixG8x5(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 0.0f; | |||||
| matrix_data[2] = 0.0f; | |||||
| matrix_data[3] = 0.0f; | |||||
| matrix_data[4] = 0.0f; | |||||
| matrix_data[5] = 1.0f; | |||||
| matrix_data[6] = 0.5f; | |||||
| matrix_data[7] = 0.25f; | |||||
| matrix_data[8] = 0.125f; | |||||
| matrix_data[9] = 0.0625f; | |||||
| matrix_data[10] = 1.0f; | |||||
| matrix_data[11] = -0.5f; | |||||
| matrix_data[12] = 0.25f; | |||||
| matrix_data[13] = -0.125f; | |||||
| matrix_data[14] = 0.0625f; | |||||
| matrix_data[15] = 1.0f; | |||||
| matrix_data[16] = 1.0f; | |||||
| matrix_data[17] = 1.0f; | |||||
| matrix_data[18] = 1.0f; | |||||
| matrix_data[19] = 1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = -1.0f; | |||||
| matrix_data[22] = 1.0f; | |||||
| matrix_data[23] = -1.0f; | |||||
| matrix_data[24] = 1.0f; | |||||
| matrix_data[25] = 1.0f; | |||||
| matrix_data[26] = 1.5f; | |||||
| matrix_data[27] = 2.25f; | |||||
| matrix_data[28] = 3.375f; | |||||
| matrix_data[29] = 5.0625f; | |||||
| matrix_data[30] = 1.0f; | |||||
| matrix_data[31] = -1.5f; | |||||
| matrix_data[32] = 2.25f; | |||||
| matrix_data[33] = -3.375f; | |||||
| matrix_data[34] = 5.0625f; | |||||
| matrix_data[35] = 0.0f; | |||||
| matrix_data[36] = 0.0f; | |||||
| matrix_data[37] = 0.0f; | |||||
| matrix_data[38] = 0.0f; | |||||
| matrix_data[39] = 1.0f; | |||||
| } | |||||
| void MatrixG8x5(float *matrix_data); | |||||
| inline void MatrixGT5x8(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 1.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 1.0f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = 1.0f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 0.0f; | |||||
| matrix_data[8] = 0.0f; | |||||
| matrix_data[9] = 0.5f; | |||||
| matrix_data[10] = -0.5f; | |||||
| matrix_data[11] = 1.0f; | |||||
| matrix_data[12] = -1.0f; | |||||
| matrix_data[13] = 1.5f; | |||||
| matrix_data[14] = -1.5f; | |||||
| matrix_data[15] = 0.0f; | |||||
| matrix_data[16] = 0.0f; | |||||
| matrix_data[17] = 0.25f; | |||||
| matrix_data[18] = 0.25f; | |||||
| matrix_data[19] = 1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = 2.25f; | |||||
| matrix_data[22] = 2.25f; | |||||
| matrix_data[23] = 0.0f; | |||||
| matrix_data[24] = 0.0f; | |||||
| matrix_data[25] = 0.125f; | |||||
| matrix_data[26] = -0.125f; | |||||
| matrix_data[27] = 1.0f; | |||||
| matrix_data[28] = -1.0f; | |||||
| matrix_data[29] = 3.375f; | |||||
| matrix_data[30] = -3.375f; | |||||
| matrix_data[31] = 0.0f; | |||||
| matrix_data[32] = 0.0f; | |||||
| matrix_data[33] = 0.0625f; | |||||
| matrix_data[34] = 0.0625f; | |||||
| matrix_data[35] = 1.0f; | |||||
| matrix_data[36] = 1.0f; | |||||
| matrix_data[37] = 5.0625f; | |||||
| matrix_data[38] = 5.0625f; | |||||
| matrix_data[39] = 1.0f; | |||||
| } | |||||
| void MatrixGT5x8(float *matrix_data); | |||||
| inline void MatrixG8x6(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 0.0f; | |||||
| matrix_data[2] = 0.0f; | |||||
| matrix_data[3] = 0.0f; | |||||
| matrix_data[4] = 0.0f; | |||||
| matrix_data[5] = 0.0f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 0.5f; | |||||
| matrix_data[8] = 0.25f; | |||||
| matrix_data[9] = 0.125f; | |||||
| matrix_data[10] = 0.0625f; | |||||
| matrix_data[11] = 0.03125f; | |||||
| matrix_data[12] = 1.0f; | |||||
| matrix_data[13] = -0.5f; | |||||
| matrix_data[14] = 0.25f; | |||||
| matrix_data[15] = -0.125f; | |||||
| matrix_data[16] = 0.0625f; | |||||
| matrix_data[17] = -0.03125f; | |||||
| matrix_data[18] = 1.0f; | |||||
| matrix_data[19] = 1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = 1.0f; | |||||
| matrix_data[22] = 1.0f; | |||||
| matrix_data[23] = 1.0f; | |||||
| matrix_data[24] = 1.0f; | |||||
| matrix_data[25] = -1.0f; | |||||
| matrix_data[26] = 1.0f; | |||||
| matrix_data[27] = -1.0f; | |||||
| matrix_data[28] = 1.0f; | |||||
| matrix_data[29] = -1.0f; | |||||
| matrix_data[30] = 1.0f; | |||||
| matrix_data[31] = 1.5f; | |||||
| matrix_data[32] = 2.25f; | |||||
| matrix_data[33] = 3.375f; | |||||
| matrix_data[34] = 5.0625f; | |||||
| matrix_data[35] = 7.59375f; | |||||
| matrix_data[36] = 1.0f; | |||||
| matrix_data[37] = -1.5f; | |||||
| matrix_data[38] = 2.25f; | |||||
| matrix_data[39] = -3.375f; | |||||
| matrix_data[40] = 5.0625f; | |||||
| matrix_data[41] = -7.59375f; | |||||
| matrix_data[42] = 0.0f; | |||||
| matrix_data[43] = 0.0f; | |||||
| matrix_data[44] = 0.0f; | |||||
| matrix_data[45] = 0.0f; | |||||
| matrix_data[46] = 0.0f; | |||||
| matrix_data[47] = 1.0f; | |||||
| } | |||||
| void MatrixG8x6(float *matrix_data); | |||||
| inline void MatrixGT6x8(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 1.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 1.0f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = 1.0f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 0.0f; | |||||
| matrix_data[8] = 0.0f; | |||||
| matrix_data[9] = 0.5f; | |||||
| matrix_data[10] = -0.5f; | |||||
| matrix_data[11] = 1.0f; | |||||
| matrix_data[12] = -1.0f; | |||||
| matrix_data[13] = 1.5f; | |||||
| matrix_data[14] = -1.5f; | |||||
| matrix_data[15] = 0.0f; | |||||
| matrix_data[16] = 0.0f; | |||||
| matrix_data[17] = 0.25f; | |||||
| matrix_data[18] = 0.25f; | |||||
| matrix_data[19] = 1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = 2.25f; | |||||
| matrix_data[22] = 2.25f; | |||||
| matrix_data[23] = 0.0f; | |||||
| matrix_data[24] = 0.0f; | |||||
| matrix_data[25] = 0.125f; | |||||
| matrix_data[26] = -0.125f; | |||||
| matrix_data[27] = 1.0f; | |||||
| matrix_data[28] = -1.0f; | |||||
| matrix_data[29] = 3.375f; | |||||
| matrix_data[30] = -3.375f; | |||||
| matrix_data[31] = 0.0f; | |||||
| matrix_data[32] = 0.0f; | |||||
| matrix_data[33] = 0.0625f; | |||||
| matrix_data[34] = 0.0625f; | |||||
| matrix_data[35] = 1.0f; | |||||
| matrix_data[36] = 1.0f; | |||||
| matrix_data[37] = 5.0625f; | |||||
| matrix_data[38] = 5.0625f; | |||||
| matrix_data[39] = 0.0f; | |||||
| matrix_data[40] = 0.0; | |||||
| matrix_data[41] = 0.03125f; | |||||
| matrix_data[42] = -0.03125f; | |||||
| matrix_data[43] = 1.0f; | |||||
| matrix_data[44] = -1.0f; | |||||
| matrix_data[45] = 7.59375f; | |||||
| matrix_data[46] = -7.59375f; | |||||
| matrix_data[47] = 0.0f; | |||||
| matrix_data[48] = 1.0f; | |||||
| } | |||||
| void MatrixGT6x8(float *matrix_data); | |||||
| inline void MatrixG8x7(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 0.0f; | |||||
| matrix_data[2] = 0.0f; | |||||
| matrix_data[3] = 0.0f; | |||||
| matrix_data[4] = 0.0f; | |||||
| matrix_data[5] = 0.0f; | |||||
| matrix_data[6] = 0.0f; | |||||
| matrix_data[7] = 1.0f; | |||||
| matrix_data[8] = 0.5f; | |||||
| matrix_data[9] = 0.25f; | |||||
| matrix_data[10] = 0.125f; | |||||
| matrix_data[11] = 0.0625f; | |||||
| matrix_data[12] = 0.03125f; | |||||
| matrix_data[13] = 0.015625f; | |||||
| matrix_data[14] = 1.0f; | |||||
| matrix_data[15] = -0.5f; | |||||
| matrix_data[16] = 0.25f; | |||||
| matrix_data[17] = -0.125f; | |||||
| matrix_data[18] = 0.0625f; | |||||
| matrix_data[19] = -0.03125f; | |||||
| matrix_data[20] = 0.015625f; | |||||
| matrix_data[21] = 1.0f; | |||||
| matrix_data[22] = 1.0f; | |||||
| matrix_data[23] = 1.0f; | |||||
| matrix_data[24] = 1.0f; | |||||
| matrix_data[25] = 1.0f; | |||||
| matrix_data[26] = 1.0f; | |||||
| matrix_data[27] = 1.0f; | |||||
| matrix_data[28] = 1.0f; | |||||
| matrix_data[29] = -1.0f; | |||||
| matrix_data[30] = 1.0f; | |||||
| matrix_data[31] = -1.0f; | |||||
| matrix_data[32] = 1.0f; | |||||
| matrix_data[33] = -1.0f; | |||||
| matrix_data[34] = 1.0f; | |||||
| matrix_data[35] = 1.0f; | |||||
| matrix_data[36] = 1.5f; | |||||
| matrix_data[37] = 2.25f; | |||||
| matrix_data[38] = 3.375f; | |||||
| matrix_data[39] = 5.0625f; | |||||
| matrix_data[40] = 7.59375f; | |||||
| matrix_data[41] = 11.390625f; | |||||
| matrix_data[42] = 1.0f; | |||||
| matrix_data[43] = -1.5f; | |||||
| matrix_data[44] = 2.25f; | |||||
| matrix_data[45] = -3.375f; | |||||
| matrix_data[46] = 5.0625f; | |||||
| matrix_data[47] = -7.59375f; | |||||
| matrix_data[48] = 11.390625f; | |||||
| matrix_data[49] = 0.0f; | |||||
| matrix_data[50] = 0.0f; | |||||
| matrix_data[51] = 0.0f; | |||||
| matrix_data[52] = 0.0f; | |||||
| matrix_data[53] = 0.0f; | |||||
| matrix_data[54] = 0.0f; | |||||
| matrix_data[55] = 1.0f; | |||||
| } | |||||
| void MatrixG8x7(float *matrix_data); | |||||
| inline void MatrixGT7x8(float *matrix_data) { | |||||
| matrix_data[0] = 1.0f; | |||||
| matrix_data[1] = 1.0f; | |||||
| matrix_data[2] = 1.0f; | |||||
| matrix_data[3] = 1.0f; | |||||
| matrix_data[4] = 1.0f; | |||||
| matrix_data[5] = 1.0f; | |||||
| matrix_data[6] = 1.0f; | |||||
| matrix_data[7] = 0.0f; | |||||
| matrix_data[8] = 0.0f; | |||||
| matrix_data[9] = 0.5f; | |||||
| matrix_data[10] = -0.5f; | |||||
| matrix_data[11] = 1.0f; | |||||
| matrix_data[12] = -1.0f; | |||||
| matrix_data[13] = 1.5f; | |||||
| matrix_data[14] = -1.5f; | |||||
| matrix_data[15] = 0.0f; | |||||
| matrix_data[16] = 0.0f; | |||||
| matrix_data[17] = 0.25f; | |||||
| matrix_data[18] = 0.25f; | |||||
| matrix_data[19] = 1.0f; | |||||
| matrix_data[20] = 1.0f; | |||||
| matrix_data[21] = 2.25f; | |||||
| matrix_data[22] = 2.25f; | |||||
| matrix_data[23] = 0.0f; | |||||
| matrix_data[24] = 0.0f; | |||||
| matrix_data[25] = 0.125f; | |||||
| matrix_data[26] = -0.125f; | |||||
| matrix_data[27] = 1.0f; | |||||
| matrix_data[28] = -1.0f; | |||||
| matrix_data[29] = 3.375f; | |||||
| matrix_data[30] = -3.375f; | |||||
| matrix_data[31] = 0.0f; | |||||
| matrix_data[32] = 0.0f; | |||||
| matrix_data[33] = 0.0625f; | |||||
| matrix_data[34] = 0.0625f; | |||||
| matrix_data[35] = 1.0f; | |||||
| matrix_data[36] = 1.0f; | |||||
| matrix_data[37] = 5.0625f; | |||||
| matrix_data[38] = 5.0625f; | |||||
| matrix_data[39] = 0.0f; | |||||
| matrix_data[40] = 0.0; | |||||
| matrix_data[41] = 0.03125f; | |||||
| matrix_data[42] = -0.03125f; | |||||
| matrix_data[43] = 1.0f; | |||||
| matrix_data[44] = -1.0f; | |||||
| matrix_data[45] = 7.59375f; | |||||
| matrix_data[46] = -7.59375f; | |||||
| matrix_data[47] = 0.0f; | |||||
| matrix_data[48] = 0.0f; | |||||
| matrix_data[49] = 0.015625f; | |||||
| matrix_data[50] = 0.015625f; | |||||
| matrix_data[51] = 1.0f; | |||||
| matrix_data[52] = 1.0f; | |||||
| matrix_data[53] = 11.390625f; | |||||
| matrix_data[54] = 11.390625f; | |||||
| matrix_data[55] = 1.0f; | |||||
| } | |||||
| void MatrixGT7x8(float *matrix_data); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -0,0 +1,171 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "nnacl/quantization/fixed_point.h" | |||||
| // returns the high-32 bits of a * b with rounding | |||||
| // assume that a and b is divided by 2^31, who fall into [-1, 1] | |||||
| // so the mantissa of a * b is (a / 2^31) * (b / 2^31) * 2^31= (a * b) / 2^31 | |||||
| // actually we compute 2 * a * b / 2^32 | |||||
| // and take 32 bits of mantissa for rounding | |||||
| int SaturatingRoundingDoublingHighMul(int a, int b) { | |||||
| if (a == INT_MIN && b == INT_MIN) { | |||||
| return INT_MAX; | |||||
| } | |||||
| int64_t ab = ((int64_t)a) * ((int64_t)b); | |||||
| int64_t rounding = ab >= 0 ? (1ll << 30) : (1ll - (1ll << 30)); | |||||
| // do not apply right shift to potential negetive values | |||||
| int ab_mantissa = (int)((ab + rounding) / (1ll << 31)); | |||||
| return ab_mantissa; | |||||
| } | |||||
| int16_t SaturatingRoundingDoublingHighMulInt16(int16_t a, int16_t b) { | |||||
| if (a == SHRT_MIN && b == SHRT_MIN) { | |||||
| return SHRT_MAX; | |||||
| } | |||||
| int32_t ab = ((int32_t)a) * ((int32_t)b); | |||||
| int16_t rounding = ab >= 0 ? (1ll << 14) : (1ll - (1ll << 14)); | |||||
| return (int16_t)((ab + rounding) / (1ll << 15)); | |||||
| } | |||||
| // division by a 2^exponent with rounding | |||||
| // or arithmetic right shift with rouding | |||||
| int RoundingDivideByPOT(int x, int exponent) { | |||||
| const int mask = (1ll << exponent) - 1; | |||||
| const int remainder = x & mask; | |||||
| const int threshold = (mask >> 1) + (x < 0 ? 1 : 0); | |||||
| return (x >> exponent) + (remainder > threshold ? 1 : 0); | |||||
| } | |||||
| int MultiplyByQuantizedMultiplier(int32_t value, int32_t multiplier, int32_t left_shift, int32_t right_shift) { | |||||
| return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(value * (1 << left_shift), multiplier), -right_shift); | |||||
| } | |||||
| int FractionsBits(int kIntegerBits) { | |||||
| int totalBits = 8 * sizeof(int32_t) - 1; | |||||
| return totalBits - kIntegerBits; | |||||
| } | |||||
| int FixedPoint_One(int kIntegerBits, int kFractionsBits) { | |||||
| return (kIntegerBits == 0 ? INT32_MAX : ((1) << (uint32_t)(kIntegerBits == 0 ? 0 : kFractionsBits))); | |||||
| } | |||||
| int RoundingHalfSum(int a, int b) { | |||||
| int64_t a64 = a; | |||||
| int64_t b64 = b; | |||||
| int64_t sum = a64 + b64; | |||||
| int64_t sign = sum > 0 ? 1 : -1; | |||||
| return (int32_t)((sum + sign) / 2); | |||||
| } | |||||
| int32_t BitAnd(int32_t a, int32_t b) { return (uint32_t)a & (uint32_t)b; } | |||||
| int32_t BitOr(int32_t a, int32_t b) { return (uint32_t)a | (uint32_t)b; } | |||||
| int32_t BitXor(int32_t a, int32_t b) { return (uint32_t)a ^ (uint32_t)b; } | |||||
| int32_t BitNot(int32_t a) { return ~(uint32_t)a; } | |||||
| int SelectUsingMask(int mask, int bound, int val) { return BitXor(BitAnd(mask, bound), BitAnd(BitNot(mask), val)); } | |||||
| int32_t MaskNonZero(int32_t a) { | |||||
| int32_t zreo = 0; | |||||
| return a ? BitNot(zreo) : zreo; | |||||
| } | |||||
| int SaturatingRoundingMultiplyByPOT(int32_t x, int Exponent) { | |||||
| int ExponentSign = (Exponent > 0 ? 1 : Exponent < 0 ? -1 : 0); | |||||
| if (ExponentSign == 0) { | |||||
| return x; | |||||
| } else if (ExponentSign == 1) { | |||||
| const int min = INT32_MIN; | |||||
| const int max = INT32_MAX; | |||||
| const int thresold = ((1 << (uint32_t)(31 - Exponent)) - 1); | |||||
| const int postive_mask = MaskNonZero(x > thresold); | |||||
| const int negative_mask = MaskNonZero(x < -thresold); | |||||
| int result = x << Exponent; | |||||
| result = SelectUsingMask(postive_mask, max, result); | |||||
| result = SelectUsingMask(negative_mask, min, result); | |||||
| return result; | |||||
| } else if (ExponentSign == -1) { | |||||
| return RoundingDivideByPOT(x, -Exponent); | |||||
| } else { | |||||
| return 0; | |||||
| } | |||||
| } | |||||
| int32_t Rescale(int x, int kIntegerBitsSrc, int kIntegerBitsDst) { | |||||
| int kExponent = kIntegerBitsSrc - kIntegerBitsDst; | |||||
| int result = SaturatingRoundingMultiplyByPOT(x, kExponent); | |||||
| return result; | |||||
| } | |||||
| static int32_t one_over_one_plus_x_for_x_in_0_1(int32_t a) { | |||||
| int one = FixedPoint_One(0, FractionsBits(0)); | |||||
| int half_denominator = RoundingHalfSum(a, one); | |||||
| const int constant_48_over_17 = 1515870810; | |||||
| const int constant_neg_32_over_17 = -1010580540; | |||||
| int x = constant_48_over_17 + SaturatingRoundingDoublingHighMul(half_denominator, constant_neg_32_over_17); | |||||
| for (int i = 0; i < 3; i++) { | |||||
| int half_denominator_times_x = SaturatingRoundingDoublingHighMul(half_denominator, x); | |||||
| int one_minus_half_denominator_times_x = FixedPoint_One(2, FractionsBits(2)) - half_denominator_times_x; | |||||
| x = x + Rescale(SaturatingRoundingDoublingHighMul(x, one_minus_half_denominator_times_x), 2 + 2, 2); | |||||
| } | |||||
| return Rescale(x, 2 - 1, 0); | |||||
| } | |||||
| int CountLeadingZeroBits(uint32_t x) { | |||||
| #if defined(__GUNC__) | |||||
| return x ? __builtin_clz(x) : 8 * sizeof(uint32_t); | |||||
| #else | |||||
| if (x == 0) { | |||||
| return 8 * sizeof(uint32_t); | |||||
| } | |||||
| const int32_t leading_positive = (int32_t)(1) << (8 * sizeof(uint32_t) - 1); | |||||
| int leading_zeros = 0; | |||||
| while (x < leading_positive) { | |||||
| x <<= 1; | |||||
| leading_zeros++; | |||||
| } | |||||
| return leading_zeros; | |||||
| #endif | |||||
| } | |||||
| int CountLeadingSignBits(int32_t x) { | |||||
| #if defined(__GUNC__) && !defined(__clang__) | |||||
| return x ? __builtin_clrsb(x) : 8 * sizeof(int32_t); | |||||
| #else | |||||
| return x >= 0 ? CountLeadingZeroBits((uint32_t)x) - 1 : x != INT32_MIN ? CountLeadingZeroBits(2 * (uint32_t)(-x)) : 0; | |||||
| #endif | |||||
| } | |||||
| int32_t ComputerReciproal(int32_t x, int x_digits, int *recip_shift) { | |||||
| int leading_zreos_plus_one = CountLeadingZeroBits((uint32_t)x); | |||||
| *recip_shift = x_digits - leading_zreos_plus_one; | |||||
| const int32_t shifted_minus_one = (int32_t)(((uint32_t)x << leading_zreos_plus_one) - ((uint32_t)(1) << 31)); | |||||
| const int32_t shifted_scaled = one_over_one_plus_x_for_x_in_0_1(shifted_minus_one); | |||||
| return shifted_scaled; | |||||
| } | |||||
| #ifdef ENABLE_NEON | |||||
| int32x4_t RoundingDivideByPOTInt32x4(int32x4_t x, int exponent) { | |||||
| const int32x4_t shift_vec = vdupq_n_s32(-exponent); | |||||
| const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift_vec), 31); | |||||
| const int32x4_t fixed_up_x = vqaddq_s32(x, fixup); | |||||
| return vrshlq_s32(fixed_up_x, shift_vec); | |||||
| } | |||||
| int32x4_t SaturatingRoundingDoublingHighMulInt32x4(int32x4_t a, int32x4_t b) { return vqrdmulhq_s32(a, b); } | |||||
| #endif | |||||
| @@ -17,6 +17,7 @@ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_QUANTIZATION_FIXED_POINT_H_ | #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_QUANTIZATION_FIXED_POINT_H_ | ||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_QUANTIZATION_FIXED_POINT_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_QUANTIZATION_FIXED_POINT_H_ | ||||
| #include <stdint.h> | |||||
| #include <limits.h> | #include <limits.h> | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| #include <arm_neon.h> | #include <arm_neon.h> | ||||
| @@ -31,160 +32,54 @@ extern "C" { | |||||
| // so the mantissa of a * b is (a / 2^31) * (b / 2^31) * 2^31= (a * b) / 2^31 | // so the mantissa of a * b is (a / 2^31) * (b / 2^31) * 2^31= (a * b) / 2^31 | ||||
| // actually we compute 2 * a * b / 2^32 | // actually we compute 2 * a * b / 2^32 | ||||
| // and take 32 bits of mantissa for rounding | // and take 32 bits of mantissa for rounding | ||||
| inline int SaturatingRoundingDoublingHighMul(int a, int b) { | |||||
| if (a == INT_MIN && b == INT_MIN) { | |||||
| return INT_MAX; | |||||
| } | |||||
| int64_t ab = ((int64_t)a) * ((int64_t)b); | |||||
| int64_t rounding = ab >= 0 ? (1ll << 30) : (1ll - (1ll << 30)); | |||||
| // do not apply right shift to potential negetive values | |||||
| int ab_mantissa = (int)((ab + rounding) / (1ll << 31)); | |||||
| return ab_mantissa; | |||||
| } | |||||
| int SaturatingRoundingDoublingHighMul(int a, int b); | |||||
| inline int16_t SaturatingRoundingDoublingHighMulInt16(int16_t a, int16_t b) { | |||||
| if (a == SHRT_MIN && b == SHRT_MIN) { | |||||
| return SHRT_MAX; | |||||
| } | |||||
| int32_t ab = ((int32_t)a) * ((int32_t)b); | |||||
| int16_t rounding = ab >= 0 ? (1ll << 14) : (1ll - (1ll << 14)); | |||||
| return (int16_t)((ab + rounding) / (1ll << 15)); | |||||
| } | |||||
| int16_t SaturatingRoundingDoublingHighMulInt16(int16_t a, int16_t b); | |||||
| // division by a 2^exponent with rounding | // division by a 2^exponent with rounding | ||||
| // or arithmetic right shift with rouding | // or arithmetic right shift with rouding | ||||
| inline int RoundingDivideByPOT(int x, int exponent) { | |||||
| const int mask = (1ll << exponent) - 1; | |||||
| const int remainder = x & mask; | |||||
| const int threshold = (mask >> 1) + (x < 0 ? 1 : 0); | |||||
| return (x >> exponent) + (remainder > threshold ? 1 : 0); | |||||
| } | |||||
| int RoundingDivideByPOT(int x, int exponent); | |||||
| inline int MultiplyByQuantizedMultiplier(int32_t value, int32_t multiplier, int32_t left_shift, int32_t right_shift) { | |||||
| return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(value * (1 << left_shift), multiplier), -right_shift); | |||||
| } | |||||
| int MultiplyByQuantizedMultiplier(int32_t value, int32_t multiplier, int32_t left_shift, int32_t right_shift); | |||||
| inline int FractionsBits(int kIntegerBits) { | |||||
| int totalBits = 8 * sizeof(int32_t) - 1; | |||||
| return totalBits - kIntegerBits; | |||||
| } | |||||
| int FractionsBits(int kIntegerBits); | |||||
| inline int FixedPoint_One(int kIntegerBits, int kFractionsBits) { | |||||
| return (kIntegerBits == 0 ? INT32_MAX : ((1) << (uint32_t)(kIntegerBits == 0 ? 0 : kFractionsBits))); | |||||
| } | |||||
| int FixedPoint_One(int kIntegerBits, int kFractionsBits); | |||||
| inline int RoundingHalfSum(int a, int b) { | |||||
| int64_t a64 = a; | |||||
| int64_t b64 = b; | |||||
| int64_t sum = a64 + b64; | |||||
| int64_t sign = sum > 0 ? 1 : -1; | |||||
| return (int32_t)((sum + sign) / 2); | |||||
| } | |||||
| int RoundingHalfSum(int a, int b); | |||||
| inline int32_t BitAnd(int32_t a, int32_t b) { return (uint32_t)a & (uint32_t)b; } | |||||
| int32_t BitAnd(int32_t a, int32_t b); | |||||
| inline int32_t BitOr(int32_t a, int32_t b) { return (uint32_t)a | (uint32_t)b; } | |||||
| int32_t BitOr(int32_t a, int32_t b); | |||||
| inline int32_t BitXor(int32_t a, int32_t b) { return (uint32_t)a ^ (uint32_t)b; } | |||||
| int32_t BitXor(int32_t a, int32_t b); | |||||
| inline int32_t BitNot(int32_t a) { return ~(uint32_t)a; } | |||||
| int32_t BitNot(int32_t a); | |||||
| inline int SelectUsingMask(int mask, int bound, int val) { | |||||
| return BitXor(BitAnd(mask, bound), BitAnd(BitNot(mask), val)); | |||||
| } | |||||
| int SelectUsingMask(int mask, int bound, int val); | |||||
| inline int32_t MaskNonZero(int32_t a) { | |||||
| int32_t zreo = 0; | |||||
| return a ? BitNot(zreo) : zreo; | |||||
| } | |||||
| int32_t MaskNonZero(int32_t a); | |||||
| inline int SaturatingRoundingMultiplyByPOT(int32_t x, int Exponent) { | |||||
| int ExponentSign = (Exponent > 0 ? 1 : Exponent < 0 ? -1 : 0); | |||||
| if (ExponentSign == 0) { | |||||
| return x; | |||||
| } else if (ExponentSign == 1) { | |||||
| const int min = INT32_MIN; | |||||
| const int max = INT32_MAX; | |||||
| const int thresold = ((1 << (uint32_t)(31 - Exponent)) - 1); | |||||
| const int postive_mask = MaskNonZero(x > thresold); | |||||
| const int negative_mask = MaskNonZero(x < -thresold); | |||||
| int result = x << Exponent; | |||||
| result = SelectUsingMask(postive_mask, max, result); | |||||
| result = SelectUsingMask(negative_mask, min, result); | |||||
| return result; | |||||
| } else if (ExponentSign == -1) { | |||||
| return RoundingDivideByPOT(x, -Exponent); | |||||
| } else { | |||||
| return 0; | |||||
| } | |||||
| } | |||||
| int SaturatingRoundingMultiplyByPOT(int32_t x, int Exponent); | |||||
| inline int32_t Rescale(int x, int kIntegerBitsSrc, int kIntegerBitsDst) { | |||||
| int kExponent = kIntegerBitsSrc - kIntegerBitsDst; | |||||
| int result = SaturatingRoundingMultiplyByPOT(x, kExponent); | |||||
| return result; | |||||
| } | |||||
| int32_t Rescale(int x, int kIntegerBitsSrc, int kIntegerBitsDst); | |||||
| static inline int32_t one_over_one_plus_x_for_x_in_0_1(int32_t a) { | |||||
| int one = FixedPoint_One(0, FractionsBits(0)); | |||||
| int half_denominator = RoundingHalfSum(a, one); | |||||
| const int constant_48_over_17 = 1515870810; | |||||
| const int constant_neg_32_over_17 = -1010580540; | |||||
| int x = constant_48_over_17 + SaturatingRoundingDoublingHighMul(half_denominator, constant_neg_32_over_17); | |||||
| for (int i = 0; i < 3; i++) { | |||||
| int half_denominator_times_x = SaturatingRoundingDoublingHighMul(half_denominator, x); | |||||
| int one_minus_half_denominator_times_x = FixedPoint_One(2, FractionsBits(2)) - half_denominator_times_x; | |||||
| x = x + Rescale(SaturatingRoundingDoublingHighMul(x, one_minus_half_denominator_times_x), 2 + 2, 2); | |||||
| } | |||||
| return Rescale(x, 2 - 1, 0); | |||||
| } | |||||
| static int32_t one_over_one_plus_x_for_x_in_0_1(int32_t a); | |||||
| inline int CountLeadingZeroBits(uint32_t x) { | |||||
| #if defined(__GUNC__) | |||||
| return x ? __builtin_clz(x) : 8 * sizeof(uint32_t); | |||||
| #else | |||||
| if (x == 0) { | |||||
| return 8 * sizeof(uint32_t); | |||||
| } | |||||
| const int32_t leading_positive = (int32_t)(1) << (8 * sizeof(uint32_t) - 1); | |||||
| int leading_zeros = 0; | |||||
| while (x < leading_positive) { | |||||
| x <<= 1; | |||||
| leading_zeros++; | |||||
| } | |||||
| return leading_zeros; | |||||
| #endif | |||||
| } | |||||
| int CountLeadingZeroBits(uint32_t x); | |||||
| inline int CountLeadingSignBits(int32_t x) { | |||||
| #if defined(__GUNC__) && !defined(__clang__) | |||||
| return x ? __builtin_clrsb(x) : 8 * sizeof(int32_t); | |||||
| #else | |||||
| return x >= 0 ? CountLeadingZeroBits((uint32_t)x) - 1 : x != INT32_MIN ? CountLeadingZeroBits(2 * (uint32_t)(-x)) : 0; | |||||
| #endif | |||||
| } | |||||
| int CountLeadingSignBits(int32_t x); | |||||
| static inline int32_t ComputerReciproal(int32_t x, int x_digits, int *recip_shift) { | |||||
| int leading_zreos_plus_one = CountLeadingZeroBits((uint32_t)x); | |||||
| *recip_shift = x_digits - leading_zreos_plus_one; | |||||
| const int32_t shifted_minus_one = (int32_t)(((uint32_t)x << leading_zreos_plus_one) - ((uint32_t)(1) << 31)); | |||||
| const int32_t shifted_scaled = one_over_one_plus_x_for_x_in_0_1(shifted_minus_one); | |||||
| return shifted_scaled; | |||||
| } | |||||
| int32_t ComputerReciproal(int32_t x, int x_digits, int *recip_shift); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| inline int32x4_t RoundingDivideByPOTInt32x4(int32x4_t x, int exponent) { | |||||
| const int32x4_t shift_vec = vdupq_n_s32(-exponent); | |||||
| const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift_vec), 31); | |||||
| const int32x4_t fixed_up_x = vqaddq_s32(x, fixup); | |||||
| return vrshlq_s32(fixed_up_x, shift_vec); | |||||
| } | |||||
| int32x4_t RoundingDivideByPOTInt32x4(int32x4_t x, int exponent); | |||||
| inline int32x4_t SaturatingRoundingDoublingHighMulInt32x4(int32x4_t a, int32x4_t b) { return vqrdmulhq_s32(a, b); } | |||||
| int32x4_t SaturatingRoundingDoublingHighMulInt32x4(int32x4_t a, int32x4_t b); | |||||
| #endif | #endif | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_QUANTIZATION_FIXED_POINT_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_QUANTIZATION_FIXED_POINT_H_ | ||||
| @@ -26,6 +26,70 @@ const double dNormalizer = 0x1p54; | |||||
| const int dNormalizerBias = 54; | const int dNormalizerBias = 54; | ||||
| const int iMantissaBits = 31; | const int iMantissaBits = 31; | ||||
| void QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier, | |||||
| int *right_shift) { | |||||
| if (quantized_multiplier == NULL || right_shift == NULL) { | |||||
| return; | |||||
| } | |||||
| int shift; | |||||
| QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift); | |||||
| *right_shift = -shift; | |||||
| } | |||||
| void QuantizeRoundParameter(double double_multiplier, int32_t *quantized_multiplier, int *left_shift, | |||||
| int *right_shift) { | |||||
| int shift; | |||||
| QuantizeMultiplierSmallerThanOne(double_multiplier, quantized_multiplier, &shift); | |||||
| shift = -shift; | |||||
| if (shift < 0) { | |||||
| *left_shift = 0; | |||||
| *right_shift = shift; | |||||
| } else { | |||||
| *left_shift = shift; | |||||
| *right_shift = 0; | |||||
| } | |||||
| } | |||||
| uint8_t QuantizeToUint8(float real_value, float scale, int32_t zp) { return round(real_value / scale + zp); } | |||||
| int32_t QuantizeToInt8(float real_value, float scale, int32_t zp) { return round(real_value / scale + zp); } | |||||
| void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32_t zp, float scale, int *mini, | |||||
| int *maxi) { | |||||
| int32_t min = CHAR_MIN; | |||||
| int32_t max = CHAR_MAX; | |||||
| int32_t quantized_zero = QuantizeToInt8(0, scale, zp); | |||||
| int32_t quantized_six = QuantizeToInt8(6, scale, zp); | |||||
| if (is_relu) { | |||||
| min = min > quantized_zero ? min : quantized_zero; | |||||
| } else if (is_relu6) { | |||||
| min = min > quantized_zero ? min : quantized_zero; | |||||
| max = max < quantized_six ? max : quantized_six; | |||||
| } else { | |||||
| // do nothing | |||||
| } | |||||
| *mini = min; | |||||
| *maxi = max; | |||||
| } | |||||
| // quantize from float to int8 | |||||
| void Quantize(float *input_data, int length, float scale, int zero_point, int8_t *output_data) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| int q = (int)round(input_data[i] / scale + zero_point); | |||||
| q = q > CHAR_MAX ? CHAR_MAX : q; | |||||
| q = q < CHAR_MIN ? CHAR_MIN : q; | |||||
| output_data[i] = (int8_t)q; | |||||
| } | |||||
| } | |||||
| // dequantize from int8 to float | |||||
| void Dequantize(int8_t *input_data, int length, float scale, int zero_point, float *output_data) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| output_data[i] = scale * (input_data[i] - zero_point); | |||||
| } | |||||
| } | |||||
| void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift) { | void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift) { | ||||
| if (quantized_multiplier == NULL || shift == NULL) { | if (quantized_multiplier == NULL || shift == NULL) { | ||||
| return; | return; | ||||
| @@ -213,68 +213,20 @@ extern "C" { | |||||
| void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift); | void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift); | ||||
| inline void QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier, | |||||
| int *right_shift) { | |||||
| if (quantized_multiplier == NULL || right_shift == NULL) { | |||||
| return; | |||||
| } | |||||
| int shift; | |||||
| QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift); | |||||
| *right_shift = -shift; | |||||
| } | |||||
| inline void QuantizeRoundParameter(double double_multiplier, int32_t *quantized_multiplier, int *left_shift, | |||||
| int *right_shift) { | |||||
| int shift; | |||||
| QuantizeMultiplierSmallerThanOne(double_multiplier, quantized_multiplier, &shift); | |||||
| shift = -shift; | |||||
| if (shift < 0) { | |||||
| *left_shift = 0; | |||||
| *right_shift = shift; | |||||
| } else { | |||||
| *left_shift = shift; | |||||
| *right_shift = 0; | |||||
| } | |||||
| } | |||||
| void QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier, int *right_shift); | |||||
| inline uint8_t QuantizeToUint8(float real_value, float scale, int32_t zp) { return round(real_value / scale + zp); } | |||||
| void QuantizeRoundParameter(double double_multiplier, int32_t *quantized_multiplier, int *left_shift, int *right_shift); | |||||
| inline int32_t QuantizeToInt8(float real_value, float scale, int32_t zp) { return round(real_value / scale + zp); } | |||||
| uint8_t QuantizeToUint8(float real_value, float scale, int32_t zp); | |||||
| inline void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32_t zp, float scale, int *mini, | |||||
| int *maxi) { | |||||
| int32_t min = CHAR_MIN; | |||||
| int32_t max = CHAR_MAX; | |||||
| int32_t quantized_zero = QuantizeToInt8(0, scale, zp); | |||||
| int32_t quantized_six = QuantizeToInt8(6, scale, zp); | |||||
| if (is_relu) { | |||||
| min = min > quantized_zero ? min : quantized_zero; | |||||
| } else if (is_relu6) { | |||||
| min = min > quantized_zero ? min : quantized_zero; | |||||
| max = max < quantized_six ? max : quantized_six; | |||||
| } else { | |||||
| // do nothing | |||||
| } | |||||
| *mini = min; | |||||
| *maxi = max; | |||||
| } | |||||
| int32_t QuantizeToInt8(float real_value, float scale, int32_t zp); | |||||
| void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32_t zp, float scale, int *mini, int *maxi); | |||||
| // quantize from float to int8 | // quantize from float to int8 | ||||
| inline void Quantize(float *input_data, int length, float scale, int zero_point, int8_t *output_data) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| int q = (int)round(input_data[i] / scale + zero_point); | |||||
| q = q > CHAR_MAX ? CHAR_MAX : q; | |||||
| q = q < CHAR_MIN ? CHAR_MIN : q; | |||||
| output_data[i] = (int8_t)q; | |||||
| } | |||||
| } | |||||
| void Quantize(float *input_data, int length, float scale, int zero_point, int8_t *output_data); | |||||
| // dequantize from int8 to float | // dequantize from int8 to float | ||||
| inline void Dequantize(int8_t *input_data, int length, float scale, int zero_point, float *output_data) { | |||||
| for (int i = 0; i < length; ++i) { | |||||
| output_data[i] = scale * (input_data[i] - zero_point); | |||||
| } | |||||
| } | |||||
| void Dequantize(int8_t *input_data, int length, float scale, int zero_point, float *output_data); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -30,7 +30,7 @@ class TestActivationFp32 : public mindspore::CommonTest { | |||||
| TEST_F(TestActivationFp32, ReluFp32) { | TEST_F(TestActivationFp32, ReluFp32) { | ||||
| float input[8] = {-3, -2, -1, 0, 1, 5, 6, 7}; | float input[8] = {-3, -2, -1, 0, 1, 5, 6, 7}; | ||||
| float output[8] = {0}; | float output[8] = {0}; | ||||
| Relu(input, 8, output); | |||||
| Fp32Relu(input, 8, output); | |||||
| float expect[8] = {0, 0, 0, 0, 1, 5, 6, 7}; | float expect[8] = {0, 0, 0, 0, 1, 5, 6, 7}; | ||||
| for (int i = 0; i < 8; ++i) { | for (int i = 0; i < 8; ++i) { | ||||
| ASSERT_EQ(output[i], expect[i]); | ASSERT_EQ(output[i], expect[i]); | ||||
| @@ -40,7 +40,7 @@ TEST_F(TestActivationFp32, ReluFp32) { | |||||
| TEST_F(TestActivationFp32, Relu6Fp32) { | TEST_F(TestActivationFp32, Relu6Fp32) { | ||||
| float input[8] = {-3, -2, -1, 0, 1, 5, 6, 7}; | float input[8] = {-3, -2, -1, 0, 1, 5, 6, 7}; | ||||
| float output[8] = {0}; | float output[8] = {0}; | ||||
| Relu6(input, 8, output); | |||||
| Fp32Relu6(input, 8, output); | |||||
| float expect[8] = {0, 0, 0, 0, 1, 5, 6, 6}; | float expect[8] = {0, 0, 0, 0, 1, 5, 6, 6}; | ||||
| for (int i = 0; i < 8; ++i) { | for (int i = 0; i < 8; ++i) { | ||||
| ASSERT_EQ(output[i], expect[i]); | ASSERT_EQ(output[i], expect[i]); | ||||