|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- /**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- #include "nnacl/common_func.h"
- #include "nnacl/quantization/fixed_point.h"
-
- int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3) {
- return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3] + dim3;
- }
-
- int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2) {
- return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3];
- }
-
- int offset4d(const int *shape, const int *dims) { return offset(shape, dims[0], dims[1], dims[2], dims[3]); }
-
- int8_t MinInt8(int8_t a, int8_t b) { return b ^ ((a ^ b) & -(a < b)); }
-
- int8_t MaxInt8(int8_t a, int8_t b) { return a ^ ((a ^ b) & -(a < b)); }
-
- void ReluFp32(float *data, float *dst, int ele_num) {
- int four_block = UP_DIV(ele_num, C4NUM);
- for (int i = 0; i < four_block - 1; i++) {
- int index = i * C4NUM;
- #ifdef ENABLE_NEON
- float32x4_t relu_data = vld1q_f32(data + index);
- float32x4_t zero_data = vdupq_n_f32(0);
- relu_data = vmaxq_f32(relu_data, zero_data);
- vst1q_f32(dst + index, relu_data);
- #else
- data[index] = data[index] < 0 ? 0 : data[index];
- data[index + 1] = data[index + 1] < 0 ? 0 : data[index + 1];
- data[index + 2] = data[index + 2] < 0 ? 0 : data[index + 2];
- data[index + 3] = data[index + 3] < 0 ? 0 : data[index + 3];
- #endif
- }
- for (int j = (four_block - 1) * C4NUM; j < ele_num; ++j) {
- data[j] = data[j] < 0 ? 0 : data[j];
- }
- }
-
- void Relu6Fp32(float *data, float *dst, int ele_num) {
- int four_block = UP_DIV(ele_num, C4NUM);
- for (int i = 0; i < four_block - 1; i++) {
- int index = i * C4NUM;
- #ifdef ENABLE_NEON
- float32x4_t relu6_data = vld1q_f32(data + index);
- float32x4_t zero_data = vdupq_n_f32(0);
- float32x4_t six_data = vdupq_n_f32(6);
- relu6_data = vmaxq_f32(relu6_data, zero_data);
- relu6_data = vminq_f32(relu6_data, six_data);
- vst1q_f32(dst + index, relu6_data);
- #else
- data[index] = data[index] < 0 ? 0 : data[index];
- data[index] = data[index] > 6 ? 6 : data[index];
- data[index + 1] = data[index + 1] < 0 ? 0 : data[index + 1];
- data[index + 1] = data[index + 1] > 6 ? 6 : data[index + 1];
- data[index + 2] = data[index + 2] < 0 ? 0 : data[index + 2];
- data[index + 2] = data[index + 2] > 6 ? 6 : data[index + 2];
- data[index + 3] = data[index + 3] < 0 ? 0 : data[index + 3];
- data[index + 3] = data[index + 3] > 6 ? 6 : data[index + 3];
- #endif
- }
- for (int j = (four_block - 1) * C4NUM; j < ele_num; ++j) {
- data[j] = data[j] < 0 ? 0 : data[j];
- data[j] = data[j] > 6 ? 6 : data[j];
- }
- }
|