From: @ling_qiao_min Reviewed-by: @zhang_xue_tong,@hangangqiang Signed-off-by: @zhang_xue_tongtags/v1.2.0-rc1
| @@ -13,66 +13,66 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_NNACL_CAST_BASE_H_ | |||||
| #define MINDSPORE_LITE_NNACL_CAST_BASE_H_ | |||||
| #include "nnacl/fp32/cast_fp32.h" | |||||
| #include "nnacl/fp32/common_func_fp32.h" | |||||
| #include "nnacl/op_base.h" | |||||
| #include "nnacl/nnacl_common.h" | |||||
| void BoolToFloat32(const bool *input, float *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | |||||
| output[i] = (float)input[i]; | |||||
| } | |||||
| } | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| void Uint8ToFloat32(const uint8_t *input, float *output, int number) { | |||||
| inline void BoolToFloat32(const bool *input, float *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | for (int i = 0; i < number; ++i) { | ||||
| output[i] = (float)input[i]; | output[i] = (float)input[i]; | ||||
| } | } | ||||
| } | } | ||||
| void Uint8ToInt8(const uint8_t *input, int8_t *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | |||||
| output[i] = (int8_t)(input[i] - 128); | |||||
| } | |||||
| } | |||||
| void Int8ToUint8(const int8_t *input, uint8_t *output, int number) { | |||||
| inline void Uint8ToFloat32(const uint8_t *input, float *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | for (int i = 0; i < number; ++i) { | ||||
| output[i] = (uint8_t)(input[i] + 128); | |||||
| output[i] = (float)input[i]; | |||||
| } | } | ||||
| } | } | ||||
| void Int32ToFloat32(const int32_t *input, float *output, int number) { | |||||
| inline void Int32ToFloat32(const int32_t *input, float *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | for (int i = 0; i < number; ++i) { | ||||
| output[i] = (float)input[i]; | output[i] = (float)input[i]; | ||||
| } | } | ||||
| } | } | ||||
| void Fp16ToFloat32(const uint16_t *input, float *output, int number) { | |||||
| inline void Fp16ToFloat32(const uint16_t *input, float *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | for (int i = 0; i < number; ++i) { | ||||
| output[i] = ShortToFloat32(input[i]); | output[i] = ShortToFloat32(input[i]); | ||||
| } | } | ||||
| } | } | ||||
| void Float32ToFp16(const float *input, uint16_t *output, int number) { | |||||
| inline void Float32ToFp16(const float *input, uint16_t *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | for (int i = 0; i < number; ++i) { | ||||
| output[i] = Float32ToShort(input[i]); | output[i] = Float32ToShort(input[i]); | ||||
| } | } | ||||
| } | } | ||||
| void Float32ToInt32(const float *input, int32_t *output, int number) { | |||||
| inline void Float32ToInt32(const float *input, int32_t *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | for (int i = 0; i < number; ++i) { | ||||
| output[i] = (int32_t)input[i]; | output[i] = (int32_t)input[i]; | ||||
| } | } | ||||
| } | } | ||||
| void Float32ToInt64(const float *input, int64_t *output, int number) { | |||||
| inline void Float32ToInt64(const float *input, int64_t *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | for (int i = 0; i < number; ++i) { | ||||
| output[i] = (int64_t)input[i]; | output[i] = (int64_t)input[i]; | ||||
| } | } | ||||
| } | } | ||||
| void Int32ToInt64(const int32_t *input, int64_t *output, int number) { | |||||
| inline void Int32ToInt64(const int32_t *input, int64_t *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | for (int i = 0; i < number; ++i) { | ||||
| output[i] = (int64_t)input[i]; | output[i] = (int64_t)input[i]; | ||||
| } | } | ||||
| } | } | ||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // MINDSPORE_LITE_NNACL_CAST_BASE_H_ | |||||
| @@ -14,17 +14,16 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "nnacl/fp32/concat_fp32.h" | |||||
| #include <string.h> | |||||
| #include "nnacl/base/concat_base.h" | |||||
| void Concat(const void **input, int input_num, int axis, const int **inputs_output_shape, size_t shape_size, | |||||
| void *output, int task_id, int thread_num) { | |||||
| void Concat(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output, | |||||
| int task_id, int thread_num, int data_size) { | |||||
| int before_axis_size = 1; | int before_axis_size = 1; | ||||
| for (int i = 0; i < axis; ++i) { | for (int i = 0; i < axis; ++i) { | ||||
| before_axis_size *= inputs_output_shape[0][i]; | before_axis_size *= inputs_output_shape[0][i]; | ||||
| } | } | ||||
| // sizeof float/int32 | |||||
| int after_axis_size = 4; | |||||
| int after_axis_size = data_size; | |||||
| for (size_t i = axis + 1; i < shape_size; ++i) { | for (size_t i = axis + 1; i < shape_size; ++i) { | ||||
| after_axis_size *= inputs_output_shape[0][i]; | after_axis_size *= inputs_output_shape[0][i]; | ||||
| } | } | ||||
| @@ -14,23 +14,19 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_NNACL_EXPANDDIMS_H_ | |||||
| #define MINDSPORE_LITE_NNACL_EXPANDDIMS_H_ | |||||
| #ifndef MINDSPORE_LITE_NNACL_FP32_CONCAT_BASE_H_ | |||||
| #define MINDSPORE_LITE_NNACL_FP32_CONCAT_BASE_H_ | |||||
| #include <string.h> | |||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| typedef struct ExpandDimsParameter { | |||||
| // Primitive parameter | |||||
| OpParameter op_parameter_; | |||||
| int dim_; | |||||
| } ExpandDimsParameter; | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int ExpandDims(const void *input_ptr, void *output_ptr, size_t data_size); | |||||
| void Concat(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output, | |||||
| int task_id, int thread_num, int data_size); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // MINDSPORE_LITE_NNACL_EXPANDDIMS_H_ | |||||
| #endif // MINDSPORE_LITE_NNACL_FP32_CONCAT_BASE_H_ | |||||
| @@ -14,18 +14,23 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_NNACL_FP32_CONCAT_H_ | |||||
| #define MINDSPORE_LITE_NNACL_FP32_CONCAT_H_ | |||||
| #ifndef MINDSPORE_LITE_NNACL_EXPAND_DIMS_BASE_H_ | |||||
| #define MINDSPORE_LITE_NNACL_EXPAND_DIMS_BASE_H_ | |||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #include "nnacl/errorcode.h" | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Concat(const void **input, int input_num, int axis, const int **inputs_output_shape, size_t shape_size, | |||||
| void *output, int task_id, int thread_num); | |||||
| inline int ExpandDims(const void *input_ptr, void *output_ptr, size_t data_size) { | |||||
| memcpy(output_ptr, input_ptr, data_size); | |||||
| return NNACL_OK; | |||||
| } | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // MINDSPORE_LITE_NNACL_FP32_CONCAT_H_ | |||||
| #endif // MINDSPORE_LITE_NNACL_EXPAND_DIMS_BASE_H_ | |||||
| @@ -14,9 +14,9 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "nnacl/fp32/fill_fp32.h" | |||||
| #include "nnacl/base/fill_base.h" | |||||
| int Fill(float *output, int size, float data) { | |||||
| int FillFp32(float *output, int size, float data) { | |||||
| for (int i = 0; i < size; ++i) { | for (int i = 0; i < size; ++i) { | ||||
| output[i] = data; | output[i] = data; | ||||
| } | } | ||||
| @@ -13,17 +13,20 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_NNACL_FILL_BASE_H_ | |||||
| #define MINDSPORE_LITE_NNACL_FILL_BASE_H_ | |||||
| #ifndef MINDSPORE_LITE_NNACL_RESHAHPE_H_ | |||||
| #define MINDSPORE_LITE_NNACL_RESHAHPE_H_ | |||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #include "nnacl/errorcode.h" | |||||
| #include "nnacl/fill_parameter.h" | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Reshape(const void *input_ptr, void *output_ptr, size_t data_size); | |||||
| int FillFp32(float *output, int size, float data); | |||||
| int FillInt32(int *output, int size, int data); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // MINDSPORE_LITE_NNACL_RESHAHPE_H_ | |||||
| #endif // MINDSPORE_LITE_NNACL_FILL_BASE_H_ | |||||
| @@ -14,20 +14,9 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "nnacl/fp32/gather_fp32.h" | |||||
| #include <string.h> | |||||
| #include "nnacl/errorcode.h" | |||||
| inline int Stride(const int *shape, int rank, int index) { | |||||
| int i, stride = 1; | |||||
| for (i = index + 1; i < rank; ++i) { | |||||
| stride *= shape[i]; | |||||
| } | |||||
| return stride; | |||||
| } | |||||
| int Gather(const float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | |||||
| float *output) { | |||||
| #include "nnacl/base/gather_base.h" | |||||
| int GatherFp32(const float *input, int outer_size, int inner_size, int limit, const int *indices, | |||||
| int indices_element_size, float *output) { | |||||
| for (int m = 0; m < outer_size; ++m) { | for (int m = 0; m < outer_size; ++m) { | ||||
| const float *inputm = input + inner_size * m * limit; | const float *inputm = input + inner_size * m * limit; | ||||
| float *outputm = output + inner_size * m * indices_element_size; | float *outputm = output + inner_size * m * indices_element_size; | ||||
| @@ -14,20 +14,22 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_NNACL_GATHER_H_ | |||||
| #define MINDSPORE_LITE_NNACL_GATHER_H_ | |||||
| #ifndef MINDSPORE_LITE_NNACL_GATHER_BASE_H_ | |||||
| #define MINDSPORE_LITE_NNACL_GATHER_BASE_H_ | |||||
| #include <string.h> | |||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #include "nnacl/errorcode.h" | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int Gather(const float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size, | |||||
| float *output); | |||||
| int GatherFp32(const float *input, int outer_size, int inner_size, int limit, const int *indices, | |||||
| int indices_element_size, float *output); | |||||
| int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, const int *indices, | int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, const int *indices, | ||||
| int indices_element_size, int32_t *output); | int indices_element_size, int32_t *output); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // MINDSPORE_LITE_NNACL_GATHER_H_ | |||||
| #endif // MINDSPORE_LITE_NNACL_GATHER_BASE_H_ | |||||
| @@ -14,18 +14,22 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_NNACL_FP16_CONCAT_FP16_H_ | |||||
| #define MINDSPORE_LITE_NNACL_FP16_CONCAT_FP16_H_ | |||||
| #ifndef MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_ | |||||
| #define MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_ | |||||
| #include <string.h> | |||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void ConcatFp16(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output, | |||||
| int dtype_len); | |||||
| inline void Reshape(const void *input_ptr, void *output_ptr, size_t data_size) { | |||||
| memcpy(output_ptr, input_ptr, data_size); | |||||
| } | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // MINDSPORE_LITE_NNACL_FP16_CONCAT_FP16_H_ | |||||
| #endif // MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_ | |||||
| @@ -23,7 +23,7 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| inline int DoSqueeze(const void *input_ptr, void *output_ptr, size_t data_size) { | |||||
| static inline int DoSqueeze(const void *input_ptr, void *output_ptr, size_t data_size) { | |||||
| if (input_ptr == NULL || output_ptr == NULL) { | if (input_ptr == NULL || output_ptr == NULL) { | ||||
| return NNACL_ERR; | return NNACL_ERR; | ||||
| } | } | ||||
| @@ -0,0 +1,36 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_PARAMETER_H_ | |||||
| #define MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_PARAMETER_H_ | |||||
| #include "nnacl/op_base.h" | |||||
| #define BROADCAST_TO_SHAPE_MAX_SIZE 4 | |||||
| typedef struct BroadcastToParameter { | |||||
| OpParameter op_parameter_; | |||||
| int shape_[BROADCAST_TO_SHAPE_MAX_SIZE]; | |||||
| size_t shape_size_; | |||||
| } BroadcastToParameter; | |||||
| typedef struct BroadcastShapeInfo { | |||||
| int input_shape_[BROADCAST_TO_SHAPE_MAX_SIZE]; | |||||
| int input_shape_size_; | |||||
| int output_shape_[BROADCAST_TO_SHAPE_MAX_SIZE]; | |||||
| int output_shape_size_; | |||||
| } BroadcastShapeInfo; | |||||
| #endif // MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_PARAMETER_H_ | |||||
| @@ -13,8 +13,15 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_NNACL_CAST_PARAMETER_H_ | |||||
| #define MINDSPORE_LITE_NNACL_CAST_PARAMETER_H_ | |||||
| #include "nnacl/reshape.h" | |||||
| #include <string.h> | |||||
| #include "nnacl/op_base.h" | |||||
| void Reshape(const void *input_ptr, void *output_ptr, size_t data_size) { memcpy(output_ptr, input_ptr, data_size); } | |||||
| typedef struct CastParameter { | |||||
| OpParameter op_parameter_; | |||||
| int dst_type_; | |||||
| int src_type_; | |||||
| } CastParameter; | |||||
| #endif // MINDSPORE_LITE_NNACL_CAST_PARAMETER_H_ | |||||
| @@ -13,14 +13,10 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_NNACL_FILL_H_ | |||||
| #define MINDSPORE_LITE_NNACL_FILL_H_ | |||||
| #ifndef MINDSPORE_LITE_NNACL_FILL_PARAMETER_H_ | |||||
| #define MINDSPORE_LITE_NNACL_FILL_PARAMETER_H_ | |||||
| #ifdef ENABLE_NEON | |||||
| #include <arm_neon.h> | |||||
| #endif | |||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #include "nnacl/errorcode.h" | |||||
| #define FILL_DIMS_MAX_SIZE 4 | #define FILL_DIMS_MAX_SIZE 4 | ||||
| @@ -31,14 +27,4 @@ typedef struct FillParameter { | |||||
| int num_dims_; | int num_dims_; | ||||
| } FillParameter; | } FillParameter; | ||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| int Fill(float *output, int size, float data); | |||||
| int FillInt32(int *output, int size, int data); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // MINDSPORE_LITE_NNACL_FILL_H_ | |||||
| #endif // MINDSPORE_LITE_NNACL_FILL_PARAMETER_H_ | |||||
| @@ -13,7 +13,6 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include <assert.h> | |||||
| #include <math.h> | #include <math.h> | ||||
| #include "nnacl/fp16/arithmetic_self_fp16.h" | #include "nnacl/fp16/arithmetic_self_fp16.h" | ||||
| @@ -1,54 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "nnacl/fp16/cast_fp16.h" | |||||
| void BoolToFloat16(const bool *input, float16_t *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | |||||
| output[i] = (float16_t)input[i]; | |||||
| } | |||||
| } | |||||
| void Uint8ToFloat16(const uint8_t *input, float16_t *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | |||||
| output[i] = (float16_t)input[i]; | |||||
| } | |||||
| } | |||||
| void Float16ToInt32(const float16_t *input, int32_t *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | |||||
| output[i] = (int32_t)input[i]; | |||||
| } | |||||
| } | |||||
| void Float16ToInt64(const float16_t *input, int64_t *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | |||||
| output[i] = (int64_t)input[i]; | |||||
| } | |||||
| } | |||||
| #ifndef ENABLE_ARM64 | |||||
| void Float32ToFloat16(const float *input, float16_t *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | |||||
| output[i] = (float16_t)input[i]; | |||||
| } | |||||
| } | |||||
| void Float16ToFloat32(const float16_t *input, float *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | |||||
| output[i] = (float)input[i]; | |||||
| } | |||||
| } | |||||
| #endif | |||||
| @@ -18,16 +18,47 @@ | |||||
| #include <arm_neon.h> | #include <arm_neon.h> | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #include "nnacl/fp32/cast_fp32.h" | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void BoolToFloat16(const bool *input, float16_t *output, int number); | |||||
| void Uint8ToFloat16(const uint8_t *input, float16_t *output, int number); | |||||
| void Float16ToInt32(const float16_t *input, int32_t *output, int number); | |||||
| void Float16ToInt64(const float16_t *input, int64_t *output, int number); | |||||
| void Float32ToFloat16(const float *input, float16_t *output, int number); | |||||
| void Float16ToFloat32(const float16_t *input, float *output, int number); | |||||
| inline void BoolToFloat16(const bool *input, float16_t *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | |||||
| output[i] = (float16_t)input[i]; | |||||
| } | |||||
| } | |||||
| inline void Uint8ToFloat16(const uint8_t *input, float16_t *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | |||||
| output[i] = (float16_t)input[i]; | |||||
| } | |||||
| } | |||||
| inline void Float16ToInt32(const float16_t *input, int32_t *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | |||||
| output[i] = (int32_t)input[i]; | |||||
| } | |||||
| } | |||||
| inline void Float16ToInt64(const float16_t *input, int64_t *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | |||||
| output[i] = (int64_t)input[i]; | |||||
| } | |||||
| } | |||||
| inline void Float32ToFloat16(const float *input, float16_t *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | |||||
| output[i] = (float16_t)input[i]; | |||||
| } | |||||
| } | |||||
| inline void Float16ToFloat32(const float16_t *input, float *output, int number) { | |||||
| for (int i = 0; i < number; ++i) { | |||||
| output[i] = (float)input[i]; | |||||
| } | |||||
| } | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -1,44 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "nnacl/fp16/concat_fp16.h" | |||||
| #include <string.h> | |||||
| void ConcatFp16(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output, | |||||
| int dtype_len) { | |||||
| int before_axis_size = 1; | |||||
| for (int i = 0; i < axis; ++i) { | |||||
| before_axis_size *= inputs_output_shape[0][i]; | |||||
| } | |||||
| // sizeof float16,int32 | |||||
| int after_axis_size = dtype_len; | |||||
| for (size_t i = axis + 1; i < shape_size; ++i) { | |||||
| after_axis_size *= inputs_output_shape[0][i]; | |||||
| } | |||||
| int axis_offset = 0; | |||||
| uint8_t *dst_base = (output); | |||||
| size_t output_stride = after_axis_size * inputs_output_shape[input_num][axis]; | |||||
| for (int i = 0; i < input_num; ++i) { | |||||
| uint8_t *src_base = (input[i]); | |||||
| size_t input_stride = after_axis_size * inputs_output_shape[i][axis]; | |||||
| for (int j = 0; j < before_axis_size; ++j) { | |||||
| uint8_t *src = src_base + j * input_stride; | |||||
| uint8_t *dst = dst_base + j * output_stride + axis_offset * after_axis_size; | |||||
| memcpy(dst, src, input_stride); | |||||
| } | |||||
| axis_offset += inputs_output_shape[i][axis]; | |||||
| } | |||||
| } | |||||
| @@ -1003,12 +1003,6 @@ int ElementMinimumInt(const int *input0, const int *input1, int *output, const i | |||||
| return NNACL_OK; | return NNACL_OK; | ||||
| } | } | ||||
| int BroadcastMaximum(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size, | |||||
| ArithmeticParameter *param) { | |||||
| TileDimensionsFp32(in0, in1, tile_in0, tile_in1, param); | |||||
| return ElementMaximum(tile_in0, tile_in1, out, size); | |||||
| } | |||||
| int ElementMinimum(const float *in0, const float *in1, float *out, int size) { | int ElementMinimum(const float *in0, const float *in1, float *out, int size) { | ||||
| int index = 0; | int index = 0; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| @@ -1027,65 +1021,6 @@ int ElementMinimum(const float *in0, const float *in1, float *out, int size) { | |||||
| #undef ACCURACY_DATA | #undef ACCURACY_DATA | ||||
| #ifdef ENABLE_NNACL_INFER_SHAPE | |||||
| int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format, | |||||
| int *in_datatype, int *out_datatype, OpParameter *param) { | |||||
| *out_format = in_format[0]; | |||||
| *out_datatype = in_datatype[0]; | |||||
| const ArithmeticParameter *arithmetic_parameter = (const ArithmeticParameter *)param; | |||||
| int ndim0 = dim_size[0]; | |||||
| int ndim1 = dim_size[1]; | |||||
| int *in_shape0 = in_shape[0]; | |||||
| int *in_shape1 = in_shape[1]; | |||||
| if (ndim0 < ndim1) { | |||||
| arithmetic_parameter->ndim_ = ndim1; | |||||
| int fill_dim_num = ndim1 - ndim0; | |||||
| int j = 0; | |||||
| for (int i = 0; i < ndim1; ++i) { | |||||
| if (i < fill_dim_num) { | |||||
| arithmetic_parameter->in_shape0_[i] = 1; | |||||
| } else { | |||||
| arithmetic_parameter->in_shape0_[i] = in_shape0[j++]; | |||||
| } | |||||
| arithmetic_parameter->in_shape1_[i] = in_shape1[i]; | |||||
| } | |||||
| } else if (ndim0 > ndim1) { | |||||
| arithmetic_parameter->ndim_ = ndim0; | |||||
| int fill_dim_num = ndim0 - ndim1; | |||||
| int j = 0; | |||||
| for (int i = 0; i < ndim0; ++i) { | |||||
| if (i < fill_dim_num) { | |||||
| arithmetic_parameter->in_shape1_[i] = 1; | |||||
| } else { | |||||
| arithmetic_parameter->in_shape1_[i] = in_shape1[j++]; | |||||
| } | |||||
| arithmetic_parameter->in_shape0_[i] = in_shape0[i]; | |||||
| } | |||||
| } else { | |||||
| arithmetic_parameter->ndim_ = ndim0; | |||||
| for (int i = 0; i < ndim0; ++i) { | |||||
| arithmetic_parameter->in_shape0_[i] = in_shape0[i]; | |||||
| arithmetic_parameter->in_shape1_[i] = in_shape1[i]; | |||||
| } | |||||
| } | |||||
| int j = 0; | |||||
| for (size_t i = 0; i < arithmetic_parameter->ndim_; ++i) { | |||||
| if (arithmetic_parameter->in_shape0_[i] != arithmetic_parameter->in_shape1_[i]) { | |||||
| if (arithmetic_parameter->in_shape0_[i] == 1) { | |||||
| out_shape[j++] = arithmetic_parameter->in_shape1_[i]; | |||||
| } else if (arithmetic_parameter->in_shape1_[i] == 1) { | |||||
| out_shape[j++] = arithmetic_parameter->in_shape0_[i]; | |||||
| } else { | |||||
| return NNACL_PARAM_INVALID; | |||||
| } | |||||
| } else { | |||||
| out_shape[j++] = arithmetic_parameter->in_shape0_[i]; | |||||
| } | |||||
| } | |||||
| return NNACL_OK; | |||||
| } | |||||
| #endif | |||||
| void TileOneDimensionFp32(const float *inData, float *outData, int dim, size_t ndim, const int *inShape, | void TileOneDimensionFp32(const float *inData, float *outData, int dim, size_t ndim, const int *inShape, | ||||
| const int *inStrides, const int *outStrides, const int *multiple) { | const int *inStrides, const int *outStrides, const int *multiple) { | ||||
| int srcDimSize = inShape[dim]; | int srcDimSize = inShape[dim]; | ||||
| @@ -96,8 +96,6 @@ int ElementMaximum(const float *in0, const float *in1, float *out, int size); | |||||
| int ElementMinimum(const float *in0, const float *in1, float *out, int size); | int ElementMinimum(const float *in0, const float *in1, float *out, int size); | ||||
| int ElementMaximumInt(const int *in0, const int *in1, int *out, int size); | int ElementMaximumInt(const int *in0, const int *in1, int *out, int size); | ||||
| int ElementMinimumInt(const int *input0, const int *input1, int *output, const int element_size); | int ElementMinimumInt(const int *input0, const int *input1, int *output, const int element_size); | ||||
| int BroadcastMaximum(const float *in0, const float *in1, float *tile_input0, float *tile_input1, float *out, int size, | |||||
| ArithmeticParameter *param); | |||||
| /* floor div */ | /* floor div */ | ||||
| int ElementFloorDiv(const float *in0, const float *in1, float *out, int size); | int ElementFloorDiv(const float *in0, const float *in1, float *out, int size); | ||||
| @@ -113,10 +111,6 @@ int ElementModInt(const int *in0, const int *in1, int *out, int size); | |||||
| int ElementOptMod(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); | int ElementOptMod(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); | ||||
| int ElementOptModInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); | int ElementOptModInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); | ||||
| #ifdef ENABLE_NNACL_INFER_SHAPE | |||||
| int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format, | |||||
| int *in_datatype, int *out_datatype, OpParameter *param); | |||||
| #endif | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -16,7 +16,6 @@ | |||||
| #include <string.h> | #include <string.h> | ||||
| #include <math.h> | #include <math.h> | ||||
| #include <assert.h> | |||||
| #include "nnacl/fp32/arithmetic_self_fp32.h" | #include "nnacl/fp32/arithmetic_self_fp32.h" | ||||
| // abs: | // abs: | ||||
| @@ -13,28 +13,11 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_H_ | |||||
| #define MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_H_ | |||||
| #ifndef MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_FP32_H_ | |||||
| #define MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_FP32_H_ | |||||
| #ifdef ENABLE_NEON | |||||
| #include <arm_neon.h> | |||||
| #endif | |||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #define BROADCAST_TO_SHAPE_MAX_SIZE 4 | |||||
| typedef struct BroadcastToParameter { | |||||
| OpParameter op_parameter_; | |||||
| int shape_[BROADCAST_TO_SHAPE_MAX_SIZE]; | |||||
| size_t shape_size_; | |||||
| } BroadcastToParameter; | |||||
| typedef struct BroadcastShapeInfo { | |||||
| int input_shape_[BROADCAST_TO_SHAPE_MAX_SIZE]; | |||||
| int input_shape_size_; | |||||
| int output_shape_[BROADCAST_TO_SHAPE_MAX_SIZE]; | |||||
| int output_shape_size_; | |||||
| } BroadcastShapeInfo; | |||||
| #include "nnacl/broadcast_to_parameter.h" | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| @@ -44,4 +27,4 @@ int BroadcastTo(const float *input, BroadcastShapeInfo *shape_info, float *outpu | |||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_H_ | |||||
| #endif // MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_FP32_H_ | |||||
| @@ -1,48 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_NNACL_CAST_H_ | |||||
| #define MINDSPORE_LITE_NNACL_CAST_H_ | |||||
| #ifdef ENABLE_NEON | |||||
| #include <arm_neon.h> | |||||
| #endif | |||||
| #include "nnacl/op_base.h" | |||||
| // For cast. | |||||
| typedef struct CastParameter { | |||||
| OpParameter op_parameter_; | |||||
| int src_type_; | |||||
| int dst_type_; | |||||
| } CastParameter; | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| void BoolToFloat32(const bool *input, float *output, int number); | |||||
| void Uint8ToFloat32(const uint8_t *input, float *output, int number); | |||||
| void Uint8ToInt8(const uint8_t *input, int8_t *output, int number); | |||||
| void Int8ToUint8(const int8_t *input, uint8_t *output, int number); | |||||
| void Int32ToFloat32(const int32_t *input, float *output, int number); | |||||
| void Fp16ToFloat32(const uint16_t *input, float *output, int number); | |||||
| void Float32ToFp16(const float *input, uint16_t *output, int number); | |||||
| void Float32ToInt32(const float *input, int32_t *output, int number); | |||||
| void Float32ToInt64(const float *input, int64_t *output, int number); | |||||
| void Int32ToInt64(const int32_t *input, int64_t *output, int number); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // MINDSPORE_LITE_NNACL_CAST_H_ | |||||
| @@ -115,101 +115,3 @@ void WinogradTransRight(const float *S, const float *B, float *M, size_t w, size | |||||
| } | } | ||||
| } | } | ||||
| #endif | #endif | ||||
| typedef union float32_bits { | |||||
| unsigned int u; | |||||
| float f; | |||||
| } float32_bits; | |||||
| float ShortToFloat32(uint16_t src_value) { | |||||
| const float32_bits magic = {113 << 23}; | |||||
| const unsigned int shifted_exp = 0x7c00 << 13; | |||||
| float32_bits o; | |||||
| o.u = (src_value & 0x7fff) << 13; | |||||
| unsigned int exp = shifted_exp & o.u; | |||||
| o.u += (127 - 15) << 23; | |||||
| if (exp == shifted_exp) { | |||||
| o.u += (128 - 16) << 23; | |||||
| } else if (exp == 0) { | |||||
| o.u += 1 << 23; | |||||
| o.f -= magic.f; | |||||
| } | |||||
| o.u |= (src_value & 0x8000) << 16; | |||||
| return o.f; | |||||
| } | |||||
| static const unsigned int FP32_BIT_SIZE = 32; | |||||
| static const unsigned int FP32_EXPONENT_BIAS = 127; | |||||
| static const unsigned int FP32_SIGNIFICAND = 23; | |||||
| static const unsigned int FP32_EXPONENT_MAX = 255; | |||||
| static const unsigned int FP16_BIT_SIZE = 16; | |||||
| static const unsigned int FP16_EXPONENT_BIAS = 15; | |||||
| static const unsigned int FP16_SIGNIFICAND = 10; | |||||
| static const int FP16_EXPONENT_MAX = 30; | |||||
| static const int FP16_EXPONENT_MIN = -10; | |||||
| uint16_t Float32ToShort(float src_value) { | |||||
| float *psrcValue = NULL; | |||||
| psrcValue = &src_value; | |||||
| unsigned int srcValueBit = (unsigned int)(*psrcValue); | |||||
| unsigned int sign = srcValueBit >> (FP32_BIT_SIZE - 1); | |||||
| unsigned int mantissa = srcValueBit & 0x007FFFFF; | |||||
| // exponent | |||||
| int exp = ((srcValueBit & 0x7F800000) >> FP32_SIGNIFICAND) + FP16_EXPONENT_BIAS - FP32_EXPONENT_BIAS; | |||||
| uint16_t res; | |||||
| if (exp > 0 && exp < FP16_EXPONENT_MAX) { | |||||
| // use rte rounding mode, round the significand, combine sign, exponent and significand into a short. | |||||
| res = (sign << (FP16_BIT_SIZE - 1)) | (exp << FP16_SIGNIFICAND) | | |||||
| ((mantissa + 0x00001000) >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||||
| } else if (srcValueBit == 0) { | |||||
| res = 0; | |||||
| } else { | |||||
| if (exp <= 0) { | |||||
| if (exp < FP16_EXPONENT_MIN) { | |||||
| // value is less than min half float point | |||||
| res = 0; | |||||
| } else { | |||||
| // normalized single, magnitude is less than min normal half float point. | |||||
| mantissa = (mantissa | 0x00800000) >> (1 - exp); | |||||
| // round to nearest | |||||
| if ((mantissa & 0x00001000) > 0) { | |||||
| mantissa = mantissa + 0x00002000; | |||||
| } | |||||
| // combine sign & mantissa (exp is zero to get denormalized number) | |||||
| res = (sign << FP16_EXPONENT_BIAS) | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||||
| } | |||||
| } else if (exp == (FP32_EXPONENT_MAX - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS)) { | |||||
| if (mantissa == 0) { | |||||
| // input float is infinity, return infinity half | |||||
| res = (sign << FP16_EXPONENT_BIAS) | 0x7C00; | |||||
| } else { | |||||
| // input float is NaN, return half NaN | |||||
| res = (sign << FP16_EXPONENT_BIAS) | 0x7C00 | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||||
| } | |||||
| } else { | |||||
| // exp > 0, normalized single, round to nearest | |||||
| if ((mantissa & 0x00001000) > 0) { | |||||
| mantissa = mantissa + 0x00002000; | |||||
| if ((mantissa & 0x00800000) > 0) { | |||||
| mantissa = 0; | |||||
| exp = exp + 1; | |||||
| } | |||||
| } | |||||
| if (exp > FP16_EXPONENT_MAX) { | |||||
| // exponent overflow - return infinity half | |||||
| res = (sign << FP16_EXPONENT_BIAS) | 0x7C00; | |||||
| } else { | |||||
| // combine sign, exp and mantissa into normalized half | |||||
| res = (sign << FP16_EXPONENT_BIAS) | (exp << FP16_SIGNIFICAND) | | |||||
| (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||||
| } | |||||
| } | |||||
| } | |||||
| return res; | |||||
| } | |||||
| @@ -33,10 +33,6 @@ void PostConvFuncFp32C4(const float *c4_out_ptr, float *out_ptr, const float *bi | |||||
| void WinogradTransLeft(const float *S, const float *B, float *M, size_t w, size_t h, size_t k, size_t length); | void WinogradTransLeft(const float *S, const float *B, float *M, size_t w, size_t h, size_t k, size_t length); | ||||
| void WinogradTransRight(const float *S, const float *B, float *M, size_t w, size_t h, size_t k, size_t length); | void WinogradTransRight(const float *S, const float *B, float *M, size_t w, size_t h, size_t k, size_t length); | ||||
| float ShortToFloat32(uint16_t src_value); | |||||
| uint16_t Float32ToShort(float src_value); | |||||
| #if defined(ENABLE_ARM) || defined(ENABLE_SSE) | #if defined(ENABLE_ARM) || defined(ENABLE_SSE) | ||||
| void ConvDwFp32Center(float *dst, const float *src, const float *weight, const float *bias, size_t height, size_t width, | void ConvDwFp32Center(float *dst, const float *src, const float *weight, const float *bias, size_t height, size_t width, | ||||
| size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step, | size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step, | ||||
| @@ -13,12 +13,3 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "nnacl/fp32/expandDims_fp32.h" | |||||
| #include <string.h> | |||||
| #include "nnacl/errorcode.h" | |||||
| int ExpandDims(const void *input_ptr, void *output_ptr, size_t data_size) { | |||||
| memcpy(output_ptr, input_ptr, data_size); | |||||
| return NNACL_OK; | |||||
| } | |||||
| @@ -1,29 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "nnacl/fp32/range_fp32.h" | |||||
| void Range(float *output_ptr, float start, float delta, int nums) { | |||||
| for (int i = 0; i < nums; ++i, start += delta) { | |||||
| output_ptr[i] = start; | |||||
| } | |||||
| } | |||||
| void RangeInt(int *output_ptr, int start, int delta, int nums) { | |||||
| for (int i = 0; i < nums; ++i, start += delta) { | |||||
| output_ptr[i] = start; | |||||
| } | |||||
| } | |||||
| @@ -31,8 +31,18 @@ typedef struct RangeParameter { | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Range(float *output_ptr, float start, float delta, int nums); | |||||
| void RangeInt(int *output_ptr, int start, int delta, int nums); | |||||
| inline void Range(float *output_ptr, float start, float delta, int nums) { | |||||
| for (int i = 0; i < nums; ++i, start += delta) { | |||||
| output_ptr[i] = start; | |||||
| } | |||||
| } | |||||
| inline void RangeInt(int *output_ptr, int start, int delta, int nums) { | |||||
| for (int i = 0; i < nums; ++i, start += delta) { | |||||
| output_ptr[i] = start; | |||||
| } | |||||
| } | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -1,19 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "nnacl/fp32/rank_fp32.h" | |||||
| void Rank(float *output, int rank) { output[0] = (float)(rank); } | |||||
| @@ -21,7 +21,10 @@ | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| void Rank(float *output, int rank); | |||||
| inline void Rank(float *output, int rank) { | |||||
| output[0] = (float)(rank); | |||||
| return; | |||||
| } | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -15,7 +15,6 @@ | |||||
| */ | */ | ||||
| #include <math.h> | #include <math.h> | ||||
| #include <assert.h> | |||||
| #include "nnacl/int8/arithmetic_self_int8.h" | #include "nnacl/int8/arithmetic_self_int8.h" | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| #include <arm_neon.h> | #include <arm_neon.h> | ||||
| @@ -15,3 +15,88 @@ | |||||
| */ | */ | ||||
| #include "nnacl/nnacl_common.h" | #include "nnacl/nnacl_common.h" | ||||
| typedef union float32_bits { | |||||
| unsigned int u; | |||||
| float f; | |||||
| } float32_bits; | |||||
| float ShortToFloat32(uint16_t src_value) { | |||||
| const float32_bits magic = {113 << 23}; | |||||
| const unsigned int shifted_exp = 0x7c00 << 13; | |||||
| float32_bits o; | |||||
| o.u = (src_value & 0x7fff) << 13; | |||||
| unsigned int exp = shifted_exp & o.u; | |||||
| o.u += (127 - 15) << 23; | |||||
| if (exp == shifted_exp) { | |||||
| o.u += (128 - 16) << 23; | |||||
| } else if (exp == 0) { | |||||
| o.u += 1 << 23; | |||||
| o.f -= magic.f; | |||||
| } | |||||
| o.u |= (src_value & 0x8000) << 16; | |||||
| return o.f; | |||||
| } | |||||
| uint16_t Float32ToShort(float src_value) { | |||||
| float *psrcValue = NULL; | |||||
| psrcValue = &src_value; | |||||
| unsigned int srcValueBit = (unsigned int)(*psrcValue); | |||||
| unsigned int sign = srcValueBit >> (FP32_BIT_SIZE - 1); | |||||
| unsigned int mantissa = srcValueBit & 0x007FFFFF; | |||||
| // exponent | |||||
| int exp = ((srcValueBit & 0x7F800000) >> FP32_SIGNIFICAND) + FP16_EXPONENT_BIAS - FP32_EXPONENT_BIAS; | |||||
| uint16_t res; | |||||
| if (exp > 0 && exp < FP16_EXPONENT_MAX) { | |||||
| // use rte rounding mode, round the significand, combine sign, exponent and significand into a short. | |||||
| res = (sign << (FP16_BIT_SIZE - 1)) | (exp << FP16_SIGNIFICAND) | | |||||
| ((mantissa + 0x00001000) >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||||
| } else if (srcValueBit == 0) { | |||||
| res = 0; | |||||
| } else { | |||||
| if (exp <= 0) { | |||||
| if (exp < FP16_EXPONENT_MIN) { | |||||
| // value is less than min half float point | |||||
| res = 0; | |||||
| } else { | |||||
| // normalized single, magnitude is less than min normal half float point. | |||||
| mantissa = (mantissa | 0x00800000) >> (1 - exp); | |||||
| // round to nearest | |||||
| if ((mantissa & 0x00001000) > 0) { | |||||
| mantissa = mantissa + 0x00002000; | |||||
| } | |||||
| // combine sign & mantissa (exp is zero to get denormalized number) | |||||
| res = (sign << FP16_EXPONENT_BIAS) | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||||
| } | |||||
| } else if (exp == (FP32_EXPONENT_MAX - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS)) { | |||||
| if (mantissa == 0) { | |||||
| // input float is infinity, return infinity half | |||||
| res = (sign << FP16_EXPONENT_BIAS) | 0x7C00; | |||||
| } else { | |||||
| // input float is NaN, return half NaN | |||||
| res = (sign << FP16_EXPONENT_BIAS) | 0x7C00 | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||||
| } | |||||
| } else { | |||||
| // exp > 0, normalized single, round to nearest | |||||
| if ((mantissa & 0x00001000) > 0) { | |||||
| mantissa = mantissa + 0x00002000; | |||||
| if ((mantissa & 0x00800000) > 0) { | |||||
| mantissa = 0; | |||||
| exp = exp + 1; | |||||
| } | |||||
| } | |||||
| if (exp > FP16_EXPONENT_MAX) { | |||||
| // exponent overflow - return infinity half | |||||
| res = (sign << FP16_EXPONENT_BIAS) | 0x7C00; | |||||
| } else { | |||||
| // combine sign, exp and mantissa into normalized half | |||||
| res = (sign << FP16_EXPONENT_BIAS) | (exp << FP16_SIGNIFICAND) | | |||||
| (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||||
| } | |||||
| } | |||||
| } | |||||
| return res; | |||||
| } | |||||
| @@ -17,6 +17,8 @@ | |||||
| #ifndef MINDSPORE_LITE_NNACL_NNACL_COMMON_H_ | #ifndef MINDSPORE_LITE_NNACL_NNACL_COMMON_H_ | ||||
| #define MINDSPORE_LITE_NNACL_NNACL_COMMON_H_ | #define MINDSPORE_LITE_NNACL_NNACL_COMMON_H_ | ||||
| #include "nnacl/op_base.h" | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| @@ -29,6 +31,18 @@ inline void ComputeStrides(const int *shape, int *strides, const int ndim) { | |||||
| } | } | ||||
| } | } | ||||
| static const unsigned int FP32_BIT_SIZE = 32; | |||||
| static const unsigned int FP32_EXPONENT_BIAS = 127; | |||||
| static const unsigned int FP32_SIGNIFICAND = 23; | |||||
| static const unsigned int FP32_EXPONENT_MAX = 255; | |||||
| static const unsigned int FP16_BIT_SIZE = 16; | |||||
| static const unsigned int FP16_EXPONENT_BIAS = 15; | |||||
| static const unsigned int FP16_SIGNIFICAND = 10; | |||||
| static const int FP16_EXPONENT_MAX = 30; | |||||
| static const int FP16_EXPONENT_MIN = -10; | |||||
| float ShortToFloat32(uint16_t src_value); | |||||
| uint16_t Float32ToShort(float src_value); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -22,7 +22,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| #ifdef PRIMITIVE_WRITEABLE | #ifdef PRIMITIVE_WRITEABLE | ||||
| int AssertOP::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | int AssertOP::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | ||||
| if (this->primitive_ == nullptr) { | if (this->primitive_ == nullptr) { | ||||
| this->primitive_ = new (std::nothrow) schema::PrimitiveT; | this->primitive_ = new (std::nothrow) schema::PrimitiveT; | ||||
| @@ -67,6 +66,5 @@ Registry AssertRegistry(schema::PrimitiveType_Assert, AssertCreator); | |||||
| #endif | #endif | ||||
| int AssertOP::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outputs_) { return RET_OK; } | int AssertOP::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outputs_) { return RET_OK; } | ||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -23,7 +23,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| #ifdef PRIMITIVE_WRITEABLE | #ifdef PRIMITIVE_WRITEABLE | ||||
| int Merge::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | int Merge::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | ||||
| if (this->primitive_ == nullptr) { | if (this->primitive_ == nullptr) { | ||||
| this->primitive_ = new (std::nothrow) schema::PrimitiveT; | this->primitive_ = new (std::nothrow) schema::PrimitiveT; | ||||
| @@ -99,6 +98,5 @@ int Merge::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outpu | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -23,7 +23,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| #ifdef PRIMITIVE_WRITEABLE | #ifdef PRIMITIVE_WRITEABLE | ||||
| int Mod::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | int Mod::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | ||||
| if (this->primitive_ == nullptr) { | if (this->primitive_ == nullptr) { | ||||
| this->primitive_ = new (std::nothrow) schema::PrimitiveT; | this->primitive_ = new (std::nothrow) schema::PrimitiveT; | ||||
| @@ -20,7 +20,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| OpParameter *PopulateAssertParameter(const mindspore::lite::PrimitiveC *primitive) { | OpParameter *PopulateAssertParameter(const mindspore::lite::PrimitiveC *primitive) { | ||||
| OpParameter *assert_parameter = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter))); | OpParameter *assert_parameter = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter))); | ||||
| if (assert_parameter == nullptr) { | if (assert_parameter == nullptr) { | ||||
| @@ -17,7 +17,7 @@ | |||||
| #include "src/ops/cast.h" | #include "src/ops/cast.h" | ||||
| #include "src/ops/primitive_c.h" | #include "src/ops/primitive_c.h" | ||||
| #include "src/ops/populate/populate_register.h" | #include "src/ops/populate/populate_register.h" | ||||
| #include "nnacl/fp32/cast_fp32.h" | |||||
| #include "nnacl/cast_parameter.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| @@ -29,9 +29,11 @@ OpParameter *PopulateCastParameter(const mindspore::lite::PrimitiveC *primitive) | |||||
| } | } | ||||
| memset(cast_param, 0, sizeof(CastParameter)); | memset(cast_param, 0, sizeof(CastParameter)); | ||||
| cast_param->op_parameter_.type_ = primitive->Type(); | cast_param->op_parameter_.type_ = primitive->Type(); | ||||
| auto param = reinterpret_cast<mindspore::lite::Cast *>(const_cast<mindspore::lite::PrimitiveC *>(primitive)); | auto param = reinterpret_cast<mindspore::lite::Cast *>(const_cast<mindspore::lite::PrimitiveC *>(primitive)); | ||||
| cast_param->src_type_ = param->GetSrcT(); | cast_param->src_type_ = param->GetSrcT(); | ||||
| cast_param->dst_type_ = param->GetDstT(); | cast_param->dst_type_ = param->GetDstT(); | ||||
| return reinterpret_cast<OpParameter *>(cast_param); | return reinterpret_cast<OpParameter *>(cast_param); | ||||
| } | } | ||||
| @@ -14,24 +14,19 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "src/ops/expand_dims.h" | |||||
| #include "src/ops/primitive_c.h" | #include "src/ops/primitive_c.h" | ||||
| #include "src/ops/populate/populate_register.h" | #include "src/ops/populate/populate_register.h" | ||||
| #include "nnacl/fp32/expandDims_fp32.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| OpParameter *PopulateExpandDimsParameter(const mindspore::lite::PrimitiveC *primitive) { | OpParameter *PopulateExpandDimsParameter(const mindspore::lite::PrimitiveC *primitive) { | ||||
| auto param = reinterpret_cast<mindspore::lite::ExpandDims *>(const_cast<mindspore::lite::PrimitiveC *>(primitive)); | |||||
| ExpandDimsParameter *expand_dims_param = reinterpret_cast<ExpandDimsParameter *>(malloc(sizeof(ExpandDimsParameter))); | |||||
| OpParameter *expand_dims_param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter))); | |||||
| if (expand_dims_param == nullptr) { | if (expand_dims_param == nullptr) { | ||||
| MS_LOG(ERROR) << "malloc ExpandDimsParameter failed."; | MS_LOG(ERROR) << "malloc ExpandDimsParameter failed."; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| memset(expand_dims_param, 0, sizeof(ExpandDimsParameter)); | |||||
| expand_dims_param->op_parameter_.type_ = primitive->Type(); | |||||
| expand_dims_param->dim_ = param->GetDim(); | |||||
| memset(expand_dims_param, 0, sizeof(OpParameter)); | |||||
| return reinterpret_cast<OpParameter *>(expand_dims_param); | return reinterpret_cast<OpParameter *>(expand_dims_param); | ||||
| } | } | ||||
| @@ -17,7 +17,7 @@ | |||||
| #include "src/ops/fill.h" | #include "src/ops/fill.h" | ||||
| #include "src/ops/primitive_c.h" | #include "src/ops/primitive_c.h" | ||||
| #include "src/ops/populate/populate_register.h" | #include "src/ops/populate/populate_register.h" | ||||
| #include "nnacl/fp32/fill_fp32.h" | |||||
| #include "nnacl/fill_parameter.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| @@ -20,9 +20,7 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primitive); | OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primitive); | ||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_ | #endif // MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_ | ||||
| @@ -19,7 +19,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| OpParameter *PopulateMergeParameter(const mindspore::lite::PrimitiveC *primitive) { | OpParameter *PopulateMergeParameter(const mindspore::lite::PrimitiveC *primitive) { | ||||
| OpParameter *merge_parameter = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter))); | OpParameter *merge_parameter = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter))); | ||||
| if (merge_parameter == nullptr) { | if (merge_parameter == nullptr) { | ||||
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "src/ops/populate/strided_slice_populate.h" | #include "src/ops/populate/strided_slice_populate.h" | ||||
| #include <limits> | |||||
| #include "src/ops/strided_slice.h" | #include "src/ops/strided_slice.h" | ||||
| #include "src/ops/primitive_c.h" | #include "src/ops/primitive_c.h" | ||||
| #include "src/ops/populate/populate_register.h" | #include "src/ops/populate/populate_register.h" | ||||
| @@ -36,6 +36,5 @@ OpParameter *PopulateTensorListSetItemParameter(const mindspore::lite::Primitive | |||||
| } | } | ||||
| Registry TensorListSetItemParameterRegistry(schema::PrimitiveType_TensorListSetItem, | Registry TensorListSetItemParameterRegistry(schema::PrimitiveType_TensorListSetItem, | ||||
| PopulateTensorListSetItemParameter); | PopulateTensorListSetItemParameter); | ||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -28,6 +28,5 @@ PrimitiveC *ReciprocalCreator(const schema::Primitive *primitive) { | |||||
| } | } | ||||
| Registry ReciprocalRegistry(schema::PrimitiveType_Reciprocal, ReciprocalCreator); | Registry ReciprocalRegistry(schema::PrimitiveType_Reciprocal, ReciprocalCreator); | ||||
| #endif | #endif | ||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -39,7 +39,6 @@ class Reciprocal : public ArithmeticSelf { | |||||
| } | } | ||||
| #endif | #endif | ||||
| }; | }; | ||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "src/ops/space_to_batch_nd.h" | #include "src/ops/space_to_batch_nd.h" | ||||
| #include <limits> | |||||
| #include "src/common/common.h" | #include "src/common/common.h" | ||||
| #ifndef PRIMITIVE_WRITEABLE | #ifndef PRIMITIVE_WRITEABLE | ||||
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "src/ops/space_to_depth.h" | #include "src/ops/space_to_depth.h" | ||||
| #include <limits> | |||||
| #include "src/common/common.h" | #include "src/common/common.h" | ||||
| #ifndef PRIMITIVE_WRITEABLE | #ifndef PRIMITIVE_WRITEABLE | ||||
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "src/ops/tile.h" | #include "src/ops/tile.h" | ||||
| #include <limits> | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #ifndef PRIMITIVE_WRITEABLE | #ifndef PRIMITIVE_WRITEABLE | ||||
| @@ -14,11 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "src/runtime/kernel/arm/fp16/cast_fp16.h" | #include "src/runtime/kernel/arm/fp16/cast_fp16.h" | ||||
| #include <vector> | |||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "nnacl/fp16/cast_fp16.h" | |||||
| #include "nnacl/op_base.h" | |||||
| #include "src/runtime/runtime_api.h" | #include "src/runtime/runtime_api.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| @@ -18,6 +18,9 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "nnacl/op_base.h" | |||||
| #include "nnacl/fp16/cast_fp16.h" | |||||
| #include "nnacl/base/cast_base.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class CastFp16CPUKernel : public LiteKernel { | class CastFp16CPUKernel : public LiteKernel { | ||||
| @@ -113,8 +113,9 @@ int ConcatFp16CPUKernel::Run() { | |||||
| fp16_output_ = reinterpret_cast<float16_t *>(out_tensors_.at(0)->MutableData()); | fp16_output_ = reinterpret_cast<float16_t *>(out_tensors_.at(0)->MutableData()); | ||||
| } | } | ||||
| int dtype_len = in_tensors_.at(0)->data_type() == kNumberTypeInt32 ? sizeof(int32_t) : sizeof(float16_t); | int dtype_len = in_tensors_.at(0)->data_type() == kNumberTypeInt32 ? sizeof(int32_t) : sizeof(float16_t); | ||||
| ConcatFp16(reinterpret_cast<void **>(fp16_inputs_.data()), input_num, concat_param_->axis_, | |||||
| inputs_output_shape.data(), output_shape.size(), reinterpret_cast<void *>(fp16_output_), dtype_len); | |||||
| Concat(reinterpret_cast<void **>(fp16_inputs_.data()), input_num, concat_param_->axis_, inputs_output_shape.data(), | |||||
| output_shape.size(), reinterpret_cast<void *>(fp16_output_), 0, 1, dtype_len); | |||||
| if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32 || out_tensors_.at(0)->data_type() == kNumberTypeFloat) { | if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32 || out_tensors_.at(0)->data_type() == kNumberTypeFloat) { | ||||
| Float16ToFloat32(fp16_output_, reinterpret_cast<float *>(output_addr), out_tensors_.at(0)->ElementsNum()); | Float16ToFloat32(fp16_output_, reinterpret_cast<float *>(output_addr), out_tensors_.at(0)->ElementsNum()); | ||||
| @@ -20,7 +20,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "include/context.h" | #include "include/context.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "nnacl/fp16/concat_fp16.h" | |||||
| #include "nnacl/base/concat_base.h" | |||||
| #include "nnacl/concat_parameter.h" | #include "nnacl/concat_parameter.h" | ||||
| #include "nnacl/fp16/cast_fp16.h" | #include "nnacl/fp16/cast_fp16.h" | ||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| @@ -15,9 +15,6 @@ | |||||
| */ | */ | ||||
| #include "src/runtime/kernel/arm/fp16/reshape_fp16.h" | #include "src/runtime/kernel/arm/fp16/reshape_fp16.h" | ||||
| #include <vector> | |||||
| #include "nnacl/fp16/cast_fp16.h" | |||||
| #include "nnacl/reshape.h" | |||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| @@ -18,8 +18,9 @@ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_RESHAPE_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_RESHAPE_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include "nnacl/fp16/cast_fp16.h" | |||||
| #include "nnacl/base/reshape_base.h" | |||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "include/context.h" | #include "include/context.h" | ||||
| #include "src/runtime/kernel/arm/fp32/reshape_fp32.h" | #include "src/runtime/kernel/arm/fp32/reshape_fp32.h" | ||||
| @@ -14,14 +14,9 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "src/runtime/kernel/arm/fp32/cast_fp32.h" | #include "src/runtime/kernel/arm/fp32/cast_fp32.h" | ||||
| #include <vector> | |||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/tensor.h" | |||||
| #include "nnacl/fp32/cast_fp32.h" | |||||
| #include "nnacl/op_base.h" | |||||
| #include "src/runtime/runtime_api.h" | #include "src/runtime/runtime_api.h" | ||||
| #include "include/errorcode.h" | |||||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | using mindspore::kernel::KERNEL_ARCH::kCPU; | ||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| @@ -17,7 +17,11 @@ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CAST_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CAST_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include "include/errorcode.h" | |||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "src/tensor.h" | |||||
| #include "nnacl/op_base.h" | |||||
| #include "nnacl/base/cast_base.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class CastCPUKernel : public LiteKernel { | class CastCPUKernel : public LiteKernel { | ||||
| @@ -40,8 +40,8 @@ int ConcatCPUKernel::ReSize() { | |||||
| int ConcatCPUKernel::DoConcat(int task_id) { | int ConcatCPUKernel::DoConcat(int task_id) { | ||||
| auto input_num = in_tensors_.size(); | auto input_num = in_tensors_.size(); | ||||
| std::vector<const void *> inputs_addr(input_num, nullptr); | |||||
| std::vector<const int *> inputs_output_shape(input_num + 1, nullptr); | |||||
| std::vector<void *> inputs_addr(input_num, nullptr); | |||||
| std::vector<int *> inputs_output_shape(input_num + 1, nullptr); | |||||
| std::vector<std::vector<int>> shapes; | std::vector<std::vector<int>> shapes; | ||||
| for (size_t i = 0; i < input_num; ++i) { | for (size_t i = 0; i < input_num; ++i) { | ||||
| @@ -54,7 +54,7 @@ int ConcatCPUKernel::DoConcat(int task_id) { | |||||
| auto output_addr = out_tensors_.at(0)->MutableData(); | auto output_addr = out_tensors_.at(0)->MutableData(); | ||||
| Concat(inputs_addr.data(), input_num, concat_param_->axis_, inputs_output_shape.data(), output_shape.size(), | Concat(inputs_addr.data(), input_num, concat_param_->axis_, inputs_output_shape.data(), output_shape.size(), | ||||
| output_addr, task_id, op_parameter_->thread_num_); | |||||
| output_addr, task_id, op_parameter_->thread_num_, sizeof(float)); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -18,7 +18,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "nnacl/fp32/concat_fp32.h" | |||||
| #include "nnacl/base/concat_base.h" | |||||
| #include "nnacl/concat_parameter.h" | #include "nnacl/concat_parameter.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/runtime/runtime_api.h" | #include "src/runtime/runtime_api.h" | ||||
| @@ -15,10 +15,8 @@ | |||||
| */ | */ | ||||
| #include "src/runtime/kernel/arm/fp32/expandDims_fp32.h" | #include "src/runtime/kernel/arm/fp32/expandDims_fp32.h" | ||||
| #include <vector> | |||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "include/errorcode.h" | |||||
| #include "src/runtime/runtime_api.h" | #include "src/runtime/runtime_api.h" | ||||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | using mindspore::kernel::KERNEL_ARCH::kCPU; | ||||
| @@ -18,8 +18,9 @@ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_EXPANDDIMS_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_EXPANDDIMS_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include "include/errorcode.h" | |||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "nnacl/fp32/expandDims_fp32.h" | |||||
| #include "nnacl/base/expand_dims_base.h" | |||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "include/context.h" | #include "include/context.h" | ||||
| @@ -51,7 +51,7 @@ int FillCPUKernel::DoFill(int task_id) { | |||||
| auto input_tensor = in_tensors_.at(0); | auto input_tensor = in_tensors_.at(0); | ||||
| int ret = RET_OK; | int ret = RET_OK; | ||||
| if (input_tensor->data_type() == kNumberTypeFloat32 || input_tensor->data_type() == kNumberTypeFloat) { | if (input_tensor->data_type() == kNumberTypeFloat32 || input_tensor->data_type() == kNumberTypeFloat) { | ||||
| ret = Fill(out_ptr_ + offset, size, src_data_); | |||||
| ret = FillFp32(out_ptr_ + offset, size, src_data_); | |||||
| } else if (input_tensor->data_type() == kNumberTypeInt32 || input_tensor->data_type() == kNumberTypeInt) { | } else if (input_tensor->data_type() == kNumberTypeInt32 || input_tensor->data_type() == kNumberTypeInt) { | ||||
| ret = FillInt32(int32_out_ptr_ + offset, size, int32_src_data_); | ret = FillInt32(int32_out_ptr_ + offset, size, int32_src_data_); | ||||
| } else { | } else { | ||||
| @@ -18,9 +18,8 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "include/context.h" | #include "include/context.h" | ||||
| #include "nnacl/fp32/fill_fp32.h" | |||||
| #include "nnacl/base/fill_base.h" | |||||
| using mindspore::lite::InnerContext; | using mindspore::lite::InnerContext; | ||||
| @@ -16,7 +16,7 @@ | |||||
| #include "src/runtime/kernel/arm/fp32/gatherNd_fp32.h" | #include "src/runtime/kernel/arm/fp32/gatherNd_fp32.h" | ||||
| #include <string.h> | #include <string.h> | ||||
| #include <vector> | |||||
| #include <limits> | |||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| @@ -29,7 +29,6 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_GatherNd; | using mindspore::schema::PrimitiveType_GatherNd; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| GatherNdCPUKernel::~GatherNdCPUKernel() { | GatherNdCPUKernel::~GatherNdCPUKernel() { | ||||
| if (in_offset_ != nullptr) { | if (in_offset_ != nullptr) { | ||||
| free(in_offset_); | free(in_offset_); | ||||
| @@ -17,10 +17,10 @@ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GATHERND_H_ | #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GATHERND_H_ | ||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GATHERND_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GATHERND_H_ | ||||
| #include <string.h> | |||||
| #include <vector> | #include <vector> | ||||
| #include "nnacl/fp32/gatherNd_fp32.h" | #include "nnacl/fp32/gatherNd_fp32.h" | ||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "include/context.h" | #include "include/context.h" | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| @@ -13,14 +13,12 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "src/runtime/kernel/arm/fp32/gather_fp32.h" | #include "src/runtime/kernel/arm/fp32/gather_fp32.h" | ||||
| #include <vector> | |||||
| #include "nnacl/gather_parameter.h" | |||||
| #include "nnacl/fp32/gather_fp32.h" | |||||
| #include <limits> | |||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/runtime_api.h" | #include "src/runtime/runtime_api.h" | ||||
| #include "include/errorcode.h" | |||||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | using mindspore::kernel::KERNEL_ARCH::kCPU; | ||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| @@ -29,7 +27,6 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Gather; | using mindspore::schema::PrimitiveType_Gather; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int GatherCPUKernel::Init() { | int GatherCPUKernel::Init() { | ||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -76,7 +73,7 @@ int GatherCPUKernel::DoGather(int task_id) { | |||||
| } else { | } else { | ||||
| input_ptr += thread_stride * limit; | input_ptr += thread_stride * limit; | ||||
| output_ptr += thread_stride * indices_element_size; | output_ptr += thread_stride * indices_element_size; | ||||
| error_code = Gather(input_ptr, count, inner_size, limit, indices_data_, indices_element_size, output_ptr); | |||||
| error_code = GatherFp32(input_ptr, count, inner_size, limit, indices_data_, indices_element_size, output_ptr); | |||||
| } | } | ||||
| return error_code; | return error_code; | ||||
| } | } | ||||
| @@ -18,8 +18,10 @@ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GATHER_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GATHER_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include "nnacl/gather_parameter.h" | |||||
| #include "include/errorcode.h" | |||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "nnacl/gather_parameter.h" | |||||
| #include "nnacl/base/gather_base.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class GatherCPUKernel : public LiteKernel { | class GatherCPUKernel : public LiteKernel { | ||||
| @@ -15,8 +15,6 @@ | |||||
| */ | */ | ||||
| #include "src/runtime/kernel/arm/fp32/reshape_fp32.h" | #include "src/runtime/kernel/arm/fp32/reshape_fp32.h" | ||||
| #include <vector> | |||||
| #include "nnacl/reshape.h" | |||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| @@ -20,6 +20,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "include/context.h" | #include "include/context.h" | ||||
| #include "nnacl/base/reshape_base.h" | |||||
| using mindspore::lite::InnerContext; | using mindspore::lite::InnerContext; | ||||
| @@ -35,8 +36,6 @@ class ReshapeCPUKernel : public LiteKernel { | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| private: | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "src/runtime/kernel/arm/fp32/space_to_depth_fp32.h" | #include "src/runtime/kernel/arm/fp32/space_to_depth_fp32.h" | ||||
| #include <limits> | |||||
| #include <vector> | #include <vector> | ||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| @@ -16,7 +16,7 @@ | |||||
| #include "src/runtime/kernel/arm/int8/gatherNd_int8.h" | #include "src/runtime/kernel/arm/int8/gatherNd_int8.h" | ||||
| #include <string.h> | #include <string.h> | ||||
| #include <vector> | |||||
| #include <limits> | |||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| @@ -30,7 +30,6 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_GatherNd; | using mindspore::schema::PrimitiveType_GatherNd; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| GatherNdInt8CPUKernel::~GatherNdInt8CPUKernel() { | GatherNdInt8CPUKernel::~GatherNdInt8CPUKernel() { | ||||
| if (in_offset_ != nullptr) { | if (in_offset_ != nullptr) { | ||||
| free(in_offset_); | free(in_offset_); | ||||
| @@ -16,7 +16,6 @@ | |||||
| #include "src/runtime/kernel/arm/int8/pooling_int8.h" | #include "src/runtime/kernel/arm/int8/pooling_int8.h" | ||||
| #include "nnacl/int8/pooling_int8.h" | #include "nnacl/int8/pooling_int8.h" | ||||
| #include "nnacl/fp32/cast_fp32.h" | |||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/runtime/runtime_api.h" | #include "src/runtime/runtime_api.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| @@ -35,7 +35,7 @@ int CastNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| op_->set_input_x(*npu_inputs[0]); | op_->set_input_x(*npu_inputs[0]); | ||||
| op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(cast_parameter_->dst_type_))); | |||||
| op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(outputs[0]->data_type()))); | |||||
| op_->set_attr_src_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(inputs[0]->data_type()))); | op_->set_attr_src_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(inputs[0]->data_type()))); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -19,16 +19,14 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/runtime/kernel/npu/npu_kernel.h" | #include "src/runtime/kernel/npu/npu_kernel.h" | ||||
| #include "include/graph/op/all_ops.h" | #include "include/graph/op/all_ops.h" | ||||
| #include "nnacl/fp32/cast_fp32.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class CastNPUKernel : public NPUKernel { | class CastNPUKernel : public NPUKernel { | ||||
| public: | public: | ||||
| CastNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | CastNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| cast_parameter_ = reinterpret_cast<CastParameter *>(parameter); | |||||
| } | |||||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~CastNPUKernel() override; | ~CastNPUKernel() override; | ||||
| int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | ||||
| @@ -39,7 +37,6 @@ class CastNPUKernel : public NPUKernel { | |||||
| private: | private: | ||||
| hiai::op::CastT *op_ = nullptr; | hiai::op::CastT *op_ = nullptr; | ||||
| CastParameter *cast_parameter_; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CAST_NPU_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CAST_NPU_H_ | ||||
| @@ -20,7 +20,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include <string> | #include <string> | ||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "nnacl/fp32/cast_fp32.h" | |||||
| #include "nnacl/cast_parameter.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -18,7 +18,7 @@ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_FILL_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_FILL_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include "mindspore/lite/nnacl/fp32/fill_fp32.h" | |||||
| #include "mindspore/lite/nnacl/base/fill_base.h" | |||||
| #include "mindspore/lite/nnacl/shape.h" | #include "mindspore/lite/nnacl/shape.h" | ||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| @@ -20,7 +20,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| /// \brief TrainModel Defines a class that allows to import and export a mindsport trainable model | /// \brief TrainModel Defines a class that allows to import and export a mindsport trainable model | ||||
| struct TrainModel : public lite::LiteModel { | struct TrainModel : public lite::LiteModel { | ||||
| /// \brief Static method to create a TrainModel object | /// \brief Static method to create a TrainModel object | ||||
| @@ -27,7 +27,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| STATUS SubgraphNodePass::GetSubgraphAllTensorIndices(const std::unique_ptr<SubGraphT> &subgraph, | STATUS SubgraphNodePass::GetSubgraphAllTensorIndices(const std::unique_ptr<SubGraphT> &subgraph, | ||||
| schema::MetaGraphT *graph, std::set<uint32_t> *tensors_indices) { | schema::MetaGraphT *graph, std::set<uint32_t> *tensors_indices) { | ||||
| for (auto &node_idx : subgraph->nodeIndices) { | for (auto &node_idx : subgraph->nodeIndices) { | ||||
| @@ -26,7 +26,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| bool SubgraphTensorPass::IsUsing(schema::MetaGraphT *graph, const uint32_t &tensor_idx) { | bool SubgraphTensorPass::IsUsing(schema::MetaGraphT *graph, const uint32_t &tensor_idx) { | ||||
| for (const auto &node : graph->nodes) { | for (const auto &node : graph->nodes) { | ||||
| if (IsContain<uint32_t>(node->inputIndex, tensor_idx)) { | if (IsContain<uint32_t>(node->inputIndex, tensor_idx)) { | ||||
| @@ -19,7 +19,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| STATUS CaffePoolingParser::ParsePads(const caffe::PoolingParameter &poolingParam, schema::PoolingT *attr) { | STATUS CaffePoolingParser::ParsePads(const caffe::PoolingParameter &poolingParam, schema::PoolingT *attr) { | ||||
| if (poolingParam.has_pad_h() && poolingParam.has_pad_w()) { | if (poolingParam.has_pad_h() && poolingParam.has_pad_w()) { | ||||
| if (poolingParam.has_pad()) { | if (poolingParam.has_pad()) { | ||||
| @@ -109,7 +109,6 @@ int CropperFlags::Init(int argc, const char **argv) { | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| } // namespace cropper | } // namespace cropper | ||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -24,7 +24,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| namespace cropper { | namespace cropper { | ||||
| class CropperFlags : public virtual mindspore::lite::FlagParser { | class CropperFlags : public virtual mindspore::lite::FlagParser { | ||||
| public: | public: | ||||
| CropperFlags(); | CropperFlags(); | ||||
| @@ -47,7 +47,6 @@ int ValidFile(std::ifstream &in_file, const char *file_path) { | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| } // namespace cropper | } // namespace cropper | ||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||