From: @yangruoqi713 Reviewed-by: Signed-off-by:tags/v1.2.0-rc1
| @@ -0,0 +1,84 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "nnacl/fp32/instance_norm_fp32.h" | |||
| #include <math.h> | |||
| #include "nnacl/errorcode.h" | |||
| #include "nnacl/op_base.h" | |||
| int InstanceNorm(const float *src_data, float *dst_data, const float *gamma_data, const float *beta_data, | |||
| const InstanceNormParameter *param, size_t task_id) { | |||
| if (src_data == NULL || dst_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| int channel_step = UP_DIV(param->channel_, param->op_parameter_.thread_num_); | |||
| int channel_begin = task_id * channel_step; | |||
| int channel_end = MSMIN(channel_begin + channel_step, param->channel_); | |||
| for (int b = 0; b < param->batch_; b++) { | |||
| const float *src_b = src_data + b * param->channel_ * param->inner_size_; | |||
| float *dst_b = dst_data + b * param->channel_ * param->inner_size_; | |||
| for (int c = channel_begin; c < channel_end; c++) { | |||
| const float *src = src_b + c * param->inner_size_; | |||
| float *dst = dst_b + c * param->inner_size_; | |||
| float mean = 0.0f; | |||
| float square_mean = 0.0f; | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t sum = vdupq_n_f32(0); | |||
| float32x4_t square_sum = vdupq_n_f32(0); | |||
| for (; index < param->inner_size_ - C4NUM; index += C4NUM) { | |||
| float32x4_t srcv = vld1q_f32(src + index); | |||
| float32x4_t squarev = vmulq_f32(srcv, srcv); | |||
| sum = vaddq_f32(sum, srcv); | |||
| square_sum = vaddq_f32(square_sum, squarev); | |||
| } | |||
| mean = sum[0] + sum[1] + sum[2] + sum[3]; | |||
| square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3]; | |||
| #endif | |||
| for (; index < param->inner_size_; index++) { | |||
| mean += src[index]; | |||
| square_mean += src[index] * src[index]; | |||
| } | |||
| mean /= (float)param->inner_size_; | |||
| square_mean /= (float)param->inner_size_; | |||
| const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_); | |||
| index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t meanv = vdupq_n_f32(mean); | |||
| float32x4_t denov = vdupq_n_f32(deno); | |||
| for (; index < param->inner_size_ - C4NUM; index += C4NUM) { | |||
| float32x4_t srcv = vld1q_f32(src + index); | |||
| float32x4_t outv = vsubq_f32(srcv, meanv); | |||
| outv = vmulq_f32(outv, denov); | |||
| float32x4_t gammav = vdupq_n_f32(gamma_data[c]); | |||
| float32x4_t betav = vdupq_n_f32(beta_data[c]); | |||
| outv = vmulq_f32(outv, gammav); | |||
| outv = vaddq_f32(outv, betav); | |||
| vst1q_f32(dst + index, outv); | |||
| } | |||
| #endif | |||
| for (; index < param->inner_size_; index++) { | |||
| dst[index] = (src[index] - mean) * deno; | |||
| dst[index] = dst[index] * gamma_data[c] + beta_data[c]; | |||
| } | |||
| } | |||
| } | |||
| return NNACL_OK; | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,14 +13,20 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_ | |||
| #define MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_ | |||
| #ifndef MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_ | |||
| #define MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_ | |||
| #include "src/ops/arithmetic.h" | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/instance_norm_parameter.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primitive); | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_ | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| int InstanceNorm(const float *src_data, float *dst_data, const float *gamma_data, const float *beta_data, | |||
| const InstanceNormParameter *param, size_t task_id); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_ | |||
| @@ -18,100 +18,81 @@ | |||
| #include "nnacl/errorcode.h" | |||
| #include "nnacl/op_base.h" | |||
| int LayerNorm(size_t outer_size, size_t inner_size, const float *src_data, const float *gamma_data, | |||
| const float *beta_data, enum ElementwiseMode elementwise_mode, float epsilon, float *dst_data, | |||
| size_t task_id, size_t thread_num) { | |||
| if (src_data == NULL || dst_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| void LayerNormMeanAndSquare(const float *src, int num, float *mean, float *square_mean) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t sum = vdupq_n_f32(0); | |||
| float32x4_t square_sum = vdupq_n_f32(0); | |||
| for (; index < num - C4NUM; index += C4NUM) { | |||
| float32x4_t srcv = vld1q_f32(src + index); | |||
| float32x4_t squarev = vmulq_f32(srcv, srcv); | |||
| sum = vaddq_f32(sum, srcv); | |||
| square_sum = vaddq_f32(square_sum, squarev); | |||
| } | |||
| if (elementwise_mode != 0 && (gamma_data == NULL || beta_data == NULL)) { | |||
| return NNACL_NULL_PTR; | |||
| *mean = sum[0] + sum[1] + sum[2] + sum[3]; | |||
| *square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3]; | |||
| #endif | |||
| for (; index < num; index++) { | |||
| *mean += src[index]; | |||
| *square_mean += src[index] * src[index]; | |||
| } | |||
| for (size_t j = task_id; j < outer_size; j += thread_num) { | |||
| const float *src = src_data + j * inner_size; | |||
| float *dst = dst_data + j * inner_size; | |||
| float mean = 0.0f; | |||
| float square_mean = 0.0f; | |||
| *mean /= (float)num; | |||
| *square_mean /= (float)num; | |||
| } | |||
| int index = 0; | |||
| void LayerNormGammaAndBeta(float *dst, const float *src, const float *gamma_data, const float *beta_data, int num, | |||
| const float mean, const float deno) { | |||
| int index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t sum = vdupq_n_f32(0); | |||
| float32x4_t square_sum = vdupq_n_f32(0); | |||
| for (; index < inner_size - C8NUM; index += C8NUM) { | |||
| float32x4_t srcv1 = vld1q_f32(src + index); | |||
| float32x4_t srcv2 = vld1q_f32(src + index + 4); | |||
| float32x4_t squarev1 = vmulq_f32(srcv1, srcv1); | |||
| float32x4_t squarev2 = vmulq_f32(srcv2, srcv2); | |||
| sum = vaddq_f32(sum, srcv1); | |||
| sum = vaddq_f32(sum, srcv2); | |||
| square_sum = vaddq_f32(square_sum, squarev1); | |||
| square_sum = vaddq_f32(square_sum, squarev2); | |||
| } | |||
| mean = sum[0] + sum[1] + sum[2] + sum[3]; | |||
| square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3]; | |||
| float32x4_t meanv = vdupq_n_f32(mean); | |||
| float32x4_t denov = vdupq_n_f32(deno); | |||
| for (; index < num - C4NUM; index += C4NUM) { | |||
| float32x4_t srcv = vld1q_f32(src + index); | |||
| float32x4_t outv = vsubq_f32(srcv, meanv); | |||
| outv = vmulq_f32(outv, denov); | |||
| float32x4_t gammav = vld1q_f32(gamma_data + index); | |||
| float32x4_t betav = vld1q_f32(beta_data + index); | |||
| outv = vmulq_f32(outv, gammav); | |||
| outv = vaddq_f32(outv, betav); | |||
| vst1q_f32(dst + index, outv); | |||
| } | |||
| #endif | |||
| for (; index < inner_size; index++) { | |||
| mean += src[index]; | |||
| square_mean += src[index] * src[index]; | |||
| } | |||
| for (; index < num; index++) { | |||
| dst[index] = (src[index] - mean) * (deno); | |||
| dst[index] = dst[index] * gamma_data[index] + beta_data[index]; | |||
| } | |||
| } | |||
| mean /= (float)inner_size; | |||
| square_mean /= (float)inner_size; | |||
| const float deno = 1 / sqrtf(square_mean - mean * mean + epsilon); | |||
| int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data, | |||
| LayerNormParameter *param, size_t task_id) { | |||
| if (src_data == NULL || dst_data == NULL || gamma_data == NULL || beta_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| index = 0; | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t meanv = vdupq_n_f32(mean); | |||
| float32x4_t denov = vdupq_n_f32(deno); | |||
| if (elementwise_mode != 0) { | |||
| for (; index < inner_size - C8NUM; index += C8NUM) { | |||
| float32x4_t srcv1 = vld1q_f32(src + index); | |||
| float32x4_t srcv2 = vld1q_f32(src + index + 4); | |||
| float32x4_t outv1 = vsubq_f32(srcv1, meanv); | |||
| float32x4_t outv2 = vsubq_f32(srcv2, meanv); | |||
| outv1 = vmulq_f32(outv1, denov); | |||
| outv2 = vmulq_f32(outv2, denov); | |||
| if (elementwise_mode == 1) { | |||
| float32x4_t gammav1 = vdupq_n_f32(gamma_data[j]); | |||
| float32x4_t betav1 = vdupq_n_f32(beta_data[j]); | |||
| outv1 = vmulq_f32(outv1, gammav1); | |||
| outv2 = vmulq_f32(outv2, gammav1); | |||
| outv1 = vaddq_f32(outv1, betav1); | |||
| outv2 = vaddq_f32(outv2, betav1); | |||
| } else { | |||
| float32x4_t gammav1 = vld1q_f32(gamma_data + index); | |||
| float32x4_t gammav2 = vld1q_f32(gamma_data + index + 4); | |||
| float32x4_t betav1 = vld1q_f32(beta_data + index); | |||
| float32x4_t betav2 = vld1q_f32(beta_data + index + 4); | |||
| outv1 = vmulq_f32(outv1, gammav1); | |||
| outv2 = vmulq_f32(outv2, gammav2); | |||
| outv1 = vaddq_f32(outv1, betav1); | |||
| outv2 = vaddq_f32(outv2, betav2); | |||
| } | |||
| vst1q_f32(dst + index, outv1); | |||
| vst1q_f32(dst + index + 4, outv2); | |||
| int step = UP_DIV(param->norm_outer_size_, param->op_parameter_.thread_num_); | |||
| int thread_end = MSMIN((task_id + 1) * step, param->norm_outer_size_); | |||
| for (int i = task_id * step; i < thread_end; i++) { | |||
| const float *src_norm = src_data + i * param->norm_inner_size_; | |||
| float *dst_norm = dst_data + i * param->norm_inner_size_; | |||
| float mean = 0.0f; | |||
| float square_mean = 0.0f; | |||
| LayerNormMeanAndSquare(src_norm, param->norm_inner_size_, &mean, &square_mean); | |||
| const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_); | |||
| if (param->norm_outer_size_ <= param->params_outer_size_) { | |||
| for (int x = 0; x < param->norm_inner_size_ / param->params_inner_size_; x++) { | |||
| const float *src_param = src_norm + x * param->params_inner_size_; | |||
| float *dst_param = dst_norm + x * param->params_inner_size_; | |||
| LayerNormGammaAndBeta(dst_param, src_param, gamma_data, beta_data, param->params_inner_size_, mean, deno); | |||
| } | |||
| } else { | |||
| for (; index < inner_size - C8NUM; index += C8NUM) { | |||
| float32x4_t srcv1 = vld1q_f32(src + index); | |||
| float32x4_t srcv2 = vld1q_f32(src + index + 4); | |||
| float32x4_t outv1 = vsubq_f32(srcv1, meanv); | |||
| float32x4_t outv2 = vsubq_f32(srcv2, meanv); | |||
| outv1 = vmulq_f32(outv1, denov); | |||
| outv2 = vmulq_f32(outv2, denov); | |||
| vst1q_f32(dst + index, outv1); | |||
| vst1q_f32(dst + index + 4, outv2); | |||
| } | |||
| } | |||
| #endif | |||
| for (; index < inner_size; index++) { | |||
| dst[index] = (src[index] - mean) * deno; | |||
| if (elementwise_mode == 1) { | |||
| dst[index] = dst[index] * gamma_data[j] + beta_data[j]; | |||
| } else if (elementwise_mode == 2) { | |||
| dst[index] = dst[index] * gamma_data[index] + beta_data[index]; | |||
| } | |||
| int x = i / param->norm_outer_size_; | |||
| const float *src_param = src_norm + x * param->params_inner_size_; | |||
| float *dst_param = dst_norm + x * param->params_inner_size_; | |||
| const float *gamma = gamma_data + x * param->params_inner_size_; | |||
| const float *beta = beta_data + x * param->params_inner_size_; | |||
| LayerNormGammaAndBeta(dst_param, src_param, gamma, beta, param->norm_inner_size_, mean, deno); | |||
| } | |||
| } | |||
| return NNACL_OK; | |||
| @@ -23,9 +23,8 @@ | |||
| extern "C" { | |||
| #endif | |||
| int LayerNorm(size_t outer_size, size_t inner_size, const float *src_data, const float *gamma_data, | |||
| const float *beta_data, enum ElementwiseMode elementwise_mode, float epsilon, float *dst_data, | |||
| size_t task_id, size_t thread_num); | |||
| int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data, | |||
| LayerNormParameter *param, size_t task_id); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -23,9 +23,10 @@ typedef struct InstanceNormParameter { | |||
| // Primitive parameter | |||
| OpParameter op_parameter_; | |||
| float epsilon_; | |||
| float momentum_; | |||
| // shape correlative | |||
| int batch_; | |||
| int channel_; | |||
| int inner_size_; | |||
| } InstanceNormParameter; | |||
| #endif // MINDSPORE_LITE_NNACL_INSTANCE_NORM_PARAMETER_H_ | |||
| @@ -16,45 +16,58 @@ | |||
| #include "nnacl/int8/layer_norm_int8.h" | |||
| void LayerNormGammaAndBetaInt8(int8_t *dst, const int8_t *src, const float *gamma_data, const float *beta_data, | |||
| LayerNormQuantArg *quant, int num, const float mean, const float deno) { | |||
| for (int i = 0; i < num; i++) { | |||
| float fp32_src = (src[i] - quant->in_zp_) * quant->in_scale_; | |||
| float fp32_dst = (fp32_src - mean) * deno; | |||
| fp32_dst = fp32_dst * gamma_data[i] + beta_data[i]; | |||
| int32_t int32_dst = (int32_t)round(fp32_dst * 1.0 / quant->out_scale_ + quant->out_zp_); | |||
| dst[i] = (int8_t)MSMAX(MSMIN(int32_dst, 127), -128); | |||
| } | |||
| } | |||
| /* | |||
| * origin : (x-mean) / sqrt(variance + epsilon) * gamma + beta | |||
| * quant : (x-mean) / sqrt(sum(x * x) - mean * mean) * gamma + beta | |||
| * | |||
| * */ | |||
| int LayerNormInt8(const int8_t *src_data, const float *gamma_data, const float *beta_data, int8_t *dst_data, | |||
| enum ElementwiseMode elementwise_mode, int outer_size, int inner_size, LayerNormQuantArg *quant, | |||
| float epsilon) { | |||
| if (src_data == NULL || dst_data == NULL) { | |||
| LayerNormParameter *param, LayerNormQuantArg *quant, int task_id) { | |||
| if (src_data == NULL || dst_data == NULL || gamma_data == NULL || beta_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| if (elementwise_mode != 0 && (gamma_data == NULL || beta_data == NULL)) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| for (int out_index = 0; out_index < outer_size; out_index++) { | |||
| const int8_t *src = src_data + out_index * inner_size; | |||
| int8_t *dst = dst_data + out_index * inner_size; | |||
| int step = UP_DIV(param->norm_outer_size_, param->op_parameter_.thread_num_); | |||
| int thread_end = MSMIN((task_id + 1) * step, param->norm_outer_size_); | |||
| for (int i = task_id * step; i < thread_end; i++) { | |||
| const int8_t *src_norm = src_data + i * param->norm_inner_size_; | |||
| int8_t *dst_norm = dst_data + i * param->norm_inner_size_; | |||
| float mean = 0.0f; | |||
| float square_mean = 0.0f; | |||
| for (int i = 0; i < inner_size; i++) { | |||
| float float_src = (src[i] - quant->in_zp_) * quant->in_scale_; | |||
| for (int j = 0; j < param->norm_inner_size_; j++) { | |||
| float float_src = (src_norm[j] - quant->in_zp_) * quant->in_scale_; | |||
| mean += float_src; | |||
| square_mean += float_src * float_src; | |||
| } | |||
| mean /= (float)inner_size; | |||
| square_mean /= (float)inner_size; | |||
| const float deno = 1 / sqrtf(square_mean - mean * mean + epsilon); | |||
| for (int i = 0; i < inner_size; i++) { | |||
| float fp32_src = (src[i] - quant->in_zp_) * quant->in_scale_; | |||
| float fp32_dst = (fp32_src - mean) * deno; | |||
| if (elementwise_mode == 1) { | |||
| fp32_dst = fp32_dst * gamma_data[out_index] + beta_data[out_index]; | |||
| } else if (elementwise_mode == 2) { | |||
| fp32_dst = fp32_dst * gamma_data[i] + beta_data[i]; | |||
| mean /= (float)param->norm_inner_size_; | |||
| square_mean /= (float)param->norm_inner_size_; | |||
| const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_); | |||
| if (param->norm_outer_size_ <= param->params_outer_size_) { | |||
| for (int x = 0; x < param->norm_inner_size_ / param->params_inner_size_; x++) { | |||
| const int8_t *src_param = src_norm + x * param->params_inner_size_; | |||
| int8_t *dst_param = dst_norm + x * param->params_inner_size_; | |||
| LayerNormGammaAndBetaInt8(dst_param, src_param, gamma_data, beta_data, quant, param->norm_inner_size_, mean, | |||
| deno); | |||
| } | |||
| int32_t int32_dst = (int32_t)round(fp32_dst * 1.0 / quant->out_scale_ + quant->out_zp_); | |||
| dst[i] = (int8_t)MSMAX(MSMIN(int32_dst, 127), -128); | |||
| } else { | |||
| int x = i / param->norm_outer_size_; | |||
| const int8_t *src_param = src_norm + x * param->params_inner_size_; | |||
| int8_t *dst_param = dst_norm + x * param->params_inner_size_; | |||
| const float *gamma = gamma_data + x * param->params_inner_size_; | |||
| const float *beta = beta_data + x * param->params_inner_size_; | |||
| LayerNormGammaAndBetaInt8(dst_param, src_param, gamma, beta, quant, param->norm_inner_size_, mean, deno); | |||
| } | |||
| } | |||
| return NNACL_OK; | |||
| @@ -25,8 +25,7 @@ extern "C" { | |||
| #endif | |||
| int LayerNormInt8(const int8_t *src_data, const float *gamma_data, const float *beta_data, int8_t *dst_data, | |||
| enum ElementwiseMode elementwise_mode, int outer_size, int inner_size, LayerNormQuantArg *quant, | |||
| float epsilon); | |||
| LayerNormParameter *param, LayerNormQuantArg *quant, int task_id); | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -24,10 +24,15 @@ typedef struct LayerNormParameter { | |||
| // Primitive parameter | |||
| OpParameter op_parameter_; | |||
| float epsilon_; | |||
| enum ElementwiseMode elementwise_mode_; | |||
| int begin_norm_axis_; | |||
| int begin_params_axis_; | |||
| // shape correlative | |||
| int normalized_shape_[8]; | |||
| int norm_inner_size_; | |||
| int norm_outer_size_; | |||
| int params_inner_size_; | |||
| int params_outer_size_; | |||
| int normalized_dims_; | |||
| int normalized_shape_[8]; | |||
| // other parameter | |||
| int thread_count_; | |||
| int thread_outsize_; | |||
| @@ -17,7 +17,7 @@ | |||
| namespace mindspore.schema; | |||
| enum ResizeMethod: byte { | |||
| UNKNOW = -1, | |||
| UNKNOWN = -1, | |||
| LINEAR = 0, | |||
| NEAREST = 1, | |||
| CUBIC = 2 | |||
| @@ -80,7 +80,7 @@ enum ActivationType : byte { | |||
| HARD_TANH = 16, | |||
| SIGN = 17, | |||
| SWISH = 18, | |||
| UNKNOW = 19 | |||
| UNKNOWN = 19 | |||
| } | |||
| enum ActivationGradType : byte { | |||
| NO_ACTIVATION = 0, | |||
| @@ -99,7 +99,7 @@ enum ActivationGradType : byte { | |||
| HSIGMOID = 13, | |||
| THRESHOLDRELU = 14, | |||
| LINEAR = 15, | |||
| UNKNOW = 16 | |||
| UNKNOWN = 16 | |||
| } | |||
| enum ReduceType : byte { | |||
| REDUCE_MAX = 0, | |||
| @@ -109,7 +109,7 @@ enum ReduceType : byte { | |||
| REDUCE_LOG_SUM_EXP = 4, | |||
| REDUCE_PROD = 5, | |||
| REDUCE_SUM = 6, | |||
| UNKNOW = 7 | |||
| UNKNOWN = 7 | |||
| } | |||
| enum PoolMode : byte { | |||
| @@ -121,7 +121,7 @@ enum EltwiseMode : byte { | |||
| PROD = 0, | |||
| SUM = 1, | |||
| MAXIMUM = 2, | |||
| UNKNOW = 3 | |||
| UNKNOWN = 3 | |||
| } | |||
| enum PadMode : byte { | |||
| @@ -1144,9 +1144,9 @@ table Identity { | |||
| } | |||
| table LayerNorm { | |||
| normalizedShape : [int]; | |||
| begin_norm_axis : int; | |||
| begin_params_axis : int; | |||
| epsilon : float = 0.00001; | |||
| elementwiseAffine : bool; | |||
| } | |||
| table While { | |||
| @@ -22,19 +22,14 @@ | |||
| namespace mindspore { | |||
| namespace lite { | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| std::vector<int> LayerNorm::GetNormalizedShape() const { | |||
| return this->primitive_->value.AsLayerNorm()->normalizedShape; | |||
| } | |||
| float LayerNorm::GetEpsilon() const { return this->primitive_->value.AsLayerNorm()->epsilon; } | |||
| bool LayerNorm::GetElementwiseAffine() const { return this->primitive_->value.AsLayerNorm()->elementwiseAffine; } | |||
| int LayerNorm::GetBeginNormAxis() const { return this->primitive_->value.AsLayerNorm()->begin_norm_axis; } | |||
| int LayerNorm::GetBeginParamsAxis() const { return this->primitive_->value.AsLayerNorm()->begin_params_axis; } | |||
| void LayerNorm::SetNormalizedShape(const std::vector<int> &normalizedShape) { | |||
| this->primitive_->value.AsLayerNorm()->normalizedShape = normalizedShape; | |||
| } | |||
| void LayerNorm::SetEpsilon(float epsilon) { this->primitive_->value.AsLayerNorm()->epsilon = epsilon; } | |||
| void LayerNorm::SetElementwiseAffine(bool elementwiseAffine) { | |||
| this->primitive_->value.AsLayerNorm()->elementwiseAffine = elementwiseAffine; | |||
| } | |||
| void LayerNorm::SetBeginNormAxis(int axis) { this->primitive_->value.AsLayerNorm()->begin_norm_axis = axis; } | |||
| void LayerNorm::SetBeginParamsAxis(int axis) { this->primitive_->value.AsLayerNorm()->begin_params_axis = axis; } | |||
| int LayerNorm::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | |||
| if (this->primitive_ == nullptr) { | |||
| this->primitive_ = new (std::nothrow) schema::PrimitiveT; | |||
| @@ -60,12 +55,17 @@ int LayerNorm::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> & | |||
| } else { | |||
| layer_norm_attr->epsilon = 1e-7; | |||
| } | |||
| value_attr = prim.GetAttr("normalized_shape"); | |||
| if (value_attr != nullptr) { | |||
| layer_norm_attr->normalizedShape = CastToInt(value_attr); | |||
| auto norm_axis_attr = prim.GetAttr("begin_norm_axis"); | |||
| if (norm_axis_attr != nullptr) { | |||
| layer_norm_attr->begin_norm_axis = GetValue<float>(norm_axis_attr); | |||
| } else { | |||
| layer_norm_attr->begin_norm_axis = -1; | |||
| } | |||
| if (inputs.size() == 3) { | |||
| layer_norm_attr->elementwiseAffine = true; | |||
| auto params_axis_attr = prim.GetAttr("begin_params_axis"); | |||
| if (params_axis_attr != nullptr) { | |||
| layer_norm_attr->begin_params_axis = GetValue<float>(params_axis_attr); | |||
| } else { | |||
| layer_norm_attr->begin_params_axis = -1; | |||
| } | |||
| this->primitive_->value.value = layer_norm_attr; | |||
| } | |||
| @@ -81,28 +81,20 @@ int LayerNorm::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffe | |||
| return RET_ERROR; | |||
| } | |||
| std::vector<int32_t> normalizedShape; | |||
| if (attr->normalizedShape() != nullptr) { | |||
| for (int i = 0; i < static_cast<int>(attr->normalizedShape()->size()); i++) { | |||
| normalizedShape.push_back(attr->normalizedShape()->data()[i]); | |||
| } | |||
| } | |||
| auto val_offset = schema::CreateLayerNormDirect(*fbb, &normalizedShape, attr->epsilon(), attr->elementwiseAffine()); | |||
| auto val_offset = schema::CreateLayerNorm(*fbb, attr->epsilon(), attr->begin_norm_axis(), attr->begin_params_axis()); | |||
| auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_LayerNorm, val_offset.o); | |||
| fbb->Finish(prim_offset); | |||
| return RET_OK; | |||
| } | |||
| std::vector<int> LayerNorm::GetNormalizedShape() const { | |||
| auto fb_vector = this->primitive_->value_as_LayerNorm()->normalizedShape(); | |||
| return std::vector<int>(fb_vector->begin(), fb_vector->end()); | |||
| } | |||
| float LayerNorm::GetEpsilon() const { return this->primitive_->value_as_LayerNorm()->epsilon(); } | |||
| bool LayerNorm::GetElementwiseAffine() const { return this->primitive_->value_as_LayerNorm()->elementwiseAffine(); } | |||
| int LayerNorm::GetBeginNormAxis() const { return this->primitive_->value_as_LayerNorm()->begin_norm_axis(); } | |||
| int LayerNorm::GetBeginParamsAxis() const { return this->primitive_->value_as_LayerNorm()->begin_params_axis(); } | |||
| PrimitiveC *LayerNormCreator(const schema::Primitive *primitive) { | |||
| return PrimitiveC::NewPrimitiveC<LayerNorm>(primitive); | |||
| } | |||
| Registry LayerNormRegistry(schema::PrimitiveType_LayerNorm, LayerNormCreator); | |||
| #endif | |||
| int LayerNorm::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) { | |||
| if (outputs_.size() != kSingleNum || (inputs_.size() != kSingleNum && inputs_.size() != kTripleNum)) { | |||
| @@ -116,41 +108,13 @@ int LayerNorm::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite: | |||
| output->set_format(input->format()); | |||
| output->set_data_type(input->data_type()); | |||
| if (GetElementwiseAffine() && inputs_.size() != kTripleNum) { | |||
| MS_LOG(INFO) << "input tensor amount error"; | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| if (!GetElementwiseAffine() && inputs_.size() != kSingleNum) { | |||
| MS_LOG(INFO) << "input tensor amount error"; | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| if (!infer_flag()) { | |||
| return RET_INFER_INVALID; | |||
| } | |||
| auto input_shape = input->shape(); | |||
| normlized_shape_ = GetNormalizedShape(); | |||
| elementwise_mode_ = GetElementwiseAffine() ? 2 : 0; | |||
| if (normlized_shape_.size() > input_shape.size()) { | |||
| MS_LOG(INFO) << "normalized_shape attr invalid"; | |||
| return RET_PARAM_INVALID; | |||
| for (size_t i = GetBeginNormAxis(); i < input_shape.size(); i++) { | |||
| normlized_shape_.push_back(input_shape[i]); | |||
| } | |||
| if (normlized_shape_.empty()) { | |||
| // instance norm -> layernorm only for nchw | |||
| if (input->format() == schema::Format_NCHW) { | |||
| normlized_shape_.insert(normlized_shape_.begin(), input_shape.begin() + 2, input_shape.end()); | |||
| elementwise_mode_ = 1; | |||
| } else { | |||
| normlized_shape_.insert(normlized_shape_.begin(), input_shape.begin() + 1, input_shape.end()); | |||
| } | |||
| } | |||
| size_t first_index = input_shape.size() - normlized_shape_.size(); | |||
| for (size_t i = first_index; i < input_shape.size(); ++i) { | |||
| if (input_shape.at(i) != normlized_shape_.at(i - first_index)) { | |||
| MS_LOG(INFO) << "normalized_shape attr invalid"; | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| } | |||
| output->set_shape(input_shape); | |||
| return RET_OK; | |||
| } | |||
| @@ -32,23 +32,21 @@ class LayerNorm : public PrimitiveC { | |||
| #ifdef PRIMITIVE_WRITEABLE | |||
| MS_DECLARE_PARENT(LayerNorm, PrimitiveC); | |||
| explicit LayerNorm(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {} | |||
| void SetNormalizedShape(const std::vector<int> &normalizedShape); | |||
| void SetEpsilon(float epsilon); | |||
| void SetElementwiseAffine(bool elementwiseAffine); | |||
| void SetBeginNormAxis(int axis); | |||
| void SetBeginParamsAxis(int axis); | |||
| int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override; | |||
| #else | |||
| int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; | |||
| #endif | |||
| int InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) override; | |||
| std::vector<int> GetNormalizedShape() const; | |||
| float GetEpsilon() const; | |||
| bool GetElementwiseAffine() const; | |||
| std::vector<int> normlized_shape() const { return normlized_shape_; } | |||
| int elementwise_mode() const { return elementwise_mode_; } | |||
| int GetBeginNormAxis() const; | |||
| int GetBeginParamsAxis() const; | |||
| std::vector<int> GetNormlizedShape() const { return normlized_shape_; } | |||
| protected: | |||
| std::vector<int> normlized_shape_; | |||
| int elementwise_mode_ = 0; | |||
| }; | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -14,9 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/ops/populate/layer_norm_populate.h" | |||
| #include "nnacl/layer_norm_parameter.h" | |||
| #include <cstdint> | |||
| #include "src/ops/layer_norm.h" | |||
| #include "src/ops/primitive_c.h" | |||
| #include "src/ops/populate/populate_register.h" | |||
| @@ -32,20 +30,15 @@ OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primi | |||
| memset(layer_norm_parameter, 0, sizeof(LayerNormParameter)); | |||
| layer_norm_parameter->op_parameter_.type_ = primitive->Type(); | |||
| auto param = reinterpret_cast<mindspore::lite::LayerNorm *>(const_cast<mindspore::lite::PrimitiveC *>(primitive)); | |||
| auto normalized_shape = param->normlized_shape(); | |||
| auto normalized_shape = param->GetNormlizedShape(); | |||
| layer_norm_parameter->normalized_dims_ = normalized_shape.size(); | |||
| if (normalized_shape.size() > SIZE_MAX / sizeof(int)) { | |||
| MS_LOG(ERROR) << "normalized_shape size too big"; | |||
| free(layer_norm_parameter); | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(normalized_shape.size() < 8); | |||
| for (size_t i = 0; i < normalized_shape.size(); i++) { | |||
| layer_norm_parameter->normalized_shape_[i] = normalized_shape[i]; | |||
| } | |||
| layer_norm_parameter->epsilon_ = param->GetEpsilon(); | |||
| layer_norm_parameter->elementwise_mode_ = static_cast<ElementwiseMode>(param->elementwise_mode()); | |||
| layer_norm_parameter->begin_norm_axis_ = param->GetBeginNormAxis(); | |||
| layer_norm_parameter->begin_params_axis_ = param->GetBeginParamsAxis(); | |||
| return reinterpret_cast<OpParameter *>(layer_norm_parameter); | |||
| } | |||
| @@ -0,0 +1,78 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/fp32/instance_norm_fp32.h" | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| #include "nnacl/fp32/instance_norm_fp32.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_InstanceNorm; | |||
| namespace mindspore::kernel { | |||
| int InstanceNormCPUKernel::Init() { | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int InstanceNormCPUKernel::ReSize() { | |||
| param_->op_parameter_.thread_num_ = context_->thread_num_; | |||
| auto shape = in_tensors_.front()->shape(); | |||
| param_->batch_ = shape[0]; | |||
| param_->inner_size_ = shape[2] * shape[3]; | |||
| param_->channel_ = shape[1]; | |||
| return RET_OK; | |||
| } | |||
| int InstanceNormCPUKernel::DoInstanceNorm(int task_id) { | |||
| int ret = InstanceNorm(src_data_, dst_data_, gamma_data_, beta_data_, param_, task_id); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "DoInstanceNorm error error_code[" << ret << "]"; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int InstanceNormRun(void *cdata, int task_id) { | |||
| auto kernel = reinterpret_cast<InstanceNormCPUKernel *>(cdata); | |||
| auto ret = kernel->DoInstanceNorm(task_id); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "InstanceNormRun error task_id[" << task_id << "] error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int InstanceNormCPUKernel::Run() { | |||
| src_data_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData()); | |||
| gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | |||
| beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData()); | |||
| dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, InstanceNormRun, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "InstanceNormRun error error_code[" << ret << "]"; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_InstanceNorm, LiteKernelCreator<InstanceNormCPUKernel>) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,50 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "include/context.h" | |||
| #include "nnacl/instance_norm_parameter.h" | |||
| using mindspore::lite::InnerContext; | |||
| namespace mindspore::kernel { | |||
| class InstanceNormCPUKernel : public LiteKernel { | |||
| public: | |||
| InstanceNormCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| param_ = reinterpret_cast<InstanceNormParameter *>(parameter); | |||
| } | |||
| ~InstanceNormCPUKernel() override{}; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int DoInstanceNorm(int task_id); | |||
| private: | |||
| InstanceNormParameter *param_ = nullptr; | |||
| float *src_data_ = nullptr; | |||
| float *dst_data_ = nullptr; | |||
| float *gamma_data_ = nullptr; | |||
| float *beta_data_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_ | |||
| @@ -18,7 +18,6 @@ | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| #include "src/ops/populate/layer_norm_populate.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| @@ -35,29 +34,37 @@ int LayerNormCPUKernel::Init() { | |||
| } | |||
| int LayerNormCPUKernel::ReSize() { | |||
| if (op_parameter_ != nullptr) { | |||
| free(op_parameter_); | |||
| op_parameter_ = nullptr; | |||
| } | |||
| op_parameter_ = PopulateLayerNormParameter(primitive_); | |||
| op_parameter_->thread_num_ = context_->thread_num_; | |||
| param_ = reinterpret_cast<LayerNormParameter *>(op_parameter_); | |||
| param_->begin_norm_axis_ = -1; | |||
| param_->begin_params_axis_ = -1; | |||
| auto shape = in_tensors_.front()->shape(); | |||
| outer_size_ = 1; | |||
| inner_size_ = 1; | |||
| for (size_t i = 0; i < shape.size(); ++i) { | |||
| if (i + param_->normalized_dims_ < shape.size()) { | |||
| outer_size_ *= shape.at(i); | |||
| } else { | |||
| inner_size_ *= shape.at(i); | |||
| } | |||
| param_->begin_norm_axis_ = | |||
| param_->begin_norm_axis_ > 0 ? param_->begin_norm_axis_ : param_->begin_norm_axis_ + shape.size(); | |||
| param_->begin_params_axis_ = | |||
| param_->begin_params_axis_ > 0 ? param_->begin_params_axis_ : param_->begin_params_axis_ + shape.size(); | |||
| param_->norm_outer_size_ = 1; | |||
| for (int i = 0; i < param_->begin_norm_axis_; ++i) { | |||
| param_->norm_outer_size_ *= shape.at(i); | |||
| } | |||
| param_->norm_inner_size_ = 1; | |||
| for (size_t i = param_->begin_norm_axis_; i < shape.size(); ++i) { | |||
| param_->norm_inner_size_ *= shape.at(i); | |||
| } | |||
| param_->params_outer_size_ = 1; | |||
| for (int i = 0; i < param_->begin_params_axis_; ++i) { | |||
| param_->params_outer_size_ *= shape.at(i); | |||
| } | |||
| param_->params_inner_size_ = 1; | |||
| for (size_t i = param_->begin_params_axis_; i < shape.size(); ++i) { | |||
| param_->params_inner_size_ *= shape.at(i); | |||
| } | |||
| param_->op_parameter_.thread_num_ = MSMIN(param_->norm_outer_size_, context_->thread_num_); | |||
| return RET_OK; | |||
| } | |||
| int LayerNormCPUKernel::DoLayerNorm(int thread_id) { | |||
| int ret = LayerNorm(outer_size_, inner_size_, src_data_, gamma_data_, beta_data_, param_->elementwise_mode_, | |||
| param_->epsilon_, dst_data_, thread_id, op_parameter_->thread_num_); | |||
| int ret = LayerNorm(src_data_, gamma_data_, beta_data_, dst_data_, param_, thread_id); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "DoLayerNorm error error_code[" << ret << "]"; | |||
| return ret; | |||
| @@ -77,10 +84,8 @@ int LayerNormRun(void *cdata, int task_id) { | |||
| int LayerNormCPUKernel::Run() { | |||
| src_data_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData()); | |||
| if (param_->elementwise_mode_ != 0) { | |||
| gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | |||
| beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData()); | |||
| } | |||
| gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | |||
| beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData()); | |||
| dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | |||
| auto ret = ParallelLaunch(this->context_->thread_pool_, LayerNormRun, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| @@ -40,8 +40,6 @@ class LayerNormCPUKernel : public LiteKernel { | |||
| private: | |||
| LayerNormParameter *param_ = nullptr; | |||
| int outer_size_; | |||
| int inner_size_; | |||
| float *src_data_ = nullptr; | |||
| float *dst_data_ = nullptr; | |||
| float *gamma_data_ = nullptr; | |||
| @@ -15,7 +15,6 @@ | |||
| */ | |||
| #include "src/runtime/kernel/arm/int8/layer_norm_int8.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "src/ops/populate/layer_norm_populate.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| @@ -25,15 +24,14 @@ using mindspore::schema::PrimitiveType_LayerNorm; | |||
| namespace mindspore::kernel { | |||
| LayerNormInt8CPUKernel::~LayerNormInt8CPUKernel() { | |||
| if (param_->elementwise_mode_ != 0 && gamma_ptr_ != nullptr) { | |||
| if (gamma_ptr_ != nullptr) { | |||
| free(gamma_ptr_); | |||
| gamma_ptr_ = nullptr; | |||
| } | |||
| if (param_->elementwise_mode_ != 0 && beta_ptr_ != nullptr) { | |||
| if (beta_ptr_ != nullptr) { | |||
| free(beta_ptr_); | |||
| beta_ptr_ = nullptr; | |||
| } | |||
| return; | |||
| } | |||
| int LayerNormInt8CPUKernel::SetQuantArgs() { | |||
| @@ -45,33 +43,31 @@ int LayerNormInt8CPUKernel::SetQuantArgs() { | |||
| quant_param_.out_zp_ = output->quant_params().front().zeroPoint; | |||
| quant_param_.out_scale_ = output->quant_params().front().scale; | |||
| if (param_->elementwise_mode_ != 0) { | |||
| lite::Tensor *gamma_tensor = in_tensors_.at(1); | |||
| lite::Tensor *beta_tensor = in_tensors_.at(2); | |||
| double gamma_scale = gamma_tensor->quant_params().front().scale; | |||
| int gamma_zp = gamma_tensor->quant_params().front().zeroPoint; | |||
| gamma_ptr_ = reinterpret_cast<float *>(malloc(gamma_tensor->ElementsNum() * sizeof(float))); | |||
| if (gamma_ptr_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc gamma_ptr_ failed"; | |||
| return RET_ERROR; | |||
| } | |||
| int8_t *src_gamma = reinterpret_cast<int8_t *>(gamma_tensor->data_c()); | |||
| for (int i = 0; i < gamma_tensor->ElementsNum(); i++) { | |||
| gamma_ptr_[i] = (src_gamma[i] - gamma_zp) * gamma_scale; | |||
| } | |||
| beta_ptr_ = reinterpret_cast<float *>(malloc(beta_tensor->ElementsNum() * sizeof(float))); | |||
| if (beta_ptr_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc beta_ptr_ failed"; | |||
| free(gamma_ptr_); | |||
| gamma_ptr_ = nullptr; | |||
| return RET_ERROR; | |||
| } | |||
| int32_t *src_beta = reinterpret_cast<int32_t *>(beta_tensor->data_c()); | |||
| for (int i = 0; i < beta_tensor->ElementsNum(); i++) { | |||
| beta_ptr_[i] = src_beta[i] * quant_param_.in_scale_ * gamma_scale; | |||
| } | |||
| lite::Tensor *gamma_tensor = in_tensors_.at(1); | |||
| lite::Tensor *beta_tensor = in_tensors_.at(2); | |||
| double gamma_scale = gamma_tensor->quant_params().front().scale; | |||
| int gamma_zp = gamma_tensor->quant_params().front().zeroPoint; | |||
| gamma_ptr_ = reinterpret_cast<float *>(malloc(gamma_tensor->ElementsNum() * sizeof(float))); | |||
| if (gamma_ptr_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc gamma_ptr_ failed"; | |||
| return RET_ERROR; | |||
| } | |||
| int8_t *src_gamma = reinterpret_cast<int8_t *>(gamma_tensor->data_c()); | |||
| for (int i = 0; i < gamma_tensor->ElementsNum(); i++) { | |||
| gamma_ptr_[i] = (src_gamma[i] - gamma_zp) * gamma_scale; | |||
| } | |||
| beta_ptr_ = reinterpret_cast<float *>(malloc(beta_tensor->ElementsNum() * sizeof(float))); | |||
| if (beta_ptr_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc beta_ptr_ failed"; | |||
| free(gamma_ptr_); | |||
| gamma_ptr_ = nullptr; | |||
| return RET_ERROR; | |||
| } | |||
| int32_t *src_beta = reinterpret_cast<int32_t *>(beta_tensor->data_c()); | |||
| for (int i = 0; i < beta_tensor->ElementsNum(); i++) { | |||
| beta_ptr_[i] = src_beta[i] * quant_param_.in_scale_ * gamma_scale; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -86,30 +82,37 @@ int LayerNormInt8CPUKernel::Init() { | |||
| } | |||
| int LayerNormInt8CPUKernel::ReSize() { | |||
| if (op_parameter_ != nullptr) { | |||
| free(op_parameter_); | |||
| op_parameter_ = nullptr; | |||
| param_->begin_norm_axis_ = -1; | |||
| param_->begin_params_axis_ = -1; | |||
| auto shape = in_tensors_.front()->shape(); | |||
| param_->begin_norm_axis_ = | |||
| param_->begin_norm_axis_ > 0 ? param_->begin_norm_axis_ : param_->begin_norm_axis_ + shape.size(); | |||
| param_->begin_params_axis_ = | |||
| param_->begin_params_axis_ > 0 ? param_->begin_params_axis_ : param_->begin_params_axis_ + shape.size(); | |||
| param_->norm_outer_size_ = 1; | |||
| for (int i = 0; i < param_->begin_norm_axis_; ++i) { | |||
| param_->norm_outer_size_ *= shape.at(i); | |||
| } | |||
| op_parameter_ = PopulateLayerNormParameter(primitive_); | |||
| if (op_parameter_ == nullptr) { | |||
| MS_LOG(ERROR) << "op_parameter_ is nullptr!"; | |||
| return RET_NULL_PTR; | |||
| param_->norm_inner_size_ = 1; | |||
| for (size_t i = param_->begin_norm_axis_; i < shape.size(); ++i) { | |||
| param_->norm_inner_size_ *= shape.at(i); | |||
| } | |||
| op_parameter_->thread_num_ = context_->thread_num_; | |||
| param_ = reinterpret_cast<LayerNormParameter *>(op_parameter_); | |||
| auto shape = in_tensors_.front()->shape(); | |||
| outer_size_ = 1; | |||
| inner_size_ = 1; | |||
| for (size_t i = 0; i < shape.size(); ++i) { | |||
| if (i + param_->normalized_dims_ < shape.size()) { | |||
| outer_size_ *= shape.at(i); | |||
| } else { | |||
| inner_size_ *= shape.at(i); | |||
| } | |||
| param_->params_outer_size_ = 1; | |||
| for (int i = 0; i < param_->begin_params_axis_; ++i) { | |||
| param_->params_outer_size_ *= shape.at(i); | |||
| } | |||
| param_->params_inner_size_ = 1; | |||
| for (size_t i = param_->begin_params_axis_; i < shape.size(); ++i) { | |||
| param_->params_inner_size_ *= shape.at(i); | |||
| } | |||
| param_->op_parameter_.thread_num_ = MSMIN(param_->norm_outer_size_, context_->thread_num_); | |||
| return RET_OK; | |||
| } | |||
| param_->thread_count_ = MSMIN(outer_size_, op_parameter_->thread_num_); | |||
| param_->thread_outsize_ = UP_DIV(outer_size_, param_->thread_count_); | |||
| int LayerNormInt8CPUKernel::DoExecute(int task_id) { | |||
| LayerNormInt8(src_ptr_, gamma_ptr_, beta_ptr_, dst_ptr_, param_, &quant_param_, task_id); | |||
| return RET_OK; | |||
| } | |||
| @@ -119,21 +122,6 @@ int LayerNormInt8Run(void *cdata, int task_id) { | |||
| return RET_OK; | |||
| } | |||
| int LayerNormInt8CPUKernel::DoExecute(int task_id) { | |||
| int current_out_size = outer_size_ - task_id * param_->thread_outsize_; | |||
| current_out_size = MSMIN(current_out_size, param_->thread_outsize_); | |||
| if (current_out_size <= 0) { | |||
| return RET_OK; | |||
| } | |||
| const int8_t *thread_src = src_ptr_ + task_id * param_->thread_outsize_ * inner_size_; | |||
| int8_t *thread_dst = dst_ptr_ + task_id * param_->thread_outsize_ * inner_size_; | |||
| LayerNormInt8(thread_src, gamma_ptr_, beta_ptr_, thread_dst, param_->elementwise_mode_, current_out_size, inner_size_, | |||
| &quant_param_, param_->epsilon_); | |||
| return RET_OK; | |||
| } | |||
| int LayerNormInt8CPUKernel::Run() { | |||
| src_ptr_ = reinterpret_cast<int8_t *>(in_tensors_.at(0)->MutableData()); | |||
| dst_ptr_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData()); | |||
| @@ -46,8 +46,6 @@ class LayerNormInt8CPUKernel : public LiteKernel { | |||
| private: | |||
| LayerNormParameter *param_ = nullptr; | |||
| LayerNormQuantArg quant_param_; | |||
| int outer_size_ = 0; | |||
| int inner_size_ = 0; | |||
| int8_t *src_ptr_ = nullptr; | |||
| int8_t *dst_ptr_ = nullptr; | |||
| float *gamma_ptr_ = nullptr; | |||
| @@ -349,7 +349,7 @@ int ResizeInt8CPUKernel::RunImpl(int task_id) { | |||
| } | |||
| break; | |||
| } | |||
| case schema::ResizeMethod_UNKNOW: | |||
| case schema::ResizeMethod_UNKNOWN: | |||
| default: { | |||
| MS_LOG(ERROR) << "Resize unknown method " << method_; | |||
| ret = RET_ERROR; | |||
| @@ -33,24 +33,6 @@ namespace mindspore::kernel { | |||
| int LayerNormOpenCLKernel::CheckSpecs() { | |||
| auto param = reinterpret_cast<LayerNormParameter *>(this->op_parameter_); | |||
| if (param->elementwise_mode_ == ELEMENTWISE_PER_NUM) { | |||
| if (in_tensors_.size() != 3) { | |||
| MS_LOG(ERROR) << " invalid in_tensors_ size" << in_tensors_.size() << std::endl; | |||
| return RET_ERROR; | |||
| } | |||
| if (param->normalized_dims_ > in_tensors_.at(0)->shape().size()) { | |||
| MS_LOG(ERROR) << " invalid normalized_shape_ size" << param->normalized_dims_ << std::endl; | |||
| return RET_ERROR; | |||
| } | |||
| } else if (param->elementwise_mode_ == ELEMENTWISE_NOT) { | |||
| if (in_tensors_.size() != 1) { | |||
| MS_LOG(ERROR) << " invalid in_tensors_ size" << in_tensors_.size() << std::endl; | |||
| return RET_ERROR; | |||
| } | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupported elementwise_mode_" << param->elementwise_mode_; | |||
| return RET_ERROR; | |||
| } | |||
| if (in_tensors_.at(0)->shape().size() != 4 || out_tensors_.size() != 1) { | |||
| MS_LOG(ERROR) << "UnSupported in_tensors_.shape.size: " << in_tensors_.at(0)->shape().size() | |||
| << " out_tensors_.size(): " << out_tensors_.size(); | |||
| @@ -24,7 +24,6 @@ namespace { | |||
| // PrimitiveType_Stack: src/ops/populate/stack_populate.cc | |||
| OpParameter *CreateParameter(float epsilon, int normalized_dims_, std::vector<int> normalizedShape) { | |||
| auto *param = test::CreateParameter<LayerNormParameter>(schema::PrimitiveType_LayerNorm); | |||
| param->elementwise_mode_ = ELEMENTWISE_PER_NUM; | |||
| param->epsilon_ = epsilon; | |||
| param->normalized_dims_ = normalized_dims_; | |||
| for (int i = 0; i < normalizedShape.size() && i < normalized_dims_; ++i) { | |||
| @@ -48,7 +48,6 @@ static const std::vector<schema::PrimitiveType> nhwcOpList = { | |||
| schema::PrimitiveType_FusedBatchNorm, | |||
| schema::PrimitiveType_PReLU, | |||
| schema::PrimitiveType_BiasAdd, | |||
| schema::PrimitiveType_InstanceNorm, | |||
| schema::PrimitiveType_SpaceToDepth, | |||
| schema::PrimitiveType_DepthToSpace, | |||
| schema::PrimitiveType_TopK}; | |||
| @@ -22,7 +22,7 @@ namespace lite { | |||
| lite::PrimitiveC *OnnxInstanceNormParser::ParseLitePrimitive(const onnx::GraphProto &onnx_graph, | |||
| const onnx::NodeProto &onnx_node) { | |||
| MS_LOG(DEBUG) << "onnx InstanceNormParser"; | |||
| auto attr = std::make_unique<schema::LayerNormT>(); | |||
| auto attr = std::make_unique<schema::InstanceNormT>(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new op failed"; | |||
| return nullptr; | |||
| @@ -39,8 +39,7 @@ lite::PrimitiveC *OnnxInstanceNormParser::ParseLitePrimitive(const onnx::GraphPr | |||
| MS_LOG(ERROR) << "new primitive failed"; | |||
| return nullptr; | |||
| } | |||
| attr->elementwiseAffine = true; | |||
| primitive->value.type = schema::PrimitiveType_LayerNorm; | |||
| primitive->value.type = schema::PrimitiveType_InstanceNorm; | |||
| primitive->value.value = attr.release(); | |||
| return PrimitiveC::Create(primitive.release()); | |||
| } | |||
| @@ -57,7 +57,7 @@ STATUS TFResizeParser::Parse(const tensorflow::NodeDef &tf_op, | |||
| } else if (tf_op.op() == "ResizeNearestNeighbor") { | |||
| attr->method = schema::ResizeMethod_NEAREST; | |||
| } else { | |||
| attr->method = schema::ResizeMethod_UNKNOW; | |||
| attr->method = schema::ResizeMethod_UNKNOWN; | |||
| } | |||
| auto size_node = tf_node_map.at(tf_op.input(1)); | |||
| if (size_node == nullptr) { | |||
| @@ -114,9 +114,7 @@ CNodePtr LayerNormFusion::CreateLayerNormNode(const FuncGraphPtr &func_graph, co | |||
| MS_EXCEPTION_IF_NULL(func_graph); | |||
| auto layer_norm_primitive = std::make_unique<schema::PrimitiveT>(); | |||
| std::unique_ptr<schema::LayerNormT> attr = std::make_unique<schema::LayerNormT>(); | |||
| attr->normalizedShape = shape; | |||
| attr->epsilon = epsilon; | |||
| attr->elementwiseAffine = true; | |||
| layer_norm_primitive->value.type = schema::PrimitiveType_LayerNorm; | |||
| layer_norm_primitive->value.value = attr.release(); | |||
| auto layer_norm_cvalue = lite::PrimitiveC::Create(layer_norm_primitive.release()); | |||