Browse Source

!31732 [MSLITE][DEVELOP] fix bug of layer norm output 3

Merge pull request !31732 from yangruoqi713/master
r1.7
i-robot Gitee 4 years ago
parent
commit
ffecf20131
No known key found for this signature in database GPG Key ID: 173E9B9CA92EEF8F
4 changed files with 26 additions and 23 deletions
  1. +12
    -11
      mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp16/layer_norm_fp16.c
  2. +1
    -1
      mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp16/layer_norm_fp16.h
  3. +12
    -10
      mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/layer_norm_fp32.c
  4. +1
    -1
      mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/layer_norm_fp32.h

+ 12
- 11
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp16/layer_norm_fp16.c View File

@@ -18,17 +18,17 @@
#include "nnacl/errorcode.h"
#include "nnacl/intrinsics/ms_simd_instructions_fp16.h"

int LayerNormMeanAndSquareFp16(const float16_t *src, int num, float16_t *mean, float16_t *square_mean) {
int LayerNormMeanAndSquareFp16(const float16_t *src, int num, float16_t *mean, float16_t *variance) {
if (num <= 0) {
return NNACL_ERR;
}
int index = 0;
float sum = 0.0f;
float square_sum = 0.0f;
float square_mean = 0.0f;
for (; index <= num - C8NUM; index += C8NUM) {
float16x8_t srcv = vld1q_f16(src + index);
for (int i = 0; i < C8NUM; ++i) {
square_sum += srcv[i] * srcv[i];
square_mean += srcv[i] * srcv[i];
}
float16x4_t sum2 = vadd_f16(vget_low_f16(srcv), vget_high_f16(srcv));
float32x4_t sum_f32 = vcvt_f32_f16(sum2);
@@ -36,10 +36,11 @@ int LayerNormMeanAndSquareFp16(const float16_t *src, int num, float16_t *mean, f
}
for (; index < num; index++) {
sum += src[index];
square_sum += src[index] * src[index];
square_mean += src[index] * src[index];
}
*mean = (float16_t)(sum / num);
*square_mean = (float16_t)(square_sum / num);
square_mean = square_mean / num;
*variance = square_mean - (*mean) * (*mean);
return NNACL_OK;
}

@@ -65,7 +66,7 @@ void LayerNormGammaAndBetaFp16(float16_t *dst, const float16_t *src, const float
}

int LayerNormFp16(const float16_t *src_data, const float16_t *gamma_data, const float16_t *beta_data,
float16_t *dst_data, float16_t *out_mean, float16_t *out_deno, LayerNormParameter *param,
float16_t *dst_data, float16_t *out_mean, float16_t *out_variance, LayerNormParameter *param,
size_t task_id) {
if (src_data == NULL || dst_data == NULL || gamma_data == NULL || beta_data == NULL) {
return NNACL_NULL_PTR;
@@ -79,18 +80,18 @@ int LayerNormFp16(const float16_t *src_data, const float16_t *gamma_data, const
const float16_t *src_norm = src_data + i * param->norm_inner_size_;
float16_t *dst_norm = dst_data + i * param->norm_inner_size_;
float16_t cur_mean = 0.0f;
float16_t cur_deno = 0.0f;
int ret = LayerNormMeanAndSquareFp16(src_norm, param->norm_inner_size_, &cur_mean, &cur_deno);
float16_t cur_variance = 0.0f;
int ret = LayerNormMeanAndSquareFp16(src_norm, param->norm_inner_size_, &cur_mean, &cur_variance);
if (ret != NNACL_OK) {
return NNACL_ERR;
}
if (out_mean != NULL) {
out_mean[i] = cur_mean;
}
if (out_deno != NULL) {
out_deno[i] = cur_deno;
if (out_variance != NULL) {
out_variance[i] = cur_variance;
}
const float16_t deno = 1 / sqrtf(cur_deno - cur_mean * cur_mean + param->epsilon_);
const float16_t deno = 1 / sqrtf(cur_variance + param->epsilon_);
if (param->norm_outer_size_ <= param->params_outer_size_) {
for (int x = 0; x < param->norm_inner_size_ / param->params_inner_size_; x++) {
const float16_t *src_param = src_norm + x * param->params_inner_size_;


+ 1
- 1
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp16/layer_norm_fp16.h View File

@@ -24,7 +24,7 @@ extern "C" {
#endif

int LayerNormFp16(const float16_t *src_data, const float16_t *gamma_data, const float16_t *beta_data,
float16_t *dst_data, float16_t *out_mean, float16_t *out_deno, LayerNormParameter *param,
float16_t *dst_data, float16_t *out_mean, float16_t *out_variance, LayerNormParameter *param,
size_t task_id);
#ifdef __cplusplus
}


+ 12
- 10
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/layer_norm_fp32.c View File

@@ -18,11 +18,12 @@
#include "nnacl/errorcode.h"
#include "nnacl/op_base.h"

int LayerNormMeanAndSquare(const float *src, int num, float *mean, float *square_mean) {
int LayerNormMeanAndSquare(const float *src, int num, float *mean, float *variance) {
if (num <= 0) {
return NNACL_ERR;
}
int index = 0;
float square_mean = 0.f;
#ifdef ENABLE_NEON
float32x4_t sum = vdupq_n_f32(0);
float32x4_t square_sum = vdupq_n_f32(0);
@@ -33,14 +34,15 @@ int LayerNormMeanAndSquare(const float *src, int num, float *mean, float *square
square_sum = vaddq_f32(square_sum, squarev);
}
*mean = sum[0] + sum[1] + sum[2] + sum[3];
*square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3];
square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3];
#endif
for (; index < num; index++) {
*mean += src[index];
*square_mean += src[index] * src[index];
square_mean += src[index] * src[index];
}
*mean /= (float)num;
*square_mean /= (float)num;
square_mean /= (float)num;
*variance = square_mean - (*mean) * (*mean);
return NNACL_OK;
}

@@ -68,7 +70,7 @@ void LayerNormGammaAndBeta(float *dst, const float *src, const float *gamma_data
}

int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data, float *out_mean,
float *out_deno, const LayerNormParameter *param, size_t task_id) {
float *out_variance, const LayerNormParameter *param, size_t task_id) {
if (src_data == NULL || dst_data == NULL || gamma_data == NULL || beta_data == NULL) {
return NNACL_NULL_PTR;
}
@@ -80,18 +82,18 @@ int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_
const float *src_norm = src_data + i * param->norm_inner_size_;
float *dst_norm = dst_data + i * param->norm_inner_size_;
float cur_mean = 0.0f;
float cur_deno = 0.0f;
int ret = LayerNormMeanAndSquare(src_norm, param->norm_inner_size_, &cur_mean, &cur_deno);
float cur_variance = 0.0f;
int ret = LayerNormMeanAndSquare(src_norm, param->norm_inner_size_, &cur_mean, &cur_variance);
if (ret != NNACL_OK) {
return NNACL_ERR;
}
if (out_mean != NULL) {
out_mean[i] = cur_mean;
}
if (out_deno != NULL) {
out_deno[i] = cur_deno;
if (out_variance != NULL) {
out_variance[i] = cur_variance;
}
const float deno = 1 / sqrtf(cur_deno - cur_mean * cur_mean + param->epsilon_);
const float deno = 1 / sqrtf(cur_variance + param->epsilon_);
if (param->norm_outer_size_ <= param->params_outer_size_) {
for (int x = 0; x < param->norm_inner_size_ / param->params_inner_size_; x++) {
const float *src_param = src_norm + x * param->params_inner_size_;


+ 1
- 1
mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/fp32/layer_norm_fp32.h View File

@@ -24,7 +24,7 @@ extern "C" {
#endif

int LayerNorm(const float *src_data, const float *gamma_data, const float *beta_data, float *dst_data, float *out_mean,
float *out_deno, const LayerNormParameter *param, size_t task_id);
float *out_variance, const LayerNormParameter *param, size_t task_id);
#ifdef __cplusplus
}
#endif


Loading…
Cancel
Save