From dacfccfa651a71ea59cc3e03cea74bdbc05a38fb Mon Sep 17 00:00:00 2001 From: nihuini Date: Wed, 26 Aug 2020 20:10:34 +0800 Subject: [PATCH] instancenorm without affine --- src/layer/instancenorm.cpp | 25 +++++++++++--- src/layer/instancenorm.h | 1 + src/layer/vulkan/instancenorm_vulkan.cpp | 8 +++-- .../vulkan/shader/instancenorm_coeffs.comp | 29 ++++++++++++---- .../shader/instancenorm_coeffs_pack4.comp | 29 ++++++++++++---- .../shader/instancenorm_coeffs_pack8.comp | 33 ++++++++++++++----- tests/test_instancenorm.cpp | 11 +++++-- 7 files changed, 103 insertions(+), 33 deletions(-) diff --git a/src/layer/instancenorm.cpp b/src/layer/instancenorm.cpp index bed6e8583..97fb99df6 100644 --- a/src/layer/instancenorm.cpp +++ b/src/layer/instancenorm.cpp @@ -28,12 +28,16 @@ int InstanceNorm::load_param(const ParamDict& pd) { channels = pd.get(0, 0); eps = pd.get(1, 0.001f); + affine = pd.get(2, 1); return 0; } int InstanceNorm::load_model(const ModelBin& mb) { + if (affine == 0) + return 0; + gamma_data = mb.load(channels, 1); if (gamma_data.empty()) return -100; @@ -51,10 +55,11 @@ int InstanceNorm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const int w = bottom_top_blob.w; int h = bottom_top_blob.h; + int c = bottom_top_blob.c; int size = w * h; #pragma omp parallel for num_threads(opt.num_threads) - for (int q = 0; q < channels; q++) + for (int q = 0; q < c; q++) { float* ptr = bottom_top_blob.channel(q); @@ -77,11 +82,21 @@ int InstanceNorm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const // the var maybe minus due to accuracy //float var = sqsum / size - mean * mean; - float gamma = gamma_data[q]; - float beta = beta_data[q]; + float a; + float b; + if (affine) + { + float gamma = gamma_data[q]; + float beta = beta_data[q]; - float a = static_cast(gamma / (sqrt(var + eps))); - float b = -mean * a + beta; + a = static_cast(gamma / (sqrt(var + eps))); + b = -mean * a + beta; + } + else + { + a = static_cast(1.f / (sqrt(var + eps))); + b = -mean * a; + } for (int i = 0; i < size; i++) { diff --git a/src/layer/instancenorm.h b/src/layer/instancenorm.h index ce6268ac6..9a37932ac 100644 --- a/src/layer/instancenorm.h +++ b/src/layer/instancenorm.h @@ -34,6 +34,7 @@ public: // param int channels; float eps; + int affine; // model Mat gamma_data; diff --git a/src/layer/vulkan/instancenorm_vulkan.cpp b/src/layer/vulkan/instancenorm_vulkan.cpp index c22835370..fc980af63 100644 --- a/src/layer/vulkan/instancenorm_vulkan.cpp +++ b/src/layer/vulkan/instancenorm_vulkan.cpp @@ -229,9 +229,10 @@ int InstanceNorm_vulkan::create_pipeline(const Option& opt) } { - std::vector specializations(2); + std::vector specializations(3); specializations[0].f = eps; - specializations[1].i = channels / elempack; + specializations[1].i = affine; + specializations[2].i = channels / elempack; Mat local_size_xyz(std::min(64, channels / elempack), 1, 1, (void*)0); if (workspace_shape_packed.dims != 0) @@ -371,6 +372,9 @@ int InstanceNorm_vulkan::destroy_pipeline(const Option& /*opt*/) int InstanceNorm_vulkan::upload_model(VkTransfer& cmd, const Option& opt) { + if (affine == 0) + return 0; + int elempack = opt.use_shader_pack8 && channels % 8 == 0 ? 8 : channels % 4 == 0 ? 4 : 1; Mat gamma_data_packed; diff --git a/src/layer/vulkan/shader/instancenorm_coeffs.comp b/src/layer/vulkan/shader/instancenorm_coeffs.comp index f58ba9ffc..098013de4 100644 --- a/src/layer/vulkan/shader/instancenorm_coeffs.comp +++ b/src/layer/vulkan/shader/instancenorm_coeffs.comp @@ -22,7 +22,8 @@ #endif layout (constant_id = 0) const float eps = 0; -layout (constant_id = 1) const int w = 0; +layout (constant_id = 1) const int affine = 0; +layout (constant_id = 2) const int w = 0; #if NCNN_image_shader layout (binding = 0, imfmtc1) writeonly uniform unfp image1D coeffs_blob; @@ -50,17 +51,31 @@ void main() #if NCNN_image_shader afp mean = image1d_ld1(mean_blob, gx); afp var = image1d_ld1(var_blob, gx); - afp gamma = image1d_ld1(gamma_blob, gx); - afp beta = image1d_ld1(beta_blob, gx); #else afp mean = buffer_ld1(mean_data, gx); afp var = buffer_ld1(var_data, gx); - afp gamma = buffer_ld1(gamma_data, gx); - afp beta = buffer_ld1(beta_data, gx); #endif - afp a = gamma / (sqrt(var + afp(eps))); - afp b = - mean * a + beta; + afp a; + afp b; + if (affine == 0) + { + a = afp(1.f) / (sqrt(var + afp(eps))); + b = - mean * a; + } + else + { +#if NCNN_image_shader + afp gamma = image1d_ld1(gamma_blob, gx); + afp beta = image1d_ld1(beta_blob, gx); +#else + afp gamma = buffer_ld1(gamma_data, gx); + afp beta = buffer_ld1(beta_data, gx); +#endif + + a = gamma / (sqrt(var + afp(eps))); + b = - mean * a + beta; + } #if NCNN_image_shader image1d_st1(coeffs_blob, gx*2, a); diff --git a/src/layer/vulkan/shader/instancenorm_coeffs_pack4.comp b/src/layer/vulkan/shader/instancenorm_coeffs_pack4.comp index d4647664b..40b2dd265 100644 --- a/src/layer/vulkan/shader/instancenorm_coeffs_pack4.comp +++ b/src/layer/vulkan/shader/instancenorm_coeffs_pack4.comp @@ -22,7 +22,8 @@ #endif layout (constant_id = 0) const float eps = 0; -layout (constant_id = 1) const int w = 0; +layout (constant_id = 1) const int affine = 0; +layout (constant_id = 2) const int w = 0; #if NCNN_image_shader layout (binding = 0, imfmtc4) writeonly uniform unfp image1D coeffs_blob; @@ -50,17 +51,31 @@ void main() #if NCNN_image_shader afpvec4 mean = image1d_ld4(mean_blob, gx); afpvec4 var = image1d_ld4(var_blob, gx); - afpvec4 gamma = image1d_ld4(gamma_blob, gx); - afpvec4 beta = image1d_ld4(beta_blob, gx); #else afpvec4 mean = buffer_ld4(mean_data, gx); afpvec4 var = buffer_ld4(var_data, gx); - afpvec4 gamma = buffer_ld4(gamma_data, gx); - afpvec4 beta = buffer_ld4(beta_data, gx); #endif - afpvec4 a = gamma / (sqrt(var + afp(eps))); - afpvec4 b = - mean * a + beta; + afpvec4 a; + afpvec4 b; + if (affine == 0) + { + a = afp(1.f) / (sqrt(var + afp(eps))); + b = - mean * a; + } + else + { +#if NCNN_image_shader + afpvec4 gamma = image1d_ld4(gamma_blob, gx); + afpvec4 beta = image1d_ld4(beta_blob, gx); +#else + afpvec4 gamma = buffer_ld4(gamma_data, gx); + afpvec4 beta = buffer_ld4(beta_data, gx); +#endif + + a = gamma / (sqrt(var + afp(eps))); + b = - mean * a + beta; + } #if NCNN_image_shader image1d_st4(coeffs_blob, gx*2, a); diff --git a/src/layer/vulkan/shader/instancenorm_coeffs_pack8.comp b/src/layer/vulkan/shader/instancenorm_coeffs_pack8.comp index 9f2fdaf3e..fcd83af10 100644 --- a/src/layer/vulkan/shader/instancenorm_coeffs_pack8.comp +++ b/src/layer/vulkan/shader/instancenorm_coeffs_pack8.comp @@ -23,7 +23,8 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; #endif layout (constant_id = 0) const float eps = 0; -layout (constant_id = 1) const int w = 0; +layout (constant_id = 1) const int affine = 0; +layout (constant_id = 2) const int w = 0; #if NCNN_image_shader layout (binding = 0, imfmtc4) writeonly uniform unfp image1D coeffs_blob; @@ -51,21 +52,35 @@ void main() #if NCNN_image_shader afpvec8 mean = image1d_ld8(mean_blob, gx); afpvec8 var = image1d_ld8(var_blob, gx); - afpvec8 gamma = image1d_ld8(gamma_blob, gx); - afpvec8 beta = image1d_ld8(beta_blob, gx); #else afpvec8 mean = buffer_ld8(mean_data, gx); afpvec8 var = buffer_ld8(var_data, gx); - afpvec8 gamma = buffer_ld8(gamma_data, gx); - afpvec8 beta = buffer_ld8(beta_data, gx); #endif afpvec8 a; afpvec8 b; - a[0] = gamma[0] / (sqrt(var[0] + afp(eps))); - a[1] = gamma[1] / (sqrt(var[1] + afp(eps))); - b[0] = - mean[0] * a[0] + beta[0]; - b[1] = - mean[1] * a[1] + beta[1]; + if (affine == 0) + { + a[0] = afp(1.f) / (sqrt(var[0] + afp(eps))); + a[1] = afp(1.f) / (sqrt(var[1] + afp(eps))); + b[0] = - mean[0] * a[0]; + b[1] = - mean[1] * a[1]; + } + else + { +#if NCNN_image_shader + afpvec8 gamma = image1d_ld8(gamma_blob, gx); + afpvec8 beta = image1d_ld8(beta_blob, gx); +#else + afpvec8 gamma = buffer_ld8(gamma_data, gx); + afpvec8 beta = buffer_ld8(beta_data, gx); +#endif + + a[0] = gamma[0] / (sqrt(var[0] + afp(eps))); + a[1] = gamma[1] / (sqrt(var[1] + afp(eps))); + b[0] = - mean[0] * a[0] + beta[0]; + b[1] = - mean[1] * a[1] + beta[1]; + } #if NCNN_image_shader image1d_st8(coeffs_blob, gx*2, a); diff --git a/tests/test_instancenorm.cpp b/tests/test_instancenorm.cpp index c46d53edc..c4c68df60 100644 --- a/tests/test_instancenorm.cpp +++ b/tests/test_instancenorm.cpp @@ -15,13 +15,14 @@ #include "layer/instancenorm.h" #include "testutil.h" -static int test_instancenorm(const ncnn::Mat& a, float eps) +static int test_instancenorm(const ncnn::Mat& a, float eps, int affine) { int channels = a.c; ncnn::ParamDict pd; pd.set(0, channels); pd.set(1, eps); + pd.set(2, affine); std::vector weights(2); weights[0] = RandomMat(channels); @@ -39,8 +40,12 @@ static int test_instancenorm(const ncnn::Mat& a, float eps) static int test_instancenorm_0() { return 0 - || test_instancenorm(RandomMat(6, 4, 2), 0.01f) - || test_instancenorm(RandomMat(3, 3, 8), 0.002f); + || test_instancenorm(RandomMat(6, 4, 2), 0.01f, 0) + || test_instancenorm(RandomMat(3, 3, 12), 0.002f, 0) + || test_instancenorm(RandomMat(5, 7, 16), 0.02f, 0) + || test_instancenorm(RandomMat(6, 4, 2), 0.01f, 1) + || test_instancenorm(RandomMat(3, 3, 12), 0.002f, 1) + || test_instancenorm(RandomMat(5, 7, 16), 0.02f, 1); } int main()