| @@ -28,12 +28,16 @@ int InstanceNorm::load_param(const ParamDict& pd) | |||
| { | |||
| channels = pd.get(0, 0); | |||
| eps = pd.get(1, 0.001f); | |||
| affine = pd.get(2, 1); | |||
| return 0; | |||
| } | |||
| int InstanceNorm::load_model(const ModelBin& mb) | |||
| { | |||
| if (affine == 0) | |||
| return 0; | |||
| gamma_data = mb.load(channels, 1); | |||
| if (gamma_data.empty()) | |||
| return -100; | |||
| @@ -51,10 +55,11 @@ int InstanceNorm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| int w = bottom_top_blob.w; | |||
| int h = bottom_top_blob.h; | |||
| int c = bottom_top_blob.c; | |||
| int size = w * h; | |||
| #pragma omp parallel for num_threads(opt.num_threads) | |||
| for (int q = 0; q < channels; q++) | |||
| for (int q = 0; q < c; q++) | |||
| { | |||
| float* ptr = bottom_top_blob.channel(q); | |||
| @@ -77,11 +82,21 @@ int InstanceNorm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| // the var maybe minus due to accuracy | |||
| //float var = sqsum / size - mean * mean; | |||
| float gamma = gamma_data[q]; | |||
| float beta = beta_data[q]; | |||
| float a; | |||
| float b; | |||
| if (affine) | |||
| { | |||
| float gamma = gamma_data[q]; | |||
| float beta = beta_data[q]; | |||
| float a = static_cast<float>(gamma / (sqrt(var + eps))); | |||
| float b = -mean * a + beta; | |||
| a = static_cast<float>(gamma / (sqrt(var + eps))); | |||
| b = -mean * a + beta; | |||
| } | |||
| else | |||
| { | |||
| a = static_cast<float>(1.f / (sqrt(var + eps))); | |||
| b = -mean * a; | |||
| } | |||
| for (int i = 0; i < size; i++) | |||
| { | |||
| @@ -34,6 +34,7 @@ public: | |||
| // param | |||
| int channels; | |||
| float eps; | |||
| int affine; | |||
| // model | |||
| Mat gamma_data; | |||
| @@ -229,9 +229,10 @@ int InstanceNorm_vulkan::create_pipeline(const Option& opt) | |||
| } | |||
| { | |||
| std::vector<vk_specialization_type> specializations(2); | |||
| std::vector<vk_specialization_type> specializations(3); | |||
| specializations[0].f = eps; | |||
| specializations[1].i = channels / elempack; | |||
| specializations[1].i = affine; | |||
| specializations[2].i = channels / elempack; | |||
| Mat local_size_xyz(std::min(64, channels / elempack), 1, 1, (void*)0); | |||
| if (workspace_shape_packed.dims != 0) | |||
| @@ -371,6 +372,9 @@ int InstanceNorm_vulkan::destroy_pipeline(const Option& /*opt*/) | |||
| int InstanceNorm_vulkan::upload_model(VkTransfer& cmd, const Option& opt) | |||
| { | |||
| if (affine == 0) | |||
| return 0; | |||
| int elempack = opt.use_shader_pack8 && channels % 8 == 0 ? 8 : channels % 4 == 0 ? 4 : 1; | |||
| Mat gamma_data_packed; | |||
| @@ -22,7 +22,8 @@ | |||
| #endif | |||
| layout (constant_id = 0) const float eps = 0; | |||
| layout (constant_id = 1) const int w = 0; | |||
| layout (constant_id = 1) const int affine = 0; | |||
| layout (constant_id = 2) const int w = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0, imfmtc1) writeonly uniform unfp image1D coeffs_blob; | |||
| @@ -50,17 +51,31 @@ void main() | |||
| #if NCNN_image_shader | |||
| afp mean = image1d_ld1(mean_blob, gx); | |||
| afp var = image1d_ld1(var_blob, gx); | |||
| afp gamma = image1d_ld1(gamma_blob, gx); | |||
| afp beta = image1d_ld1(beta_blob, gx); | |||
| #else | |||
| afp mean = buffer_ld1(mean_data, gx); | |||
| afp var = buffer_ld1(var_data, gx); | |||
| afp gamma = buffer_ld1(gamma_data, gx); | |||
| afp beta = buffer_ld1(beta_data, gx); | |||
| #endif | |||
| afp a = gamma / (sqrt(var + afp(eps))); | |||
| afp b = - mean * a + beta; | |||
| afp a; | |||
| afp b; | |||
| if (affine == 0) | |||
| { | |||
| a = afp(1.f) / (sqrt(var + afp(eps))); | |||
| b = - mean * a; | |||
| } | |||
| else | |||
| { | |||
| #if NCNN_image_shader | |||
| afp gamma = image1d_ld1(gamma_blob, gx); | |||
| afp beta = image1d_ld1(beta_blob, gx); | |||
| #else | |||
| afp gamma = buffer_ld1(gamma_data, gx); | |||
| afp beta = buffer_ld1(beta_data, gx); | |||
| #endif | |||
| a = gamma / (sqrt(var + afp(eps))); | |||
| b = - mean * a + beta; | |||
| } | |||
| #if NCNN_image_shader | |||
| image1d_st1(coeffs_blob, gx*2, a); | |||
| @@ -22,7 +22,8 @@ | |||
| #endif | |||
| layout (constant_id = 0) const float eps = 0; | |||
| layout (constant_id = 1) const int w = 0; | |||
| layout (constant_id = 1) const int affine = 0; | |||
| layout (constant_id = 2) const int w = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0, imfmtc4) writeonly uniform unfp image1D coeffs_blob; | |||
| @@ -50,17 +51,31 @@ void main() | |||
| #if NCNN_image_shader | |||
| afpvec4 mean = image1d_ld4(mean_blob, gx); | |||
| afpvec4 var = image1d_ld4(var_blob, gx); | |||
| afpvec4 gamma = image1d_ld4(gamma_blob, gx); | |||
| afpvec4 beta = image1d_ld4(beta_blob, gx); | |||
| #else | |||
| afpvec4 mean = buffer_ld4(mean_data, gx); | |||
| afpvec4 var = buffer_ld4(var_data, gx); | |||
| afpvec4 gamma = buffer_ld4(gamma_data, gx); | |||
| afpvec4 beta = buffer_ld4(beta_data, gx); | |||
| #endif | |||
| afpvec4 a = gamma / (sqrt(var + afp(eps))); | |||
| afpvec4 b = - mean * a + beta; | |||
| afpvec4 a; | |||
| afpvec4 b; | |||
| if (affine == 0) | |||
| { | |||
| a = afp(1.f) / (sqrt(var + afp(eps))); | |||
| b = - mean * a; | |||
| } | |||
| else | |||
| { | |||
| #if NCNN_image_shader | |||
| afpvec4 gamma = image1d_ld4(gamma_blob, gx); | |||
| afpvec4 beta = image1d_ld4(beta_blob, gx); | |||
| #else | |||
| afpvec4 gamma = buffer_ld4(gamma_data, gx); | |||
| afpvec4 beta = buffer_ld4(beta_data, gx); | |||
| #endif | |||
| a = gamma / (sqrt(var + afp(eps))); | |||
| b = - mean * a + beta; | |||
| } | |||
| #if NCNN_image_shader | |||
| image1d_st4(coeffs_blob, gx*2, a); | |||
| @@ -23,7 +23,8 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||
| #endif | |||
| layout (constant_id = 0) const float eps = 0; | |||
| layout (constant_id = 1) const int w = 0; | |||
| layout (constant_id = 1) const int affine = 0; | |||
| layout (constant_id = 2) const int w = 0; | |||
| #if NCNN_image_shader | |||
| layout (binding = 0, imfmtc4) writeonly uniform unfp image1D coeffs_blob; | |||
| @@ -51,21 +52,35 @@ void main() | |||
| #if NCNN_image_shader | |||
| afpvec8 mean = image1d_ld8(mean_blob, gx); | |||
| afpvec8 var = image1d_ld8(var_blob, gx); | |||
| afpvec8 gamma = image1d_ld8(gamma_blob, gx); | |||
| afpvec8 beta = image1d_ld8(beta_blob, gx); | |||
| #else | |||
| afpvec8 mean = buffer_ld8(mean_data, gx); | |||
| afpvec8 var = buffer_ld8(var_data, gx); | |||
| afpvec8 gamma = buffer_ld8(gamma_data, gx); | |||
| afpvec8 beta = buffer_ld8(beta_data, gx); | |||
| #endif | |||
| afpvec8 a; | |||
| afpvec8 b; | |||
| a[0] = gamma[0] / (sqrt(var[0] + afp(eps))); | |||
| a[1] = gamma[1] / (sqrt(var[1] + afp(eps))); | |||
| b[0] = - mean[0] * a[0] + beta[0]; | |||
| b[1] = - mean[1] * a[1] + beta[1]; | |||
| if (affine == 0) | |||
| { | |||
| a[0] = afp(1.f) / (sqrt(var[0] + afp(eps))); | |||
| a[1] = afp(1.f) / (sqrt(var[1] + afp(eps))); | |||
| b[0] = - mean[0] * a[0]; | |||
| b[1] = - mean[1] * a[1]; | |||
| } | |||
| else | |||
| { | |||
| #if NCNN_image_shader | |||
| afpvec8 gamma = image1d_ld8(gamma_blob, gx); | |||
| afpvec8 beta = image1d_ld8(beta_blob, gx); | |||
| #else | |||
| afpvec8 gamma = buffer_ld8(gamma_data, gx); | |||
| afpvec8 beta = buffer_ld8(beta_data, gx); | |||
| #endif | |||
| a[0] = gamma[0] / (sqrt(var[0] + afp(eps))); | |||
| a[1] = gamma[1] / (sqrt(var[1] + afp(eps))); | |||
| b[0] = - mean[0] * a[0] + beta[0]; | |||
| b[1] = - mean[1] * a[1] + beta[1]; | |||
| } | |||
| #if NCNN_image_shader | |||
| image1d_st8(coeffs_blob, gx*2, a); | |||
| @@ -15,13 +15,14 @@ | |||
| #include "layer/instancenorm.h" | |||
| #include "testutil.h" | |||
| static int test_instancenorm(const ncnn::Mat& a, float eps) | |||
| static int test_instancenorm(const ncnn::Mat& a, float eps, int affine) | |||
| { | |||
| int channels = a.c; | |||
| ncnn::ParamDict pd; | |||
| pd.set(0, channels); | |||
| pd.set(1, eps); | |||
| pd.set(2, affine); | |||
| std::vector<ncnn::Mat> weights(2); | |||
| weights[0] = RandomMat(channels); | |||
| @@ -39,8 +40,12 @@ static int test_instancenorm(const ncnn::Mat& a, float eps) | |||
| static int test_instancenorm_0() | |||
| { | |||
| return 0 | |||
| || test_instancenorm(RandomMat(6, 4, 2), 0.01f) | |||
| || test_instancenorm(RandomMat(3, 3, 8), 0.002f); | |||
| || test_instancenorm(RandomMat(6, 4, 2), 0.01f, 0) | |||
| || test_instancenorm(RandomMat(3, 3, 12), 0.002f, 0) | |||
| || test_instancenorm(RandomMat(5, 7, 16), 0.02f, 0) | |||
| || test_instancenorm(RandomMat(6, 4, 2), 0.01f, 1) | |||
| || test_instancenorm(RandomMat(3, 3, 12), 0.002f, 1) | |||
| || test_instancenorm(RandomMat(5, 7, 16), 0.02f, 1); | |||
| } | |||
| int main() | |||