| @@ -28,12 +28,16 @@ int InstanceNorm::load_param(const ParamDict& pd) | |||||
| { | { | ||||
| channels = pd.get(0, 0); | channels = pd.get(0, 0); | ||||
| eps = pd.get(1, 0.001f); | eps = pd.get(1, 0.001f); | ||||
| affine = pd.get(2, 1); | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| int InstanceNorm::load_model(const ModelBin& mb) | int InstanceNorm::load_model(const ModelBin& mb) | ||||
| { | { | ||||
| if (affine == 0) | |||||
| return 0; | |||||
| gamma_data = mb.load(channels, 1); | gamma_data = mb.load(channels, 1); | ||||
| if (gamma_data.empty()) | if (gamma_data.empty()) | ||||
| return -100; | return -100; | ||||
| @@ -51,10 +55,11 @@ int InstanceNorm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||||
| int w = bottom_top_blob.w; | int w = bottom_top_blob.w; | ||||
| int h = bottom_top_blob.h; | int h = bottom_top_blob.h; | ||||
| int c = bottom_top_blob.c; | |||||
| int size = w * h; | int size = w * h; | ||||
| #pragma omp parallel for num_threads(opt.num_threads) | #pragma omp parallel for num_threads(opt.num_threads) | ||||
| for (int q = 0; q < channels; q++) | |||||
| for (int q = 0; q < c; q++) | |||||
| { | { | ||||
| float* ptr = bottom_top_blob.channel(q); | float* ptr = bottom_top_blob.channel(q); | ||||
| @@ -77,11 +82,21 @@ int InstanceNorm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||||
| // the var maybe minus due to accuracy | // the var maybe minus due to accuracy | ||||
| //float var = sqsum / size - mean * mean; | //float var = sqsum / size - mean * mean; | ||||
| float gamma = gamma_data[q]; | |||||
| float beta = beta_data[q]; | |||||
| float a; | |||||
| float b; | |||||
| if (affine) | |||||
| { | |||||
| float gamma = gamma_data[q]; | |||||
| float beta = beta_data[q]; | |||||
| float a = static_cast<float>(gamma / (sqrt(var + eps))); | |||||
| float b = -mean * a + beta; | |||||
| a = static_cast<float>(gamma / (sqrt(var + eps))); | |||||
| b = -mean * a + beta; | |||||
| } | |||||
| else | |||||
| { | |||||
| a = static_cast<float>(1.f / (sqrt(var + eps))); | |||||
| b = -mean * a; | |||||
| } | |||||
| for (int i = 0; i < size; i++) | for (int i = 0; i < size; i++) | ||||
| { | { | ||||
| @@ -34,6 +34,7 @@ public: | |||||
| // param | // param | ||||
| int channels; | int channels; | ||||
| float eps; | float eps; | ||||
| int affine; | |||||
| // model | // model | ||||
| Mat gamma_data; | Mat gamma_data; | ||||
| @@ -229,9 +229,10 @@ int InstanceNorm_vulkan::create_pipeline(const Option& opt) | |||||
| } | } | ||||
| { | { | ||||
| std::vector<vk_specialization_type> specializations(2); | |||||
| std::vector<vk_specialization_type> specializations(3); | |||||
| specializations[0].f = eps; | specializations[0].f = eps; | ||||
| specializations[1].i = channels / elempack; | |||||
| specializations[1].i = affine; | |||||
| specializations[2].i = channels / elempack; | |||||
| Mat local_size_xyz(std::min(64, channels / elempack), 1, 1, (void*)0); | Mat local_size_xyz(std::min(64, channels / elempack), 1, 1, (void*)0); | ||||
| if (workspace_shape_packed.dims != 0) | if (workspace_shape_packed.dims != 0) | ||||
| @@ -371,6 +372,9 @@ int InstanceNorm_vulkan::destroy_pipeline(const Option& /*opt*/) | |||||
| int InstanceNorm_vulkan::upload_model(VkTransfer& cmd, const Option& opt) | int InstanceNorm_vulkan::upload_model(VkTransfer& cmd, const Option& opt) | ||||
| { | { | ||||
| if (affine == 0) | |||||
| return 0; | |||||
| int elempack = opt.use_shader_pack8 && channels % 8 == 0 ? 8 : channels % 4 == 0 ? 4 : 1; | int elempack = opt.use_shader_pack8 && channels % 8 == 0 ? 8 : channels % 4 == 0 ? 4 : 1; | ||||
| Mat gamma_data_packed; | Mat gamma_data_packed; | ||||
| @@ -22,7 +22,8 @@ | |||||
| #endif | #endif | ||||
| layout (constant_id = 0) const float eps = 0; | layout (constant_id = 0) const float eps = 0; | ||||
| layout (constant_id = 1) const int w = 0; | |||||
| layout (constant_id = 1) const int affine = 0; | |||||
| layout (constant_id = 2) const int w = 0; | |||||
| #if NCNN_image_shader | #if NCNN_image_shader | ||||
| layout (binding = 0, imfmtc1) writeonly uniform unfp image1D coeffs_blob; | layout (binding = 0, imfmtc1) writeonly uniform unfp image1D coeffs_blob; | ||||
| @@ -50,17 +51,31 @@ void main() | |||||
| #if NCNN_image_shader | #if NCNN_image_shader | ||||
| afp mean = image1d_ld1(mean_blob, gx); | afp mean = image1d_ld1(mean_blob, gx); | ||||
| afp var = image1d_ld1(var_blob, gx); | afp var = image1d_ld1(var_blob, gx); | ||||
| afp gamma = image1d_ld1(gamma_blob, gx); | |||||
| afp beta = image1d_ld1(beta_blob, gx); | |||||
| #else | #else | ||||
| afp mean = buffer_ld1(mean_data, gx); | afp mean = buffer_ld1(mean_data, gx); | ||||
| afp var = buffer_ld1(var_data, gx); | afp var = buffer_ld1(var_data, gx); | ||||
| afp gamma = buffer_ld1(gamma_data, gx); | |||||
| afp beta = buffer_ld1(beta_data, gx); | |||||
| #endif | #endif | ||||
| afp a = gamma / (sqrt(var + afp(eps))); | |||||
| afp b = - mean * a + beta; | |||||
| afp a; | |||||
| afp b; | |||||
| if (affine == 0) | |||||
| { | |||||
| a = afp(1.f) / (sqrt(var + afp(eps))); | |||||
| b = - mean * a; | |||||
| } | |||||
| else | |||||
| { | |||||
| #if NCNN_image_shader | |||||
| afp gamma = image1d_ld1(gamma_blob, gx); | |||||
| afp beta = image1d_ld1(beta_blob, gx); | |||||
| #else | |||||
| afp gamma = buffer_ld1(gamma_data, gx); | |||||
| afp beta = buffer_ld1(beta_data, gx); | |||||
| #endif | |||||
| a = gamma / (sqrt(var + afp(eps))); | |||||
| b = - mean * a + beta; | |||||
| } | |||||
| #if NCNN_image_shader | #if NCNN_image_shader | ||||
| image1d_st1(coeffs_blob, gx*2, a); | image1d_st1(coeffs_blob, gx*2, a); | ||||
| @@ -22,7 +22,8 @@ | |||||
| #endif | #endif | ||||
| layout (constant_id = 0) const float eps = 0; | layout (constant_id = 0) const float eps = 0; | ||||
| layout (constant_id = 1) const int w = 0; | |||||
| layout (constant_id = 1) const int affine = 0; | |||||
| layout (constant_id = 2) const int w = 0; | |||||
| #if NCNN_image_shader | #if NCNN_image_shader | ||||
| layout (binding = 0, imfmtc4) writeonly uniform unfp image1D coeffs_blob; | layout (binding = 0, imfmtc4) writeonly uniform unfp image1D coeffs_blob; | ||||
| @@ -50,17 +51,31 @@ void main() | |||||
| #if NCNN_image_shader | #if NCNN_image_shader | ||||
| afpvec4 mean = image1d_ld4(mean_blob, gx); | afpvec4 mean = image1d_ld4(mean_blob, gx); | ||||
| afpvec4 var = image1d_ld4(var_blob, gx); | afpvec4 var = image1d_ld4(var_blob, gx); | ||||
| afpvec4 gamma = image1d_ld4(gamma_blob, gx); | |||||
| afpvec4 beta = image1d_ld4(beta_blob, gx); | |||||
| #else | #else | ||||
| afpvec4 mean = buffer_ld4(mean_data, gx); | afpvec4 mean = buffer_ld4(mean_data, gx); | ||||
| afpvec4 var = buffer_ld4(var_data, gx); | afpvec4 var = buffer_ld4(var_data, gx); | ||||
| afpvec4 gamma = buffer_ld4(gamma_data, gx); | |||||
| afpvec4 beta = buffer_ld4(beta_data, gx); | |||||
| #endif | #endif | ||||
| afpvec4 a = gamma / (sqrt(var + afp(eps))); | |||||
| afpvec4 b = - mean * a + beta; | |||||
| afpvec4 a; | |||||
| afpvec4 b; | |||||
| if (affine == 0) | |||||
| { | |||||
| a = afp(1.f) / (sqrt(var + afp(eps))); | |||||
| b = - mean * a; | |||||
| } | |||||
| else | |||||
| { | |||||
| #if NCNN_image_shader | |||||
| afpvec4 gamma = image1d_ld4(gamma_blob, gx); | |||||
| afpvec4 beta = image1d_ld4(beta_blob, gx); | |||||
| #else | |||||
| afpvec4 gamma = buffer_ld4(gamma_data, gx); | |||||
| afpvec4 beta = buffer_ld4(beta_data, gx); | |||||
| #endif | |||||
| a = gamma / (sqrt(var + afp(eps))); | |||||
| b = - mean * a + beta; | |||||
| } | |||||
| #if NCNN_image_shader | #if NCNN_image_shader | ||||
| image1d_st4(coeffs_blob, gx*2, a); | image1d_st4(coeffs_blob, gx*2, a); | ||||
| @@ -23,7 +23,8 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; | |||||
| #endif | #endif | ||||
| layout (constant_id = 0) const float eps = 0; | layout (constant_id = 0) const float eps = 0; | ||||
| layout (constant_id = 1) const int w = 0; | |||||
| layout (constant_id = 1) const int affine = 0; | |||||
| layout (constant_id = 2) const int w = 0; | |||||
| #if NCNN_image_shader | #if NCNN_image_shader | ||||
| layout (binding = 0, imfmtc4) writeonly uniform unfp image1D coeffs_blob; | layout (binding = 0, imfmtc4) writeonly uniform unfp image1D coeffs_blob; | ||||
| @@ -51,21 +52,35 @@ void main() | |||||
| #if NCNN_image_shader | #if NCNN_image_shader | ||||
| afpvec8 mean = image1d_ld8(mean_blob, gx); | afpvec8 mean = image1d_ld8(mean_blob, gx); | ||||
| afpvec8 var = image1d_ld8(var_blob, gx); | afpvec8 var = image1d_ld8(var_blob, gx); | ||||
| afpvec8 gamma = image1d_ld8(gamma_blob, gx); | |||||
| afpvec8 beta = image1d_ld8(beta_blob, gx); | |||||
| #else | #else | ||||
| afpvec8 mean = buffer_ld8(mean_data, gx); | afpvec8 mean = buffer_ld8(mean_data, gx); | ||||
| afpvec8 var = buffer_ld8(var_data, gx); | afpvec8 var = buffer_ld8(var_data, gx); | ||||
| afpvec8 gamma = buffer_ld8(gamma_data, gx); | |||||
| afpvec8 beta = buffer_ld8(beta_data, gx); | |||||
| #endif | #endif | ||||
| afpvec8 a; | afpvec8 a; | ||||
| afpvec8 b; | afpvec8 b; | ||||
| a[0] = gamma[0] / (sqrt(var[0] + afp(eps))); | |||||
| a[1] = gamma[1] / (sqrt(var[1] + afp(eps))); | |||||
| b[0] = - mean[0] * a[0] + beta[0]; | |||||
| b[1] = - mean[1] * a[1] + beta[1]; | |||||
| if (affine == 0) | |||||
| { | |||||
| a[0] = afp(1.f) / (sqrt(var[0] + afp(eps))); | |||||
| a[1] = afp(1.f) / (sqrt(var[1] + afp(eps))); | |||||
| b[0] = - mean[0] * a[0]; | |||||
| b[1] = - mean[1] * a[1]; | |||||
| } | |||||
| else | |||||
| { | |||||
| #if NCNN_image_shader | |||||
| afpvec8 gamma = image1d_ld8(gamma_blob, gx); | |||||
| afpvec8 beta = image1d_ld8(beta_blob, gx); | |||||
| #else | |||||
| afpvec8 gamma = buffer_ld8(gamma_data, gx); | |||||
| afpvec8 beta = buffer_ld8(beta_data, gx); | |||||
| #endif | |||||
| a[0] = gamma[0] / (sqrt(var[0] + afp(eps))); | |||||
| a[1] = gamma[1] / (sqrt(var[1] + afp(eps))); | |||||
| b[0] = - mean[0] * a[0] + beta[0]; | |||||
| b[1] = - mean[1] * a[1] + beta[1]; | |||||
| } | |||||
| #if NCNN_image_shader | #if NCNN_image_shader | ||||
| image1d_st8(coeffs_blob, gx*2, a); | image1d_st8(coeffs_blob, gx*2, a); | ||||
| @@ -15,13 +15,14 @@ | |||||
| #include "layer/instancenorm.h" | #include "layer/instancenorm.h" | ||||
| #include "testutil.h" | #include "testutil.h" | ||||
| static int test_instancenorm(const ncnn::Mat& a, float eps) | |||||
| static int test_instancenorm(const ncnn::Mat& a, float eps, int affine) | |||||
| { | { | ||||
| int channels = a.c; | int channels = a.c; | ||||
| ncnn::ParamDict pd; | ncnn::ParamDict pd; | ||||
| pd.set(0, channels); | pd.set(0, channels); | ||||
| pd.set(1, eps); | pd.set(1, eps); | ||||
| pd.set(2, affine); | |||||
| std::vector<ncnn::Mat> weights(2); | std::vector<ncnn::Mat> weights(2); | ||||
| weights[0] = RandomMat(channels); | weights[0] = RandomMat(channels); | ||||
| @@ -39,8 +40,12 @@ static int test_instancenorm(const ncnn::Mat& a, float eps) | |||||
| static int test_instancenorm_0() | static int test_instancenorm_0() | ||||
| { | { | ||||
| return 0 | return 0 | ||||
| || test_instancenorm(RandomMat(6, 4, 2), 0.01f) | |||||
| || test_instancenorm(RandomMat(3, 3, 8), 0.002f); | |||||
| || test_instancenorm(RandomMat(6, 4, 2), 0.01f, 0) | |||||
| || test_instancenorm(RandomMat(3, 3, 12), 0.002f, 0) | |||||
| || test_instancenorm(RandomMat(5, 7, 16), 0.02f, 0) | |||||
| || test_instancenorm(RandomMat(6, 4, 2), 0.01f, 1) | |||||
| || test_instancenorm(RandomMat(3, 3, 12), 0.002f, 1) | |||||
| || test_instancenorm(RandomMat(5, 7, 16), 0.02f, 1); | |||||
| } | } | ||||
| int main() | int main() | ||||