Browse Source

instancenorm without affine

tags/20200916
nihuini 5 years ago
parent
commit
dacfccfa65
7 changed files with 103 additions and 33 deletions
  1. +20
    -5
      src/layer/instancenorm.cpp
  2. +1
    -0
      src/layer/instancenorm.h
  3. +6
    -2
      src/layer/vulkan/instancenorm_vulkan.cpp
  4. +22
    -7
      src/layer/vulkan/shader/instancenorm_coeffs.comp
  5. +22
    -7
      src/layer/vulkan/shader/instancenorm_coeffs_pack4.comp
  6. +24
    -9
      src/layer/vulkan/shader/instancenorm_coeffs_pack8.comp
  7. +8
    -3
      tests/test_instancenorm.cpp

+ 20
- 5
src/layer/instancenorm.cpp View File

@@ -28,12 +28,16 @@ int InstanceNorm::load_param(const ParamDict& pd)
{
channels = pd.get(0, 0);
eps = pd.get(1, 0.001f);
affine = pd.get(2, 1);

return 0;
}

int InstanceNorm::load_model(const ModelBin& mb)
{
if (affine == 0)
return 0;

gamma_data = mb.load(channels, 1);
if (gamma_data.empty())
return -100;
@@ -51,10 +55,11 @@ int InstanceNorm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const

int w = bottom_top_blob.w;
int h = bottom_top_blob.h;
int c = bottom_top_blob.c;
int size = w * h;

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
for (int q = 0; q < c; q++)
{
float* ptr = bottom_top_blob.channel(q);

@@ -77,11 +82,21 @@ int InstanceNorm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
// the var maybe minus due to accuracy
//float var = sqsum / size - mean * mean;

float gamma = gamma_data[q];
float beta = beta_data[q];
float a;
float b;
if (affine)
{
float gamma = gamma_data[q];
float beta = beta_data[q];

float a = static_cast<float>(gamma / (sqrt(var + eps)));
float b = -mean * a + beta;
a = static_cast<float>(gamma / (sqrt(var + eps)));
b = -mean * a + beta;
}
else
{
a = static_cast<float>(1.f / (sqrt(var + eps)));
b = -mean * a;
}

for (int i = 0; i < size; i++)
{


+ 1
- 0
src/layer/instancenorm.h View File

@@ -34,6 +34,7 @@ public:
// param
int channels;
float eps;
int affine;

// model
Mat gamma_data;


+ 6
- 2
src/layer/vulkan/instancenorm_vulkan.cpp View File

@@ -229,9 +229,10 @@ int InstanceNorm_vulkan::create_pipeline(const Option& opt)
}

{
std::vector<vk_specialization_type> specializations(2);
std::vector<vk_specialization_type> specializations(3);
specializations[0].f = eps;
specializations[1].i = channels / elempack;
specializations[1].i = affine;
specializations[2].i = channels / elempack;

Mat local_size_xyz(std::min(64, channels / elempack), 1, 1, (void*)0);
if (workspace_shape_packed.dims != 0)
@@ -371,6 +372,9 @@ int InstanceNorm_vulkan::destroy_pipeline(const Option& /*opt*/)

int InstanceNorm_vulkan::upload_model(VkTransfer& cmd, const Option& opt)
{
if (affine == 0)
return 0;

int elempack = opt.use_shader_pack8 && channels % 8 == 0 ? 8 : channels % 4 == 0 ? 4 : 1;

Mat gamma_data_packed;


+ 22
- 7
src/layer/vulkan/shader/instancenorm_coeffs.comp View File

@@ -22,7 +22,8 @@
#endif

layout (constant_id = 0) const float eps = 0;
layout (constant_id = 1) const int w = 0;
layout (constant_id = 1) const int affine = 0;
layout (constant_id = 2) const int w = 0;

#if NCNN_image_shader
layout (binding = 0, imfmtc1) writeonly uniform unfp image1D coeffs_blob;
@@ -50,17 +51,31 @@ void main()
#if NCNN_image_shader
afp mean = image1d_ld1(mean_blob, gx);
afp var = image1d_ld1(var_blob, gx);
afp gamma = image1d_ld1(gamma_blob, gx);
afp beta = image1d_ld1(beta_blob, gx);
#else
afp mean = buffer_ld1(mean_data, gx);
afp var = buffer_ld1(var_data, gx);
afp gamma = buffer_ld1(gamma_data, gx);
afp beta = buffer_ld1(beta_data, gx);
#endif

afp a = gamma / (sqrt(var + afp(eps)));
afp b = - mean * a + beta;
afp a;
afp b;
if (affine == 0)
{
a = afp(1.f) / (sqrt(var + afp(eps)));
b = - mean * a;
}
else
{
#if NCNN_image_shader
afp gamma = image1d_ld1(gamma_blob, gx);
afp beta = image1d_ld1(beta_blob, gx);
#else
afp gamma = buffer_ld1(gamma_data, gx);
afp beta = buffer_ld1(beta_data, gx);
#endif

a = gamma / (sqrt(var + afp(eps)));
b = - mean * a + beta;
}

#if NCNN_image_shader
image1d_st1(coeffs_blob, gx*2, a);


+ 22
- 7
src/layer/vulkan/shader/instancenorm_coeffs_pack4.comp View File

@@ -22,7 +22,8 @@
#endif

layout (constant_id = 0) const float eps = 0;
layout (constant_id = 1) const int w = 0;
layout (constant_id = 1) const int affine = 0;
layout (constant_id = 2) const int w = 0;

#if NCNN_image_shader
layout (binding = 0, imfmtc4) writeonly uniform unfp image1D coeffs_blob;
@@ -50,17 +51,31 @@ void main()
#if NCNN_image_shader
afpvec4 mean = image1d_ld4(mean_blob, gx);
afpvec4 var = image1d_ld4(var_blob, gx);
afpvec4 gamma = image1d_ld4(gamma_blob, gx);
afpvec4 beta = image1d_ld4(beta_blob, gx);
#else
afpvec4 mean = buffer_ld4(mean_data, gx);
afpvec4 var = buffer_ld4(var_data, gx);
afpvec4 gamma = buffer_ld4(gamma_data, gx);
afpvec4 beta = buffer_ld4(beta_data, gx);
#endif

afpvec4 a = gamma / (sqrt(var + afp(eps)));
afpvec4 b = - mean * a + beta;
afpvec4 a;
afpvec4 b;
if (affine == 0)
{
a = afp(1.f) / (sqrt(var + afp(eps)));
b = - mean * a;
}
else
{
#if NCNN_image_shader
afpvec4 gamma = image1d_ld4(gamma_blob, gx);
afpvec4 beta = image1d_ld4(beta_blob, gx);
#else
afpvec4 gamma = buffer_ld4(gamma_data, gx);
afpvec4 beta = buffer_ld4(beta_data, gx);
#endif

a = gamma / (sqrt(var + afp(eps)));
b = - mean * a + beta;
}

#if NCNN_image_shader
image1d_st4(coeffs_blob, gx*2, a);


+ 24
- 9
src/layer/vulkan/shader/instancenorm_coeffs_pack8.comp View File

@@ -23,7 +23,8 @@ struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
#endif

layout (constant_id = 0) const float eps = 0;
layout (constant_id = 1) const int w = 0;
layout (constant_id = 1) const int affine = 0;
layout (constant_id = 2) const int w = 0;

#if NCNN_image_shader
layout (binding = 0, imfmtc4) writeonly uniform unfp image1D coeffs_blob;
@@ -51,21 +52,35 @@ void main()
#if NCNN_image_shader
afpvec8 mean = image1d_ld8(mean_blob, gx);
afpvec8 var = image1d_ld8(var_blob, gx);
afpvec8 gamma = image1d_ld8(gamma_blob, gx);
afpvec8 beta = image1d_ld8(beta_blob, gx);
#else
afpvec8 mean = buffer_ld8(mean_data, gx);
afpvec8 var = buffer_ld8(var_data, gx);
afpvec8 gamma = buffer_ld8(gamma_data, gx);
afpvec8 beta = buffer_ld8(beta_data, gx);
#endif

afpvec8 a;
afpvec8 b;
a[0] = gamma[0] / (sqrt(var[0] + afp(eps)));
a[1] = gamma[1] / (sqrt(var[1] + afp(eps)));
b[0] = - mean[0] * a[0] + beta[0];
b[1] = - mean[1] * a[1] + beta[1];
if (affine == 0)
{
a[0] = afp(1.f) / (sqrt(var[0] + afp(eps)));
a[1] = afp(1.f) / (sqrt(var[1] + afp(eps)));
b[0] = - mean[0] * a[0];
b[1] = - mean[1] * a[1];
}
else
{
#if NCNN_image_shader
afpvec8 gamma = image1d_ld8(gamma_blob, gx);
afpvec8 beta = image1d_ld8(beta_blob, gx);
#else
afpvec8 gamma = buffer_ld8(gamma_data, gx);
afpvec8 beta = buffer_ld8(beta_data, gx);
#endif

a[0] = gamma[0] / (sqrt(var[0] + afp(eps)));
a[1] = gamma[1] / (sqrt(var[1] + afp(eps)));
b[0] = - mean[0] * a[0] + beta[0];
b[1] = - mean[1] * a[1] + beta[1];
}

#if NCNN_image_shader
image1d_st8(coeffs_blob, gx*2, a);


+ 8
- 3
tests/test_instancenorm.cpp View File

@@ -15,13 +15,14 @@
#include "layer/instancenorm.h"
#include "testutil.h"

static int test_instancenorm(const ncnn::Mat& a, float eps)
static int test_instancenorm(const ncnn::Mat& a, float eps, int affine)
{
int channels = a.c;

ncnn::ParamDict pd;
pd.set(0, channels);
pd.set(1, eps);
pd.set(2, affine);

std::vector<ncnn::Mat> weights(2);
weights[0] = RandomMat(channels);
@@ -39,8 +40,12 @@ static int test_instancenorm(const ncnn::Mat& a, float eps)
static int test_instancenorm_0()
{
return 0
|| test_instancenorm(RandomMat(6, 4, 2), 0.01f)
|| test_instancenorm(RandomMat(3, 3, 8), 0.002f);
|| test_instancenorm(RandomMat(6, 4, 2), 0.01f, 0)
|| test_instancenorm(RandomMat(3, 3, 12), 0.002f, 0)
|| test_instancenorm(RandomMat(5, 7, 16), 0.02f, 0)
|| test_instancenorm(RandomMat(6, 4, 2), 0.01f, 1)
|| test_instancenorm(RandomMat(3, 3, 12), 0.002f, 1)
|| test_instancenorm(RandomMat(5, 7, 16), 0.02f, 1);
}

int main()


Loading…
Cancel
Save