Browse Source

conv based speed stft

pull/5989/head
futz12 1 year ago
parent
commit
c6ab155a67
3 changed files with 202 additions and 78 deletions
  1. +125
    -77
      src/layer/spectrogram.cpp
  2. +6
    -0
      src/layer/spectrogram.h
  3. +71
    -1
      tests/test_spectrogram.cpp

+ 125
- 77
src/layer/spectrogram.cpp View File

@@ -22,6 +22,11 @@ Spectrogram::Spectrogram()
support_inplace = false;
}

Spectrogram::~Spectrogram()
{
delete conv_transpose;
}

int Spectrogram::load_param(const ParamDict& pd)
{
n_fft = pd.get(0, 0);
@@ -36,7 +41,7 @@ int Spectrogram::load_param(const ParamDict& pd)

// assert winlen <= n_fft
// generate window
window_data.create(normalized == 2 ? n_fft + 1 : n_fft);
window_data.create(n_fft);
{
float* p = window_data;
for (int i = 0; i < (n_fft - winlen) / 2; i++)
@@ -80,10 +85,90 @@ int Spectrogram::load_param(const ParamDict& pd)
{
sqsum += window_data[i] * window_data[i];
}
window_data[n_fft] = 1.f / sqrt(sqsum);
float scale = 1.f / sqrt(sqsum);

for (int i = 0; i < n_fft; i++)
{
window_data[i] *= scale;
}
}
}

Mat theta;
if (onesided)
{
n_freq = n_fft / 2 + 1;
} else
{
n_freq = n_fft;
}
theta.create(n_fft,n_freq,size_t(8));

for (int i = 0; i<n_freq; i++)
{
for (int j = 0; j<n_fft; j++)
{
theta.row<double>(i)[j] = 2 * 3.14159265358979323846 * i * j / n_fft;
}
}

Mat real_basis, imag_basis;
real_basis.create(n_fft,n_freq,size_t(8));
imag_basis.create(n_fft,n_freq,size_t(8));

for (int i = 0; i<n_freq; i++)
{
for (int j = 0; j<n_fft; j++)
{
real_basis.row<double>(i)[j] = cos(theta.row<double>(i)[j]);
imag_basis.row<double>(i)[j] = -sin(theta.row<double>(i)[j]);
}
}

// multiply window
for (int i = 0; i<n_freq; i++)
{
for (int j = 0; j<n_fft; j++)
{
real_basis.row<double>(i)[j] *= window_data[j];
imag_basis.row<double>(i)[j] *= window_data[j];
}
}

if (normalized == 1)
{
double scale = 1.f / sqrt(n_fft);
for (int i = 0; i<n_freq; i++)
{
for (int j = 0; j<n_fft; j++)
{
real_basis.row<double>(i)[j] *= scale;
imag_basis.row<double>(i)[j] *= scale;
}
}
}

conv_data.create(n_fft,1,n_freq * 2);

for (int i = 0; i<n_freq; i++)
{
for (int j = 0; j<n_fft; j++)
{
conv_data.channel(i).row<float>(0)[j]= (float)real_basis.row<double>(i)[j];
conv_data.channel(i+n_freq).row<float>(0)[j] = (float)imag_basis.row<double>(i)[j];
}
}

conv_transpose = ncnn::create_layer("Convolution1D");
ncnn::ParamDict conv_transpose_pd;

conv_transpose_pd.set(0,2 * n_freq); // num_output
conv_transpose_pd.set(1,n_fft); // kernel_w
conv_transpose_pd.set(3,hoplen); // stride_w
conv_transpose_pd.set(19,1); // dynamic_weight

conv_transpose->load_param(conv_transpose_pd);

return 0;
}

@@ -110,107 +195,70 @@ int Spectrogram::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op

// const int frames = size / hoplen + 1;
const int frames = (size - n_fft) / hoplen + 1;
const int freqs_onesided = n_fft / 2 + 1;
const int freqs = onesided ? freqs_onesided : n_fft;

const size_t elemsize = bottom_blob_bordered.elemsize;

if (elemsize != sizeof(float))
{
return -100;
}

if (power == 0)
{
top_blob.create(2, frames, freqs, elemsize, opt.blob_allocator);
top_blob.create(2, frames, n_freq, elemsize, opt.blob_allocator);
}
else
{
top_blob.create(frames, freqs, elemsize, opt.blob_allocator);
top_blob.create(frames, n_freq, elemsize, opt.blob_allocator);
}
if (top_blob.empty())
return -100;

#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < freqs_onesided; i++)
{
const float* ptr = bottom_blob_bordered;
float* outptr = power == 0 ? top_blob.channel(i) : top_blob.row(i);
std::vector<Mat> inputs = {bottom_blob_bordered,conv_data};
std::vector<Mat> outputs = {Mat()};

for (int j = 0; j < frames; j++)
{
float re = 0.f;
float im = 0.f;
for (int k = 0; k < n_fft; k++)
{
float v = ptr[k];
Option opt_conv = opt;
opt_conv.use_packing_layout = false;

// apply window
v *= window_data[k];
conv_transpose->create_pipeline(opt_conv);
conv_transpose->forward(inputs,outputs,opt_conv);
conv_transpose->destroy_pipeline(opt_conv);

// dft
double angle = 2 * 3.14159265358979323846 * i * k / n_fft;
Mat conv_top_blob = outputs[0]; // (2 * n_freq, frames)
float* conv_top_data = conv_top_blob;

re += v * cosf(angle); // + imag * sinf(angle);
im -= v * sinf(angle); // + imag * cosf(angle);
}

if (normalized == 1)
{
float norm = 1.f / sqrt(n_fft);
re *= norm;
im *= norm;
}
if (normalized == 2)
{
float norm = window_data[n_fft];
re *= norm;
im *= norm;
}

if (power == 0)
{
// complex as real
outptr[0] = re;
outptr[1] = im;
outptr += 2;
}
if (power == 1)
{
// magnitude
outptr[0] = sqrt(re * re + im * im);
outptr += 1;
}
if (power == 2)
if (power == 0) // as complex
{
// copy
for (int i = 0; i<frames; i++)
{
for (int j = 0; j<n_freq; j++)
{
outptr[0] = re * re + im * im;
outptr += 1;
top_blob.channel(j).row<float>(i)[0] = conv_top_data[j * frames + i];
top_blob.channel(j).row<float>(i)[1] = conv_top_data[(j + n_freq) * frames + i];
}

ptr += hoplen;
}
}

if (!onesided)
} else
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int i = freqs_onesided; i < n_fft; i++)
if (power == 1) // magnitude sqrt(re * re + im * im);
{
if (power == 0)
// copy
for (int i = 0; i < frames; i++)
{
const float* ptr = top_blob.channel(n_fft - i);
float* outptr = top_blob.channel(i);

for (int j = 0; j < frames; j++)
for (int j = 0; j < n_freq; j++)
{
// complex as real
outptr[0] = ptr[0];
outptr[1] = -ptr[1];
ptr += 2;
outptr += 2;
top_blob.row<float>(j)[i] = sqrtf(conv_top_data[j * frames + i] * conv_top_data[j * frames + i] + conv_top_data[(j + n_freq) * frames + i] * conv_top_data[(j + n_freq) * frames + i]);
}
}
else // if (power == 1 || power == 2)
} else if (power == 2) // power re * re + im * im;
{
// copy
for (int i = 0; i < frames; i++)
{
const float* ptr = top_blob.row(n_fft - i);
float* outptr = top_blob.row(i);
memcpy(outptr, ptr, frames * sizeof(float));
for (int j = 0; j< n_freq; j++)
{
top_blob.row<float>(j)[i] = conv_top_data[j * frames + i] * conv_top_data[j * frames + i] + conv_top_data[(j + n_freq) * frames + i] * conv_top_data[(j + n_freq) * frames + i];
}
}
}
}


+ 6
- 0
src/layer/spectrogram.h View File

@@ -23,6 +23,7 @@ class Spectrogram : public Layer
{
public:
Spectrogram();
~Spectrogram();

virtual int load_param(const ParamDict& pd);

@@ -39,7 +40,12 @@ public:
int normalized; // 0=disabled 1=sqrt(n_fft) 2=window-l2-energy
int onesided;

int n_freq;

Mat window_data;
Mat conv_data;

Layer* conv_transpose = 0;
};

} // namespace ncnn


+ 71
- 1
tests/test_spectrogram.cpp View File

@@ -50,9 +50,79 @@ static int test_spectrogram_0()
|| test_spectrogram(124, 55, 2, 12, 55, 1, 1, 2, 2, 0);
}

static int test_spectrogram_eval(int size, int n_fft, int power, int hoplen, int winlen, int window_type, int center, int pad_type, int normalized, int onesided,float * in,float * std)
{
ncnn::Layer * layer = ncnn::create_layer("Spectrogram");

ncnn::ParamDict pd;
pd.set(0, n_fft);
pd.set(1, power);
pd.set(2, hoplen);
pd.set(3, winlen);
pd.set(4, window_type);
pd.set(5, center);
pd.set(6, pad_type);
pd.set(7, normalized);
pd.set(8, onesided);

ncnn::Mat input = ncnn::Mat(size);
memcpy(input, in, size * sizeof(float));

ncnn::Mat output;

ncnn::Option opt;
opt.num_threads = 2;

layer->load_param(pd);
layer->create_pipeline(opt);
layer->forward(input, output, opt);
layer->destroy_pipeline(opt);

const float epsilon = 1e-6;

for (int i = 0; i < output.c; i++)
{
float * output_data = output.channel(i);
for (int j = 0; j < output.h; j++)
{
for (int k = 0; k < output.w; k++)
{
if (fabs(output_data[j * output.w + k] - std[i * output.h * output.w + j * output.w + k]) > epsilon)
{
fprintf(stderr, "test_spectrogram failed size=%d n_fft=%d power=%d hoplen=%d winlen=%d window_type=%d center=%d pad_type=%d normalized=%d onesided=%d\n", size, n_fft, power, hoplen, winlen, window_type, center, pad_type, normalized, onesided);
return 1;
}
}
}
}

delete layer;
return 0;
}

static int test_spectrogram_1()
{
float input_0[16] = {0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 1.3f, 1.4f, 1.5f};
float std_0[] = {
0.05000000f, 0.40000001f, 0.80000001f, 1.20000005f, 1.59999990f, 2.00000000f, 2.40000010f, 2.79999995f, 0.75000000f, 0.05000000f, 0.22360681f, 0.41231057f, 0.60827625f, 0.80622578f, 1.00498760f, 1.20415950f, 1.40356684f, 0.75000000f, 0.05000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000006f, 0.00000000f, 0.00000000f, 0.00000000f, 0.75000000f
};
float std_1[] = {
0.80000001f, 1.20000005f, 1.59999990f, 2.00000000f, 2.40000010f, 0.68649411f, 1.02670193f, 1.36751485f, 1.70857072f, 2.04974818f, 0.41231057f, 0.60827625f, 0.80622578f, 1.00498760f, 1.20415950f, 0.13684234f, 0.18942842f, 0.24475159f, 0.30130789f, 0.35851428f, 0.00000000f, 0.00000000f, 0.00000006f, 0.00000000f, 0.00000000f
};
float std_2[] = {
0.28284273f, 0.49497476f, 0.70710677f, 0.24271232f, 0.42322639f, 0.60407096f, 0.14577380f, 0.25000000f, 0.35531676f, 0.04838108f, 0.07667736f, 0.10652842f, 0.00000000f, 0.00000002f, 0.00000000f, 0.04838108f, 0.07667736f, 0.10652842f, 0.14577380f, 0.25000000f, 0.35531676f, 0.24271232f, 0.42322639f, 0.60407096f
};

return
test_spectrogram_eval(16, 4, 1, 2, 4, 1, 1, 0, 0, 1, input_0, std_0)
|| test_spectrogram_eval(16, 8, 1, 2, 4, 1, 0, 0, 0, 1, input_0, std_1)
|| test_spectrogram_eval(16, 8, 1, 3, 4, 1, 0, 0, 1, 0, input_0, std_2);

}

int main()
{
SRAND(7767517);

return test_spectrogram_0();
return test_spectrogram_0() || test_spectrogram_1();
}

Loading…
Cancel
Save