|
|
|
@@ -152,14 +152,18 @@ int DeconvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const |
|
|
|
const int channels_g = channels / group; |
|
|
|
const int num_output_g = num_output / group; |
|
|
|
|
|
|
|
#ifdef _WIN32 |
|
|
|
#pragma omp parallel for num_threads(opt.num_threads) |
|
|
|
#else // _WIN32 |
|
|
|
#pragma omp parallel for collapse(2) num_threads(opt.num_threads) |
|
|
|
#endif // _WIN32 |
|
|
|
for (int g = 0; g < group; g++) |
|
|
|
{ |
|
|
|
const float* weight_data_ptr = (const float*)weight_data + maxk * channels_g * num_output_g * g; |
|
|
|
for (int p = 0; p < num_output_g; p++) |
|
|
|
{ |
|
|
|
Mat out = top_blob_bordered.channel(g * num_output_g + p); |
|
|
|
|
|
|
|
const float* weight_data_ptr = (const float*)weight_data + maxk * channels_g * num_output_g * g; |
|
|
|
const float bias = bias_term ? bias_data[g * num_output_g + p] : 0.f; |
|
|
|
|
|
|
|
out.fill(bias); |
|
|
|
|