Browse Source

implement the missing dequantize image on armv7, prefer neon-optimized 3-dim dequantize, fix #547

tags/20180830
nihuini 7 years ago
parent
commit
ef36d79b7e
3 changed files with 45 additions and 8 deletions
  1. +1
    -1
      src/layer/arm/convolutiondepthwise_arm.cpp
  2. +37
    -0
      src/layer/arm/dequantize_arm.cpp
  3. +7
    -7
      src/layer/convolutiondepthwise.cpp

+ 1
- 1
src/layer/arm/convolutiondepthwise_arm.cpp View File

@@ -220,7 +220,7 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
opt_g.num_threads = 1;
opt_g.blob_allocator = top_blob.allocator;

Mat top_blob_g = top_blob.channel(g);
Mat top_blob_g = top_blob.channel_range(g, 1);
dequantize_ops[g]->forward_inplace(top_blob_g, opt_g);
}



+ 37
- 0
src/layer/arm/dequantize_arm.cpp View File

@@ -53,6 +53,43 @@ int Dequantize_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) con
}
}

if (dims == 2)
{
int w = bottom_top_blob.w;
int h = bottom_top_blob.h;

if (bias_term)
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int i=0; i<h; i++)
{
const int* intptr = bottom_top_blob.row<const int>(i);
float* ptr = bottom_top_blob.row(i);

float bias = bias_data_size > 1 ? bias_data[i] : bias_data[0];

for (int j=0; j<w; j++)
{
ptr[j] = intptr[j] * scale + bias;
}
}
}
else
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int i=0; i<h; i++)
{
const int* intptr = bottom_top_blob.row<const int>(i);
float* ptr = bottom_top_blob.row(i);

for (int j=0; j<w; j++)
{
ptr[j] = intptr[j] * scale;
}
}
}
}

if (dims == 3)
{
int w = bottom_top_blob.w;


+ 7
- 7
src/layer/convolutiondepthwise.cpp View File

@@ -142,8 +142,8 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)
ncnn::Option opt = ncnn::get_default_option();
opt.blob_allocator = int8_weight_data.allocator;

const Mat weight_data_g(weight_data_size_g, (void*)((float*)weight_data + weight_data_size_g * g), (size_t)4u, weight_data.allocator);
Mat int8_weight_data_g(weight_data_size_g, (void*)((signed char*)int8_weight_data + weight_data_size_g * g), (size_t)1u, int8_weight_data.allocator);
const Mat weight_data_g = weight_data.range(weight_data_size_g * g, weight_data_size_g);
Mat int8_weight_data_g = int8_weight_data.range(weight_data_size_g * g, weight_data_size_g);
op->forward(weight_data_g, int8_weight_data_g, opt);

delete op;
@@ -181,7 +181,7 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)
dequantize_ops[g]->load_param(pd);

ncnn::Mat weights[1];
weights[0] = Mat(1, (void*)((const float*)bias_data + g));
weights[0] = bias_data.range(g, 1);

dequantize_ops[g]->load_model(ModelBinFromMatArray(weights));
}
@@ -229,8 +229,8 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
opt_g.num_threads = 1;
opt_g.blob_allocator = bottom_blob_int8.allocator;

const Mat bottom_blob_g(w, h, channels_g, (void*)((const float*)bottom_blob.channel(channels_g * g)));
Mat bottom_blob_int8_g(w, h, channels_g, (void*)((signed char*)bottom_blob_int8.channel(channels_g * g)));
const Mat bottom_blob_g = bottom_blob.channel_range(channels_g * g, channels_g);
Mat bottom_blob_int8_g = bottom_blob_int8.channel_range(channels_g * g, channels_g);
quantize_ops[g]->forward(bottom_blob_g, bottom_blob_int8_g, opt_g);
}

@@ -329,7 +329,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
opt_g.num_threads = 1;
opt_g.blob_allocator = top_blob.allocator;

Mat top_blob_g = top_blob.channel(g);
Mat top_blob_g = top_blob.channel_range(g, 1);
dequantize_ops[g]->forward_inplace(top_blob_g, opt_g);
}
}
@@ -391,7 +391,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
opt_g.num_threads = 1;
opt_g.blob_allocator = top_blob.allocator;

Mat top_blob_g(outw, outh, num_output_g, (void*)((signed int*)top_blob.channel(g * num_output_g)));
Mat top_blob_g = top_blob.channel_range(num_output_g * g, num_output_g);
dequantize_ops[g]->forward_inplace(top_blob_g, opt_g);
}
}


Loading…
Cancel
Save