implement the missing dequantize image on armv7, prefer neon-optimized 3-dim dequantize, fix #547

8 years ago · ef36d79b7e
--- a/src/layer/arm/convolutiondepthwise_arm.cpp
+++ b/src/layer/arm/convolutiondepthwise_arm.cpp
@@ -220,7 +220,7 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
                        opt_g.num_threads = 1;
                        opt_g.blob_allocator = top_blob.allocator;

                        Mat top_blob_g = top_blob.channel(g);
                        Mat top_blob_g = top_blob.channel_range(g, 1);
                        dequantize_ops[g]->forward_inplace(top_blob_g, opt_g);
                    }

--- a/src/layer/arm/dequantize_arm.cpp
+++ b/src/layer/arm/dequantize_arm.cpp
@@ -53,6 +53,43 @@ int Dequantize_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) con
        }
    }

    if (dims == 2)
    {
        int w = bottom_top_blob.w;
        int h = bottom_top_blob.h;

        if (bias_term)
        {
            #pragma omp parallel for num_threads(opt.num_threads)
            for (int i=0; i<h; i++)
            {
                const int* intptr = bottom_top_blob.row<const int>(i);
                float* ptr = bottom_top_blob.row(i);

                float bias = bias_data_size > 1 ? bias_data[i] : bias_data[0];

                for (int j=0; j<w; j++)
                {
                    ptr[j] = intptr[j] * scale + bias;
                }
            }
        }
        else
        {
            #pragma omp parallel for num_threads(opt.num_threads)
            for (int i=0; i<h; i++)
            {
                const int* intptr = bottom_top_blob.row<const int>(i);
                float* ptr = bottom_top_blob.row(i);

                for (int j=0; j<w; j++)
                {
                    ptr[j] = intptr[j] * scale;
                }
            }
        }
    }

    if (dims == 3)
    {
        int w = bottom_top_blob.w;
--- a/src/layer/convolutiondepthwise.cpp
+++ b/src/layer/convolutiondepthwise.cpp
@@ -142,8 +142,8 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)
            ncnn::Option opt = ncnn::get_default_option();
            opt.blob_allocator = int8_weight_data.allocator;

            const Mat weight_data_g(weight_data_size_g, (void*)((float*)weight_data + weight_data_size_g * g), (size_t)4u, weight_data.allocator);
            Mat int8_weight_data_g(weight_data_size_g, (void*)((signed char*)int8_weight_data + weight_data_size_g * g), (size_t)1u, int8_weight_data.allocator);
            const Mat weight_data_g = weight_data.range(weight_data_size_g * g, weight_data_size_g);
            Mat int8_weight_data_g = int8_weight_data.range(weight_data_size_g * g, weight_data_size_g);
            op->forward(weight_data_g, int8_weight_data_g, opt);

            delete op;
@@ -181,7 +181,7 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)
            dequantize_ops[g]->load_param(pd);

            ncnn::Mat weights[1];
            weights[0] = Mat(1, (void*)((const float*)bias_data + g));
            weights[0] = bias_data.range(g, 1);

            dequantize_ops[g]->load_model(ModelBinFromMatArray(weights));
        }
@@ -229,8 +229,8 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
            opt_g.num_threads = 1;
            opt_g.blob_allocator = bottom_blob_int8.allocator;

            const Mat bottom_blob_g(w, h, channels_g, (void*)((const float*)bottom_blob.channel(channels_g * g)));
            Mat bottom_blob_int8_g(w, h, channels_g, (void*)((signed char*)bottom_blob_int8.channel(channels_g * g)));
            const Mat bottom_blob_g = bottom_blob.channel_range(channels_g * g, channels_g);
            Mat bottom_blob_int8_g = bottom_blob_int8.channel_range(channels_g * g, channels_g);
            quantize_ops[g]->forward(bottom_blob_g, bottom_blob_int8_g, opt_g);
        }

@@ -329,7 +329,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
                    opt_g.num_threads = 1;
                    opt_g.blob_allocator = top_blob.allocator;

                    Mat top_blob_g = top_blob.channel(g);
                    Mat top_blob_g = top_blob.channel_range(g, 1);
                    dequantize_ops[g]->forward_inplace(top_blob_g, opt_g);
                }
            }
@@ -391,7 +391,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
                opt_g.num_threads = 1;
                opt_g.blob_allocator = top_blob.allocator;

                Mat top_blob_g(outw, outh, num_output_g, (void*)((signed int*)top_blob.channel(g * num_output_g)));
                Mat top_blob_g = top_blob.channel_range(num_output_g * g, num_output_g);
                dequantize_ops[g]->forward_inplace(top_blob_g, opt_g);
            }
        }