as we already have the int8_scale_term switch, do not have to rely on the actual scale value

8 years ago · 23de61fd07
--- a/src/layer/convolution.cpp
+++ b/src/layer/convolution.cpp
@@ -88,31 +88,23 @@ int Convolution::load_model(const ModelBin& mb)

    if (weight_data_is_float32 && use_int8_inference)
    {
        if (weight_data_int8_scale != 0.f && bottom_blob_int8_scale != 0.f)
        {
            // quantize weight to int8
            Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize);
        // quantize weight to int8
        Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize);

            ncnn::ParamDict pd;
            pd.set(0, weight_data_int8_scale);// scale
        ncnn::ParamDict pd;
        pd.set(0, weight_data_int8_scale);// scale

            op->load_param(pd);
        op->load_param(pd);

            Mat int8_weight_data;
            op->forward(weight_data, int8_weight_data);
        Mat int8_weight_data;
        op->forward(weight_data, int8_weight_data);

            delete op;
        delete op;

            if (int8_weight_data.empty())
                return -100;
        if (int8_weight_data.empty())
            return -100;

            weight_data = int8_weight_data;
        }
        else
        {
            // plain float32 weight, fallback to float32 inference
            use_int8_inference = false;
        }
        weight_data = int8_weight_data;
    }

    if (use_int8_inference)
--- a/src/layer/convolutiondepthwise.cpp
+++ b/src/layer/convolutiondepthwise.cpp
@@ -123,41 +123,33 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)

    if (weight_data_is_float32 && use_int8_inference)
    {
        if (!weight_data_int8_scales.empty() && !bottom_blob_int8_scales.empty())
        {
            // quantize weight to int8
            Mat int8_weight_data(weight_data_size, (size_t)1u);
            if (int8_weight_data.empty())
                return -100;
        // quantize weight to int8
        Mat int8_weight_data(weight_data_size, (size_t)1u);
        if (int8_weight_data.empty())
            return -100;

            const int weight_data_size_g = weight_data_size / group;
        const int weight_data_size_g = weight_data_size / group;

            for (int g=0; g<group; g++)
            {
                Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize);

                ncnn::ParamDict pd;
                pd.set(0, weight_data_int8_scales[g]);// scale
        for (int g=0; g<group; g++)
        {
            Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize);

                op->load_param(pd);
            ncnn::ParamDict pd;
            pd.set(0, weight_data_int8_scales[g]);// scale

                ncnn::Option opt = ncnn::get_default_option();
                opt.blob_allocator = int8_weight_data.allocator;
            op->load_param(pd);

                const Mat weight_data_g(weight_data_size_g, (void*)((float*)weight_data + weight_data_size_g * g), (size_t)4u, weight_data.allocator);
                Mat int8_weight_data_g(weight_data_size_g, (void*)((signed char*)int8_weight_data + weight_data_size_g * g), (size_t)1u, int8_weight_data.allocator);
                op->forward(weight_data_g, int8_weight_data_g, opt);
            ncnn::Option opt = ncnn::get_default_option();
            opt.blob_allocator = int8_weight_data.allocator;

                delete op;
            }
            const Mat weight_data_g(weight_data_size_g, (void*)((float*)weight_data + weight_data_size_g * g), (size_t)4u, weight_data.allocator);
            Mat int8_weight_data_g(weight_data_size_g, (void*)((signed char*)int8_weight_data + weight_data_size_g * g), (size_t)1u, int8_weight_data.allocator);
            op->forward(weight_data_g, int8_weight_data_g, opt);

            weight_data = int8_weight_data;
        }
        else
        {
            // plain float32 weight, fallback to float32 inference
            use_int8_inference = false;
            delete op;
        }

        weight_data = int8_weight_data;
    }

    if (use_int8_inference)
--- a/src/layer/innerproduct.cpp
+++ b/src/layer/innerproduct.cpp
@@ -86,27 +86,19 @@ int InnerProduct::load_model(const ModelBin& mb)

    if (weight_data_is_float32 && use_int8_inference)
    {
        if (weight_data_int8_scale != 0.f && bottom_blob_int8_scale != 0.f)
        {
            // quantize weight to int8
            ncnn::ParamDict pd;
            pd.set(0, weight_data_int8_scale);// scale
        // quantize weight to int8
        ncnn::ParamDict pd;
        pd.set(0, weight_data_int8_scale);// scale

            quantize->load_param(pd);
        quantize->load_param(pd);

            Mat int8_weight_data;
            quantize->forward(weight_data, int8_weight_data);
        Mat int8_weight_data;
        quantize->forward(weight_data, int8_weight_data);

            if (int8_weight_data.empty())
                return -100;
        if (int8_weight_data.empty())
            return -100;

            weight_data = int8_weight_data;
        }
        else
        {
            // plain float32 weight, fallback to float32 inference
            use_int8_inference = false;
        }
        weight_data = int8_weight_data;
    }

    return 0;