Browse Source

as we already have the int8_scale_term switch, do not have to rely on the actual scale value

tags/20180830
nihuini 7 years ago
parent
commit
23de61fd07
3 changed files with 39 additions and 63 deletions
  1. +11
    -19
      src/layer/convolution.cpp
  2. +19
    -27
      src/layer/convolutiondepthwise.cpp
  3. +9
    -17
      src/layer/innerproduct.cpp

+ 11
- 19
src/layer/convolution.cpp View File

@@ -88,31 +88,23 @@ int Convolution::load_model(const ModelBin& mb)

if (weight_data_is_float32 && use_int8_inference)
{
if (weight_data_int8_scale != 0.f && bottom_blob_int8_scale != 0.f)
{
// quantize weight to int8
Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize);
// quantize weight to int8
Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize);

ncnn::ParamDict pd;
pd.set(0, weight_data_int8_scale);// scale
ncnn::ParamDict pd;
pd.set(0, weight_data_int8_scale);// scale

op->load_param(pd);
op->load_param(pd);

Mat int8_weight_data;
op->forward(weight_data, int8_weight_data);
Mat int8_weight_data;
op->forward(weight_data, int8_weight_data);

delete op;
delete op;

if (int8_weight_data.empty())
return -100;
if (int8_weight_data.empty())
return -100;

weight_data = int8_weight_data;
}
else
{
// plain float32 weight, fallback to float32 inference
use_int8_inference = false;
}
weight_data = int8_weight_data;
}

if (use_int8_inference)


+ 19
- 27
src/layer/convolutiondepthwise.cpp View File

@@ -123,41 +123,33 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)

if (weight_data_is_float32 && use_int8_inference)
{
if (!weight_data_int8_scales.empty() && !bottom_blob_int8_scales.empty())
{
// quantize weight to int8
Mat int8_weight_data(weight_data_size, (size_t)1u);
if (int8_weight_data.empty())
return -100;
// quantize weight to int8
Mat int8_weight_data(weight_data_size, (size_t)1u);
if (int8_weight_data.empty())
return -100;

const int weight_data_size_g = weight_data_size / group;
const int weight_data_size_g = weight_data_size / group;

for (int g=0; g<group; g++)
{
Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize);

ncnn::ParamDict pd;
pd.set(0, weight_data_int8_scales[g]);// scale
for (int g=0; g<group; g++)
{
Layer* op = ncnn::create_layer(ncnn::LayerType::Quantize);

op->load_param(pd);
ncnn::ParamDict pd;
pd.set(0, weight_data_int8_scales[g]);// scale

ncnn::Option opt = ncnn::get_default_option();
opt.blob_allocator = int8_weight_data.allocator;
op->load_param(pd);

const Mat weight_data_g(weight_data_size_g, (void*)((float*)weight_data + weight_data_size_g * g), (size_t)4u, weight_data.allocator);
Mat int8_weight_data_g(weight_data_size_g, (void*)((signed char*)int8_weight_data + weight_data_size_g * g), (size_t)1u, int8_weight_data.allocator);
op->forward(weight_data_g, int8_weight_data_g, opt);
ncnn::Option opt = ncnn::get_default_option();
opt.blob_allocator = int8_weight_data.allocator;

delete op;
}
const Mat weight_data_g(weight_data_size_g, (void*)((float*)weight_data + weight_data_size_g * g), (size_t)4u, weight_data.allocator);
Mat int8_weight_data_g(weight_data_size_g, (void*)((signed char*)int8_weight_data + weight_data_size_g * g), (size_t)1u, int8_weight_data.allocator);
op->forward(weight_data_g, int8_weight_data_g, opt);

weight_data = int8_weight_data;
}
else
{
// plain float32 weight, fallback to float32 inference
use_int8_inference = false;
delete op;
}

weight_data = int8_weight_data;
}

if (use_int8_inference)


+ 9
- 17
src/layer/innerproduct.cpp View File

@@ -86,27 +86,19 @@ int InnerProduct::load_model(const ModelBin& mb)

if (weight_data_is_float32 && use_int8_inference)
{
if (weight_data_int8_scale != 0.f && bottom_blob_int8_scale != 0.f)
{
// quantize weight to int8
ncnn::ParamDict pd;
pd.set(0, weight_data_int8_scale);// scale
// quantize weight to int8
ncnn::ParamDict pd;
pd.set(0, weight_data_int8_scale);// scale

quantize->load_param(pd);
quantize->load_param(pd);

Mat int8_weight_data;
quantize->forward(weight_data, int8_weight_data);
Mat int8_weight_data;
quantize->forward(weight_data, int8_weight_data);

if (int8_weight_data.empty())
return -100;
if (int8_weight_data.empty())
return -100;

weight_data = int8_weight_data;
}
else
{
// plain float32 weight, fallback to float32 inference
use_int8_inference = false;
}
weight_data = int8_weight_data;
}

return 0;


Loading…
Cancel
Save