Browse Source

a dirty hack for resolving int8 pack4 crash

tags/20191113
nihuini 6 years ago
parent
commit
567e2bd501
10 changed files with 137 additions and 70 deletions
  1. +78
    -46
      benchmark/benchncnn.cpp
  2. +18
    -2
      src/layer/arm/convolution_arm.cpp
  3. +2
    -0
      src/layer/arm/convolution_arm.h
  4. +14
    -2
      src/layer/arm/convolutiondepthwise_arm.cpp
  5. +2
    -0
      src/layer/arm/convolutiondepthwise_arm.h
  6. +12
    -2
      src/layer/arm/innerproduct_arm.cpp
  7. +2
    -0
      src/layer/arm/innerproduct_arm.h
  8. +3
    -6
      src/layer/convolution.cpp
  9. +3
    -6
      src/layer/convolutiondepthwise.cpp
  10. +3
    -6
      src/layer/innerproduct.cpp

+ 78
- 46
benchmark/benchncnn.cpp View File

@@ -98,8 +98,6 @@ public:
static int g_warmup_loop_count = 3;
static int g_loop_count = 4;

static ncnn::Option g_default_option;

static ncnn::UnlockedPoolAllocator g_blob_pool_allocator;
static ncnn::PoolAllocator g_workspace_pool_allocator;

@@ -109,11 +107,11 @@ static ncnn::VkAllocator* g_blob_vkallocator = 0;
static ncnn::VkAllocator* g_staging_vkallocator = 0;
#endif // NCNN_VULKAN

void benchmark(const char* comment, const ncnn::Mat& in)
void benchmark(const char* comment, const ncnn::Mat& in, const ncnn::Option& opt)
{
ncnn::BenchNet net;

net.opt = g_default_option;
net.opt = opt;

#if NCNN_VULKAN
if (net.opt.use_vulkan_compute)
@@ -228,24 +226,26 @@ int main(int argc, char** argv)
#endif // NCNN_VULKAN

// default option
g_default_option.lightmode = true;
g_default_option.num_threads = num_threads;
g_default_option.blob_allocator = &g_blob_pool_allocator;
g_default_option.workspace_allocator = &g_workspace_pool_allocator;
ncnn::Option opt;
opt.lightmode = true;
opt.num_threads = num_threads;
opt.blob_allocator = &g_blob_pool_allocator;
opt.workspace_allocator = &g_workspace_pool_allocator;
#if NCNN_VULKAN
g_default_option.blob_vkallocator = g_blob_vkallocator;
g_default_option.workspace_vkallocator = g_blob_vkallocator;
g_default_option.staging_vkallocator = g_staging_vkallocator;
opt.blob_vkallocator = g_blob_vkallocator;
opt.workspace_vkallocator = g_blob_vkallocator;
opt.staging_vkallocator = g_staging_vkallocator;
#endif // NCNN_VULKAN
g_default_option.use_winograd_convolution = true;
g_default_option.use_sgemm_convolution = true;
g_default_option.use_int8_inference = true;
g_default_option.use_vulkan_compute = use_vulkan_compute;
g_default_option.use_fp16_packed = true;
g_default_option.use_fp16_storage = true;
g_default_option.use_fp16_arithmetic = true;
g_default_option.use_int8_storage = true;
g_default_option.use_int8_arithmetic = true;
opt.use_winograd_convolution = true;
opt.use_sgemm_convolution = true;
opt.use_int8_inference = true;
opt.use_vulkan_compute = use_vulkan_compute;
opt.use_fp16_packed = true;
opt.use_fp16_storage = true;
opt.use_fp16_arithmetic = true;
opt.use_int8_storage = true;
opt.use_int8_arithmetic = true;
opt.use_packing_layout = true;

ncnn::set_cpu_powersave(powersave);

@@ -258,84 +258,116 @@ int main(int argc, char** argv)
fprintf(stderr, "gpu_device = %d\n", gpu_device);

// run
benchmark("squeezenet", ncnn::Mat(227, 227, 3));
benchmark("squeezenet", ncnn::Mat(227, 227, 3), opt);

#if NCNN_VULKAN
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("squeezenet_int8", ncnn::Mat(227, 227, 3));
{
opt.use_packing_layout = false;
benchmark("squeezenet_int8", ncnn::Mat(227, 227, 3), opt);
opt.use_packing_layout = true;
}

benchmark("mobilenet", ncnn::Mat(224, 224, 3));
benchmark("mobilenet", ncnn::Mat(224, 224, 3), opt);

#if NCNN_VULKAN
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("mobilenet_int8", ncnn::Mat(224, 224, 3));
{
opt.use_packing_layout = false;
benchmark("mobilenet_int8", ncnn::Mat(224, 224, 3), opt);
opt.use_packing_layout = true;
}

benchmark("mobilenet_v2", ncnn::Mat(224, 224, 3));
benchmark("mobilenet_v2", ncnn::Mat(224, 224, 3), opt);

// #if NCNN_VULKAN
// if (!use_vulkan_compute)
// #endif // NCNN_VULKAN
// benchmark("mobilenet_v2_int8", ncnn::Mat(224, 224, 3));
// benchmark("mobilenet_v2_int8", ncnn::Mat(224, 224, 3), opt);

benchmark("mobilenet_v3", ncnn::Mat(224, 224, 3));
benchmark("mobilenet_v3", ncnn::Mat(224, 224, 3), opt);

benchmark("shufflenet", ncnn::Mat(224, 224, 3));
benchmark("shufflenet", ncnn::Mat(224, 224, 3), opt);

benchmark("shufflenet_v2", ncnn::Mat(224, 224, 3));
benchmark("shufflenet_v2", ncnn::Mat(224, 224, 3), opt);

benchmark("mnasnet", ncnn::Mat(224, 224, 3));
benchmark("mnasnet", ncnn::Mat(224, 224, 3), opt);

benchmark("proxylessnasnet", ncnn::Mat(224, 224, 3));
benchmark("proxylessnasnet", ncnn::Mat(224, 224, 3), opt);

benchmark("googlenet", ncnn::Mat(224, 224, 3));
benchmark("googlenet", ncnn::Mat(224, 224, 3), opt);

#if NCNN_VULKAN
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("googlenet_int8", ncnn::Mat(224, 224, 3));
{
opt.use_packing_layout = false;
benchmark("googlenet_int8", ncnn::Mat(224, 224, 3), opt);
opt.use_packing_layout = true;
}

benchmark("resnet18", ncnn::Mat(224, 224, 3));
benchmark("resnet18", ncnn::Mat(224, 224, 3), opt);

#if NCNN_VULKAN
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("resnet18_int8", ncnn::Mat(224, 224, 3));
{
opt.use_packing_layout = false;
benchmark("resnet18_int8", ncnn::Mat(224, 224, 3), opt);
opt.use_packing_layout = true;
}

benchmark("alexnet", ncnn::Mat(227, 227, 3));
benchmark("alexnet", ncnn::Mat(227, 227, 3), opt);

benchmark("vgg16", ncnn::Mat(224, 224, 3));
benchmark("vgg16", ncnn::Mat(224, 224, 3), opt);

#if NCNN_VULKAN
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("vgg16_int8", ncnn::Mat(224, 224, 3));
{
opt.use_packing_layout = false;
benchmark("vgg16_int8", ncnn::Mat(224, 224, 3), opt);
opt.use_packing_layout = true;
}

benchmark("resnet50", ncnn::Mat(224, 224, 3));
benchmark("resnet50", ncnn::Mat(224, 224, 3), opt);

#if NCNN_VULKAN
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("resnet50_int8", ncnn::Mat(224, 224, 3));
{
opt.use_packing_layout = false;
benchmark("resnet50_int8", ncnn::Mat(224, 224, 3), opt);
opt.use_packing_layout = true;
}

benchmark("squeezenet_ssd", ncnn::Mat(300, 300, 3));
benchmark("squeezenet_ssd", ncnn::Mat(300, 300, 3), opt);

#if NCNN_VULKAN
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("squeezenet_ssd_int8", ncnn::Mat(300, 300, 3));
{
opt.use_packing_layout = false;
benchmark("squeezenet_ssd_int8", ncnn::Mat(300, 300, 3), opt);
opt.use_packing_layout = true;
}

benchmark("mobilenet_ssd", ncnn::Mat(300, 300, 3));
benchmark("mobilenet_ssd", ncnn::Mat(300, 300, 3), opt);

#if NCNN_VULKAN
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("mobilenet_ssd_int8", ncnn::Mat(300, 300, 3));
{
opt.use_packing_layout = false;
benchmark("mobilenet_ssd_int8", ncnn::Mat(300, 300, 3), opt);
opt.use_packing_layout = true;
}

benchmark("mobilenet_yolo", ncnn::Mat(416, 416, 3));
benchmark("mobilenet_yolo", ncnn::Mat(416, 416, 3), opt);

benchmark("mobilenetv2_yolov3", ncnn::Mat(352, 352, 3));
benchmark("mobilenetv2_yolov3", ncnn::Mat(352, 352, 3), opt);

#if NCNN_VULKAN
delete g_blob_vkallocator;


+ 18
- 2
src/layer/arm/convolution_arm.cpp View File

@@ -54,6 +54,7 @@ Convolution_arm::Convolution_arm()
{
#if __ARM_NEON
support_packing = true;
use_fp32_packing_inference = false;
#endif // __ARM_NEON

activation = 0;
@@ -102,7 +103,16 @@ int Convolution_arm::create_pipeline(const Option& opt)
int num_input = weight_data_size / maxk / num_output;

#if __ARM_NEON
if (opt.use_packing_layout)
bool weight_data_is_float32 = (weight_data.elemsize == (size_t)4u);

use_fp32_packing_inference = opt.use_packing_layout && weight_data_is_float32 && !use_int8_inference;

if (use_int8_inference)
{
support_packing = false;
}

if (use_fp32_packing_inference)
{

// pack4
@@ -188,6 +198,8 @@ int Convolution_arm::create_pipeline(const Option& opt)
}
}
}

return 0;
}

// pack1to4
@@ -230,6 +242,8 @@ int Convolution_arm::create_pipeline(const Option& opt)
}
}
}

return 0;
}

// pack4to1
@@ -281,6 +295,8 @@ int Convolution_arm::create_pipeline(const Option& opt)
}
}
}

return 0;
}

} // opt.use_packing_layout
@@ -525,7 +541,7 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option
// value = value + bias

#if __ARM_NEON
if (opt.use_packing_layout)
if (use_fp32_packing_inference)
{

int w = bottom_blob.w;


+ 2
- 0
src/layer/arm/convolution_arm.h View File

@@ -46,6 +46,8 @@ public:
Mat weight_sgemm_data;
std::vector<Mat> weight_3x3_winograd23_int8_data;

bool use_fp32_packing_inference;

// pack4
Mat weight_data_pack4;
Mat weight_data_pack1to4;


+ 14
- 2
src/layer/arm/convolutiondepthwise_arm.cpp View File

@@ -40,6 +40,7 @@ ConvolutionDepthWise_arm::ConvolutionDepthWise_arm()
{
#if __ARM_NEON
support_packing = true;
use_fp32_packing_inference = false;
#endif // __ARM_NEON

activation = 0;
@@ -89,7 +90,16 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt)
int channels = (weight_data_size / group) / maxk / (num_output / group) * group;

#if __ARM_NEON
if (opt.use_packing_layout)
bool weight_data_is_float32 = (weight_data.elemsize == (size_t)4u);

use_fp32_packing_inference = opt.use_packing_layout && weight_data_is_float32 && !use_int8_inference;

if (use_int8_inference)
{
support_packing = false;
}

if (use_fp32_packing_inference)
{

// depth-wise
@@ -100,6 +110,8 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt)
{
Mat weight_data_r2 = weight_data.reshape(maxk, group);
convert_packing(weight_data_r2, weight_data_pack4, 4);

return 0;
}
}

@@ -298,7 +310,7 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
size_t out_elemsize = elemsize / elempack * out_elempack;

#if __ARM_NEON
if (opt.use_packing_layout)
if (use_fp32_packing_inference)
{

const int maxk = kernel_w * kernel_h;


+ 2
- 0
src/layer/arm/convolutiondepthwise_arm.h View File

@@ -33,6 +33,8 @@ public:
Layer* activation;
std::vector<ncnn::Layer*> group_ops;

bool use_fp32_packing_inference;

// packing
Mat weight_data_pack4;
};


+ 12
- 2
src/layer/arm/innerproduct_arm.cpp View File

@@ -30,6 +30,7 @@ InnerProduct_arm::InnerProduct_arm()
{
#if __ARM_NEON
support_packing = true;
use_fp32_packing_inference = false;
#endif // __ARM_NEON

flatten = 0;
@@ -38,7 +39,16 @@ InnerProduct_arm::InnerProduct_arm()
int InnerProduct_arm::create_pipeline(const Option& opt)
{
#if __ARM_NEON
if (opt.use_packing_layout)
bool weight_data_is_float32 = (weight_data.elemsize == (size_t)4u);

use_fp32_packing_inference = opt.use_packing_layout && weight_data_is_float32 && !use_int8_inference;

if (use_int8_inference)
{
support_packing = false;
}

if (use_fp32_packing_inference)
{

{
@@ -85,7 +95,7 @@ int InnerProduct_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Optio
int size = w * h;

#if __ARM_NEON
if (opt.use_packing_layout)
if (use_fp32_packing_inference)
{

if (elempack == 4)


+ 2
- 0
src/layer/arm/innerproduct_arm.h View File

@@ -30,6 +30,8 @@ public:
virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;

public:
bool use_fp32_packing_inference;

ncnn::Layer* flatten;
};



+ 3
- 6
src/layer/convolution.cpp View File

@@ -77,20 +77,17 @@ int Convolution::load_model(const ModelBin& mb)

int Convolution::create_pipeline(const Option& opt)
{
use_int8_inference = opt.use_int8_inference;

if (int8_scale_term == 0)
use_int8_inference = false;

bool weight_data_is_int8 = (weight_data.elemsize == (size_t)1u);
bool weight_data_is_float32 = (weight_data.elemsize == (size_t)4u);

if (weight_data_is_int8 && !use_int8_inference)
if (weight_data_is_int8 && !opt.use_int8_inference)
{
fprintf(stderr, "quantized int8 weight loaded but use_int8_inference disabled\n");
return -1;
}

use_int8_inference = opt.use_int8_inference && (weight_data_is_int8 || (weight_data_is_float32 && int8_scale_term));

// runtime quantize the weight data
if (weight_data_is_float32 && use_int8_inference)
{


+ 3
- 6
src/layer/convolutiondepthwise.cpp View File

@@ -100,20 +100,17 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)

int ConvolutionDepthWise::create_pipeline(const Option& opt)
{
use_int8_inference = opt.use_int8_inference;

if (int8_scale_term == 0)
use_int8_inference = false;

bool weight_data_is_int8 = (weight_data.elemsize == (size_t)1u);
bool weight_data_is_float32 = (weight_data.elemsize == (size_t)4u);

if (weight_data_is_int8 && !use_int8_inference)
if (weight_data_is_int8 && !opt.use_int8_inference)
{
fprintf(stderr, "quantized int8 weight loaded but use_int8_inference disabled\n");
return -1;
}

use_int8_inference = opt.use_int8_inference && (weight_data_is_int8 || (weight_data_is_float32 && int8_scale_term));

if (weight_data_is_float32 && use_int8_inference)
{
// quantize weight to int8


+ 3
- 6
src/layer/innerproduct.cpp View File

@@ -64,20 +64,17 @@ int InnerProduct::load_model(const ModelBin& mb)

int InnerProduct::create_pipeline(const Option& opt)
{
use_int8_inference = opt.use_int8_inference;

if (int8_scale_term == 0)
use_int8_inference = false;

bool weight_data_is_int8 = (weight_data.elemsize == (size_t)1u);
bool weight_data_is_float32 = (weight_data.elemsize == (size_t)4u);

if (weight_data_is_int8 && !use_int8_inference)
if (weight_data_is_int8 && !opt.use_int8_inference)
{
fprintf(stderr, "quantized int8 weight loaded but use_int8_inference disabled\n");
return -1;
}

use_int8_inference = opt.use_int8_inference && (weight_data_is_int8 || (weight_data_is_float32 && int8_scale_term));

// initial the quantize,dequantize op layer
if (use_int8_inference)
{


Loading…
Cancel
Save