option api changes

7 years ago · 838c5df839
--- a/benchmark/benchncnn.cpp
+++ b/benchmark/benchncnn.cpp
@@ -57,9 +57,6 @@ public:
        // load file
        int ret = 0;

        Option opt;
        opt.vulkan_compute = use_vulkan_compute;

        ModelBinFromEmpty mb;
        for (size_t i=0; i<layers.size(); i++)
        {
@@ -83,7 +80,7 @@ public:
        }

 #if NCNN_VULKAN
        if (use_vulkan_compute)
        if (opt.use_vulkan_compute)
        {
            upload_model();

@@ -100,12 +97,12 @@ public:
 static int g_warmup_loop_count = 3;
 static int g_loop_count = 4;

 static ncnn::Option g_default_option;

 static ncnn::UnlockedPoolAllocator g_blob_pool_allocator;
 static ncnn::PoolAllocator g_workspace_pool_allocator;

 #if NCNN_VULKAN
 static bool g_use_vulkan_compute = false;

 static ncnn::VulkanDevice* g_vkdev = 0;
 static ncnn::VkAllocator* g_blob_vkallocator = 0;
 static ncnn::VkAllocator* g_staging_vkallocator = 0;
@@ -115,11 +112,11 @@ void benchmark(const char* comment, const ncnn::Mat& in)
 {
    ncnn::BenchNet net;

    net.opt = g_default_option;

 #if NCNN_VULKAN
    if (g_use_vulkan_compute)
    if (net.opt.use_vulkan_compute)
    {
        net.use_vulkan_compute = g_use_vulkan_compute;

        net.set_vulkan_device(g_vkdev);
    }
 #endif // NCNN_VULKAN
@@ -134,7 +131,7 @@ void benchmark(const char* comment, const ncnn::Mat& in)
    g_workspace_pool_allocator.clear();

 #if NCNN_VULKAN
    if (g_use_vulkan_compute)
    if (net.opt.use_vulkan_compute)
    {
        g_blob_vkallocator->clear();
        g_staging_vkallocator->clear();
@@ -145,7 +142,7 @@ void benchmark(const char* comment, const ncnn::Mat& in)
 #ifdef _WIN32
    Sleep(10 * 1000);
 #else
    sleep(10);
 //     sleep(10);
 #endif

    ncnn::Mat out;
@@ -210,14 +207,15 @@ int main(int argc, char** argv)
        gpu_device = atoi(argv[4]);
    }

    bool use_vulkan_compute = gpu_device != -1;

    g_loop_count = loop_count;

    g_blob_pool_allocator.set_size_compare_ratio(0.0f);
    g_workspace_pool_allocator.set_size_compare_ratio(0.5f);

 #if NCNN_VULKAN
    g_use_vulkan_compute = gpu_device != -1;
    if (g_use_vulkan_compute)
    if (use_vulkan_compute)
    {
        g_warmup_loop_count = 10;

@@ -228,20 +226,25 @@ int main(int argc, char** argv)
    }
 #endif // NCNN_VULKAN

    ncnn::Option opt;
    opt.lightmode = true;
    opt.num_threads = num_threads;
    opt.blob_allocator = &g_blob_pool_allocator;
    opt.workspace_allocator = &g_workspace_pool_allocator;

    // default option
    g_default_option.lightmode = true;
    g_default_option.num_threads = num_threads;
    g_default_option.blob_allocator = &g_blob_pool_allocator;
    g_default_option.workspace_allocator = &g_workspace_pool_allocator;
 #if NCNN_VULKAN
    opt.vulkan_compute = g_use_vulkan_compute;
    opt.blob_vkallocator = g_blob_vkallocator;
    opt.workspace_vkallocator = g_blob_vkallocator;
    opt.staging_vkallocator = g_staging_vkallocator;
    g_default_option.blob_vkallocator = g_blob_vkallocator;
    g_default_option.workspace_vkallocator = g_blob_vkallocator;
    g_default_option.staging_vkallocator = g_staging_vkallocator;
 #endif // NCNN_VULKAN

    ncnn::set_default_option(opt);
    g_default_option.use_winograd_convolution = true;
    g_default_option.use_sgemm_convolution = true;
    g_default_option.use_int8_inference = true;
    g_default_option.use_vulkan_compute = use_vulkan_compute;
    g_default_option.use_fp16_packed = true;
    g_default_option.use_fp16_storage = true;
    g_default_option.use_fp16_arithmetic = true;
    g_default_option.use_int8_storage = true;
    g_default_option.use_int8_arithmetic = true;

    ncnn::set_cpu_powersave(powersave);

@@ -257,21 +260,21 @@ int main(int argc, char** argv)
    benchmark("squeezenet", ncnn::Mat(227, 227, 3));

 #if NCNN_VULKAN
    if (!g_use_vulkan_compute)
    if (!use_vulkan_compute)
 #endif // NCNN_VULKAN
    benchmark("squeezenet_int8", ncnn::Mat(227, 227, 3));

    benchmark("mobilenet", ncnn::Mat(224, 224, 3));

 #if NCNN_VULKAN
    if (!g_use_vulkan_compute)
    if (!use_vulkan_compute)
 #endif // NCNN_VULKAN
    benchmark("mobilenet_int8", ncnn::Mat(224, 224, 3));

    benchmark("mobilenet_v2", ncnn::Mat(224, 224, 3));

 // #if NCNN_VULKAN
 //     if (!g_use_vulkan_compute)
 //     if (!use_vulkan_compute)
 // #endif // NCNN_VULKAN
 //     benchmark("mobilenet_v2_int8", ncnn::Mat(224, 224, 3));

@@ -284,14 +287,14 @@ int main(int argc, char** argv)
    benchmark("googlenet", ncnn::Mat(224, 224, 3));

 #if NCNN_VULKAN
    if (!g_use_vulkan_compute)
    if (!use_vulkan_compute)
 #endif // NCNN_VULKAN
    benchmark("googlenet_int8", ncnn::Mat(224, 224, 3));

    benchmark("resnet18", ncnn::Mat(224, 224, 3));

 #if NCNN_VULKAN
    if (!g_use_vulkan_compute)
    if (!use_vulkan_compute)
 #endif // NCNN_VULKAN
    benchmark("resnet18_int8", ncnn::Mat(224, 224, 3));

@@ -300,28 +303,28 @@ int main(int argc, char** argv)
    benchmark("vgg16", ncnn::Mat(224, 224, 3));

 #if NCNN_VULKAN
    if (!g_use_vulkan_compute)
    if (!use_vulkan_compute)
 #endif // NCNN_VULKAN
    benchmark("vgg16_int8", ncnn::Mat(224, 224, 3));

    benchmark("resnet50", ncnn::Mat(224, 224, 3));

 #if NCNN_VULKAN
    if (!g_use_vulkan_compute)
    if (!use_vulkan_compute)
 #endif // NCNN_VULKAN
    benchmark("resnet50_int8", ncnn::Mat(224, 224, 3));

    benchmark("squeezenet_ssd", ncnn::Mat(300, 300, 3));

 #if NCNN_VULKAN
    if (!g_use_vulkan_compute)
    if (!use_vulkan_compute)
 #endif // NCNN_VULKAN
    benchmark("squeezenet_ssd_int8", ncnn::Mat(300, 300, 3));

    benchmark("mobilenet_ssd", ncnn::Mat(300, 300, 3));

 #if NCNN_VULKAN
    if (!g_use_vulkan_compute)
    if (!use_vulkan_compute)
 #endif // NCNN_VULKAN
    benchmark("mobilenet_ssd_int8", ncnn::Mat(300, 300, 3));

--- a/examples/fasterrcnn.cpp
+++ b/examples/fasterrcnn.cpp
@@ -107,7 +107,7 @@ static int detect_fasterrcnn(const cv::Mat& bgr, std::vector<Object>& objects)
    ncnn::Net fasterrcnn;

 #if NCNN_VULKAN
    fasterrcnn.use_vulkan_compute = true;
    fasterrcnn.opt.use_vulkan_compute = true;
 #endif // NCNN_VULKAN

    // original pretrained model from https://github.com/rbgirshick/py-faster-rcnn
--- a/examples/mobilenetssd.cpp
+++ b/examples/mobilenetssd.cpp
@@ -36,7 +36,7 @@ static int detect_mobilenet(const cv::Mat& bgr, std::vector<Object>& objects)
    ncnn::Net mobilenet;

 #if NCNN_VULKAN
    mobilenet.use_vulkan_compute = true;
    mobilenet.opt.use_vulkan_compute = true;
 #endif // NCNN_VULKAN

    // model is converted from https://github.com/chuanqi305/MobileNet-SSD
--- a/examples/mobilenetv2ssdlite.cpp
+++ b/examples/mobilenetv2ssdlite.cpp
@@ -39,7 +39,7 @@ static int detect_mobilenetv2(const cv::Mat& bgr, std::vector<Object>& objects)
    ncnn::Net mobilenetv2;

 #if NCNN_VULKAN
    mobilenetv2.use_vulkan_compute = true;
    mobilenetv2.opt.use_vulkan_compute = true;
 #endif // NCNN_VULKAN

    mobilenetv2.register_custom_layer("Silence", Noop_layer_creator);
--- a/examples/peleenetssd_seg.cpp
+++ b/examples/peleenetssd_seg.cpp
@@ -36,7 +36,7 @@ static int detect_peleenet(const cv::Mat& bgr, std::vector<Object>& objects,ncnn
    ncnn::Net peleenet;

 #if NCNN_VULKAN
    peleenet.use_vulkan_compute = true;
    peleenet.opt.use_vulkan_compute = true;
 #endif // NCNN_VULKAN

    // model is converted from https://github.com/eric612/MobileNet-YOLO
@@ -94,7 +94,8 @@ static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects,
        "traffic light","traffic sign","train"};

    cv::Mat image = bgr.clone();
    std::vector<int> color = {128,255,128,244,35,232};
    const int color[] = {128,255,128,244,35,232};
    const int color_count = sizeof(color) / sizeof(int);
    
    for (size_t i = 0; i < objects.size(); i++)
    {
@@ -147,7 +148,7 @@ static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects,
            }
            if(index > -1) {
                int color_index = (index)*3;
                if(color_index<color.size()) {
                if(color_index<color_count) {
                    int b = color[color_index];
                    int g = color[color_index+1];
                    int r = color[color_index+2];
--- a/examples/rfcn.cpp
+++ b/examples/rfcn.cpp
@@ -121,7 +121,7 @@ static int detect_rfcn(const cv::Mat& bgr, std::vector<Object>& objects)
    ncnn::Net rfcn;

 #if NCNN_VULKAN
    rfcn.use_vulkan_compute = true;
    rfcn.opt.use_vulkan_compute = true;
 #endif // NCNN_VULKAN

    // original pretrained model from https://github.com/YuwenXiong/py-R-FCN
--- a/examples/shufflenetv2.cpp
+++ b/examples/shufflenetv2.cpp
@@ -29,7 +29,7 @@ static int detect_shufflenetv2(const cv::Mat& bgr, std::vector<float>& cls_score
    ncnn::Net shufflenetv2;

 #if NCNN_VULKAN
    shufflenetv2.use_vulkan_compute = true;
    shufflenetv2.opt.use_vulkan_compute = true;
 #endif // NCNN_VULKAN

    // https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe
--- a/examples/squeezenet.cpp
+++ b/examples/squeezenet.cpp
@@ -29,7 +29,7 @@ static int detect_squeezenet(const cv::Mat& bgr, std::vector<float>& cls_scores)
    ncnn::Net squeezenet;

 #if NCNN_VULKAN
    squeezenet.use_vulkan_compute = true;
    squeezenet.opt.use_vulkan_compute = true;
 #endif // NCNN_VULKAN

    squeezenet.load_param("squeezenet_v1.1.param");
--- a/examples/squeezenetssd.cpp
+++ b/examples/squeezenetssd.cpp
@@ -36,7 +36,7 @@ static int detect_squeezenet(const cv::Mat& bgr, std::vector<Object>& objects)
    ncnn::Net squeezenet;

 #if NCNN_VULKAN
    squeezenet.use_vulkan_compute = true;
    squeezenet.opt.use_vulkan_compute = true;
 #endif // NCNN_VULKAN

    // original pretrained model from https://github.com/chuanqi305/SqueezeNet-SSD
--- a/examples/yolov2.cpp
+++ b/examples/yolov2.cpp
@@ -36,7 +36,7 @@ static int detect_yolov2(const cv::Mat& bgr, std::vector<Object>& objects)
    ncnn::Net yolov2;

 #if NCNN_VULKAN
    yolov2.use_vulkan_compute = true;
    yolov2.opt.use_vulkan_compute = true;
 #endif // NCNN_VULKAN

    // original pretrained model from https://github.com/eric612/MobileNet-YOLO
--- a/examples/yolov3.cpp
+++ b/examples/yolov3.cpp
@@ -36,7 +36,7 @@ static int detect_yolov3(const cv::Mat& bgr, std::vector<Object>& objects)
    ncnn::Net yolov3;

 #if NCNN_VULKAN
    yolov3.use_vulkan_compute = true;
    yolov3.opt.use_vulkan_compute = true;
 #endif // NCNN_VULKAN

    // original pretrained model from https://github.com/eric612/MobileNet-YOLO
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -25,6 +25,7 @@ set(ncnn_SRCS
    modelbin.cpp
    net.cpp
    opencv.cpp
    option.cpp
    paramdict.cpp
    pipeline.cpp
    benchmark.cpp
@@ -87,8 +88,8 @@ macro(ncnn_add_layer class)
    if(WITH_LAYER_${name}_vulkan)
        set(layer_declaration "${layer_declaration}#include \"layer/vulkan/${name}_vulkan.h\"\n")
        set(layer_declaration_class "${layer_declaration_class}, virtual public ${class}_vulkan")
        set(create_pipeline_content "${create_pipeline_content}        if (opt.vulkan_compute) { int ret = ${class}_vulkan::create_pipeline(opt); if (ret) return ret; }\n")
        set(destroy_pipeline_content "        if (opt.vulkan_compute) { int ret = ${class}_vulkan::destroy_pipeline(opt); if (ret) return ret; }\n${destroy_pipeline_content}")
        set(create_pipeline_content "${create_pipeline_content}        if (opt.use_vulkan_compute) { int ret = ${class}_vulkan::create_pipeline(opt); if (ret) return ret; }\n")
        set(destroy_pipeline_content "        if (opt.use_vulkan_compute) { int ret = ${class}_vulkan::destroy_pipeline(opt); if (ret) return ret; }\n${destroy_pipeline_content}")

        file(GLOB_RECURSE SHADER_SRCS "layer/vulkan/shader/${name}.comp")
        file(GLOB_RECURSE SHADER_SUBSRCS "layer/vulkan/shader/${name}_*.comp")
@@ -297,6 +298,7 @@ if(NCNN_INSTALL_SDK)
    modelbin.h
    net.h
    opencv.h
    option.h
    paramdict.h
    pipeline.h
    benchmark.h
--- a/src/gpu.cpp
+++ b/src/gpu.cpp
@@ -645,7 +645,7 @@ int create_gpu_instance()
        }

        // check features
        gpu_info.support_fp16_packed = false;// TODO
        gpu_info.support_fp16_packed = true;
        gpu_info.support_fp16_storage = false;
        gpu_info.support_fp16_arithmetic = false;
        gpu_info.support_int8_storage = false;
@@ -698,11 +698,11 @@ int create_gpu_instance()
            {
                gpu_info.support_fp16_storage = query16BitStorageFeatures.storageBuffer16BitAccess && query16BitStorageFeatures.uniformAndStorageBuffer16BitAccess;
            }
 //             if (gpu_info.support_VK_KHR_shader_float16_int8)
 //             {
 //                 gpu_info.support_fp16_arithmetic = queryFloat16Int8Features.shaderFloat16;
 //                 gpu_info.support_int8_arithmetic = queryFloat16Int8Features.shaderInt8;
 //             }
            if (gpu_info.support_VK_KHR_shader_float16_int8)
            {
                gpu_info.support_fp16_arithmetic = queryFloat16Int8Features.shaderFloat16;
                gpu_info.support_int8_arithmetic = queryFloat16Int8Features.shaderInt8;
            }
        }
        else
        {
--- a/src/layer.cpp
+++ b/src/layer.cpp
@@ -27,46 +27,6 @@

 namespace ncnn {

 Option::Option()
 {
    lightmode = true;
    num_threads = get_cpu_count();
    blob_allocator = 0;
    workspace_allocator = 0;

    vulkan_compute = false;

 #if NCNN_VULKAN
    blob_vkallocator = 0;
    workspace_vkallocator = 0;
    staging_vkallocator = 0;
 #endif // NCNN_VULKAN

    use_winograd_convolution = 1;
    use_sgemm_convolution = 1;
    use_int8_inference = 1;
 }

 static Option g_default_option;

 const Option& get_default_option()
 {
    return g_default_option;
 }

 int set_default_option(const Option& opt)
 {
    if (opt.num_threads <= 0)
    {
        fprintf(stderr, "invalid option num_threads %d\n", opt.num_threads);
        return -1;
    }

    g_default_option = opt;

    return 0;
 }

 Layer::Layer()
 {
    one_blob_only = false;
--- a/src/layer.h
+++ b/src/layer.h
@@ -19,10 +19,11 @@
 #include <string>
 #include <vector>
 #include <math.h>
 #include "platform.h"
 #include "mat.h"
 #include "modelbin.h"
 #include "option.h"
 #include "paramdict.h"
 #include "platform.h"

 #if NCNN_VULKAN
 #include <vulkan/vulkan.h>
@@ -32,57 +33,6 @@

 namespace ncnn {

 #if NCNN_VULKAN
 class VkAllocator;
 #endif // NCNN_VULKAN

 class Allocator;
 class Option
 {
 public:
    // default option
    Option();

 public:
    // light mode
    // intermediate blob will be recycled when enabled
    // enabled by default
    bool lightmode;

    // thread count
    // default value is the one returned by get_cpu_count()
    int num_threads;

    // blob memory allocator
    Allocator* blob_allocator;

    // workspace memory allocator
    Allocator* workspace_allocator;

    // enable vulkan compute
    bool vulkan_compute;

 #if NCNN_VULKAN
    // blob memory allocator
    VkAllocator* blob_vkallocator;

    // workspace memory allocator
    VkAllocator* workspace_vkallocator;

    // staging memory allocator
    VkAllocator* staging_vkallocator;
 #endif // NCNN_VULKAN

 public:
    int use_winograd_convolution;
    int use_sgemm_convolution;
    int use_int8_inference;
 };

 // the global default option
 const Option& get_default_option();
 int set_default_option(const Option& opt);

 class Layer
 {
 public:
@@ -100,10 +50,10 @@ public:
    virtual int load_model(const ModelBin& mb);

    //
    virtual int create_pipeline(const Option& opt = get_default_option());
    virtual int create_pipeline(const Option& opt = Option());

    //
    virtual int destroy_pipeline(const Option& opt = get_default_option());
    virtual int destroy_pipeline(const Option& opt = Option());

 public:
    // one input and one output blob
@@ -118,13 +68,13 @@ public:
 public:
    // implement inference
    // return 0 if success
    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt = get_default_option()) const;
    virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt = get_default_option()) const;
    virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt = Option()) const;
    virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt = Option()) const;

    // implement inplace inference
    // return 0 if success
    virtual int forward_inplace(std::vector<Mat>& bottom_top_blobs, const Option& opt = get_default_option()) const;
    virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt = get_default_option()) const;
    virtual int forward_inplace(std::vector<Mat>& bottom_top_blobs, const Option& opt = Option()) const;
    virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt = Option()) const;

 #if NCNN_VULKAN
 public:
@@ -134,13 +84,13 @@ public:
 public:
    // implement inference
    // return 0 if success
    virtual int forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt = get_default_option()) const;
    virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt = get_default_option()) const;
    virtual int forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt = Option()) const;
    virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt = Option()) const;

    // implement inplace inference
    // return 0 if success
    virtual int forward_inplace(std::vector<VkMat>& bottom_top_blobs, VkCompute& cmd, const Option& opt = get_default_option()) const;
    virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt = get_default_option()) const;
    virtual int forward_inplace(std::vector<VkMat>& bottom_top_blobs, VkCompute& cmd, const Option& opt = Option()) const;
    virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt = Option()) const;

 public:
    // assigned immediately after creating this layer
--- a/src/layer/convolution.cpp
+++ b/src/layer/convolution.cpp
@@ -74,7 +74,7 @@ int Convolution::load_model(const ModelBin& mb)
 int Convolution::create_pipeline(const Option& opt)
 {
    Option opt_cpu = opt;
    opt_cpu.vulkan_compute = false;
    opt_cpu.use_vulkan_compute = false;

    use_int8_inference = opt.use_int8_inference;

@@ -111,7 +111,7 @@ int Convolution::create_pipeline(const Option& opt)

            op->create_pipeline(opt_cpu);

            ncnn::Option opt = ncnn::get_default_option();
            ncnn::Option opt;
            opt.blob_allocator = int8_weight_data.allocator;

            const Mat weight_data_n = weight_data.range(weight_data_size_output * n, weight_data_size_output);
@@ -173,7 +173,7 @@ int Convolution::create_pipeline(const Option& opt)
 int Convolution::destroy_pipeline(const Option& opt)
 {
    Option opt_cpu = opt;
    opt_cpu.vulkan_compute = false;
    opt_cpu.use_vulkan_compute = false;

    if (quantize)
    {
@@ -286,7 +286,7 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
            op->load_model(ModelBinFromMatArray(weights));

            Option opt_cpu = opt;
            opt_cpu.vulkan_compute = false;
            opt_cpu.use_vulkan_compute = false;
            op->create_pipeline(opt_cpu);

            // forward
--- a/src/layer/convolutiondepthwise.cpp
+++ b/src/layer/convolutiondepthwise.cpp
@@ -98,7 +98,7 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)
 int ConvolutionDepthWise::create_pipeline(const Option& opt)
 {
    Option opt_cpu = opt;
    opt_cpu.vulkan_compute = false;
    opt_cpu.use_vulkan_compute = false;

    use_int8_inference = opt.use_int8_inference;

@@ -134,7 +134,7 @@ int ConvolutionDepthWise::create_pipeline(const Option& opt)

            op->create_pipeline(opt_cpu);

            ncnn::Option opt = ncnn::get_default_option();
            ncnn::Option opt;
            opt.blob_allocator = int8_weight_data.allocator;

            const Mat weight_data_g = weight_data.range(weight_data_size_g * g, weight_data_size_g);
@@ -198,7 +198,7 @@ int ConvolutionDepthWise::create_pipeline(const Option& opt)
 int ConvolutionDepthWise::destroy_pipeline(const Option& opt)
 {
    Option opt_cpu = opt;
    opt_cpu.vulkan_compute = false;
    opt_cpu.use_vulkan_compute = false;

    for (int i=0; i<(int)quantize_ops.size(); i++)
    {
--- a/src/layer/innerproduct.cpp
+++ b/src/layer/innerproduct.cpp
@@ -65,7 +65,7 @@ int InnerProduct::load_model(const ModelBin& mb)
 int InnerProduct::create_pipeline(const Option& opt)
 {
    Option opt_cpu = opt;
    opt_cpu.vulkan_compute = false;
    opt_cpu.use_vulkan_compute = false;

    use_int8_inference = opt.use_int8_inference;

@@ -143,7 +143,7 @@ int InnerProduct::create_pipeline(const Option& opt)

            op->create_pipeline(opt_cpu);

            ncnn::Option opt = ncnn::get_default_option();
            ncnn::Option opt;
            opt.blob_allocator = int8_weight_data.allocator;

            const Mat weight_data_n = weight_data.range(weight_data_size_output * n, weight_data_size_output);
@@ -162,7 +162,7 @@ int InnerProduct::create_pipeline(const Option& opt)
 int InnerProduct::destroy_pipeline(const Option& opt)
 {
    Option opt_cpu = opt;
    opt_cpu.vulkan_compute = false;
    opt_cpu.use_vulkan_compute = false;

    if (quantize)
    {
--- a/src/layer/x86/convolution_x86.cpp
+++ b/src/layer/x86/convolution_x86.cpp
@@ -45,7 +45,7 @@ Convolution_x86::Convolution_x86()
 int Convolution_x86::create_pipeline(const Option& opt)
 {
    Option opt_cpu = opt;
    opt_cpu.vulkan_compute = false;
    opt_cpu.use_vulkan_compute = false;

    if (activation_type == 1)
    {
@@ -120,7 +120,7 @@ int Convolution_x86::create_pipeline(const Option& opt)
 int Convolution_x86::destroy_pipeline(const Option& opt)
 {
    Option opt_cpu = opt;
    opt_cpu.vulkan_compute = false;
    opt_cpu.use_vulkan_compute = false;

    if (activation)
    {
--- a/src/layer/x86/convolutiondepthwise_x86.cpp
+++ b/src/layer/x86/convolutiondepthwise_x86.cpp
@@ -36,7 +36,7 @@ ConvolutionDepthWise_x86::ConvolutionDepthWise_x86()
 int ConvolutionDepthWise_x86::create_pipeline(const Option& opt)
 {
    Option opt_cpu = opt;
    opt_cpu.vulkan_compute = false;
    opt_cpu.use_vulkan_compute = false;

    if (activation_type == 1)
    {
@@ -167,7 +167,7 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt)
 int ConvolutionDepthWise_x86::destroy_pipeline(const Option& opt)
 {
    Option opt_cpu = opt;
    opt_cpu.vulkan_compute = false;
    opt_cpu.use_vulkan_compute = false;

    if (activation)
    {
--- a/src/layer/yolodetectionoutput.cpp
+++ b/src/layer/yolodetectionoutput.cpp
@@ -49,7 +49,7 @@ int YoloDetectionOutput::create_pipeline(const Option& opt)
        softmax->load_param(pd);

        Option opt_cpu = opt;
        opt_cpu.vulkan_compute = false;
        opt_cpu.use_vulkan_compute = false;
        softmax->create_pipeline(opt_cpu);
    }

@@ -61,7 +61,7 @@ int YoloDetectionOutput::destroy_pipeline(const Option& opt)
    if (softmax)
    {
        Option opt_cpu = opt;
        opt_cpu.vulkan_compute = false;
        opt_cpu.use_vulkan_compute = false;
        softmax->destroy_pipeline(opt_cpu);
        delete softmax;
        softmax = 0;
--- a/src/mat.cpp
+++ b/src/mat.cpp
@@ -95,7 +95,7 @@ void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_val
        return;
    }

    op->forward_inplace(*this, ncnn::get_default_option());
    op->forward_inplace(*this);

    delete op;
 }
@@ -234,7 +234,7 @@ void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, i

    padding->load_param(pd);

    ncnn::Option opt = ncnn::get_default_option();
    ncnn::Option opt;
    opt.num_threads = num_threads;
    opt.blob_allocator = allocator;

@@ -257,7 +257,7 @@ void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, in

    crop->load_param(pd);

    ncnn::Option opt = ncnn::get_default_option();
    ncnn::Option opt;
    opt.num_threads = num_threads;
    opt.blob_allocator = allocator;

@@ -277,7 +277,7 @@ void resize_bilinear(const Mat& src, Mat& dst, int w, int h, Allocator* allocato

    interp->load_param(pd);

    ncnn::Option opt = ncnn::get_default_option();
    ncnn::Option opt;
    opt.num_threads = num_threads;
    opt.blob_allocator = allocator;

@@ -297,7 +297,7 @@ void resize_bicubic(const Mat& src, Mat& dst, int w, int h, Allocator* allocator

    interp->load_param(pd);

    ncnn::Option opt = ncnn::get_default_option();
    ncnn::Option opt;
    opt.num_threads = num_threads;
    opt.blob_allocator = allocator;

@@ -315,7 +315,7 @@ void convert_packing(const Mat& src, Mat& dst, int _packing, Allocator* allocato

    packing->load_param(pd);

    ncnn::Option opt = ncnn::get_default_option();
    ncnn::Option opt;
    opt.num_threads = num_threads;
    opt.blob_allocator = allocator;

@@ -334,7 +334,7 @@ void cast_float32_to_float16(const Mat& src, Mat& dst, Allocator* allocator, int

    cast->load_param(pd);

    ncnn::Option opt = ncnn::get_default_option();
    ncnn::Option opt;
    opt.num_threads = num_threads;
    opt.blob_allocator = allocator;

@@ -353,7 +353,7 @@ void cast_float16_to_float32(const Mat& src, Mat& dst, Allocator* allocator, int

    cast->load_param(pd);

    ncnn::Option opt = ncnn::get_default_option();
    ncnn::Option opt;
    opt.num_threads = num_threads;
    opt.blob_allocator = allocator;

--- a/src/net.cpp
+++ b/src/net.cpp
@@ -40,11 +40,6 @@ namespace ncnn {

 Net::Net()
 {
    use_winograd_convolution = 1;
    use_sgemm_convolution = 1;
    use_int8_inference = 1;
    use_vulkan_compute = 0;

 #if NCNN_VULKAN
    vkdev = 0;
    weight_vkallocator = 0;
@@ -155,11 +150,8 @@ int Net::load_param(FILE* fp)
    blobs.resize((size_t)blob_count);

 #if NCNN_VULKAN
    if (use_vulkan_compute && !vkdev)
    {
        // use default vulkan device
    if (opt.use_vulkan_compute && !vkdev)
        vkdev = get_default_gpu_device();
    }
 #endif // NCNN_VULKAN

    ParamDict pd;
@@ -192,7 +184,7 @@ int Net::load_param(FILE* fp)
        }

 #if NCNN_VULKAN
        if (use_vulkan_compute)
        if (opt.use_vulkan_compute)
            layer->vkdev = vkdev;
 #endif // NCNN_VULKAN

@@ -320,11 +312,8 @@ int Net::load_param_mem(const char* _mem)
    blobs.resize(blob_count);

 #if NCNN_VULKAN
    if (use_vulkan_compute && !vkdev)
    {
        // use default vulkan device
    if (opt.use_vulkan_compute && !vkdev)
        vkdev = get_default_gpu_device();
    }
 #endif // NCNN_VULKAN

    ParamDict pd;
@@ -357,7 +346,7 @@ int Net::load_param_mem(const char* _mem)
        }

 #if NCNN_VULKAN
        if (use_vulkan_compute)
        if (opt.use_vulkan_compute)
            layer->vkdev = vkdev;
 #endif // NCNN_VULKAN

@@ -489,11 +478,8 @@ int Net::load_param_bin(FILE* fp)
    blobs.resize(blob_count);

 #if NCNN_VULKAN
    if (use_vulkan_compute && !vkdev)
    {
        // use default vulkan device
    if (opt.use_vulkan_compute && !vkdev)
        vkdev = get_default_gpu_device();
    }
 #endif // NCNN_VULKAN

    ParamDict pd;
@@ -526,7 +512,7 @@ int Net::load_param_bin(FILE* fp)
        }

 #if NCNN_VULKAN
        if (use_vulkan_compute)
        if (opt.use_vulkan_compute)
            layer->vkdev = vkdev;
 #endif // NCNN_VULKAN

@@ -613,12 +599,6 @@ int Net::load_model(FILE* fp)
    // load file
    int ret = 0;

    Option opt;
    opt.vulkan_compute = use_vulkan_compute;
    opt.use_winograd_convolution = use_winograd_convolution;
    opt.use_sgemm_convolution = use_sgemm_convolution;
    opt.use_int8_inference = use_int8_inference;

    ModelBinFromStdio mb(fp);
    for (size_t i=0; i<layers.size(); i++)
    {
@@ -649,7 +629,7 @@ int Net::load_model(FILE* fp)
    }

 #if NCNN_VULKAN
    if (use_vulkan_compute)
    if (opt.use_vulkan_compute)
    {
        create_pipeline();

@@ -709,11 +689,8 @@ int Net::load_param(const unsigned char* _mem)
    blobs.resize(blob_count);

 #if NCNN_VULKAN
    if (use_vulkan_compute && !vkdev)
    {
        // use default vulkan device
    if (opt.use_vulkan_compute && !vkdev)
        vkdev = get_default_gpu_device();
    }
 #endif // NCNN_VULKAN

    ParamDict pd;
@@ -743,7 +720,7 @@ int Net::load_param(const unsigned char* _mem)
        }

 #if NCNN_VULKAN
        if (use_vulkan_compute)
        if (opt.use_vulkan_compute)
            layer->vkdev = vkdev;
 #endif // NCNN_VULKAN

@@ -816,12 +793,6 @@ int Net::load_model(const unsigned char* _mem)
        return -1;
    }

    Option opt;
    opt.vulkan_compute = use_vulkan_compute;
    opt.use_winograd_convolution = use_winograd_convolution;
    opt.use_sgemm_convolution = use_sgemm_convolution;
    opt.use_int8_inference = use_int8_inference;

    const unsigned char* mem = _mem;
    ModelBinFromMemory mb(mem);
    for (size_t i=0; i<layers.size(); i++)
@@ -850,7 +821,7 @@ int Net::load_model(const unsigned char* _mem)
    }

 #if NCNN_VULKAN
    if (use_vulkan_compute)
    if (opt.use_vulkan_compute)
    {
        create_pipeline();

@@ -971,12 +942,6 @@ void Net::clear()
    destroy_pipeline();
 #endif // NCNN_VULKAN

    Option opt;
    opt.vulkan_compute = use_vulkan_compute;
    opt.use_winograd_convolution = use_winograd_convolution;
    opt.use_sgemm_convolution = use_sgemm_convolution;
    opt.use_int8_inference = use_int8_inference;

    blobs.clear();
    for (size_t i=0; i<layers.size(); i++)
    {
@@ -1053,12 +1018,6 @@ int Net::upload_model()

 int Net::create_pipeline()
 {
    Option opt;
    opt.vulkan_compute = use_vulkan_compute;
    opt.use_winograd_convolution = use_winograd_convolution;
    opt.use_sgemm_convolution = use_sgemm_convolution;
    opt.use_int8_inference = use_int8_inference;

    if (vkdev->info.support_fp16_packed || vkdev->info.support_fp16_storage)
    {
        {
@@ -1117,12 +1076,6 @@ int Net::create_pipeline()

 int Net::destroy_pipeline()
 {
    Option opt;
    opt.vulkan_compute = use_vulkan_compute;
    opt.use_winograd_convolution = use_winograd_convolution;
    opt.use_sgemm_convolution = use_sgemm_convolution;
    opt.use_int8_inference = use_int8_inference;

    if (cast_float32_to_float16)
        cast_float32_to_float16->destroy_pipeline(opt);

@@ -1887,19 +1840,20 @@ int Net::forward_layer(int layer_index, std::vector<Mat>& blob_mats, std::vector
 Extractor::Extractor(const Net* _net, int blob_count) : net(_net)
 {
    blob_mats.resize(blob_count);
    opt = get_default_option();
    opt = net->opt;

 #if NCNN_VULKAN
    opt.vulkan_compute = net->use_vulkan_compute;

    if (net->use_vulkan_compute)
    if (net->opt.use_vulkan_compute)
    {
        blob_mats_gpu.resize(blob_count);

        // set default vulkan blob/workspace/staging allocator
        opt.blob_vkallocator = net->vkdev->allocator();
        opt.workspace_vkallocator = net->vkdev->allocator();
        opt.staging_vkallocator = net->vkdev->staging_allocator();
        if (!opt.blob_vkallocator)
            opt.blob_vkallocator = net->vkdev->allocator();
        if (!opt.workspace_vkallocator)
            opt.workspace_vkallocator = net->vkdev->allocator();
        if (!opt.staging_vkallocator)
            opt.staging_vkallocator = net->vkdev->staging_allocator();

        blob_mats_gpu.resize(blob_count);
    }
 #endif // NCNN_VULKAN
 }
@@ -1927,9 +1881,9 @@ void Extractor::set_workspace_allocator(Allocator* allocator)
 #if NCNN_VULKAN
 void Extractor::set_vulkan_compute(bool enable)
 {
    if (net->use_vulkan_compute)
    if (net->opt.use_vulkan_compute)
    {
        opt.vulkan_compute = enable;
        opt.use_vulkan_compute = enable;
    }
    else
    {
@@ -1995,7 +1949,7 @@ int Extractor::extract(int blob_index, Mat& feat)
        int layer_index = net->blobs[blob_index].producer;

 #if NCNN_VULKAN
        if (opt.vulkan_compute)
        if (opt.use_vulkan_compute)
        {
            ncnn::VkCompute cmd(net->vkdev);
 #if NCNN_BENCHMARK
--- a/src/net.h
+++ b/src/net.h
@@ -17,10 +17,11 @@

 #include <stdio.h>
 #include <vector>
 #include "platform.h"
 #include "blob.h"
 #include "layer.h"
 #include "mat.h"
 #include "platform.h"
 #include "option.h"

 namespace ncnn {

@@ -36,6 +37,16 @@ public:
    // clear and destroy
    ~Net();

 public:
    // option
    Option opt;

 #if NCNN_VULKAN

    void set_vulkan_device(const VulkanDevice* vkdev);

 #endif // NCNN_VULKAN

 #if NCNN_STRING
    // register custom layer by layer type name
    // return 0 if success
@@ -82,34 +93,6 @@ public:
    // construct an Extractor from network
    Extractor create_extractor() const;

 public:
    // enable winograd convolution optimization
    // improve convolution 3x3 stride1 performace, may consume more memory
    // changes should be applied before loading network structure and weight
    // enabled by default
    int use_winograd_convolution;

    // enable sgemm convolution optimization
    // improve convolution 1x1 stride1 performace, may consume more memory
    // changes should be applied before loading network structure and weight
    // enabled by default
    int use_sgemm_convolution;

    // enable quantized int8 inference
    // use low-precision int8 path for quantized model
    // changes should be applied before loading network structure and weight
    // enabled by default
    int use_int8_inference;

    // enable vulkan compute
    int use_vulkan_compute;

 #if NCNN_VULKAN

    void set_vulkan_device(const VulkanDevice* vkdev);

 #endif // NCNN_VULKAN

 protected:
    // parse the structure of network
    // fuse int8 op dequantize and quantize by requantize
--- a/src/option.cpp
+++ b/src/option.cpp
@@ -0,0 +1,49 @@
 // Tencent is pleased to support the open source community by making ncnn available.
 //
 // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
 //
 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
 //
 // https://opensource.org/licenses/BSD-3-Clause
 //
 // Unless required by applicable law or agreed to in writing, software distributed
 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.

 #include "option.h"
 #include "cpu.h"

 namespace ncnn {

 Option::Option()
 {
    lightmode = true;
    num_threads = get_cpu_count();
    blob_allocator = 0;
    workspace_allocator = 0;

 #if NCNN_VULKAN
    blob_vkallocator = 0;
    workspace_vkallocator = 0;
    staging_vkallocator = 0;
 #endif // NCNN_VULKAN

    use_winograd_convolution = true;
    use_sgemm_convolution = true;
    use_int8_inference = true;
    use_vulkan_compute = false;// TODO enable me

    use_fp16_packed = false;// TODO enable me
    use_fp16_storage = true;
    use_fp16_arithmetic = false;
    use_int8_storage = true;
    use_int8_arithmetic = false;

    // sanitize
    if (num_threads <= 0)
        num_threads = 1;
 }

 } // namespace ncnn
--- a/src/option.h
+++ b/src/option.h
@@ -0,0 +1,91 @@
 // Tencent is pleased to support the open source community by making ncnn available.
 //
 // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
 //
 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
 // in compliance with the License. You may obtain a copy of the License at
 //
 // https://opensource.org/licenses/BSD-3-Clause
 //
 // Unless required by applicable law or agreed to in writing, software distributed
 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.

 #ifndef NCNN_OPTION_H
 #define NCNN_OPTION_H

 #include "platform.h"

 namespace ncnn {

 #if NCNN_VULKAN
 class VkAllocator;
 #endif // NCNN_VULKAN

 class Allocator;
 class Option
 {
 public:
    // default option
    Option();

 public:
    // light mode
    // intermediate blob will be recycled when enabled
    // enabled by default
    bool lightmode;

    // thread count
    // default value is the one returned by get_cpu_count()
    int num_threads;

    // blob memory allocator
    Allocator* blob_allocator;

    // workspace memory allocator
    Allocator* workspace_allocator;

 #if NCNN_VULKAN
    // blob memory allocator
    VkAllocator* blob_vkallocator;

    // workspace memory allocator
    VkAllocator* workspace_vkallocator;

    // staging memory allocator
    VkAllocator* staging_vkallocator;
 #endif // NCNN_VULKAN

    // enable winograd convolution optimization
    // improve convolution 3x3 stride1 performace, may consume more memory
    // changes should be applied before loading network structure and weight
    // enabled by default
    bool use_winograd_convolution;

    // enable sgemm convolution optimization
    // improve convolution 1x1 stride1 performace, may consume more memory
    // changes should be applied before loading network structure and weight
    // enabled by default
    bool use_sgemm_convolution;

    // enable quantized int8 inference
    // use low-precision int8 path for quantized model
    // changes should be applied before loading network structure and weight
    // enabled by default
    bool use_int8_inference;

    // enable vulkan compute
    bool use_vulkan_compute;

    // enable options for gpu inference
    bool use_fp16_packed;
    bool use_fp16_storage;
    bool use_fp16_arithmetic;
    bool use_int8_storage;
    bool use_int8_arithmetic;
 };

 } // namespace ncnn

 #endif // NCNN_OPTION_H
--- a/src/pipeline.cpp
+++ b/src/pipeline.cpp
@@ -47,18 +47,7 @@ int Pipeline::create(const uint32_t* spv_data, size_t spv_data_size, const char*

 //     fprintf(stderr, "local_shader_module %p %s created\n", local_shader_module, entry_name);

    create_descriptorset_layout(binding_count);

    create_pipeline_layout(push_constant_count);

    create_pipeline(local_shader_module, entry_name, specializations);

    if (vkdev->info.support_VK_KHR_descriptor_update_template)
    {
        create_descriptor_update_template(binding_count);
    }

    return 0;
    return create(local_shader_module, entry_name, specializations, binding_count, push_constant_count);
 }

 int Pipeline::create(VkShaderModule shader_module, const char* entry_name, const std::vector<vk_specialization_type>& specializations, int binding_count, int push_constant_count)