diff --git a/benchmark/benchncnn.cpp b/benchmark/benchncnn.cpp index 34a24763c..d58a7e478 100644 --- a/benchmark/benchncnn.cpp +++ b/benchmark/benchncnn.cpp @@ -57,9 +57,6 @@ public: // load file int ret = 0; - Option opt; - opt.vulkan_compute = use_vulkan_compute; - ModelBinFromEmpty mb; for (size_t i=0; iclear(); g_staging_vkallocator->clear(); @@ -145,7 +142,7 @@ void benchmark(const char* comment, const ncnn::Mat& in) #ifdef _WIN32 Sleep(10 * 1000); #else - sleep(10); +// sleep(10); #endif ncnn::Mat out; @@ -210,14 +207,15 @@ int main(int argc, char** argv) gpu_device = atoi(argv[4]); } + bool use_vulkan_compute = gpu_device != -1; + g_loop_count = loop_count; g_blob_pool_allocator.set_size_compare_ratio(0.0f); g_workspace_pool_allocator.set_size_compare_ratio(0.5f); #if NCNN_VULKAN - g_use_vulkan_compute = gpu_device != -1; - if (g_use_vulkan_compute) + if (use_vulkan_compute) { g_warmup_loop_count = 10; @@ -228,20 +226,25 @@ int main(int argc, char** argv) } #endif // NCNN_VULKAN - ncnn::Option opt; - opt.lightmode = true; - opt.num_threads = num_threads; - opt.blob_allocator = &g_blob_pool_allocator; - opt.workspace_allocator = &g_workspace_pool_allocator; - + // default option + g_default_option.lightmode = true; + g_default_option.num_threads = num_threads; + g_default_option.blob_allocator = &g_blob_pool_allocator; + g_default_option.workspace_allocator = &g_workspace_pool_allocator; #if NCNN_VULKAN - opt.vulkan_compute = g_use_vulkan_compute; - opt.blob_vkallocator = g_blob_vkallocator; - opt.workspace_vkallocator = g_blob_vkallocator; - opt.staging_vkallocator = g_staging_vkallocator; + g_default_option.blob_vkallocator = g_blob_vkallocator; + g_default_option.workspace_vkallocator = g_blob_vkallocator; + g_default_option.staging_vkallocator = g_staging_vkallocator; #endif // NCNN_VULKAN - - ncnn::set_default_option(opt); + g_default_option.use_winograd_convolution = true; + g_default_option.use_sgemm_convolution = true; + g_default_option.use_int8_inference = true; + g_default_option.use_vulkan_compute = use_vulkan_compute; + g_default_option.use_fp16_packed = true; + g_default_option.use_fp16_storage = true; + g_default_option.use_fp16_arithmetic = true; + g_default_option.use_int8_storage = true; + g_default_option.use_int8_arithmetic = true; ncnn::set_cpu_powersave(powersave); @@ -257,21 +260,21 @@ int main(int argc, char** argv) benchmark("squeezenet", ncnn::Mat(227, 227, 3)); #if NCNN_VULKAN - if (!g_use_vulkan_compute) + if (!use_vulkan_compute) #endif // NCNN_VULKAN benchmark("squeezenet_int8", ncnn::Mat(227, 227, 3)); benchmark("mobilenet", ncnn::Mat(224, 224, 3)); #if NCNN_VULKAN - if (!g_use_vulkan_compute) + if (!use_vulkan_compute) #endif // NCNN_VULKAN benchmark("mobilenet_int8", ncnn::Mat(224, 224, 3)); benchmark("mobilenet_v2", ncnn::Mat(224, 224, 3)); // #if NCNN_VULKAN -// if (!g_use_vulkan_compute) +// if (!use_vulkan_compute) // #endif // NCNN_VULKAN // benchmark("mobilenet_v2_int8", ncnn::Mat(224, 224, 3)); @@ -284,14 +287,14 @@ int main(int argc, char** argv) benchmark("googlenet", ncnn::Mat(224, 224, 3)); #if NCNN_VULKAN - if (!g_use_vulkan_compute) + if (!use_vulkan_compute) #endif // NCNN_VULKAN benchmark("googlenet_int8", ncnn::Mat(224, 224, 3)); benchmark("resnet18", ncnn::Mat(224, 224, 3)); #if NCNN_VULKAN - if (!g_use_vulkan_compute) + if (!use_vulkan_compute) #endif // NCNN_VULKAN benchmark("resnet18_int8", ncnn::Mat(224, 224, 3)); @@ -300,28 +303,28 @@ int main(int argc, char** argv) benchmark("vgg16", ncnn::Mat(224, 224, 3)); #if NCNN_VULKAN - if (!g_use_vulkan_compute) + if (!use_vulkan_compute) #endif // NCNN_VULKAN benchmark("vgg16_int8", ncnn::Mat(224, 224, 3)); benchmark("resnet50", ncnn::Mat(224, 224, 3)); #if NCNN_VULKAN - if (!g_use_vulkan_compute) + if (!use_vulkan_compute) #endif // NCNN_VULKAN benchmark("resnet50_int8", ncnn::Mat(224, 224, 3)); benchmark("squeezenet_ssd", ncnn::Mat(300, 300, 3)); #if NCNN_VULKAN - if (!g_use_vulkan_compute) + if (!use_vulkan_compute) #endif // NCNN_VULKAN benchmark("squeezenet_ssd_int8", ncnn::Mat(300, 300, 3)); benchmark("mobilenet_ssd", ncnn::Mat(300, 300, 3)); #if NCNN_VULKAN - if (!g_use_vulkan_compute) + if (!use_vulkan_compute) #endif // NCNN_VULKAN benchmark("mobilenet_ssd_int8", ncnn::Mat(300, 300, 3)); diff --git a/examples/fasterrcnn.cpp b/examples/fasterrcnn.cpp index 6f8f72830..fcd78dff0 100644 --- a/examples/fasterrcnn.cpp +++ b/examples/fasterrcnn.cpp @@ -107,7 +107,7 @@ static int detect_fasterrcnn(const cv::Mat& bgr, std::vector& objects) ncnn::Net fasterrcnn; #if NCNN_VULKAN - fasterrcnn.use_vulkan_compute = true; + fasterrcnn.opt.use_vulkan_compute = true; #endif // NCNN_VULKAN // original pretrained model from https://github.com/rbgirshick/py-faster-rcnn diff --git a/examples/mobilenetssd.cpp b/examples/mobilenetssd.cpp index 5689033f0..e1870cf64 100755 --- a/examples/mobilenetssd.cpp +++ b/examples/mobilenetssd.cpp @@ -36,7 +36,7 @@ static int detect_mobilenet(const cv::Mat& bgr, std::vector& objects) ncnn::Net mobilenet; #if NCNN_VULKAN - mobilenet.use_vulkan_compute = true; + mobilenet.opt.use_vulkan_compute = true; #endif // NCNN_VULKAN // model is converted from https://github.com/chuanqi305/MobileNet-SSD diff --git a/examples/mobilenetv2ssdlite.cpp b/examples/mobilenetv2ssdlite.cpp index 998312da3..5cf0861f0 100755 --- a/examples/mobilenetv2ssdlite.cpp +++ b/examples/mobilenetv2ssdlite.cpp @@ -39,7 +39,7 @@ static int detect_mobilenetv2(const cv::Mat& bgr, std::vector& objects) ncnn::Net mobilenetv2; #if NCNN_VULKAN - mobilenetv2.use_vulkan_compute = true; + mobilenetv2.opt.use_vulkan_compute = true; #endif // NCNN_VULKAN mobilenetv2.register_custom_layer("Silence", Noop_layer_creator); diff --git a/examples/peleenetssd_seg.cpp b/examples/peleenetssd_seg.cpp index ca6e06564..d002b8eee 100644 --- a/examples/peleenetssd_seg.cpp +++ b/examples/peleenetssd_seg.cpp @@ -36,7 +36,7 @@ static int detect_peleenet(const cv::Mat& bgr, std::vector& objects,ncnn ncnn::Net peleenet; #if NCNN_VULKAN - peleenet.use_vulkan_compute = true; + peleenet.opt.use_vulkan_compute = true; #endif // NCNN_VULKAN // model is converted from https://github.com/eric612/MobileNet-YOLO @@ -94,7 +94,8 @@ static void draw_objects(const cv::Mat& bgr, const std::vector& objects, "traffic light","traffic sign","train"}; cv::Mat image = bgr.clone(); - std::vector color = {128,255,128,244,35,232}; + const int color[] = {128,255,128,244,35,232}; + const int color_count = sizeof(color) / sizeof(int); for (size_t i = 0; i < objects.size(); i++) { @@ -147,7 +148,7 @@ static void draw_objects(const cv::Mat& bgr, const std::vector& objects, } if(index > -1) { int color_index = (index)*3; - if(color_index& objects) ncnn::Net rfcn; #if NCNN_VULKAN - rfcn.use_vulkan_compute = true; + rfcn.opt.use_vulkan_compute = true; #endif // NCNN_VULKAN // original pretrained model from https://github.com/YuwenXiong/py-R-FCN diff --git a/examples/shufflenetv2.cpp b/examples/shufflenetv2.cpp index f3df82305..29e6cd6cd 100644 --- a/examples/shufflenetv2.cpp +++ b/examples/shufflenetv2.cpp @@ -29,7 +29,7 @@ static int detect_shufflenetv2(const cv::Mat& bgr, std::vector& cls_score ncnn::Net shufflenetv2; #if NCNN_VULKAN - shufflenetv2.use_vulkan_compute = true; + shufflenetv2.opt.use_vulkan_compute = true; #endif // NCNN_VULKAN // https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe diff --git a/examples/squeezenet.cpp b/examples/squeezenet.cpp index e33ae6f74..8e602cc66 100644 --- a/examples/squeezenet.cpp +++ b/examples/squeezenet.cpp @@ -29,7 +29,7 @@ static int detect_squeezenet(const cv::Mat& bgr, std::vector& cls_scores) ncnn::Net squeezenet; #if NCNN_VULKAN - squeezenet.use_vulkan_compute = true; + squeezenet.opt.use_vulkan_compute = true; #endif // NCNN_VULKAN squeezenet.load_param("squeezenet_v1.1.param"); diff --git a/examples/squeezenetssd.cpp b/examples/squeezenetssd.cpp index c93e25252..40791d9b4 100755 --- a/examples/squeezenetssd.cpp +++ b/examples/squeezenetssd.cpp @@ -36,7 +36,7 @@ static int detect_squeezenet(const cv::Mat& bgr, std::vector& objects) ncnn::Net squeezenet; #if NCNN_VULKAN - squeezenet.use_vulkan_compute = true; + squeezenet.opt.use_vulkan_compute = true; #endif // NCNN_VULKAN // original pretrained model from https://github.com/chuanqi305/SqueezeNet-SSD diff --git a/examples/yolov2.cpp b/examples/yolov2.cpp index 3967e5b65..cce7fe8a7 100644 --- a/examples/yolov2.cpp +++ b/examples/yolov2.cpp @@ -36,7 +36,7 @@ static int detect_yolov2(const cv::Mat& bgr, std::vector& objects) ncnn::Net yolov2; #if NCNN_VULKAN - yolov2.use_vulkan_compute = true; + yolov2.opt.use_vulkan_compute = true; #endif // NCNN_VULKAN // original pretrained model from https://github.com/eric612/MobileNet-YOLO diff --git a/examples/yolov3.cpp b/examples/yolov3.cpp index 4c707f34c..92722b522 100644 --- a/examples/yolov3.cpp +++ b/examples/yolov3.cpp @@ -36,7 +36,7 @@ static int detect_yolov3(const cv::Mat& bgr, std::vector& objects) ncnn::Net yolov3; #if NCNN_VULKAN - yolov3.use_vulkan_compute = true; + yolov3.opt.use_vulkan_compute = true; #endif // NCNN_VULKAN // original pretrained model from https://github.com/eric612/MobileNet-YOLO diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c3dbea978..4fbea3504 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -25,6 +25,7 @@ set(ncnn_SRCS modelbin.cpp net.cpp opencv.cpp + option.cpp paramdict.cpp pipeline.cpp benchmark.cpp @@ -87,8 +88,8 @@ macro(ncnn_add_layer class) if(WITH_LAYER_${name}_vulkan) set(layer_declaration "${layer_declaration}#include \"layer/vulkan/${name}_vulkan.h\"\n") set(layer_declaration_class "${layer_declaration_class}, virtual public ${class}_vulkan") - set(create_pipeline_content "${create_pipeline_content} if (opt.vulkan_compute) { int ret = ${class}_vulkan::create_pipeline(opt); if (ret) return ret; }\n") - set(destroy_pipeline_content " if (opt.vulkan_compute) { int ret = ${class}_vulkan::destroy_pipeline(opt); if (ret) return ret; }\n${destroy_pipeline_content}") + set(create_pipeline_content "${create_pipeline_content} if (opt.use_vulkan_compute) { int ret = ${class}_vulkan::create_pipeline(opt); if (ret) return ret; }\n") + set(destroy_pipeline_content " if (opt.use_vulkan_compute) { int ret = ${class}_vulkan::destroy_pipeline(opt); if (ret) return ret; }\n${destroy_pipeline_content}") file(GLOB_RECURSE SHADER_SRCS "layer/vulkan/shader/${name}.comp") file(GLOB_RECURSE SHADER_SUBSRCS "layer/vulkan/shader/${name}_*.comp") @@ -297,6 +298,7 @@ if(NCNN_INSTALL_SDK) modelbin.h net.h opencv.h + option.h paramdict.h pipeline.h benchmark.h diff --git a/src/gpu.cpp b/src/gpu.cpp index d1c64aa8a..460ccb03c 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -645,7 +645,7 @@ int create_gpu_instance() } // check features - gpu_info.support_fp16_packed = false;// TODO + gpu_info.support_fp16_packed = true; gpu_info.support_fp16_storage = false; gpu_info.support_fp16_arithmetic = false; gpu_info.support_int8_storage = false; @@ -698,11 +698,11 @@ int create_gpu_instance() { gpu_info.support_fp16_storage = query16BitStorageFeatures.storageBuffer16BitAccess && query16BitStorageFeatures.uniformAndStorageBuffer16BitAccess; } -// if (gpu_info.support_VK_KHR_shader_float16_int8) -// { -// gpu_info.support_fp16_arithmetic = queryFloat16Int8Features.shaderFloat16; -// gpu_info.support_int8_arithmetic = queryFloat16Int8Features.shaderInt8; -// } + if (gpu_info.support_VK_KHR_shader_float16_int8) + { + gpu_info.support_fp16_arithmetic = queryFloat16Int8Features.shaderFloat16; + gpu_info.support_int8_arithmetic = queryFloat16Int8Features.shaderInt8; + } } else { diff --git a/src/layer.cpp b/src/layer.cpp index d2acd8bd8..103a12c36 100644 --- a/src/layer.cpp +++ b/src/layer.cpp @@ -27,46 +27,6 @@ namespace ncnn { -Option::Option() -{ - lightmode = true; - num_threads = get_cpu_count(); - blob_allocator = 0; - workspace_allocator = 0; - - vulkan_compute = false; - -#if NCNN_VULKAN - blob_vkallocator = 0; - workspace_vkallocator = 0; - staging_vkallocator = 0; -#endif // NCNN_VULKAN - - use_winograd_convolution = 1; - use_sgemm_convolution = 1; - use_int8_inference = 1; -} - -static Option g_default_option; - -const Option& get_default_option() -{ - return g_default_option; -} - -int set_default_option(const Option& opt) -{ - if (opt.num_threads <= 0) - { - fprintf(stderr, "invalid option num_threads %d\n", opt.num_threads); - return -1; - } - - g_default_option = opt; - - return 0; -} - Layer::Layer() { one_blob_only = false; diff --git a/src/layer.h b/src/layer.h index 6e1408539..56f334249 100644 --- a/src/layer.h +++ b/src/layer.h @@ -19,10 +19,11 @@ #include #include #include +#include "platform.h" #include "mat.h" #include "modelbin.h" +#include "option.h" #include "paramdict.h" -#include "platform.h" #if NCNN_VULKAN #include @@ -32,57 +33,6 @@ namespace ncnn { -#if NCNN_VULKAN -class VkAllocator; -#endif // NCNN_VULKAN - -class Allocator; -class Option -{ -public: - // default option - Option(); - -public: - // light mode - // intermediate blob will be recycled when enabled - // enabled by default - bool lightmode; - - // thread count - // default value is the one returned by get_cpu_count() - int num_threads; - - // blob memory allocator - Allocator* blob_allocator; - - // workspace memory allocator - Allocator* workspace_allocator; - - // enable vulkan compute - bool vulkan_compute; - -#if NCNN_VULKAN - // blob memory allocator - VkAllocator* blob_vkallocator; - - // workspace memory allocator - VkAllocator* workspace_vkallocator; - - // staging memory allocator - VkAllocator* staging_vkallocator; -#endif // NCNN_VULKAN - -public: - int use_winograd_convolution; - int use_sgemm_convolution; - int use_int8_inference; -}; - -// the global default option -const Option& get_default_option(); -int set_default_option(const Option& opt); - class Layer { public: @@ -100,10 +50,10 @@ public: virtual int load_model(const ModelBin& mb); // - virtual int create_pipeline(const Option& opt = get_default_option()); + virtual int create_pipeline(const Option& opt = Option()); // - virtual int destroy_pipeline(const Option& opt = get_default_option()); + virtual int destroy_pipeline(const Option& opt = Option()); public: // one input and one output blob @@ -118,13 +68,13 @@ public: public: // implement inference // return 0 if success - virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt = get_default_option()) const; - virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt = get_default_option()) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, const Option& opt = Option()) const; + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt = Option()) const; // implement inplace inference // return 0 if success - virtual int forward_inplace(std::vector& bottom_top_blobs, const Option& opt = get_default_option()) const; - virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt = get_default_option()) const; + virtual int forward_inplace(std::vector& bottom_top_blobs, const Option& opt = Option()) const; + virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt = Option()) const; #if NCNN_VULKAN public: @@ -134,13 +84,13 @@ public: public: // implement inference // return 0 if success - virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt = get_default_option()) const; - virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt = get_default_option()) const; + virtual int forward(const std::vector& bottom_blobs, std::vector& top_blobs, VkCompute& cmd, const Option& opt = Option()) const; + virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt = Option()) const; // implement inplace inference // return 0 if success - virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt = get_default_option()) const; - virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt = get_default_option()) const; + virtual int forward_inplace(std::vector& bottom_top_blobs, VkCompute& cmd, const Option& opt = Option()) const; + virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt = Option()) const; public: // assigned immediately after creating this layer diff --git a/src/layer/convolution.cpp b/src/layer/convolution.cpp index cac836b9c..c01fd02c3 100644 --- a/src/layer/convolution.cpp +++ b/src/layer/convolution.cpp @@ -74,7 +74,7 @@ int Convolution::load_model(const ModelBin& mb) int Convolution::create_pipeline(const Option& opt) { Option opt_cpu = opt; - opt_cpu.vulkan_compute = false; + opt_cpu.use_vulkan_compute = false; use_int8_inference = opt.use_int8_inference; @@ -111,7 +111,7 @@ int Convolution::create_pipeline(const Option& opt) op->create_pipeline(opt_cpu); - ncnn::Option opt = ncnn::get_default_option(); + ncnn::Option opt; opt.blob_allocator = int8_weight_data.allocator; const Mat weight_data_n = weight_data.range(weight_data_size_output * n, weight_data_size_output); @@ -173,7 +173,7 @@ int Convolution::create_pipeline(const Option& opt) int Convolution::destroy_pipeline(const Option& opt) { Option opt_cpu = opt; - opt_cpu.vulkan_compute = false; + opt_cpu.use_vulkan_compute = false; if (quantize) { @@ -286,7 +286,7 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op op->load_model(ModelBinFromMatArray(weights)); Option opt_cpu = opt; - opt_cpu.vulkan_compute = false; + opt_cpu.use_vulkan_compute = false; op->create_pipeline(opt_cpu); // forward diff --git a/src/layer/convolutiondepthwise.cpp b/src/layer/convolutiondepthwise.cpp index 6ec4daef0..a1405e9d1 100644 --- a/src/layer/convolutiondepthwise.cpp +++ b/src/layer/convolutiondepthwise.cpp @@ -98,7 +98,7 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb) int ConvolutionDepthWise::create_pipeline(const Option& opt) { Option opt_cpu = opt; - opt_cpu.vulkan_compute = false; + opt_cpu.use_vulkan_compute = false; use_int8_inference = opt.use_int8_inference; @@ -134,7 +134,7 @@ int ConvolutionDepthWise::create_pipeline(const Option& opt) op->create_pipeline(opt_cpu); - ncnn::Option opt = ncnn::get_default_option(); + ncnn::Option opt; opt.blob_allocator = int8_weight_data.allocator; const Mat weight_data_g = weight_data.range(weight_data_size_g * g, weight_data_size_g); @@ -198,7 +198,7 @@ int ConvolutionDepthWise::create_pipeline(const Option& opt) int ConvolutionDepthWise::destroy_pipeline(const Option& opt) { Option opt_cpu = opt; - opt_cpu.vulkan_compute = false; + opt_cpu.use_vulkan_compute = false; for (int i=0; i<(int)quantize_ops.size(); i++) { diff --git a/src/layer/innerproduct.cpp b/src/layer/innerproduct.cpp index 979903d6d..200d120ad 100644 --- a/src/layer/innerproduct.cpp +++ b/src/layer/innerproduct.cpp @@ -65,7 +65,7 @@ int InnerProduct::load_model(const ModelBin& mb) int InnerProduct::create_pipeline(const Option& opt) { Option opt_cpu = opt; - opt_cpu.vulkan_compute = false; + opt_cpu.use_vulkan_compute = false; use_int8_inference = opt.use_int8_inference; @@ -143,7 +143,7 @@ int InnerProduct::create_pipeline(const Option& opt) op->create_pipeline(opt_cpu); - ncnn::Option opt = ncnn::get_default_option(); + ncnn::Option opt; opt.blob_allocator = int8_weight_data.allocator; const Mat weight_data_n = weight_data.range(weight_data_size_output * n, weight_data_size_output); @@ -162,7 +162,7 @@ int InnerProduct::create_pipeline(const Option& opt) int InnerProduct::destroy_pipeline(const Option& opt) { Option opt_cpu = opt; - opt_cpu.vulkan_compute = false; + opt_cpu.use_vulkan_compute = false; if (quantize) { diff --git a/src/layer/x86/convolution_x86.cpp b/src/layer/x86/convolution_x86.cpp index 80ffa919a..1375eecc4 100644 --- a/src/layer/x86/convolution_x86.cpp +++ b/src/layer/x86/convolution_x86.cpp @@ -45,7 +45,7 @@ Convolution_x86::Convolution_x86() int Convolution_x86::create_pipeline(const Option& opt) { Option opt_cpu = opt; - opt_cpu.vulkan_compute = false; + opt_cpu.use_vulkan_compute = false; if (activation_type == 1) { @@ -120,7 +120,7 @@ int Convolution_x86::create_pipeline(const Option& opt) int Convolution_x86::destroy_pipeline(const Option& opt) { Option opt_cpu = opt; - opt_cpu.vulkan_compute = false; + opt_cpu.use_vulkan_compute = false; if (activation) { diff --git a/src/layer/x86/convolutiondepthwise_x86.cpp b/src/layer/x86/convolutiondepthwise_x86.cpp index 0adaad109..8011ad654 100644 --- a/src/layer/x86/convolutiondepthwise_x86.cpp +++ b/src/layer/x86/convolutiondepthwise_x86.cpp @@ -36,7 +36,7 @@ ConvolutionDepthWise_x86::ConvolutionDepthWise_x86() int ConvolutionDepthWise_x86::create_pipeline(const Option& opt) { Option opt_cpu = opt; - opt_cpu.vulkan_compute = false; + opt_cpu.use_vulkan_compute = false; if (activation_type == 1) { @@ -167,7 +167,7 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt) int ConvolutionDepthWise_x86::destroy_pipeline(const Option& opt) { Option opt_cpu = opt; - opt_cpu.vulkan_compute = false; + opt_cpu.use_vulkan_compute = false; if (activation) { diff --git a/src/layer/yolodetectionoutput.cpp b/src/layer/yolodetectionoutput.cpp index 7e74cf4e1..244a4bb27 100644 --- a/src/layer/yolodetectionoutput.cpp +++ b/src/layer/yolodetectionoutput.cpp @@ -49,7 +49,7 @@ int YoloDetectionOutput::create_pipeline(const Option& opt) softmax->load_param(pd); Option opt_cpu = opt; - opt_cpu.vulkan_compute = false; + opt_cpu.use_vulkan_compute = false; softmax->create_pipeline(opt_cpu); } @@ -61,7 +61,7 @@ int YoloDetectionOutput::destroy_pipeline(const Option& opt) if (softmax) { Option opt_cpu = opt; - opt_cpu.vulkan_compute = false; + opt_cpu.use_vulkan_compute = false; softmax->destroy_pipeline(opt_cpu); delete softmax; softmax = 0; diff --git a/src/mat.cpp b/src/mat.cpp index 84b645870..2e7886b76 100644 --- a/src/mat.cpp +++ b/src/mat.cpp @@ -95,7 +95,7 @@ void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_val return; } - op->forward_inplace(*this, ncnn::get_default_option()); + op->forward_inplace(*this); delete op; } @@ -234,7 +234,7 @@ void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, i padding->load_param(pd); - ncnn::Option opt = ncnn::get_default_option(); + ncnn::Option opt; opt.num_threads = num_threads; opt.blob_allocator = allocator; @@ -257,7 +257,7 @@ void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, in crop->load_param(pd); - ncnn::Option opt = ncnn::get_default_option(); + ncnn::Option opt; opt.num_threads = num_threads; opt.blob_allocator = allocator; @@ -277,7 +277,7 @@ void resize_bilinear(const Mat& src, Mat& dst, int w, int h, Allocator* allocato interp->load_param(pd); - ncnn::Option opt = ncnn::get_default_option(); + ncnn::Option opt; opt.num_threads = num_threads; opt.blob_allocator = allocator; @@ -297,7 +297,7 @@ void resize_bicubic(const Mat& src, Mat& dst, int w, int h, Allocator* allocator interp->load_param(pd); - ncnn::Option opt = ncnn::get_default_option(); + ncnn::Option opt; opt.num_threads = num_threads; opt.blob_allocator = allocator; @@ -315,7 +315,7 @@ void convert_packing(const Mat& src, Mat& dst, int _packing, Allocator* allocato packing->load_param(pd); - ncnn::Option opt = ncnn::get_default_option(); + ncnn::Option opt; opt.num_threads = num_threads; opt.blob_allocator = allocator; @@ -334,7 +334,7 @@ void cast_float32_to_float16(const Mat& src, Mat& dst, Allocator* allocator, int cast->load_param(pd); - ncnn::Option opt = ncnn::get_default_option(); + ncnn::Option opt; opt.num_threads = num_threads; opt.blob_allocator = allocator; @@ -353,7 +353,7 @@ void cast_float16_to_float32(const Mat& src, Mat& dst, Allocator* allocator, int cast->load_param(pd); - ncnn::Option opt = ncnn::get_default_option(); + ncnn::Option opt; opt.num_threads = num_threads; opt.blob_allocator = allocator; diff --git a/src/net.cpp b/src/net.cpp index 18c25ddd1..4c6237ffd 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -40,11 +40,6 @@ namespace ncnn { Net::Net() { - use_winograd_convolution = 1; - use_sgemm_convolution = 1; - use_int8_inference = 1; - use_vulkan_compute = 0; - #if NCNN_VULKAN vkdev = 0; weight_vkallocator = 0; @@ -155,11 +150,8 @@ int Net::load_param(FILE* fp) blobs.resize((size_t)blob_count); #if NCNN_VULKAN - if (use_vulkan_compute && !vkdev) - { - // use default vulkan device + if (opt.use_vulkan_compute && !vkdev) vkdev = get_default_gpu_device(); - } #endif // NCNN_VULKAN ParamDict pd; @@ -192,7 +184,7 @@ int Net::load_param(FILE* fp) } #if NCNN_VULKAN - if (use_vulkan_compute) + if (opt.use_vulkan_compute) layer->vkdev = vkdev; #endif // NCNN_VULKAN @@ -320,11 +312,8 @@ int Net::load_param_mem(const char* _mem) blobs.resize(blob_count); #if NCNN_VULKAN - if (use_vulkan_compute && !vkdev) - { - // use default vulkan device + if (opt.use_vulkan_compute && !vkdev) vkdev = get_default_gpu_device(); - } #endif // NCNN_VULKAN ParamDict pd; @@ -357,7 +346,7 @@ int Net::load_param_mem(const char* _mem) } #if NCNN_VULKAN - if (use_vulkan_compute) + if (opt.use_vulkan_compute) layer->vkdev = vkdev; #endif // NCNN_VULKAN @@ -489,11 +478,8 @@ int Net::load_param_bin(FILE* fp) blobs.resize(blob_count); #if NCNN_VULKAN - if (use_vulkan_compute && !vkdev) - { - // use default vulkan device + if (opt.use_vulkan_compute && !vkdev) vkdev = get_default_gpu_device(); - } #endif // NCNN_VULKAN ParamDict pd; @@ -526,7 +512,7 @@ int Net::load_param_bin(FILE* fp) } #if NCNN_VULKAN - if (use_vulkan_compute) + if (opt.use_vulkan_compute) layer->vkdev = vkdev; #endif // NCNN_VULKAN @@ -613,12 +599,6 @@ int Net::load_model(FILE* fp) // load file int ret = 0; - Option opt; - opt.vulkan_compute = use_vulkan_compute; - opt.use_winograd_convolution = use_winograd_convolution; - opt.use_sgemm_convolution = use_sgemm_convolution; - opt.use_int8_inference = use_int8_inference; - ModelBinFromStdio mb(fp); for (size_t i=0; ivkdev = vkdev; #endif // NCNN_VULKAN @@ -816,12 +793,6 @@ int Net::load_model(const unsigned char* _mem) return -1; } - Option opt; - opt.vulkan_compute = use_vulkan_compute; - opt.use_winograd_convolution = use_winograd_convolution; - opt.use_sgemm_convolution = use_sgemm_convolution; - opt.use_int8_inference = use_int8_inference; - const unsigned char* mem = _mem; ModelBinFromMemory mb(mem); for (size_t i=0; iinfo.support_fp16_packed || vkdev->info.support_fp16_storage) { { @@ -1117,12 +1076,6 @@ int Net::create_pipeline() int Net::destroy_pipeline() { - Option opt; - opt.vulkan_compute = use_vulkan_compute; - opt.use_winograd_convolution = use_winograd_convolution; - opt.use_sgemm_convolution = use_sgemm_convolution; - opt.use_int8_inference = use_int8_inference; - if (cast_float32_to_float16) cast_float32_to_float16->destroy_pipeline(opt); @@ -1887,19 +1840,20 @@ int Net::forward_layer(int layer_index, std::vector& blob_mats, std::vector Extractor::Extractor(const Net* _net, int blob_count) : net(_net) { blob_mats.resize(blob_count); - opt = get_default_option(); + opt = net->opt; #if NCNN_VULKAN - opt.vulkan_compute = net->use_vulkan_compute; - - if (net->use_vulkan_compute) + if (net->opt.use_vulkan_compute) { - blob_mats_gpu.resize(blob_count); - // set default vulkan blob/workspace/staging allocator - opt.blob_vkallocator = net->vkdev->allocator(); - opt.workspace_vkallocator = net->vkdev->allocator(); - opt.staging_vkallocator = net->vkdev->staging_allocator(); + if (!opt.blob_vkallocator) + opt.blob_vkallocator = net->vkdev->allocator(); + if (!opt.workspace_vkallocator) + opt.workspace_vkallocator = net->vkdev->allocator(); + if (!opt.staging_vkallocator) + opt.staging_vkallocator = net->vkdev->staging_allocator(); + + blob_mats_gpu.resize(blob_count); } #endif // NCNN_VULKAN } @@ -1927,9 +1881,9 @@ void Extractor::set_workspace_allocator(Allocator* allocator) #if NCNN_VULKAN void Extractor::set_vulkan_compute(bool enable) { - if (net->use_vulkan_compute) + if (net->opt.use_vulkan_compute) { - opt.vulkan_compute = enable; + opt.use_vulkan_compute = enable; } else { @@ -1995,7 +1949,7 @@ int Extractor::extract(int blob_index, Mat& feat) int layer_index = net->blobs[blob_index].producer; #if NCNN_VULKAN - if (opt.vulkan_compute) + if (opt.use_vulkan_compute) { ncnn::VkCompute cmd(net->vkdev); #if NCNN_BENCHMARK diff --git a/src/net.h b/src/net.h index 8b9278833..5d5df01fc 100644 --- a/src/net.h +++ b/src/net.h @@ -17,10 +17,11 @@ #include #include +#include "platform.h" #include "blob.h" #include "layer.h" #include "mat.h" -#include "platform.h" +#include "option.h" namespace ncnn { @@ -36,6 +37,16 @@ public: // clear and destroy ~Net(); +public: + // option + Option opt; + +#if NCNN_VULKAN + + void set_vulkan_device(const VulkanDevice* vkdev); + +#endif // NCNN_VULKAN + #if NCNN_STRING // register custom layer by layer type name // return 0 if success @@ -82,34 +93,6 @@ public: // construct an Extractor from network Extractor create_extractor() const; -public: - // enable winograd convolution optimization - // improve convolution 3x3 stride1 performace, may consume more memory - // changes should be applied before loading network structure and weight - // enabled by default - int use_winograd_convolution; - - // enable sgemm convolution optimization - // improve convolution 1x1 stride1 performace, may consume more memory - // changes should be applied before loading network structure and weight - // enabled by default - int use_sgemm_convolution; - - // enable quantized int8 inference - // use low-precision int8 path for quantized model - // changes should be applied before loading network structure and weight - // enabled by default - int use_int8_inference; - - // enable vulkan compute - int use_vulkan_compute; - -#if NCNN_VULKAN - - void set_vulkan_device(const VulkanDevice* vkdev); - -#endif // NCNN_VULKAN - protected: // parse the structure of network // fuse int8 op dequantize and quantize by requantize diff --git a/src/option.cpp b/src/option.cpp new file mode 100644 index 000000000..249306b30 --- /dev/null +++ b/src/option.cpp @@ -0,0 +1,49 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "option.h" +#include "cpu.h" + +namespace ncnn { + +Option::Option() +{ + lightmode = true; + num_threads = get_cpu_count(); + blob_allocator = 0; + workspace_allocator = 0; + +#if NCNN_VULKAN + blob_vkallocator = 0; + workspace_vkallocator = 0; + staging_vkallocator = 0; +#endif // NCNN_VULKAN + + use_winograd_convolution = true; + use_sgemm_convolution = true; + use_int8_inference = true; + use_vulkan_compute = false;// TODO enable me + + use_fp16_packed = false;// TODO enable me + use_fp16_storage = true; + use_fp16_arithmetic = false; + use_int8_storage = true; + use_int8_arithmetic = false; + + // sanitize + if (num_threads <= 0) + num_threads = 1; +} + +} // namespace ncnn diff --git a/src/option.h b/src/option.h new file mode 100644 index 000000000..54c7e57af --- /dev/null +++ b/src/option.h @@ -0,0 +1,91 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef NCNN_OPTION_H +#define NCNN_OPTION_H + +#include "platform.h" + +namespace ncnn { + +#if NCNN_VULKAN +class VkAllocator; +#endif // NCNN_VULKAN + +class Allocator; +class Option +{ +public: + // default option + Option(); + +public: + // light mode + // intermediate blob will be recycled when enabled + // enabled by default + bool lightmode; + + // thread count + // default value is the one returned by get_cpu_count() + int num_threads; + + // blob memory allocator + Allocator* blob_allocator; + + // workspace memory allocator + Allocator* workspace_allocator; + +#if NCNN_VULKAN + // blob memory allocator + VkAllocator* blob_vkallocator; + + // workspace memory allocator + VkAllocator* workspace_vkallocator; + + // staging memory allocator + VkAllocator* staging_vkallocator; +#endif // NCNN_VULKAN + + // enable winograd convolution optimization + // improve convolution 3x3 stride1 performace, may consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_winograd_convolution; + + // enable sgemm convolution optimization + // improve convolution 1x1 stride1 performace, may consume more memory + // changes should be applied before loading network structure and weight + // enabled by default + bool use_sgemm_convolution; + + // enable quantized int8 inference + // use low-precision int8 path for quantized model + // changes should be applied before loading network structure and weight + // enabled by default + bool use_int8_inference; + + // enable vulkan compute + bool use_vulkan_compute; + + // enable options for gpu inference + bool use_fp16_packed; + bool use_fp16_storage; + bool use_fp16_arithmetic; + bool use_int8_storage; + bool use_int8_arithmetic; +}; + +} // namespace ncnn + +#endif // NCNN_OPTION_H diff --git a/src/pipeline.cpp b/src/pipeline.cpp index fe2ec5c41..9ec7e0431 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -47,18 +47,7 @@ int Pipeline::create(const uint32_t* spv_data, size_t spv_data_size, const char* // fprintf(stderr, "local_shader_module %p %s created\n", local_shader_module, entry_name); - create_descriptorset_layout(binding_count); - - create_pipeline_layout(push_constant_count); - - create_pipeline(local_shader_module, entry_name, specializations); - - if (vkdev->info.support_VK_KHR_descriptor_update_template) - { - create_descriptor_update_template(binding_count); - } - - return 0; + return create(local_shader_module, entry_name, specializations, binding_count, push_constant_count); } int Pipeline::create(VkShaderModule shader_module, const char* entry_name, const std::vector& specializations, int binding_count, int push_constant_count)