| @@ -57,9 +57,6 @@ public: | |||||
| // load file | // load file | ||||
| int ret = 0; | int ret = 0; | ||||
| Option opt; | |||||
| opt.vulkan_compute = use_vulkan_compute; | |||||
| ModelBinFromEmpty mb; | ModelBinFromEmpty mb; | ||||
| for (size_t i=0; i<layers.size(); i++) | for (size_t i=0; i<layers.size(); i++) | ||||
| { | { | ||||
| @@ -83,7 +80,7 @@ public: | |||||
| } | } | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (use_vulkan_compute) | |||||
| if (opt.use_vulkan_compute) | |||||
| { | { | ||||
| upload_model(); | upload_model(); | ||||
| @@ -100,12 +97,12 @@ public: | |||||
| static int g_warmup_loop_count = 3; | static int g_warmup_loop_count = 3; | ||||
| static int g_loop_count = 4; | static int g_loop_count = 4; | ||||
| static ncnn::Option g_default_option; | |||||
| static ncnn::UnlockedPoolAllocator g_blob_pool_allocator; | static ncnn::UnlockedPoolAllocator g_blob_pool_allocator; | ||||
| static ncnn::PoolAllocator g_workspace_pool_allocator; | static ncnn::PoolAllocator g_workspace_pool_allocator; | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| static bool g_use_vulkan_compute = false; | |||||
| static ncnn::VulkanDevice* g_vkdev = 0; | static ncnn::VulkanDevice* g_vkdev = 0; | ||||
| static ncnn::VkAllocator* g_blob_vkallocator = 0; | static ncnn::VkAllocator* g_blob_vkallocator = 0; | ||||
| static ncnn::VkAllocator* g_staging_vkallocator = 0; | static ncnn::VkAllocator* g_staging_vkallocator = 0; | ||||
| @@ -115,11 +112,11 @@ void benchmark(const char* comment, const ncnn::Mat& in) | |||||
| { | { | ||||
| ncnn::BenchNet net; | ncnn::BenchNet net; | ||||
| net.opt = g_default_option; | |||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (g_use_vulkan_compute) | |||||
| if (net.opt.use_vulkan_compute) | |||||
| { | { | ||||
| net.use_vulkan_compute = g_use_vulkan_compute; | |||||
| net.set_vulkan_device(g_vkdev); | net.set_vulkan_device(g_vkdev); | ||||
| } | } | ||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| @@ -134,7 +131,7 @@ void benchmark(const char* comment, const ncnn::Mat& in) | |||||
| g_workspace_pool_allocator.clear(); | g_workspace_pool_allocator.clear(); | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (g_use_vulkan_compute) | |||||
| if (net.opt.use_vulkan_compute) | |||||
| { | { | ||||
| g_blob_vkallocator->clear(); | g_blob_vkallocator->clear(); | ||||
| g_staging_vkallocator->clear(); | g_staging_vkallocator->clear(); | ||||
| @@ -145,7 +142,7 @@ void benchmark(const char* comment, const ncnn::Mat& in) | |||||
| #ifdef _WIN32 | #ifdef _WIN32 | ||||
| Sleep(10 * 1000); | Sleep(10 * 1000); | ||||
| #else | #else | ||||
| sleep(10); | |||||
| // sleep(10); | |||||
| #endif | #endif | ||||
| ncnn::Mat out; | ncnn::Mat out; | ||||
| @@ -210,14 +207,15 @@ int main(int argc, char** argv) | |||||
| gpu_device = atoi(argv[4]); | gpu_device = atoi(argv[4]); | ||||
| } | } | ||||
| bool use_vulkan_compute = gpu_device != -1; | |||||
| g_loop_count = loop_count; | g_loop_count = loop_count; | ||||
| g_blob_pool_allocator.set_size_compare_ratio(0.0f); | g_blob_pool_allocator.set_size_compare_ratio(0.0f); | ||||
| g_workspace_pool_allocator.set_size_compare_ratio(0.5f); | g_workspace_pool_allocator.set_size_compare_ratio(0.5f); | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| g_use_vulkan_compute = gpu_device != -1; | |||||
| if (g_use_vulkan_compute) | |||||
| if (use_vulkan_compute) | |||||
| { | { | ||||
| g_warmup_loop_count = 10; | g_warmup_loop_count = 10; | ||||
| @@ -228,20 +226,25 @@ int main(int argc, char** argv) | |||||
| } | } | ||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| ncnn::Option opt; | |||||
| opt.lightmode = true; | |||||
| opt.num_threads = num_threads; | |||||
| opt.blob_allocator = &g_blob_pool_allocator; | |||||
| opt.workspace_allocator = &g_workspace_pool_allocator; | |||||
| // default option | |||||
| g_default_option.lightmode = true; | |||||
| g_default_option.num_threads = num_threads; | |||||
| g_default_option.blob_allocator = &g_blob_pool_allocator; | |||||
| g_default_option.workspace_allocator = &g_workspace_pool_allocator; | |||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| opt.vulkan_compute = g_use_vulkan_compute; | |||||
| opt.blob_vkallocator = g_blob_vkallocator; | |||||
| opt.workspace_vkallocator = g_blob_vkallocator; | |||||
| opt.staging_vkallocator = g_staging_vkallocator; | |||||
| g_default_option.blob_vkallocator = g_blob_vkallocator; | |||||
| g_default_option.workspace_vkallocator = g_blob_vkallocator; | |||||
| g_default_option.staging_vkallocator = g_staging_vkallocator; | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| ncnn::set_default_option(opt); | |||||
| g_default_option.use_winograd_convolution = true; | |||||
| g_default_option.use_sgemm_convolution = true; | |||||
| g_default_option.use_int8_inference = true; | |||||
| g_default_option.use_vulkan_compute = use_vulkan_compute; | |||||
| g_default_option.use_fp16_packed = true; | |||||
| g_default_option.use_fp16_storage = true; | |||||
| g_default_option.use_fp16_arithmetic = true; | |||||
| g_default_option.use_int8_storage = true; | |||||
| g_default_option.use_int8_arithmetic = true; | |||||
| ncnn::set_cpu_powersave(powersave); | ncnn::set_cpu_powersave(powersave); | ||||
| @@ -257,21 +260,21 @@ int main(int argc, char** argv) | |||||
| benchmark("squeezenet", ncnn::Mat(227, 227, 3)); | benchmark("squeezenet", ncnn::Mat(227, 227, 3)); | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (!g_use_vulkan_compute) | |||||
| if (!use_vulkan_compute) | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| benchmark("squeezenet_int8", ncnn::Mat(227, 227, 3)); | benchmark("squeezenet_int8", ncnn::Mat(227, 227, 3)); | ||||
| benchmark("mobilenet", ncnn::Mat(224, 224, 3)); | benchmark("mobilenet", ncnn::Mat(224, 224, 3)); | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (!g_use_vulkan_compute) | |||||
| if (!use_vulkan_compute) | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| benchmark("mobilenet_int8", ncnn::Mat(224, 224, 3)); | benchmark("mobilenet_int8", ncnn::Mat(224, 224, 3)); | ||||
| benchmark("mobilenet_v2", ncnn::Mat(224, 224, 3)); | benchmark("mobilenet_v2", ncnn::Mat(224, 224, 3)); | ||||
| // #if NCNN_VULKAN | // #if NCNN_VULKAN | ||||
| // if (!g_use_vulkan_compute) | |||||
| // if (!use_vulkan_compute) | |||||
| // #endif // NCNN_VULKAN | // #endif // NCNN_VULKAN | ||||
| // benchmark("mobilenet_v2_int8", ncnn::Mat(224, 224, 3)); | // benchmark("mobilenet_v2_int8", ncnn::Mat(224, 224, 3)); | ||||
| @@ -284,14 +287,14 @@ int main(int argc, char** argv) | |||||
| benchmark("googlenet", ncnn::Mat(224, 224, 3)); | benchmark("googlenet", ncnn::Mat(224, 224, 3)); | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (!g_use_vulkan_compute) | |||||
| if (!use_vulkan_compute) | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| benchmark("googlenet_int8", ncnn::Mat(224, 224, 3)); | benchmark("googlenet_int8", ncnn::Mat(224, 224, 3)); | ||||
| benchmark("resnet18", ncnn::Mat(224, 224, 3)); | benchmark("resnet18", ncnn::Mat(224, 224, 3)); | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (!g_use_vulkan_compute) | |||||
| if (!use_vulkan_compute) | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| benchmark("resnet18_int8", ncnn::Mat(224, 224, 3)); | benchmark("resnet18_int8", ncnn::Mat(224, 224, 3)); | ||||
| @@ -300,28 +303,28 @@ int main(int argc, char** argv) | |||||
| benchmark("vgg16", ncnn::Mat(224, 224, 3)); | benchmark("vgg16", ncnn::Mat(224, 224, 3)); | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (!g_use_vulkan_compute) | |||||
| if (!use_vulkan_compute) | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| benchmark("vgg16_int8", ncnn::Mat(224, 224, 3)); | benchmark("vgg16_int8", ncnn::Mat(224, 224, 3)); | ||||
| benchmark("resnet50", ncnn::Mat(224, 224, 3)); | benchmark("resnet50", ncnn::Mat(224, 224, 3)); | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (!g_use_vulkan_compute) | |||||
| if (!use_vulkan_compute) | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| benchmark("resnet50_int8", ncnn::Mat(224, 224, 3)); | benchmark("resnet50_int8", ncnn::Mat(224, 224, 3)); | ||||
| benchmark("squeezenet_ssd", ncnn::Mat(300, 300, 3)); | benchmark("squeezenet_ssd", ncnn::Mat(300, 300, 3)); | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (!g_use_vulkan_compute) | |||||
| if (!use_vulkan_compute) | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| benchmark("squeezenet_ssd_int8", ncnn::Mat(300, 300, 3)); | benchmark("squeezenet_ssd_int8", ncnn::Mat(300, 300, 3)); | ||||
| benchmark("mobilenet_ssd", ncnn::Mat(300, 300, 3)); | benchmark("mobilenet_ssd", ncnn::Mat(300, 300, 3)); | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (!g_use_vulkan_compute) | |||||
| if (!use_vulkan_compute) | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| benchmark("mobilenet_ssd_int8", ncnn::Mat(300, 300, 3)); | benchmark("mobilenet_ssd_int8", ncnn::Mat(300, 300, 3)); | ||||
| @@ -107,7 +107,7 @@ static int detect_fasterrcnn(const cv::Mat& bgr, std::vector<Object>& objects) | |||||
| ncnn::Net fasterrcnn; | ncnn::Net fasterrcnn; | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| fasterrcnn.use_vulkan_compute = true; | |||||
| fasterrcnn.opt.use_vulkan_compute = true; | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| // original pretrained model from https://github.com/rbgirshick/py-faster-rcnn | // original pretrained model from https://github.com/rbgirshick/py-faster-rcnn | ||||
| @@ -36,7 +36,7 @@ static int detect_mobilenet(const cv::Mat& bgr, std::vector<Object>& objects) | |||||
| ncnn::Net mobilenet; | ncnn::Net mobilenet; | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| mobilenet.use_vulkan_compute = true; | |||||
| mobilenet.opt.use_vulkan_compute = true; | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| // model is converted from https://github.com/chuanqi305/MobileNet-SSD | // model is converted from https://github.com/chuanqi305/MobileNet-SSD | ||||
| @@ -39,7 +39,7 @@ static int detect_mobilenetv2(const cv::Mat& bgr, std::vector<Object>& objects) | |||||
| ncnn::Net mobilenetv2; | ncnn::Net mobilenetv2; | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| mobilenetv2.use_vulkan_compute = true; | |||||
| mobilenetv2.opt.use_vulkan_compute = true; | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| mobilenetv2.register_custom_layer("Silence", Noop_layer_creator); | mobilenetv2.register_custom_layer("Silence", Noop_layer_creator); | ||||
| @@ -36,7 +36,7 @@ static int detect_peleenet(const cv::Mat& bgr, std::vector<Object>& objects,ncnn | |||||
| ncnn::Net peleenet; | ncnn::Net peleenet; | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| peleenet.use_vulkan_compute = true; | |||||
| peleenet.opt.use_vulkan_compute = true; | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| // model is converted from https://github.com/eric612/MobileNet-YOLO | // model is converted from https://github.com/eric612/MobileNet-YOLO | ||||
| @@ -94,7 +94,8 @@ static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects, | |||||
| "traffic light","traffic sign","train"}; | "traffic light","traffic sign","train"}; | ||||
| cv::Mat image = bgr.clone(); | cv::Mat image = bgr.clone(); | ||||
| std::vector<int> color = {128,255,128,244,35,232}; | |||||
| const int color[] = {128,255,128,244,35,232}; | |||||
| const int color_count = sizeof(color) / sizeof(int); | |||||
| for (size_t i = 0; i < objects.size(); i++) | for (size_t i = 0; i < objects.size(); i++) | ||||
| { | { | ||||
| @@ -147,7 +148,7 @@ static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects, | |||||
| } | } | ||||
| if(index > -1) { | if(index > -1) { | ||||
| int color_index = (index)*3; | int color_index = (index)*3; | ||||
| if(color_index<color.size()) { | |||||
| if(color_index<color_count) { | |||||
| int b = color[color_index]; | int b = color[color_index]; | ||||
| int g = color[color_index+1]; | int g = color[color_index+1]; | ||||
| int r = color[color_index+2]; | int r = color[color_index+2]; | ||||
| @@ -121,7 +121,7 @@ static int detect_rfcn(const cv::Mat& bgr, std::vector<Object>& objects) | |||||
| ncnn::Net rfcn; | ncnn::Net rfcn; | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| rfcn.use_vulkan_compute = true; | |||||
| rfcn.opt.use_vulkan_compute = true; | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| // original pretrained model from https://github.com/YuwenXiong/py-R-FCN | // original pretrained model from https://github.com/YuwenXiong/py-R-FCN | ||||
| @@ -29,7 +29,7 @@ static int detect_shufflenetv2(const cv::Mat& bgr, std::vector<float>& cls_score | |||||
| ncnn::Net shufflenetv2; | ncnn::Net shufflenetv2; | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| shufflenetv2.use_vulkan_compute = true; | |||||
| shufflenetv2.opt.use_vulkan_compute = true; | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| // https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe | // https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe | ||||
| @@ -29,7 +29,7 @@ static int detect_squeezenet(const cv::Mat& bgr, std::vector<float>& cls_scores) | |||||
| ncnn::Net squeezenet; | ncnn::Net squeezenet; | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| squeezenet.use_vulkan_compute = true; | |||||
| squeezenet.opt.use_vulkan_compute = true; | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| squeezenet.load_param("squeezenet_v1.1.param"); | squeezenet.load_param("squeezenet_v1.1.param"); | ||||
| @@ -36,7 +36,7 @@ static int detect_squeezenet(const cv::Mat& bgr, std::vector<Object>& objects) | |||||
| ncnn::Net squeezenet; | ncnn::Net squeezenet; | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| squeezenet.use_vulkan_compute = true; | |||||
| squeezenet.opt.use_vulkan_compute = true; | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| // original pretrained model from https://github.com/chuanqi305/SqueezeNet-SSD | // original pretrained model from https://github.com/chuanqi305/SqueezeNet-SSD | ||||
| @@ -36,7 +36,7 @@ static int detect_yolov2(const cv::Mat& bgr, std::vector<Object>& objects) | |||||
| ncnn::Net yolov2; | ncnn::Net yolov2; | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| yolov2.use_vulkan_compute = true; | |||||
| yolov2.opt.use_vulkan_compute = true; | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| // original pretrained model from https://github.com/eric612/MobileNet-YOLO | // original pretrained model from https://github.com/eric612/MobileNet-YOLO | ||||
| @@ -36,7 +36,7 @@ static int detect_yolov3(const cv::Mat& bgr, std::vector<Object>& objects) | |||||
| ncnn::Net yolov3; | ncnn::Net yolov3; | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| yolov3.use_vulkan_compute = true; | |||||
| yolov3.opt.use_vulkan_compute = true; | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| // original pretrained model from https://github.com/eric612/MobileNet-YOLO | // original pretrained model from https://github.com/eric612/MobileNet-YOLO | ||||
| @@ -25,6 +25,7 @@ set(ncnn_SRCS | |||||
| modelbin.cpp | modelbin.cpp | ||||
| net.cpp | net.cpp | ||||
| opencv.cpp | opencv.cpp | ||||
| option.cpp | |||||
| paramdict.cpp | paramdict.cpp | ||||
| pipeline.cpp | pipeline.cpp | ||||
| benchmark.cpp | benchmark.cpp | ||||
| @@ -87,8 +88,8 @@ macro(ncnn_add_layer class) | |||||
| if(WITH_LAYER_${name}_vulkan) | if(WITH_LAYER_${name}_vulkan) | ||||
| set(layer_declaration "${layer_declaration}#include \"layer/vulkan/${name}_vulkan.h\"\n") | set(layer_declaration "${layer_declaration}#include \"layer/vulkan/${name}_vulkan.h\"\n") | ||||
| set(layer_declaration_class "${layer_declaration_class}, virtual public ${class}_vulkan") | set(layer_declaration_class "${layer_declaration_class}, virtual public ${class}_vulkan") | ||||
| set(create_pipeline_content "${create_pipeline_content} if (opt.vulkan_compute) { int ret = ${class}_vulkan::create_pipeline(opt); if (ret) return ret; }\n") | |||||
| set(destroy_pipeline_content " if (opt.vulkan_compute) { int ret = ${class}_vulkan::destroy_pipeline(opt); if (ret) return ret; }\n${destroy_pipeline_content}") | |||||
| set(create_pipeline_content "${create_pipeline_content} if (opt.use_vulkan_compute) { int ret = ${class}_vulkan::create_pipeline(opt); if (ret) return ret; }\n") | |||||
| set(destroy_pipeline_content " if (opt.use_vulkan_compute) { int ret = ${class}_vulkan::destroy_pipeline(opt); if (ret) return ret; }\n${destroy_pipeline_content}") | |||||
| file(GLOB_RECURSE SHADER_SRCS "layer/vulkan/shader/${name}.comp") | file(GLOB_RECURSE SHADER_SRCS "layer/vulkan/shader/${name}.comp") | ||||
| file(GLOB_RECURSE SHADER_SUBSRCS "layer/vulkan/shader/${name}_*.comp") | file(GLOB_RECURSE SHADER_SUBSRCS "layer/vulkan/shader/${name}_*.comp") | ||||
| @@ -297,6 +298,7 @@ if(NCNN_INSTALL_SDK) | |||||
| modelbin.h | modelbin.h | ||||
| net.h | net.h | ||||
| opencv.h | opencv.h | ||||
| option.h | |||||
| paramdict.h | paramdict.h | ||||
| pipeline.h | pipeline.h | ||||
| benchmark.h | benchmark.h | ||||
| @@ -645,7 +645,7 @@ int create_gpu_instance() | |||||
| } | } | ||||
| // check features | // check features | ||||
| gpu_info.support_fp16_packed = false;// TODO | |||||
| gpu_info.support_fp16_packed = true; | |||||
| gpu_info.support_fp16_storage = false; | gpu_info.support_fp16_storage = false; | ||||
| gpu_info.support_fp16_arithmetic = false; | gpu_info.support_fp16_arithmetic = false; | ||||
| gpu_info.support_int8_storage = false; | gpu_info.support_int8_storage = false; | ||||
| @@ -698,11 +698,11 @@ int create_gpu_instance() | |||||
| { | { | ||||
| gpu_info.support_fp16_storage = query16BitStorageFeatures.storageBuffer16BitAccess && query16BitStorageFeatures.uniformAndStorageBuffer16BitAccess; | gpu_info.support_fp16_storage = query16BitStorageFeatures.storageBuffer16BitAccess && query16BitStorageFeatures.uniformAndStorageBuffer16BitAccess; | ||||
| } | } | ||||
| // if (gpu_info.support_VK_KHR_shader_float16_int8) | |||||
| // { | |||||
| // gpu_info.support_fp16_arithmetic = queryFloat16Int8Features.shaderFloat16; | |||||
| // gpu_info.support_int8_arithmetic = queryFloat16Int8Features.shaderInt8; | |||||
| // } | |||||
| if (gpu_info.support_VK_KHR_shader_float16_int8) | |||||
| { | |||||
| gpu_info.support_fp16_arithmetic = queryFloat16Int8Features.shaderFloat16; | |||||
| gpu_info.support_int8_arithmetic = queryFloat16Int8Features.shaderInt8; | |||||
| } | |||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| @@ -27,46 +27,6 @@ | |||||
| namespace ncnn { | namespace ncnn { | ||||
| Option::Option() | |||||
| { | |||||
| lightmode = true; | |||||
| num_threads = get_cpu_count(); | |||||
| blob_allocator = 0; | |||||
| workspace_allocator = 0; | |||||
| vulkan_compute = false; | |||||
| #if NCNN_VULKAN | |||||
| blob_vkallocator = 0; | |||||
| workspace_vkallocator = 0; | |||||
| staging_vkallocator = 0; | |||||
| #endif // NCNN_VULKAN | |||||
| use_winograd_convolution = 1; | |||||
| use_sgemm_convolution = 1; | |||||
| use_int8_inference = 1; | |||||
| } | |||||
| static Option g_default_option; | |||||
| const Option& get_default_option() | |||||
| { | |||||
| return g_default_option; | |||||
| } | |||||
| int set_default_option(const Option& opt) | |||||
| { | |||||
| if (opt.num_threads <= 0) | |||||
| { | |||||
| fprintf(stderr, "invalid option num_threads %d\n", opt.num_threads); | |||||
| return -1; | |||||
| } | |||||
| g_default_option = opt; | |||||
| return 0; | |||||
| } | |||||
| Layer::Layer() | Layer::Layer() | ||||
| { | { | ||||
| one_blob_only = false; | one_blob_only = false; | ||||
| @@ -19,10 +19,11 @@ | |||||
| #include <string> | #include <string> | ||||
| #include <vector> | #include <vector> | ||||
| #include <math.h> | #include <math.h> | ||||
| #include "platform.h" | |||||
| #include "mat.h" | #include "mat.h" | ||||
| #include "modelbin.h" | #include "modelbin.h" | ||||
| #include "option.h" | |||||
| #include "paramdict.h" | #include "paramdict.h" | ||||
| #include "platform.h" | |||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| #include <vulkan/vulkan.h> | #include <vulkan/vulkan.h> | ||||
| @@ -32,57 +33,6 @@ | |||||
| namespace ncnn { | namespace ncnn { | ||||
| #if NCNN_VULKAN | |||||
| class VkAllocator; | |||||
| #endif // NCNN_VULKAN | |||||
| class Allocator; | |||||
| class Option | |||||
| { | |||||
| public: | |||||
| // default option | |||||
| Option(); | |||||
| public: | |||||
| // light mode | |||||
| // intermediate blob will be recycled when enabled | |||||
| // enabled by default | |||||
| bool lightmode; | |||||
| // thread count | |||||
| // default value is the one returned by get_cpu_count() | |||||
| int num_threads; | |||||
| // blob memory allocator | |||||
| Allocator* blob_allocator; | |||||
| // workspace memory allocator | |||||
| Allocator* workspace_allocator; | |||||
| // enable vulkan compute | |||||
| bool vulkan_compute; | |||||
| #if NCNN_VULKAN | |||||
| // blob memory allocator | |||||
| VkAllocator* blob_vkallocator; | |||||
| // workspace memory allocator | |||||
| VkAllocator* workspace_vkallocator; | |||||
| // staging memory allocator | |||||
| VkAllocator* staging_vkallocator; | |||||
| #endif // NCNN_VULKAN | |||||
| public: | |||||
| int use_winograd_convolution; | |||||
| int use_sgemm_convolution; | |||||
| int use_int8_inference; | |||||
| }; | |||||
| // the global default option | |||||
| const Option& get_default_option(); | |||||
| int set_default_option(const Option& opt); | |||||
| class Layer | class Layer | ||||
| { | { | ||||
| public: | public: | ||||
| @@ -100,10 +50,10 @@ public: | |||||
| virtual int load_model(const ModelBin& mb); | virtual int load_model(const ModelBin& mb); | ||||
| // | // | ||||
| virtual int create_pipeline(const Option& opt = get_default_option()); | |||||
| virtual int create_pipeline(const Option& opt = Option()); | |||||
| // | // | ||||
| virtual int destroy_pipeline(const Option& opt = get_default_option()); | |||||
| virtual int destroy_pipeline(const Option& opt = Option()); | |||||
| public: | public: | ||||
| // one input and one output blob | // one input and one output blob | ||||
| @@ -118,13 +68,13 @@ public: | |||||
| public: | public: | ||||
| // implement inference | // implement inference | ||||
| // return 0 if success | // return 0 if success | ||||
| virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt = get_default_option()) const; | |||||
| virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt = get_default_option()) const; | |||||
| virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt = Option()) const; | |||||
| virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt = Option()) const; | |||||
| // implement inplace inference | // implement inplace inference | ||||
| // return 0 if success | // return 0 if success | ||||
| virtual int forward_inplace(std::vector<Mat>& bottom_top_blobs, const Option& opt = get_default_option()) const; | |||||
| virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt = get_default_option()) const; | |||||
| virtual int forward_inplace(std::vector<Mat>& bottom_top_blobs, const Option& opt = Option()) const; | |||||
| virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt = Option()) const; | |||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| public: | public: | ||||
| @@ -134,13 +84,13 @@ public: | |||||
| public: | public: | ||||
| // implement inference | // implement inference | ||||
| // return 0 if success | // return 0 if success | ||||
| virtual int forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt = get_default_option()) const; | |||||
| virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt = get_default_option()) const; | |||||
| virtual int forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt = Option()) const; | |||||
| virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt = Option()) const; | |||||
| // implement inplace inference | // implement inplace inference | ||||
| // return 0 if success | // return 0 if success | ||||
| virtual int forward_inplace(std::vector<VkMat>& bottom_top_blobs, VkCompute& cmd, const Option& opt = get_default_option()) const; | |||||
| virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt = get_default_option()) const; | |||||
| virtual int forward_inplace(std::vector<VkMat>& bottom_top_blobs, VkCompute& cmd, const Option& opt = Option()) const; | |||||
| virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt = Option()) const; | |||||
| public: | public: | ||||
| // assigned immediately after creating this layer | // assigned immediately after creating this layer | ||||
| @@ -74,7 +74,7 @@ int Convolution::load_model(const ModelBin& mb) | |||||
| int Convolution::create_pipeline(const Option& opt) | int Convolution::create_pipeline(const Option& opt) | ||||
| { | { | ||||
| Option opt_cpu = opt; | Option opt_cpu = opt; | ||||
| opt_cpu.vulkan_compute = false; | |||||
| opt_cpu.use_vulkan_compute = false; | |||||
| use_int8_inference = opt.use_int8_inference; | use_int8_inference = opt.use_int8_inference; | ||||
| @@ -111,7 +111,7 @@ int Convolution::create_pipeline(const Option& opt) | |||||
| op->create_pipeline(opt_cpu); | op->create_pipeline(opt_cpu); | ||||
| ncnn::Option opt = ncnn::get_default_option(); | |||||
| ncnn::Option opt; | |||||
| opt.blob_allocator = int8_weight_data.allocator; | opt.blob_allocator = int8_weight_data.allocator; | ||||
| const Mat weight_data_n = weight_data.range(weight_data_size_output * n, weight_data_size_output); | const Mat weight_data_n = weight_data.range(weight_data_size_output * n, weight_data_size_output); | ||||
| @@ -173,7 +173,7 @@ int Convolution::create_pipeline(const Option& opt) | |||||
| int Convolution::destroy_pipeline(const Option& opt) | int Convolution::destroy_pipeline(const Option& opt) | ||||
| { | { | ||||
| Option opt_cpu = opt; | Option opt_cpu = opt; | ||||
| opt_cpu.vulkan_compute = false; | |||||
| opt_cpu.use_vulkan_compute = false; | |||||
| if (quantize) | if (quantize) | ||||
| { | { | ||||
| @@ -286,7 +286,7 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op | |||||
| op->load_model(ModelBinFromMatArray(weights)); | op->load_model(ModelBinFromMatArray(weights)); | ||||
| Option opt_cpu = opt; | Option opt_cpu = opt; | ||||
| opt_cpu.vulkan_compute = false; | |||||
| opt_cpu.use_vulkan_compute = false; | |||||
| op->create_pipeline(opt_cpu); | op->create_pipeline(opt_cpu); | ||||
| // forward | // forward | ||||
| @@ -98,7 +98,7 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb) | |||||
| int ConvolutionDepthWise::create_pipeline(const Option& opt) | int ConvolutionDepthWise::create_pipeline(const Option& opt) | ||||
| { | { | ||||
| Option opt_cpu = opt; | Option opt_cpu = opt; | ||||
| opt_cpu.vulkan_compute = false; | |||||
| opt_cpu.use_vulkan_compute = false; | |||||
| use_int8_inference = opt.use_int8_inference; | use_int8_inference = opt.use_int8_inference; | ||||
| @@ -134,7 +134,7 @@ int ConvolutionDepthWise::create_pipeline(const Option& opt) | |||||
| op->create_pipeline(opt_cpu); | op->create_pipeline(opt_cpu); | ||||
| ncnn::Option opt = ncnn::get_default_option(); | |||||
| ncnn::Option opt; | |||||
| opt.blob_allocator = int8_weight_data.allocator; | opt.blob_allocator = int8_weight_data.allocator; | ||||
| const Mat weight_data_g = weight_data.range(weight_data_size_g * g, weight_data_size_g); | const Mat weight_data_g = weight_data.range(weight_data_size_g * g, weight_data_size_g); | ||||
| @@ -198,7 +198,7 @@ int ConvolutionDepthWise::create_pipeline(const Option& opt) | |||||
| int ConvolutionDepthWise::destroy_pipeline(const Option& opt) | int ConvolutionDepthWise::destroy_pipeline(const Option& opt) | ||||
| { | { | ||||
| Option opt_cpu = opt; | Option opt_cpu = opt; | ||||
| opt_cpu.vulkan_compute = false; | |||||
| opt_cpu.use_vulkan_compute = false; | |||||
| for (int i=0; i<(int)quantize_ops.size(); i++) | for (int i=0; i<(int)quantize_ops.size(); i++) | ||||
| { | { | ||||
| @@ -65,7 +65,7 @@ int InnerProduct::load_model(const ModelBin& mb) | |||||
| int InnerProduct::create_pipeline(const Option& opt) | int InnerProduct::create_pipeline(const Option& opt) | ||||
| { | { | ||||
| Option opt_cpu = opt; | Option opt_cpu = opt; | ||||
| opt_cpu.vulkan_compute = false; | |||||
| opt_cpu.use_vulkan_compute = false; | |||||
| use_int8_inference = opt.use_int8_inference; | use_int8_inference = opt.use_int8_inference; | ||||
| @@ -143,7 +143,7 @@ int InnerProduct::create_pipeline(const Option& opt) | |||||
| op->create_pipeline(opt_cpu); | op->create_pipeline(opt_cpu); | ||||
| ncnn::Option opt = ncnn::get_default_option(); | |||||
| ncnn::Option opt; | |||||
| opt.blob_allocator = int8_weight_data.allocator; | opt.blob_allocator = int8_weight_data.allocator; | ||||
| const Mat weight_data_n = weight_data.range(weight_data_size_output * n, weight_data_size_output); | const Mat weight_data_n = weight_data.range(weight_data_size_output * n, weight_data_size_output); | ||||
| @@ -162,7 +162,7 @@ int InnerProduct::create_pipeline(const Option& opt) | |||||
| int InnerProduct::destroy_pipeline(const Option& opt) | int InnerProduct::destroy_pipeline(const Option& opt) | ||||
| { | { | ||||
| Option opt_cpu = opt; | Option opt_cpu = opt; | ||||
| opt_cpu.vulkan_compute = false; | |||||
| opt_cpu.use_vulkan_compute = false; | |||||
| if (quantize) | if (quantize) | ||||
| { | { | ||||
| @@ -45,7 +45,7 @@ Convolution_x86::Convolution_x86() | |||||
| int Convolution_x86::create_pipeline(const Option& opt) | int Convolution_x86::create_pipeline(const Option& opt) | ||||
| { | { | ||||
| Option opt_cpu = opt; | Option opt_cpu = opt; | ||||
| opt_cpu.vulkan_compute = false; | |||||
| opt_cpu.use_vulkan_compute = false; | |||||
| if (activation_type == 1) | if (activation_type == 1) | ||||
| { | { | ||||
| @@ -120,7 +120,7 @@ int Convolution_x86::create_pipeline(const Option& opt) | |||||
| int Convolution_x86::destroy_pipeline(const Option& opt) | int Convolution_x86::destroy_pipeline(const Option& opt) | ||||
| { | { | ||||
| Option opt_cpu = opt; | Option opt_cpu = opt; | ||||
| opt_cpu.vulkan_compute = false; | |||||
| opt_cpu.use_vulkan_compute = false; | |||||
| if (activation) | if (activation) | ||||
| { | { | ||||
| @@ -36,7 +36,7 @@ ConvolutionDepthWise_x86::ConvolutionDepthWise_x86() | |||||
| int ConvolutionDepthWise_x86::create_pipeline(const Option& opt) | int ConvolutionDepthWise_x86::create_pipeline(const Option& opt) | ||||
| { | { | ||||
| Option opt_cpu = opt; | Option opt_cpu = opt; | ||||
| opt_cpu.vulkan_compute = false; | |||||
| opt_cpu.use_vulkan_compute = false; | |||||
| if (activation_type == 1) | if (activation_type == 1) | ||||
| { | { | ||||
| @@ -167,7 +167,7 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt) | |||||
| int ConvolutionDepthWise_x86::destroy_pipeline(const Option& opt) | int ConvolutionDepthWise_x86::destroy_pipeline(const Option& opt) | ||||
| { | { | ||||
| Option opt_cpu = opt; | Option opt_cpu = opt; | ||||
| opt_cpu.vulkan_compute = false; | |||||
| opt_cpu.use_vulkan_compute = false; | |||||
| if (activation) | if (activation) | ||||
| { | { | ||||
| @@ -49,7 +49,7 @@ int YoloDetectionOutput::create_pipeline(const Option& opt) | |||||
| softmax->load_param(pd); | softmax->load_param(pd); | ||||
| Option opt_cpu = opt; | Option opt_cpu = opt; | ||||
| opt_cpu.vulkan_compute = false; | |||||
| opt_cpu.use_vulkan_compute = false; | |||||
| softmax->create_pipeline(opt_cpu); | softmax->create_pipeline(opt_cpu); | ||||
| } | } | ||||
| @@ -61,7 +61,7 @@ int YoloDetectionOutput::destroy_pipeline(const Option& opt) | |||||
| if (softmax) | if (softmax) | ||||
| { | { | ||||
| Option opt_cpu = opt; | Option opt_cpu = opt; | ||||
| opt_cpu.vulkan_compute = false; | |||||
| opt_cpu.use_vulkan_compute = false; | |||||
| softmax->destroy_pipeline(opt_cpu); | softmax->destroy_pipeline(opt_cpu); | ||||
| delete softmax; | delete softmax; | ||||
| softmax = 0; | softmax = 0; | ||||
| @@ -95,7 +95,7 @@ void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_val | |||||
| return; | return; | ||||
| } | } | ||||
| op->forward_inplace(*this, ncnn::get_default_option()); | |||||
| op->forward_inplace(*this); | |||||
| delete op; | delete op; | ||||
| } | } | ||||
| @@ -234,7 +234,7 @@ void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, i | |||||
| padding->load_param(pd); | padding->load_param(pd); | ||||
| ncnn::Option opt = ncnn::get_default_option(); | |||||
| ncnn::Option opt; | |||||
| opt.num_threads = num_threads; | opt.num_threads = num_threads; | ||||
| opt.blob_allocator = allocator; | opt.blob_allocator = allocator; | ||||
| @@ -257,7 +257,7 @@ void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, in | |||||
| crop->load_param(pd); | crop->load_param(pd); | ||||
| ncnn::Option opt = ncnn::get_default_option(); | |||||
| ncnn::Option opt; | |||||
| opt.num_threads = num_threads; | opt.num_threads = num_threads; | ||||
| opt.blob_allocator = allocator; | opt.blob_allocator = allocator; | ||||
| @@ -277,7 +277,7 @@ void resize_bilinear(const Mat& src, Mat& dst, int w, int h, Allocator* allocato | |||||
| interp->load_param(pd); | interp->load_param(pd); | ||||
| ncnn::Option opt = ncnn::get_default_option(); | |||||
| ncnn::Option opt; | |||||
| opt.num_threads = num_threads; | opt.num_threads = num_threads; | ||||
| opt.blob_allocator = allocator; | opt.blob_allocator = allocator; | ||||
| @@ -297,7 +297,7 @@ void resize_bicubic(const Mat& src, Mat& dst, int w, int h, Allocator* allocator | |||||
| interp->load_param(pd); | interp->load_param(pd); | ||||
| ncnn::Option opt = ncnn::get_default_option(); | |||||
| ncnn::Option opt; | |||||
| opt.num_threads = num_threads; | opt.num_threads = num_threads; | ||||
| opt.blob_allocator = allocator; | opt.blob_allocator = allocator; | ||||
| @@ -315,7 +315,7 @@ void convert_packing(const Mat& src, Mat& dst, int _packing, Allocator* allocato | |||||
| packing->load_param(pd); | packing->load_param(pd); | ||||
| ncnn::Option opt = ncnn::get_default_option(); | |||||
| ncnn::Option opt; | |||||
| opt.num_threads = num_threads; | opt.num_threads = num_threads; | ||||
| opt.blob_allocator = allocator; | opt.blob_allocator = allocator; | ||||
| @@ -334,7 +334,7 @@ void cast_float32_to_float16(const Mat& src, Mat& dst, Allocator* allocator, int | |||||
| cast->load_param(pd); | cast->load_param(pd); | ||||
| ncnn::Option opt = ncnn::get_default_option(); | |||||
| ncnn::Option opt; | |||||
| opt.num_threads = num_threads; | opt.num_threads = num_threads; | ||||
| opt.blob_allocator = allocator; | opt.blob_allocator = allocator; | ||||
| @@ -353,7 +353,7 @@ void cast_float16_to_float32(const Mat& src, Mat& dst, Allocator* allocator, int | |||||
| cast->load_param(pd); | cast->load_param(pd); | ||||
| ncnn::Option opt = ncnn::get_default_option(); | |||||
| ncnn::Option opt; | |||||
| opt.num_threads = num_threads; | opt.num_threads = num_threads; | ||||
| opt.blob_allocator = allocator; | opt.blob_allocator = allocator; | ||||
| @@ -40,11 +40,6 @@ namespace ncnn { | |||||
| Net::Net() | Net::Net() | ||||
| { | { | ||||
| use_winograd_convolution = 1; | |||||
| use_sgemm_convolution = 1; | |||||
| use_int8_inference = 1; | |||||
| use_vulkan_compute = 0; | |||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| vkdev = 0; | vkdev = 0; | ||||
| weight_vkallocator = 0; | weight_vkallocator = 0; | ||||
| @@ -155,11 +150,8 @@ int Net::load_param(FILE* fp) | |||||
| blobs.resize((size_t)blob_count); | blobs.resize((size_t)blob_count); | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (use_vulkan_compute && !vkdev) | |||||
| { | |||||
| // use default vulkan device | |||||
| if (opt.use_vulkan_compute && !vkdev) | |||||
| vkdev = get_default_gpu_device(); | vkdev = get_default_gpu_device(); | ||||
| } | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| ParamDict pd; | ParamDict pd; | ||||
| @@ -192,7 +184,7 @@ int Net::load_param(FILE* fp) | |||||
| } | } | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (use_vulkan_compute) | |||||
| if (opt.use_vulkan_compute) | |||||
| layer->vkdev = vkdev; | layer->vkdev = vkdev; | ||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| @@ -320,11 +312,8 @@ int Net::load_param_mem(const char* _mem) | |||||
| blobs.resize(blob_count); | blobs.resize(blob_count); | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (use_vulkan_compute && !vkdev) | |||||
| { | |||||
| // use default vulkan device | |||||
| if (opt.use_vulkan_compute && !vkdev) | |||||
| vkdev = get_default_gpu_device(); | vkdev = get_default_gpu_device(); | ||||
| } | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| ParamDict pd; | ParamDict pd; | ||||
| @@ -357,7 +346,7 @@ int Net::load_param_mem(const char* _mem) | |||||
| } | } | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (use_vulkan_compute) | |||||
| if (opt.use_vulkan_compute) | |||||
| layer->vkdev = vkdev; | layer->vkdev = vkdev; | ||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| @@ -489,11 +478,8 @@ int Net::load_param_bin(FILE* fp) | |||||
| blobs.resize(blob_count); | blobs.resize(blob_count); | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (use_vulkan_compute && !vkdev) | |||||
| { | |||||
| // use default vulkan device | |||||
| if (opt.use_vulkan_compute && !vkdev) | |||||
| vkdev = get_default_gpu_device(); | vkdev = get_default_gpu_device(); | ||||
| } | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| ParamDict pd; | ParamDict pd; | ||||
| @@ -526,7 +512,7 @@ int Net::load_param_bin(FILE* fp) | |||||
| } | } | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (use_vulkan_compute) | |||||
| if (opt.use_vulkan_compute) | |||||
| layer->vkdev = vkdev; | layer->vkdev = vkdev; | ||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| @@ -613,12 +599,6 @@ int Net::load_model(FILE* fp) | |||||
| // load file | // load file | ||||
| int ret = 0; | int ret = 0; | ||||
| Option opt; | |||||
| opt.vulkan_compute = use_vulkan_compute; | |||||
| opt.use_winograd_convolution = use_winograd_convolution; | |||||
| opt.use_sgemm_convolution = use_sgemm_convolution; | |||||
| opt.use_int8_inference = use_int8_inference; | |||||
| ModelBinFromStdio mb(fp); | ModelBinFromStdio mb(fp); | ||||
| for (size_t i=0; i<layers.size(); i++) | for (size_t i=0; i<layers.size(); i++) | ||||
| { | { | ||||
| @@ -649,7 +629,7 @@ int Net::load_model(FILE* fp) | |||||
| } | } | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (use_vulkan_compute) | |||||
| if (opt.use_vulkan_compute) | |||||
| { | { | ||||
| create_pipeline(); | create_pipeline(); | ||||
| @@ -709,11 +689,8 @@ int Net::load_param(const unsigned char* _mem) | |||||
| blobs.resize(blob_count); | blobs.resize(blob_count); | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (use_vulkan_compute && !vkdev) | |||||
| { | |||||
| // use default vulkan device | |||||
| if (opt.use_vulkan_compute && !vkdev) | |||||
| vkdev = get_default_gpu_device(); | vkdev = get_default_gpu_device(); | ||||
| } | |||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| ParamDict pd; | ParamDict pd; | ||||
| @@ -743,7 +720,7 @@ int Net::load_param(const unsigned char* _mem) | |||||
| } | } | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (use_vulkan_compute) | |||||
| if (opt.use_vulkan_compute) | |||||
| layer->vkdev = vkdev; | layer->vkdev = vkdev; | ||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| @@ -816,12 +793,6 @@ int Net::load_model(const unsigned char* _mem) | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| Option opt; | |||||
| opt.vulkan_compute = use_vulkan_compute; | |||||
| opt.use_winograd_convolution = use_winograd_convolution; | |||||
| opt.use_sgemm_convolution = use_sgemm_convolution; | |||||
| opt.use_int8_inference = use_int8_inference; | |||||
| const unsigned char* mem = _mem; | const unsigned char* mem = _mem; | ||||
| ModelBinFromMemory mb(mem); | ModelBinFromMemory mb(mem); | ||||
| for (size_t i=0; i<layers.size(); i++) | for (size_t i=0; i<layers.size(); i++) | ||||
| @@ -850,7 +821,7 @@ int Net::load_model(const unsigned char* _mem) | |||||
| } | } | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (use_vulkan_compute) | |||||
| if (opt.use_vulkan_compute) | |||||
| { | { | ||||
| create_pipeline(); | create_pipeline(); | ||||
| @@ -971,12 +942,6 @@ void Net::clear() | |||||
| destroy_pipeline(); | destroy_pipeline(); | ||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| Option opt; | |||||
| opt.vulkan_compute = use_vulkan_compute; | |||||
| opt.use_winograd_convolution = use_winograd_convolution; | |||||
| opt.use_sgemm_convolution = use_sgemm_convolution; | |||||
| opt.use_int8_inference = use_int8_inference; | |||||
| blobs.clear(); | blobs.clear(); | ||||
| for (size_t i=0; i<layers.size(); i++) | for (size_t i=0; i<layers.size(); i++) | ||||
| { | { | ||||
| @@ -1053,12 +1018,6 @@ int Net::upload_model() | |||||
| int Net::create_pipeline() | int Net::create_pipeline() | ||||
| { | { | ||||
| Option opt; | |||||
| opt.vulkan_compute = use_vulkan_compute; | |||||
| opt.use_winograd_convolution = use_winograd_convolution; | |||||
| opt.use_sgemm_convolution = use_sgemm_convolution; | |||||
| opt.use_int8_inference = use_int8_inference; | |||||
| if (vkdev->info.support_fp16_packed || vkdev->info.support_fp16_storage) | if (vkdev->info.support_fp16_packed || vkdev->info.support_fp16_storage) | ||||
| { | { | ||||
| { | { | ||||
| @@ -1117,12 +1076,6 @@ int Net::create_pipeline() | |||||
| int Net::destroy_pipeline() | int Net::destroy_pipeline() | ||||
| { | { | ||||
| Option opt; | |||||
| opt.vulkan_compute = use_vulkan_compute; | |||||
| opt.use_winograd_convolution = use_winograd_convolution; | |||||
| opt.use_sgemm_convolution = use_sgemm_convolution; | |||||
| opt.use_int8_inference = use_int8_inference; | |||||
| if (cast_float32_to_float16) | if (cast_float32_to_float16) | ||||
| cast_float32_to_float16->destroy_pipeline(opt); | cast_float32_to_float16->destroy_pipeline(opt); | ||||
| @@ -1887,19 +1840,20 @@ int Net::forward_layer(int layer_index, std::vector<Mat>& blob_mats, std::vector | |||||
| Extractor::Extractor(const Net* _net, int blob_count) : net(_net) | Extractor::Extractor(const Net* _net, int blob_count) : net(_net) | ||||
| { | { | ||||
| blob_mats.resize(blob_count); | blob_mats.resize(blob_count); | ||||
| opt = get_default_option(); | |||||
| opt = net->opt; | |||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| opt.vulkan_compute = net->use_vulkan_compute; | |||||
| if (net->use_vulkan_compute) | |||||
| if (net->opt.use_vulkan_compute) | |||||
| { | { | ||||
| blob_mats_gpu.resize(blob_count); | |||||
| // set default vulkan blob/workspace/staging allocator | // set default vulkan blob/workspace/staging allocator | ||||
| opt.blob_vkallocator = net->vkdev->allocator(); | |||||
| opt.workspace_vkallocator = net->vkdev->allocator(); | |||||
| opt.staging_vkallocator = net->vkdev->staging_allocator(); | |||||
| if (!opt.blob_vkallocator) | |||||
| opt.blob_vkallocator = net->vkdev->allocator(); | |||||
| if (!opt.workspace_vkallocator) | |||||
| opt.workspace_vkallocator = net->vkdev->allocator(); | |||||
| if (!opt.staging_vkallocator) | |||||
| opt.staging_vkallocator = net->vkdev->staging_allocator(); | |||||
| blob_mats_gpu.resize(blob_count); | |||||
| } | } | ||||
| #endif // NCNN_VULKAN | #endif // NCNN_VULKAN | ||||
| } | } | ||||
| @@ -1927,9 +1881,9 @@ void Extractor::set_workspace_allocator(Allocator* allocator) | |||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| void Extractor::set_vulkan_compute(bool enable) | void Extractor::set_vulkan_compute(bool enable) | ||||
| { | { | ||||
| if (net->use_vulkan_compute) | |||||
| if (net->opt.use_vulkan_compute) | |||||
| { | { | ||||
| opt.vulkan_compute = enable; | |||||
| opt.use_vulkan_compute = enable; | |||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| @@ -1995,7 +1949,7 @@ int Extractor::extract(int blob_index, Mat& feat) | |||||
| int layer_index = net->blobs[blob_index].producer; | int layer_index = net->blobs[blob_index].producer; | ||||
| #if NCNN_VULKAN | #if NCNN_VULKAN | ||||
| if (opt.vulkan_compute) | |||||
| if (opt.use_vulkan_compute) | |||||
| { | { | ||||
| ncnn::VkCompute cmd(net->vkdev); | ncnn::VkCompute cmd(net->vkdev); | ||||
| #if NCNN_BENCHMARK | #if NCNN_BENCHMARK | ||||
| @@ -17,10 +17,11 @@ | |||||
| #include <stdio.h> | #include <stdio.h> | ||||
| #include <vector> | #include <vector> | ||||
| #include "platform.h" | |||||
| #include "blob.h" | #include "blob.h" | ||||
| #include "layer.h" | #include "layer.h" | ||||
| #include "mat.h" | #include "mat.h" | ||||
| #include "platform.h" | |||||
| #include "option.h" | |||||
| namespace ncnn { | namespace ncnn { | ||||
| @@ -36,6 +37,16 @@ public: | |||||
| // clear and destroy | // clear and destroy | ||||
| ~Net(); | ~Net(); | ||||
| public: | |||||
| // option | |||||
| Option opt; | |||||
| #if NCNN_VULKAN | |||||
| void set_vulkan_device(const VulkanDevice* vkdev); | |||||
| #endif // NCNN_VULKAN | |||||
| #if NCNN_STRING | #if NCNN_STRING | ||||
| // register custom layer by layer type name | // register custom layer by layer type name | ||||
| // return 0 if success | // return 0 if success | ||||
| @@ -82,34 +93,6 @@ public: | |||||
| // construct an Extractor from network | // construct an Extractor from network | ||||
| Extractor create_extractor() const; | Extractor create_extractor() const; | ||||
| public: | |||||
| // enable winograd convolution optimization | |||||
| // improve convolution 3x3 stride1 performace, may consume more memory | |||||
| // changes should be applied before loading network structure and weight | |||||
| // enabled by default | |||||
| int use_winograd_convolution; | |||||
| // enable sgemm convolution optimization | |||||
| // improve convolution 1x1 stride1 performace, may consume more memory | |||||
| // changes should be applied before loading network structure and weight | |||||
| // enabled by default | |||||
| int use_sgemm_convolution; | |||||
| // enable quantized int8 inference | |||||
| // use low-precision int8 path for quantized model | |||||
| // changes should be applied before loading network structure and weight | |||||
| // enabled by default | |||||
| int use_int8_inference; | |||||
| // enable vulkan compute | |||||
| int use_vulkan_compute; | |||||
| #if NCNN_VULKAN | |||||
| void set_vulkan_device(const VulkanDevice* vkdev); | |||||
| #endif // NCNN_VULKAN | |||||
| protected: | protected: | ||||
| // parse the structure of network | // parse the structure of network | ||||
| // fuse int8 op dequantize and quantize by requantize | // fuse int8 op dequantize and quantize by requantize | ||||
| @@ -0,0 +1,49 @@ | |||||
| // Tencent is pleased to support the open source community by making ncnn available. | |||||
| // | |||||
| // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. | |||||
| // | |||||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||||
| // in compliance with the License. You may obtain a copy of the License at | |||||
| // | |||||
| // https://opensource.org/licenses/BSD-3-Clause | |||||
| // | |||||
| // Unless required by applicable law or agreed to in writing, software distributed | |||||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||||
| // specific language governing permissions and limitations under the License. | |||||
| #include "option.h" | |||||
| #include "cpu.h" | |||||
| namespace ncnn { | |||||
| Option::Option() | |||||
| { | |||||
| lightmode = true; | |||||
| num_threads = get_cpu_count(); | |||||
| blob_allocator = 0; | |||||
| workspace_allocator = 0; | |||||
| #if NCNN_VULKAN | |||||
| blob_vkallocator = 0; | |||||
| workspace_vkallocator = 0; | |||||
| staging_vkallocator = 0; | |||||
| #endif // NCNN_VULKAN | |||||
| use_winograd_convolution = true; | |||||
| use_sgemm_convolution = true; | |||||
| use_int8_inference = true; | |||||
| use_vulkan_compute = false;// TODO enable me | |||||
| use_fp16_packed = false;// TODO enable me | |||||
| use_fp16_storage = true; | |||||
| use_fp16_arithmetic = false; | |||||
| use_int8_storage = true; | |||||
| use_int8_arithmetic = false; | |||||
| // sanitize | |||||
| if (num_threads <= 0) | |||||
| num_threads = 1; | |||||
| } | |||||
| } // namespace ncnn | |||||
| @@ -0,0 +1,91 @@ | |||||
| // Tencent is pleased to support the open source community by making ncnn available. | |||||
| // | |||||
| // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. | |||||
| // | |||||
| // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except | |||||
| // in compliance with the License. You may obtain a copy of the License at | |||||
| // | |||||
| // https://opensource.org/licenses/BSD-3-Clause | |||||
| // | |||||
| // Unless required by applicable law or agreed to in writing, software distributed | |||||
| // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | |||||
| // CONDITIONS OF ANY KIND, either express or implied. See the License for the | |||||
| // specific language governing permissions and limitations under the License. | |||||
| #ifndef NCNN_OPTION_H | |||||
| #define NCNN_OPTION_H | |||||
| #include "platform.h" | |||||
| namespace ncnn { | |||||
| #if NCNN_VULKAN | |||||
| class VkAllocator; | |||||
| #endif // NCNN_VULKAN | |||||
| class Allocator; | |||||
| class Option | |||||
| { | |||||
| public: | |||||
| // default option | |||||
| Option(); | |||||
| public: | |||||
| // light mode | |||||
| // intermediate blob will be recycled when enabled | |||||
| // enabled by default | |||||
| bool lightmode; | |||||
| // thread count | |||||
| // default value is the one returned by get_cpu_count() | |||||
| int num_threads; | |||||
| // blob memory allocator | |||||
| Allocator* blob_allocator; | |||||
| // workspace memory allocator | |||||
| Allocator* workspace_allocator; | |||||
| #if NCNN_VULKAN | |||||
| // blob memory allocator | |||||
| VkAllocator* blob_vkallocator; | |||||
| // workspace memory allocator | |||||
| VkAllocator* workspace_vkallocator; | |||||
| // staging memory allocator | |||||
| VkAllocator* staging_vkallocator; | |||||
| #endif // NCNN_VULKAN | |||||
| // enable winograd convolution optimization | |||||
| // improve convolution 3x3 stride1 performace, may consume more memory | |||||
| // changes should be applied before loading network structure and weight | |||||
| // enabled by default | |||||
| bool use_winograd_convolution; | |||||
| // enable sgemm convolution optimization | |||||
| // improve convolution 1x1 stride1 performace, may consume more memory | |||||
| // changes should be applied before loading network structure and weight | |||||
| // enabled by default | |||||
| bool use_sgemm_convolution; | |||||
| // enable quantized int8 inference | |||||
| // use low-precision int8 path for quantized model | |||||
| // changes should be applied before loading network structure and weight | |||||
| // enabled by default | |||||
| bool use_int8_inference; | |||||
| // enable vulkan compute | |||||
| bool use_vulkan_compute; | |||||
| // enable options for gpu inference | |||||
| bool use_fp16_packed; | |||||
| bool use_fp16_storage; | |||||
| bool use_fp16_arithmetic; | |||||
| bool use_int8_storage; | |||||
| bool use_int8_arithmetic; | |||||
| }; | |||||
| } // namespace ncnn | |||||
| #endif // NCNN_OPTION_H | |||||
| @@ -47,18 +47,7 @@ int Pipeline::create(const uint32_t* spv_data, size_t spv_data_size, const char* | |||||
| // fprintf(stderr, "local_shader_module %p %s created\n", local_shader_module, entry_name); | // fprintf(stderr, "local_shader_module %p %s created\n", local_shader_module, entry_name); | ||||
| create_descriptorset_layout(binding_count); | |||||
| create_pipeline_layout(push_constant_count); | |||||
| create_pipeline(local_shader_module, entry_name, specializations); | |||||
| if (vkdev->info.support_VK_KHR_descriptor_update_template) | |||||
| { | |||||
| create_descriptor_update_template(binding_count); | |||||
| } | |||||
| return 0; | |||||
| return create(local_shader_module, entry_name, specializations, binding_count, push_constant_count); | |||||
| } | } | ||||
| int Pipeline::create(VkShaderModule shader_module, const char* entry_name, const std::vector<vk_specialization_type>& specializations, int binding_count, int push_constant_count) | int Pipeline::create(VkShaderModule shader_module, const char* entry_name, const std::vector<vk_specialization_type>& specializations, int binding_count, int push_constant_count) | ||||