Browse Source

option api changes

tags/20190611
nihuini 7 years ago
parent
commit
838c5df839
27 changed files with 272 additions and 290 deletions
  1. +37
    -34
      benchmark/benchncnn.cpp
  2. +1
    -1
      examples/fasterrcnn.cpp
  3. +1
    -1
      examples/mobilenetssd.cpp
  4. +1
    -1
      examples/mobilenetv2ssdlite.cpp
  5. +4
    -3
      examples/peleenetssd_seg.cpp
  6. +1
    -1
      examples/rfcn.cpp
  7. +1
    -1
      examples/shufflenetv2.cpp
  8. +1
    -1
      examples/squeezenet.cpp
  9. +1
    -1
      examples/squeezenetssd.cpp
  10. +1
    -1
      examples/yolov2.cpp
  11. +1
    -1
      examples/yolov3.cpp
  12. +4
    -2
      src/CMakeLists.txt
  13. +6
    -6
      src/gpu.cpp
  14. +0
    -40
      src/layer.cpp
  15. +12
    -62
      src/layer.h
  16. +4
    -4
      src/layer/convolution.cpp
  17. +3
    -3
      src/layer/convolutiondepthwise.cpp
  18. +3
    -3
      src/layer/innerproduct.cpp
  19. +2
    -2
      src/layer/x86/convolution_x86.cpp
  20. +2
    -2
      src/layer/x86/convolutiondepthwise_x86.cpp
  21. +2
    -2
      src/layer/yolodetectionoutput.cpp
  22. +8
    -8
      src/mat.cpp
  23. +23
    -69
      src/net.cpp
  24. +12
    -29
      src/net.h
  25. +49
    -0
      src/option.cpp
  26. +91
    -0
      src/option.h
  27. +1
    -12
      src/pipeline.cpp

+ 37
- 34
benchmark/benchncnn.cpp View File

@@ -57,9 +57,6 @@ public:
// load file // load file
int ret = 0; int ret = 0;


Option opt;
opt.vulkan_compute = use_vulkan_compute;

ModelBinFromEmpty mb; ModelBinFromEmpty mb;
for (size_t i=0; i<layers.size(); i++) for (size_t i=0; i<layers.size(); i++)
{ {
@@ -83,7 +80,7 @@ public:
} }


#if NCNN_VULKAN #if NCNN_VULKAN
if (use_vulkan_compute)
if (opt.use_vulkan_compute)
{ {
upload_model(); upload_model();


@@ -100,12 +97,12 @@ public:
static int g_warmup_loop_count = 3; static int g_warmup_loop_count = 3;
static int g_loop_count = 4; static int g_loop_count = 4;


static ncnn::Option g_default_option;

static ncnn::UnlockedPoolAllocator g_blob_pool_allocator; static ncnn::UnlockedPoolAllocator g_blob_pool_allocator;
static ncnn::PoolAllocator g_workspace_pool_allocator; static ncnn::PoolAllocator g_workspace_pool_allocator;


#if NCNN_VULKAN #if NCNN_VULKAN
static bool g_use_vulkan_compute = false;

static ncnn::VulkanDevice* g_vkdev = 0; static ncnn::VulkanDevice* g_vkdev = 0;
static ncnn::VkAllocator* g_blob_vkallocator = 0; static ncnn::VkAllocator* g_blob_vkallocator = 0;
static ncnn::VkAllocator* g_staging_vkallocator = 0; static ncnn::VkAllocator* g_staging_vkallocator = 0;
@@ -115,11 +112,11 @@ void benchmark(const char* comment, const ncnn::Mat& in)
{ {
ncnn::BenchNet net; ncnn::BenchNet net;


net.opt = g_default_option;

#if NCNN_VULKAN #if NCNN_VULKAN
if (g_use_vulkan_compute)
if (net.opt.use_vulkan_compute)
{ {
net.use_vulkan_compute = g_use_vulkan_compute;

net.set_vulkan_device(g_vkdev); net.set_vulkan_device(g_vkdev);
} }
#endif // NCNN_VULKAN #endif // NCNN_VULKAN
@@ -134,7 +131,7 @@ void benchmark(const char* comment, const ncnn::Mat& in)
g_workspace_pool_allocator.clear(); g_workspace_pool_allocator.clear();


#if NCNN_VULKAN #if NCNN_VULKAN
if (g_use_vulkan_compute)
if (net.opt.use_vulkan_compute)
{ {
g_blob_vkallocator->clear(); g_blob_vkallocator->clear();
g_staging_vkallocator->clear(); g_staging_vkallocator->clear();
@@ -145,7 +142,7 @@ void benchmark(const char* comment, const ncnn::Mat& in)
#ifdef _WIN32 #ifdef _WIN32
Sleep(10 * 1000); Sleep(10 * 1000);
#else #else
sleep(10);
// sleep(10);
#endif #endif


ncnn::Mat out; ncnn::Mat out;
@@ -210,14 +207,15 @@ int main(int argc, char** argv)
gpu_device = atoi(argv[4]); gpu_device = atoi(argv[4]);
} }


bool use_vulkan_compute = gpu_device != -1;

g_loop_count = loop_count; g_loop_count = loop_count;


g_blob_pool_allocator.set_size_compare_ratio(0.0f); g_blob_pool_allocator.set_size_compare_ratio(0.0f);
g_workspace_pool_allocator.set_size_compare_ratio(0.5f); g_workspace_pool_allocator.set_size_compare_ratio(0.5f);


#if NCNN_VULKAN #if NCNN_VULKAN
g_use_vulkan_compute = gpu_device != -1;
if (g_use_vulkan_compute)
if (use_vulkan_compute)
{ {
g_warmup_loop_count = 10; g_warmup_loop_count = 10;


@@ -228,20 +226,25 @@ int main(int argc, char** argv)
} }
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


ncnn::Option opt;
opt.lightmode = true;
opt.num_threads = num_threads;
opt.blob_allocator = &g_blob_pool_allocator;
opt.workspace_allocator = &g_workspace_pool_allocator;

// default option
g_default_option.lightmode = true;
g_default_option.num_threads = num_threads;
g_default_option.blob_allocator = &g_blob_pool_allocator;
g_default_option.workspace_allocator = &g_workspace_pool_allocator;
#if NCNN_VULKAN #if NCNN_VULKAN
opt.vulkan_compute = g_use_vulkan_compute;
opt.blob_vkallocator = g_blob_vkallocator;
opt.workspace_vkallocator = g_blob_vkallocator;
opt.staging_vkallocator = g_staging_vkallocator;
g_default_option.blob_vkallocator = g_blob_vkallocator;
g_default_option.workspace_vkallocator = g_blob_vkallocator;
g_default_option.staging_vkallocator = g_staging_vkallocator;
#endif // NCNN_VULKAN #endif // NCNN_VULKAN

ncnn::set_default_option(opt);
g_default_option.use_winograd_convolution = true;
g_default_option.use_sgemm_convolution = true;
g_default_option.use_int8_inference = true;
g_default_option.use_vulkan_compute = use_vulkan_compute;
g_default_option.use_fp16_packed = true;
g_default_option.use_fp16_storage = true;
g_default_option.use_fp16_arithmetic = true;
g_default_option.use_int8_storage = true;
g_default_option.use_int8_arithmetic = true;


ncnn::set_cpu_powersave(powersave); ncnn::set_cpu_powersave(powersave);


@@ -257,21 +260,21 @@ int main(int argc, char** argv)
benchmark("squeezenet", ncnn::Mat(227, 227, 3)); benchmark("squeezenet", ncnn::Mat(227, 227, 3));


#if NCNN_VULKAN #if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN #endif // NCNN_VULKAN
benchmark("squeezenet_int8", ncnn::Mat(227, 227, 3)); benchmark("squeezenet_int8", ncnn::Mat(227, 227, 3));


benchmark("mobilenet", ncnn::Mat(224, 224, 3)); benchmark("mobilenet", ncnn::Mat(224, 224, 3));


#if NCNN_VULKAN #if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN #endif // NCNN_VULKAN
benchmark("mobilenet_int8", ncnn::Mat(224, 224, 3)); benchmark("mobilenet_int8", ncnn::Mat(224, 224, 3));


benchmark("mobilenet_v2", ncnn::Mat(224, 224, 3)); benchmark("mobilenet_v2", ncnn::Mat(224, 224, 3));


// #if NCNN_VULKAN // #if NCNN_VULKAN
// if (!g_use_vulkan_compute)
// if (!use_vulkan_compute)
// #endif // NCNN_VULKAN // #endif // NCNN_VULKAN
// benchmark("mobilenet_v2_int8", ncnn::Mat(224, 224, 3)); // benchmark("mobilenet_v2_int8", ncnn::Mat(224, 224, 3));


@@ -284,14 +287,14 @@ int main(int argc, char** argv)
benchmark("googlenet", ncnn::Mat(224, 224, 3)); benchmark("googlenet", ncnn::Mat(224, 224, 3));


#if NCNN_VULKAN #if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN #endif // NCNN_VULKAN
benchmark("googlenet_int8", ncnn::Mat(224, 224, 3)); benchmark("googlenet_int8", ncnn::Mat(224, 224, 3));


benchmark("resnet18", ncnn::Mat(224, 224, 3)); benchmark("resnet18", ncnn::Mat(224, 224, 3));


#if NCNN_VULKAN #if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN #endif // NCNN_VULKAN
benchmark("resnet18_int8", ncnn::Mat(224, 224, 3)); benchmark("resnet18_int8", ncnn::Mat(224, 224, 3));


@@ -300,28 +303,28 @@ int main(int argc, char** argv)
benchmark("vgg16", ncnn::Mat(224, 224, 3)); benchmark("vgg16", ncnn::Mat(224, 224, 3));


#if NCNN_VULKAN #if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN #endif // NCNN_VULKAN
benchmark("vgg16_int8", ncnn::Mat(224, 224, 3)); benchmark("vgg16_int8", ncnn::Mat(224, 224, 3));


benchmark("resnet50", ncnn::Mat(224, 224, 3)); benchmark("resnet50", ncnn::Mat(224, 224, 3));


#if NCNN_VULKAN #if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN #endif // NCNN_VULKAN
benchmark("resnet50_int8", ncnn::Mat(224, 224, 3)); benchmark("resnet50_int8", ncnn::Mat(224, 224, 3));


benchmark("squeezenet_ssd", ncnn::Mat(300, 300, 3)); benchmark("squeezenet_ssd", ncnn::Mat(300, 300, 3));


#if NCNN_VULKAN #if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN #endif // NCNN_VULKAN
benchmark("squeezenet_ssd_int8", ncnn::Mat(300, 300, 3)); benchmark("squeezenet_ssd_int8", ncnn::Mat(300, 300, 3));


benchmark("mobilenet_ssd", ncnn::Mat(300, 300, 3)); benchmark("mobilenet_ssd", ncnn::Mat(300, 300, 3));


#if NCNN_VULKAN #if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN #endif // NCNN_VULKAN
benchmark("mobilenet_ssd_int8", ncnn::Mat(300, 300, 3)); benchmark("mobilenet_ssd_int8", ncnn::Mat(300, 300, 3));




+ 1
- 1
examples/fasterrcnn.cpp View File

@@ -107,7 +107,7 @@ static int detect_fasterrcnn(const cv::Mat& bgr, std::vector<Object>& objects)
ncnn::Net fasterrcnn; ncnn::Net fasterrcnn;


#if NCNN_VULKAN #if NCNN_VULKAN
fasterrcnn.use_vulkan_compute = true;
fasterrcnn.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


// original pretrained model from https://github.com/rbgirshick/py-faster-rcnn // original pretrained model from https://github.com/rbgirshick/py-faster-rcnn


+ 1
- 1
examples/mobilenetssd.cpp View File

@@ -36,7 +36,7 @@ static int detect_mobilenet(const cv::Mat& bgr, std::vector<Object>& objects)
ncnn::Net mobilenet; ncnn::Net mobilenet;


#if NCNN_VULKAN #if NCNN_VULKAN
mobilenet.use_vulkan_compute = true;
mobilenet.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


// model is converted from https://github.com/chuanqi305/MobileNet-SSD // model is converted from https://github.com/chuanqi305/MobileNet-SSD


+ 1
- 1
examples/mobilenetv2ssdlite.cpp View File

@@ -39,7 +39,7 @@ static int detect_mobilenetv2(const cv::Mat& bgr, std::vector<Object>& objects)
ncnn::Net mobilenetv2; ncnn::Net mobilenetv2;


#if NCNN_VULKAN #if NCNN_VULKAN
mobilenetv2.use_vulkan_compute = true;
mobilenetv2.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


mobilenetv2.register_custom_layer("Silence", Noop_layer_creator); mobilenetv2.register_custom_layer("Silence", Noop_layer_creator);


+ 4
- 3
examples/peleenetssd_seg.cpp View File

@@ -36,7 +36,7 @@ static int detect_peleenet(const cv::Mat& bgr, std::vector<Object>& objects,ncnn
ncnn::Net peleenet; ncnn::Net peleenet;


#if NCNN_VULKAN #if NCNN_VULKAN
peleenet.use_vulkan_compute = true;
peleenet.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


// model is converted from https://github.com/eric612/MobileNet-YOLO // model is converted from https://github.com/eric612/MobileNet-YOLO
@@ -94,7 +94,8 @@ static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects,
"traffic light","traffic sign","train"}; "traffic light","traffic sign","train"};


cv::Mat image = bgr.clone(); cv::Mat image = bgr.clone();
std::vector<int> color = {128,255,128,244,35,232};
const int color[] = {128,255,128,244,35,232};
const int color_count = sizeof(color) / sizeof(int);
for (size_t i = 0; i < objects.size(); i++) for (size_t i = 0; i < objects.size(); i++)
{ {
@@ -147,7 +148,7 @@ static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects,
} }
if(index > -1) { if(index > -1) {
int color_index = (index)*3; int color_index = (index)*3;
if(color_index<color.size()) {
if(color_index<color_count) {
int b = color[color_index]; int b = color[color_index];
int g = color[color_index+1]; int g = color[color_index+1];
int r = color[color_index+2]; int r = color[color_index+2];


+ 1
- 1
examples/rfcn.cpp View File

@@ -121,7 +121,7 @@ static int detect_rfcn(const cv::Mat& bgr, std::vector<Object>& objects)
ncnn::Net rfcn; ncnn::Net rfcn;


#if NCNN_VULKAN #if NCNN_VULKAN
rfcn.use_vulkan_compute = true;
rfcn.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


// original pretrained model from https://github.com/YuwenXiong/py-R-FCN // original pretrained model from https://github.com/YuwenXiong/py-R-FCN


+ 1
- 1
examples/shufflenetv2.cpp View File

@@ -29,7 +29,7 @@ static int detect_shufflenetv2(const cv::Mat& bgr, std::vector<float>& cls_score
ncnn::Net shufflenetv2; ncnn::Net shufflenetv2;


#if NCNN_VULKAN #if NCNN_VULKAN
shufflenetv2.use_vulkan_compute = true;
shufflenetv2.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


// https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe // https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe


+ 1
- 1
examples/squeezenet.cpp View File

@@ -29,7 +29,7 @@ static int detect_squeezenet(const cv::Mat& bgr, std::vector<float>& cls_scores)
ncnn::Net squeezenet; ncnn::Net squeezenet;


#if NCNN_VULKAN #if NCNN_VULKAN
squeezenet.use_vulkan_compute = true;
squeezenet.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


squeezenet.load_param("squeezenet_v1.1.param"); squeezenet.load_param("squeezenet_v1.1.param");


+ 1
- 1
examples/squeezenetssd.cpp View File

@@ -36,7 +36,7 @@ static int detect_squeezenet(const cv::Mat& bgr, std::vector<Object>& objects)
ncnn::Net squeezenet; ncnn::Net squeezenet;


#if NCNN_VULKAN #if NCNN_VULKAN
squeezenet.use_vulkan_compute = true;
squeezenet.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


// original pretrained model from https://github.com/chuanqi305/SqueezeNet-SSD // original pretrained model from https://github.com/chuanqi305/SqueezeNet-SSD


+ 1
- 1
examples/yolov2.cpp View File

@@ -36,7 +36,7 @@ static int detect_yolov2(const cv::Mat& bgr, std::vector<Object>& objects)
ncnn::Net yolov2; ncnn::Net yolov2;


#if NCNN_VULKAN #if NCNN_VULKAN
yolov2.use_vulkan_compute = true;
yolov2.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


// original pretrained model from https://github.com/eric612/MobileNet-YOLO // original pretrained model from https://github.com/eric612/MobileNet-YOLO


+ 1
- 1
examples/yolov3.cpp View File

@@ -36,7 +36,7 @@ static int detect_yolov3(const cv::Mat& bgr, std::vector<Object>& objects)
ncnn::Net yolov3; ncnn::Net yolov3;


#if NCNN_VULKAN #if NCNN_VULKAN
yolov3.use_vulkan_compute = true;
yolov3.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


// original pretrained model from https://github.com/eric612/MobileNet-YOLO // original pretrained model from https://github.com/eric612/MobileNet-YOLO


+ 4
- 2
src/CMakeLists.txt View File

@@ -25,6 +25,7 @@ set(ncnn_SRCS
modelbin.cpp modelbin.cpp
net.cpp net.cpp
opencv.cpp opencv.cpp
option.cpp
paramdict.cpp paramdict.cpp
pipeline.cpp pipeline.cpp
benchmark.cpp benchmark.cpp
@@ -87,8 +88,8 @@ macro(ncnn_add_layer class)
if(WITH_LAYER_${name}_vulkan) if(WITH_LAYER_${name}_vulkan)
set(layer_declaration "${layer_declaration}#include \"layer/vulkan/${name}_vulkan.h\"\n") set(layer_declaration "${layer_declaration}#include \"layer/vulkan/${name}_vulkan.h\"\n")
set(layer_declaration_class "${layer_declaration_class}, virtual public ${class}_vulkan") set(layer_declaration_class "${layer_declaration_class}, virtual public ${class}_vulkan")
set(create_pipeline_content "${create_pipeline_content} if (opt.vulkan_compute) { int ret = ${class}_vulkan::create_pipeline(opt); if (ret) return ret; }\n")
set(destroy_pipeline_content " if (opt.vulkan_compute) { int ret = ${class}_vulkan::destroy_pipeline(opt); if (ret) return ret; }\n${destroy_pipeline_content}")
set(create_pipeline_content "${create_pipeline_content} if (opt.use_vulkan_compute) { int ret = ${class}_vulkan::create_pipeline(opt); if (ret) return ret; }\n")
set(destroy_pipeline_content " if (opt.use_vulkan_compute) { int ret = ${class}_vulkan::destroy_pipeline(opt); if (ret) return ret; }\n${destroy_pipeline_content}")


file(GLOB_RECURSE SHADER_SRCS "layer/vulkan/shader/${name}.comp") file(GLOB_RECURSE SHADER_SRCS "layer/vulkan/shader/${name}.comp")
file(GLOB_RECURSE SHADER_SUBSRCS "layer/vulkan/shader/${name}_*.comp") file(GLOB_RECURSE SHADER_SUBSRCS "layer/vulkan/shader/${name}_*.comp")
@@ -297,6 +298,7 @@ if(NCNN_INSTALL_SDK)
modelbin.h modelbin.h
net.h net.h
opencv.h opencv.h
option.h
paramdict.h paramdict.h
pipeline.h pipeline.h
benchmark.h benchmark.h


+ 6
- 6
src/gpu.cpp View File

@@ -645,7 +645,7 @@ int create_gpu_instance()
} }


// check features // check features
gpu_info.support_fp16_packed = false;// TODO
gpu_info.support_fp16_packed = true;
gpu_info.support_fp16_storage = false; gpu_info.support_fp16_storage = false;
gpu_info.support_fp16_arithmetic = false; gpu_info.support_fp16_arithmetic = false;
gpu_info.support_int8_storage = false; gpu_info.support_int8_storage = false;
@@ -698,11 +698,11 @@ int create_gpu_instance()
{ {
gpu_info.support_fp16_storage = query16BitStorageFeatures.storageBuffer16BitAccess && query16BitStorageFeatures.uniformAndStorageBuffer16BitAccess; gpu_info.support_fp16_storage = query16BitStorageFeatures.storageBuffer16BitAccess && query16BitStorageFeatures.uniformAndStorageBuffer16BitAccess;
} }
// if (gpu_info.support_VK_KHR_shader_float16_int8)
// {
// gpu_info.support_fp16_arithmetic = queryFloat16Int8Features.shaderFloat16;
// gpu_info.support_int8_arithmetic = queryFloat16Int8Features.shaderInt8;
// }
if (gpu_info.support_VK_KHR_shader_float16_int8)
{
gpu_info.support_fp16_arithmetic = queryFloat16Int8Features.shaderFloat16;
gpu_info.support_int8_arithmetic = queryFloat16Int8Features.shaderInt8;
}
} }
else else
{ {


+ 0
- 40
src/layer.cpp View File

@@ -27,46 +27,6 @@


namespace ncnn { namespace ncnn {


Option::Option()
{
lightmode = true;
num_threads = get_cpu_count();
blob_allocator = 0;
workspace_allocator = 0;

vulkan_compute = false;

#if NCNN_VULKAN
blob_vkallocator = 0;
workspace_vkallocator = 0;
staging_vkallocator = 0;
#endif // NCNN_VULKAN

use_winograd_convolution = 1;
use_sgemm_convolution = 1;
use_int8_inference = 1;
}

static Option g_default_option;

const Option& get_default_option()
{
return g_default_option;
}

int set_default_option(const Option& opt)
{
if (opt.num_threads <= 0)
{
fprintf(stderr, "invalid option num_threads %d\n", opt.num_threads);
return -1;
}

g_default_option = opt;

return 0;
}

Layer::Layer() Layer::Layer()
{ {
one_blob_only = false; one_blob_only = false;


+ 12
- 62
src/layer.h View File

@@ -19,10 +19,11 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <math.h> #include <math.h>
#include "platform.h"
#include "mat.h" #include "mat.h"
#include "modelbin.h" #include "modelbin.h"
#include "option.h"
#include "paramdict.h" #include "paramdict.h"
#include "platform.h"


#if NCNN_VULKAN #if NCNN_VULKAN
#include <vulkan/vulkan.h> #include <vulkan/vulkan.h>
@@ -32,57 +33,6 @@


namespace ncnn { namespace ncnn {


#if NCNN_VULKAN
class VkAllocator;
#endif // NCNN_VULKAN

class Allocator;
class Option
{
public:
// default option
Option();

public:
// light mode
// intermediate blob will be recycled when enabled
// enabled by default
bool lightmode;

// thread count
// default value is the one returned by get_cpu_count()
int num_threads;

// blob memory allocator
Allocator* blob_allocator;

// workspace memory allocator
Allocator* workspace_allocator;

// enable vulkan compute
bool vulkan_compute;

#if NCNN_VULKAN
// blob memory allocator
VkAllocator* blob_vkallocator;

// workspace memory allocator
VkAllocator* workspace_vkallocator;

// staging memory allocator
VkAllocator* staging_vkallocator;
#endif // NCNN_VULKAN

public:
int use_winograd_convolution;
int use_sgemm_convolution;
int use_int8_inference;
};

// the global default option
const Option& get_default_option();
int set_default_option(const Option& opt);

class Layer class Layer
{ {
public: public:
@@ -100,10 +50,10 @@ public:
virtual int load_model(const ModelBin& mb); virtual int load_model(const ModelBin& mb);


// //
virtual int create_pipeline(const Option& opt = get_default_option());
virtual int create_pipeline(const Option& opt = Option());


// //
virtual int destroy_pipeline(const Option& opt = get_default_option());
virtual int destroy_pipeline(const Option& opt = Option());


public: public:
// one input and one output blob // one input and one output blob
@@ -118,13 +68,13 @@ public:
public: public:
// implement inference // implement inference
// return 0 if success // return 0 if success
virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt = get_default_option()) const;
virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt = get_default_option()) const;
virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt = Option()) const;
virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt = Option()) const;


// implement inplace inference // implement inplace inference
// return 0 if success // return 0 if success
virtual int forward_inplace(std::vector<Mat>& bottom_top_blobs, const Option& opt = get_default_option()) const;
virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt = get_default_option()) const;
virtual int forward_inplace(std::vector<Mat>& bottom_top_blobs, const Option& opt = Option()) const;
virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt = Option()) const;


#if NCNN_VULKAN #if NCNN_VULKAN
public: public:
@@ -134,13 +84,13 @@ public:
public: public:
// implement inference // implement inference
// return 0 if success // return 0 if success
virtual int forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt = get_default_option()) const;
virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt = get_default_option()) const;
virtual int forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt = Option()) const;
virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt = Option()) const;


// implement inplace inference // implement inplace inference
// return 0 if success // return 0 if success
virtual int forward_inplace(std::vector<VkMat>& bottom_top_blobs, VkCompute& cmd, const Option& opt = get_default_option()) const;
virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt = get_default_option()) const;
virtual int forward_inplace(std::vector<VkMat>& bottom_top_blobs, VkCompute& cmd, const Option& opt = Option()) const;
virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt = Option()) const;


public: public:
// assigned immediately after creating this layer // assigned immediately after creating this layer


+ 4
- 4
src/layer/convolution.cpp View File

@@ -74,7 +74,7 @@ int Convolution::load_model(const ModelBin& mb)
int Convolution::create_pipeline(const Option& opt) int Convolution::create_pipeline(const Option& opt)
{ {
Option opt_cpu = opt; Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;


use_int8_inference = opt.use_int8_inference; use_int8_inference = opt.use_int8_inference;


@@ -111,7 +111,7 @@ int Convolution::create_pipeline(const Option& opt)


op->create_pipeline(opt_cpu); op->create_pipeline(opt_cpu);


ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.blob_allocator = int8_weight_data.allocator; opt.blob_allocator = int8_weight_data.allocator;


const Mat weight_data_n = weight_data.range(weight_data_size_output * n, weight_data_size_output); const Mat weight_data_n = weight_data.range(weight_data_size_output * n, weight_data_size_output);
@@ -173,7 +173,7 @@ int Convolution::create_pipeline(const Option& opt)
int Convolution::destroy_pipeline(const Option& opt) int Convolution::destroy_pipeline(const Option& opt)
{ {
Option opt_cpu = opt; Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;


if (quantize) if (quantize)
{ {
@@ -286,7 +286,7 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
op->load_model(ModelBinFromMatArray(weights)); op->load_model(ModelBinFromMatArray(weights));


Option opt_cpu = opt; Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;
op->create_pipeline(opt_cpu); op->create_pipeline(opt_cpu);


// forward // forward


+ 3
- 3
src/layer/convolutiondepthwise.cpp View File

@@ -98,7 +98,7 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)
int ConvolutionDepthWise::create_pipeline(const Option& opt) int ConvolutionDepthWise::create_pipeline(const Option& opt)
{ {
Option opt_cpu = opt; Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;


use_int8_inference = opt.use_int8_inference; use_int8_inference = opt.use_int8_inference;


@@ -134,7 +134,7 @@ int ConvolutionDepthWise::create_pipeline(const Option& opt)


op->create_pipeline(opt_cpu); op->create_pipeline(opt_cpu);


ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.blob_allocator = int8_weight_data.allocator; opt.blob_allocator = int8_weight_data.allocator;


const Mat weight_data_g = weight_data.range(weight_data_size_g * g, weight_data_size_g); const Mat weight_data_g = weight_data.range(weight_data_size_g * g, weight_data_size_g);
@@ -198,7 +198,7 @@ int ConvolutionDepthWise::create_pipeline(const Option& opt)
int ConvolutionDepthWise::destroy_pipeline(const Option& opt) int ConvolutionDepthWise::destroy_pipeline(const Option& opt)
{ {
Option opt_cpu = opt; Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;


for (int i=0; i<(int)quantize_ops.size(); i++) for (int i=0; i<(int)quantize_ops.size(); i++)
{ {


+ 3
- 3
src/layer/innerproduct.cpp View File

@@ -65,7 +65,7 @@ int InnerProduct::load_model(const ModelBin& mb)
int InnerProduct::create_pipeline(const Option& opt) int InnerProduct::create_pipeline(const Option& opt)
{ {
Option opt_cpu = opt; Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;


use_int8_inference = opt.use_int8_inference; use_int8_inference = opt.use_int8_inference;


@@ -143,7 +143,7 @@ int InnerProduct::create_pipeline(const Option& opt)


op->create_pipeline(opt_cpu); op->create_pipeline(opt_cpu);


ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.blob_allocator = int8_weight_data.allocator; opt.blob_allocator = int8_weight_data.allocator;


const Mat weight_data_n = weight_data.range(weight_data_size_output * n, weight_data_size_output); const Mat weight_data_n = weight_data.range(weight_data_size_output * n, weight_data_size_output);
@@ -162,7 +162,7 @@ int InnerProduct::create_pipeline(const Option& opt)
int InnerProduct::destroy_pipeline(const Option& opt) int InnerProduct::destroy_pipeline(const Option& opt)
{ {
Option opt_cpu = opt; Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;


if (quantize) if (quantize)
{ {


+ 2
- 2
src/layer/x86/convolution_x86.cpp View File

@@ -45,7 +45,7 @@ Convolution_x86::Convolution_x86()
int Convolution_x86::create_pipeline(const Option& opt) int Convolution_x86::create_pipeline(const Option& opt)
{ {
Option opt_cpu = opt; Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;


if (activation_type == 1) if (activation_type == 1)
{ {
@@ -120,7 +120,7 @@ int Convolution_x86::create_pipeline(const Option& opt)
int Convolution_x86::destroy_pipeline(const Option& opt) int Convolution_x86::destroy_pipeline(const Option& opt)
{ {
Option opt_cpu = opt; Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;


if (activation) if (activation)
{ {


+ 2
- 2
src/layer/x86/convolutiondepthwise_x86.cpp View File

@@ -36,7 +36,7 @@ ConvolutionDepthWise_x86::ConvolutionDepthWise_x86()
int ConvolutionDepthWise_x86::create_pipeline(const Option& opt) int ConvolutionDepthWise_x86::create_pipeline(const Option& opt)
{ {
Option opt_cpu = opt; Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;


if (activation_type == 1) if (activation_type == 1)
{ {
@@ -167,7 +167,7 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt)
int ConvolutionDepthWise_x86::destroy_pipeline(const Option& opt) int ConvolutionDepthWise_x86::destroy_pipeline(const Option& opt)
{ {
Option opt_cpu = opt; Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;


if (activation) if (activation)
{ {


+ 2
- 2
src/layer/yolodetectionoutput.cpp View File

@@ -49,7 +49,7 @@ int YoloDetectionOutput::create_pipeline(const Option& opt)
softmax->load_param(pd); softmax->load_param(pd);


Option opt_cpu = opt; Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;
softmax->create_pipeline(opt_cpu); softmax->create_pipeline(opt_cpu);
} }


@@ -61,7 +61,7 @@ int YoloDetectionOutput::destroy_pipeline(const Option& opt)
if (softmax) if (softmax)
{ {
Option opt_cpu = opt; Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;
softmax->destroy_pipeline(opt_cpu); softmax->destroy_pipeline(opt_cpu);
delete softmax; delete softmax;
softmax = 0; softmax = 0;


+ 8
- 8
src/mat.cpp View File

@@ -95,7 +95,7 @@ void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_val
return; return;
} }


op->forward_inplace(*this, ncnn::get_default_option());
op->forward_inplace(*this);


delete op; delete op;
} }
@@ -234,7 +234,7 @@ void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, i


padding->load_param(pd); padding->load_param(pd);


ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.num_threads = num_threads; opt.num_threads = num_threads;
opt.blob_allocator = allocator; opt.blob_allocator = allocator;


@@ -257,7 +257,7 @@ void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, in


crop->load_param(pd); crop->load_param(pd);


ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.num_threads = num_threads; opt.num_threads = num_threads;
opt.blob_allocator = allocator; opt.blob_allocator = allocator;


@@ -277,7 +277,7 @@ void resize_bilinear(const Mat& src, Mat& dst, int w, int h, Allocator* allocato


interp->load_param(pd); interp->load_param(pd);


ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.num_threads = num_threads; opt.num_threads = num_threads;
opt.blob_allocator = allocator; opt.blob_allocator = allocator;


@@ -297,7 +297,7 @@ void resize_bicubic(const Mat& src, Mat& dst, int w, int h, Allocator* allocator


interp->load_param(pd); interp->load_param(pd);


ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.num_threads = num_threads; opt.num_threads = num_threads;
opt.blob_allocator = allocator; opt.blob_allocator = allocator;


@@ -315,7 +315,7 @@ void convert_packing(const Mat& src, Mat& dst, int _packing, Allocator* allocato


packing->load_param(pd); packing->load_param(pd);


ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.num_threads = num_threads; opt.num_threads = num_threads;
opt.blob_allocator = allocator; opt.blob_allocator = allocator;


@@ -334,7 +334,7 @@ void cast_float32_to_float16(const Mat& src, Mat& dst, Allocator* allocator, int


cast->load_param(pd); cast->load_param(pd);


ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.num_threads = num_threads; opt.num_threads = num_threads;
opt.blob_allocator = allocator; opt.blob_allocator = allocator;


@@ -353,7 +353,7 @@ void cast_float16_to_float32(const Mat& src, Mat& dst, Allocator* allocator, int


cast->load_param(pd); cast->load_param(pd);


ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.num_threads = num_threads; opt.num_threads = num_threads;
opt.blob_allocator = allocator; opt.blob_allocator = allocator;




+ 23
- 69
src/net.cpp View File

@@ -40,11 +40,6 @@ namespace ncnn {


Net::Net() Net::Net()
{ {
use_winograd_convolution = 1;
use_sgemm_convolution = 1;
use_int8_inference = 1;
use_vulkan_compute = 0;

#if NCNN_VULKAN #if NCNN_VULKAN
vkdev = 0; vkdev = 0;
weight_vkallocator = 0; weight_vkallocator = 0;
@@ -155,11 +150,8 @@ int Net::load_param(FILE* fp)
blobs.resize((size_t)blob_count); blobs.resize((size_t)blob_count);


#if NCNN_VULKAN #if NCNN_VULKAN
if (use_vulkan_compute && !vkdev)
{
// use default vulkan device
if (opt.use_vulkan_compute && !vkdev)
vkdev = get_default_gpu_device(); vkdev = get_default_gpu_device();
}
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


ParamDict pd; ParamDict pd;
@@ -192,7 +184,7 @@ int Net::load_param(FILE* fp)
} }


#if NCNN_VULKAN #if NCNN_VULKAN
if (use_vulkan_compute)
if (opt.use_vulkan_compute)
layer->vkdev = vkdev; layer->vkdev = vkdev;
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


@@ -320,11 +312,8 @@ int Net::load_param_mem(const char* _mem)
blobs.resize(blob_count); blobs.resize(blob_count);


#if NCNN_VULKAN #if NCNN_VULKAN
if (use_vulkan_compute && !vkdev)
{
// use default vulkan device
if (opt.use_vulkan_compute && !vkdev)
vkdev = get_default_gpu_device(); vkdev = get_default_gpu_device();
}
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


ParamDict pd; ParamDict pd;
@@ -357,7 +346,7 @@ int Net::load_param_mem(const char* _mem)
} }


#if NCNN_VULKAN #if NCNN_VULKAN
if (use_vulkan_compute)
if (opt.use_vulkan_compute)
layer->vkdev = vkdev; layer->vkdev = vkdev;
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


@@ -489,11 +478,8 @@ int Net::load_param_bin(FILE* fp)
blobs.resize(blob_count); blobs.resize(blob_count);


#if NCNN_VULKAN #if NCNN_VULKAN
if (use_vulkan_compute && !vkdev)
{
// use default vulkan device
if (opt.use_vulkan_compute && !vkdev)
vkdev = get_default_gpu_device(); vkdev = get_default_gpu_device();
}
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


ParamDict pd; ParamDict pd;
@@ -526,7 +512,7 @@ int Net::load_param_bin(FILE* fp)
} }


#if NCNN_VULKAN #if NCNN_VULKAN
if (use_vulkan_compute)
if (opt.use_vulkan_compute)
layer->vkdev = vkdev; layer->vkdev = vkdev;
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


@@ -613,12 +599,6 @@ int Net::load_model(FILE* fp)
// load file // load file
int ret = 0; int ret = 0;


Option opt;
opt.vulkan_compute = use_vulkan_compute;
opt.use_winograd_convolution = use_winograd_convolution;
opt.use_sgemm_convolution = use_sgemm_convolution;
opt.use_int8_inference = use_int8_inference;

ModelBinFromStdio mb(fp); ModelBinFromStdio mb(fp);
for (size_t i=0; i<layers.size(); i++) for (size_t i=0; i<layers.size(); i++)
{ {
@@ -649,7 +629,7 @@ int Net::load_model(FILE* fp)
} }


#if NCNN_VULKAN #if NCNN_VULKAN
if (use_vulkan_compute)
if (opt.use_vulkan_compute)
{ {
create_pipeline(); create_pipeline();


@@ -709,11 +689,8 @@ int Net::load_param(const unsigned char* _mem)
blobs.resize(blob_count); blobs.resize(blob_count);


#if NCNN_VULKAN #if NCNN_VULKAN
if (use_vulkan_compute && !vkdev)
{
// use default vulkan device
if (opt.use_vulkan_compute && !vkdev)
vkdev = get_default_gpu_device(); vkdev = get_default_gpu_device();
}
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


ParamDict pd; ParamDict pd;
@@ -743,7 +720,7 @@ int Net::load_param(const unsigned char* _mem)
} }


#if NCNN_VULKAN #if NCNN_VULKAN
if (use_vulkan_compute)
if (opt.use_vulkan_compute)
layer->vkdev = vkdev; layer->vkdev = vkdev;
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


@@ -816,12 +793,6 @@ int Net::load_model(const unsigned char* _mem)
return -1; return -1;
} }


Option opt;
opt.vulkan_compute = use_vulkan_compute;
opt.use_winograd_convolution = use_winograd_convolution;
opt.use_sgemm_convolution = use_sgemm_convolution;
opt.use_int8_inference = use_int8_inference;

const unsigned char* mem = _mem; const unsigned char* mem = _mem;
ModelBinFromMemory mb(mem); ModelBinFromMemory mb(mem);
for (size_t i=0; i<layers.size(); i++) for (size_t i=0; i<layers.size(); i++)
@@ -850,7 +821,7 @@ int Net::load_model(const unsigned char* _mem)
} }


#if NCNN_VULKAN #if NCNN_VULKAN
if (use_vulkan_compute)
if (opt.use_vulkan_compute)
{ {
create_pipeline(); create_pipeline();


@@ -971,12 +942,6 @@ void Net::clear()
destroy_pipeline(); destroy_pipeline();
#endif // NCNN_VULKAN #endif // NCNN_VULKAN


Option opt;
opt.vulkan_compute = use_vulkan_compute;
opt.use_winograd_convolution = use_winograd_convolution;
opt.use_sgemm_convolution = use_sgemm_convolution;
opt.use_int8_inference = use_int8_inference;

blobs.clear(); blobs.clear();
for (size_t i=0; i<layers.size(); i++) for (size_t i=0; i<layers.size(); i++)
{ {
@@ -1053,12 +1018,6 @@ int Net::upload_model()


int Net::create_pipeline() int Net::create_pipeline()
{ {
Option opt;
opt.vulkan_compute = use_vulkan_compute;
opt.use_winograd_convolution = use_winograd_convolution;
opt.use_sgemm_convolution = use_sgemm_convolution;
opt.use_int8_inference = use_int8_inference;

if (vkdev->info.support_fp16_packed || vkdev->info.support_fp16_storage) if (vkdev->info.support_fp16_packed || vkdev->info.support_fp16_storage)
{ {
{ {
@@ -1117,12 +1076,6 @@ int Net::create_pipeline()


int Net::destroy_pipeline() int Net::destroy_pipeline()
{ {
Option opt;
opt.vulkan_compute = use_vulkan_compute;
opt.use_winograd_convolution = use_winograd_convolution;
opt.use_sgemm_convolution = use_sgemm_convolution;
opt.use_int8_inference = use_int8_inference;

if (cast_float32_to_float16) if (cast_float32_to_float16)
cast_float32_to_float16->destroy_pipeline(opt); cast_float32_to_float16->destroy_pipeline(opt);


@@ -1887,19 +1840,20 @@ int Net::forward_layer(int layer_index, std::vector<Mat>& blob_mats, std::vector
Extractor::Extractor(const Net* _net, int blob_count) : net(_net) Extractor::Extractor(const Net* _net, int blob_count) : net(_net)
{ {
blob_mats.resize(blob_count); blob_mats.resize(blob_count);
opt = get_default_option();
opt = net->opt;


#if NCNN_VULKAN #if NCNN_VULKAN
opt.vulkan_compute = net->use_vulkan_compute;

if (net->use_vulkan_compute)
if (net->opt.use_vulkan_compute)
{ {
blob_mats_gpu.resize(blob_count);

// set default vulkan blob/workspace/staging allocator // set default vulkan blob/workspace/staging allocator
opt.blob_vkallocator = net->vkdev->allocator();
opt.workspace_vkallocator = net->vkdev->allocator();
opt.staging_vkallocator = net->vkdev->staging_allocator();
if (!opt.blob_vkallocator)
opt.blob_vkallocator = net->vkdev->allocator();
if (!opt.workspace_vkallocator)
opt.workspace_vkallocator = net->vkdev->allocator();
if (!opt.staging_vkallocator)
opt.staging_vkallocator = net->vkdev->staging_allocator();

blob_mats_gpu.resize(blob_count);
} }
#endif // NCNN_VULKAN #endif // NCNN_VULKAN
} }
@@ -1927,9 +1881,9 @@ void Extractor::set_workspace_allocator(Allocator* allocator)
#if NCNN_VULKAN #if NCNN_VULKAN
void Extractor::set_vulkan_compute(bool enable) void Extractor::set_vulkan_compute(bool enable)
{ {
if (net->use_vulkan_compute)
if (net->opt.use_vulkan_compute)
{ {
opt.vulkan_compute = enable;
opt.use_vulkan_compute = enable;
} }
else else
{ {
@@ -1995,7 +1949,7 @@ int Extractor::extract(int blob_index, Mat& feat)
int layer_index = net->blobs[blob_index].producer; int layer_index = net->blobs[blob_index].producer;


#if NCNN_VULKAN #if NCNN_VULKAN
if (opt.vulkan_compute)
if (opt.use_vulkan_compute)
{ {
ncnn::VkCompute cmd(net->vkdev); ncnn::VkCompute cmd(net->vkdev);
#if NCNN_BENCHMARK #if NCNN_BENCHMARK


+ 12
- 29
src/net.h View File

@@ -17,10 +17,11 @@


#include <stdio.h> #include <stdio.h>
#include <vector> #include <vector>
#include "platform.h"
#include "blob.h" #include "blob.h"
#include "layer.h" #include "layer.h"
#include "mat.h" #include "mat.h"
#include "platform.h"
#include "option.h"


namespace ncnn { namespace ncnn {


@@ -36,6 +37,16 @@ public:
// clear and destroy // clear and destroy
~Net(); ~Net();


public:
// option
Option opt;

#if NCNN_VULKAN

void set_vulkan_device(const VulkanDevice* vkdev);

#endif // NCNN_VULKAN

#if NCNN_STRING #if NCNN_STRING
// register custom layer by layer type name // register custom layer by layer type name
// return 0 if success // return 0 if success
@@ -82,34 +93,6 @@ public:
// construct an Extractor from network // construct an Extractor from network
Extractor create_extractor() const; Extractor create_extractor() const;


public:
// enable winograd convolution optimization
// improve convolution 3x3 stride1 performace, may consume more memory
// changes should be applied before loading network structure and weight
// enabled by default
int use_winograd_convolution;

// enable sgemm convolution optimization
// improve convolution 1x1 stride1 performace, may consume more memory
// changes should be applied before loading network structure and weight
// enabled by default
int use_sgemm_convolution;

// enable quantized int8 inference
// use low-precision int8 path for quantized model
// changes should be applied before loading network structure and weight
// enabled by default
int use_int8_inference;

// enable vulkan compute
int use_vulkan_compute;

#if NCNN_VULKAN

void set_vulkan_device(const VulkanDevice* vkdev);

#endif // NCNN_VULKAN

protected: protected:
// parse the structure of network // parse the structure of network
// fuse int8 op dequantize and quantize by requantize // fuse int8 op dequantize and quantize by requantize


+ 49
- 0
src/option.cpp View File

@@ -0,0 +1,49 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "option.h"
#include "cpu.h"

namespace ncnn {

Option::Option()
{
lightmode = true;
num_threads = get_cpu_count();
blob_allocator = 0;
workspace_allocator = 0;

#if NCNN_VULKAN
blob_vkallocator = 0;
workspace_vkallocator = 0;
staging_vkallocator = 0;
#endif // NCNN_VULKAN

use_winograd_convolution = true;
use_sgemm_convolution = true;
use_int8_inference = true;
use_vulkan_compute = false;// TODO enable me

use_fp16_packed = false;// TODO enable me
use_fp16_storage = true;
use_fp16_arithmetic = false;
use_int8_storage = true;
use_int8_arithmetic = false;

// sanitize
if (num_threads <= 0)
num_threads = 1;
}

} // namespace ncnn

+ 91
- 0
src/option.h View File

@@ -0,0 +1,91 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#ifndef NCNN_OPTION_H
#define NCNN_OPTION_H

#include "platform.h"

namespace ncnn {

#if NCNN_VULKAN
class VkAllocator;
#endif // NCNN_VULKAN

class Allocator;
class Option
{
public:
// default option
Option();

public:
// light mode
// intermediate blob will be recycled when enabled
// enabled by default
bool lightmode;

// thread count
// default value is the one returned by get_cpu_count()
int num_threads;

// blob memory allocator
Allocator* blob_allocator;

// workspace memory allocator
Allocator* workspace_allocator;

#if NCNN_VULKAN
// blob memory allocator
VkAllocator* blob_vkallocator;

// workspace memory allocator
VkAllocator* workspace_vkallocator;

// staging memory allocator
VkAllocator* staging_vkallocator;
#endif // NCNN_VULKAN

// enable winograd convolution optimization
// improve convolution 3x3 stride1 performace, may consume more memory
// changes should be applied before loading network structure and weight
// enabled by default
bool use_winograd_convolution;

// enable sgemm convolution optimization
// improve convolution 1x1 stride1 performace, may consume more memory
// changes should be applied before loading network structure and weight
// enabled by default
bool use_sgemm_convolution;

// enable quantized int8 inference
// use low-precision int8 path for quantized model
// changes should be applied before loading network structure and weight
// enabled by default
bool use_int8_inference;

// enable vulkan compute
bool use_vulkan_compute;

// enable options for gpu inference
bool use_fp16_packed;
bool use_fp16_storage;
bool use_fp16_arithmetic;
bool use_int8_storage;
bool use_int8_arithmetic;
};

} // namespace ncnn

#endif // NCNN_OPTION_H

+ 1
- 12
src/pipeline.cpp View File

@@ -47,18 +47,7 @@ int Pipeline::create(const uint32_t* spv_data, size_t spv_data_size, const char*


// fprintf(stderr, "local_shader_module %p %s created\n", local_shader_module, entry_name); // fprintf(stderr, "local_shader_module %p %s created\n", local_shader_module, entry_name);


create_descriptorset_layout(binding_count);

create_pipeline_layout(push_constant_count);

create_pipeline(local_shader_module, entry_name, specializations);

if (vkdev->info.support_VK_KHR_descriptor_update_template)
{
create_descriptor_update_template(binding_count);
}

return 0;
return create(local_shader_module, entry_name, specializations, binding_count, push_constant_count);
} }


int Pipeline::create(VkShaderModule shader_module, const char* entry_name, const std::vector<vk_specialization_type>& specializations, int binding_count, int push_constant_count) int Pipeline::create(VkShaderModule shader_module, const char* entry_name, const std::vector<vk_specialization_type>& specializations, int binding_count, int push_constant_count)


Loading…
Cancel
Save