Browse Source

option api changes

tags/20190611
nihuini 7 years ago
parent
commit
838c5df839
27 changed files with 272 additions and 290 deletions
  1. +37
    -34
      benchmark/benchncnn.cpp
  2. +1
    -1
      examples/fasterrcnn.cpp
  3. +1
    -1
      examples/mobilenetssd.cpp
  4. +1
    -1
      examples/mobilenetv2ssdlite.cpp
  5. +4
    -3
      examples/peleenetssd_seg.cpp
  6. +1
    -1
      examples/rfcn.cpp
  7. +1
    -1
      examples/shufflenetv2.cpp
  8. +1
    -1
      examples/squeezenet.cpp
  9. +1
    -1
      examples/squeezenetssd.cpp
  10. +1
    -1
      examples/yolov2.cpp
  11. +1
    -1
      examples/yolov3.cpp
  12. +4
    -2
      src/CMakeLists.txt
  13. +6
    -6
      src/gpu.cpp
  14. +0
    -40
      src/layer.cpp
  15. +12
    -62
      src/layer.h
  16. +4
    -4
      src/layer/convolution.cpp
  17. +3
    -3
      src/layer/convolutiondepthwise.cpp
  18. +3
    -3
      src/layer/innerproduct.cpp
  19. +2
    -2
      src/layer/x86/convolution_x86.cpp
  20. +2
    -2
      src/layer/x86/convolutiondepthwise_x86.cpp
  21. +2
    -2
      src/layer/yolodetectionoutput.cpp
  22. +8
    -8
      src/mat.cpp
  23. +23
    -69
      src/net.cpp
  24. +12
    -29
      src/net.h
  25. +49
    -0
      src/option.cpp
  26. +91
    -0
      src/option.h
  27. +1
    -12
      src/pipeline.cpp

+ 37
- 34
benchmark/benchncnn.cpp View File

@@ -57,9 +57,6 @@ public:
// load file
int ret = 0;

Option opt;
opt.vulkan_compute = use_vulkan_compute;

ModelBinFromEmpty mb;
for (size_t i=0; i<layers.size(); i++)
{
@@ -83,7 +80,7 @@ public:
}

#if NCNN_VULKAN
if (use_vulkan_compute)
if (opt.use_vulkan_compute)
{
upload_model();

@@ -100,12 +97,12 @@ public:
static int g_warmup_loop_count = 3;
static int g_loop_count = 4;

static ncnn::Option g_default_option;

static ncnn::UnlockedPoolAllocator g_blob_pool_allocator;
static ncnn::PoolAllocator g_workspace_pool_allocator;

#if NCNN_VULKAN
static bool g_use_vulkan_compute = false;

static ncnn::VulkanDevice* g_vkdev = 0;
static ncnn::VkAllocator* g_blob_vkallocator = 0;
static ncnn::VkAllocator* g_staging_vkallocator = 0;
@@ -115,11 +112,11 @@ void benchmark(const char* comment, const ncnn::Mat& in)
{
ncnn::BenchNet net;

net.opt = g_default_option;

#if NCNN_VULKAN
if (g_use_vulkan_compute)
if (net.opt.use_vulkan_compute)
{
net.use_vulkan_compute = g_use_vulkan_compute;

net.set_vulkan_device(g_vkdev);
}
#endif // NCNN_VULKAN
@@ -134,7 +131,7 @@ void benchmark(const char* comment, const ncnn::Mat& in)
g_workspace_pool_allocator.clear();

#if NCNN_VULKAN
if (g_use_vulkan_compute)
if (net.opt.use_vulkan_compute)
{
g_blob_vkallocator->clear();
g_staging_vkallocator->clear();
@@ -145,7 +142,7 @@ void benchmark(const char* comment, const ncnn::Mat& in)
#ifdef _WIN32
Sleep(10 * 1000);
#else
sleep(10);
// sleep(10);
#endif

ncnn::Mat out;
@@ -210,14 +207,15 @@ int main(int argc, char** argv)
gpu_device = atoi(argv[4]);
}

bool use_vulkan_compute = gpu_device != -1;

g_loop_count = loop_count;

g_blob_pool_allocator.set_size_compare_ratio(0.0f);
g_workspace_pool_allocator.set_size_compare_ratio(0.5f);

#if NCNN_VULKAN
g_use_vulkan_compute = gpu_device != -1;
if (g_use_vulkan_compute)
if (use_vulkan_compute)
{
g_warmup_loop_count = 10;

@@ -228,20 +226,25 @@ int main(int argc, char** argv)
}
#endif // NCNN_VULKAN

ncnn::Option opt;
opt.lightmode = true;
opt.num_threads = num_threads;
opt.blob_allocator = &g_blob_pool_allocator;
opt.workspace_allocator = &g_workspace_pool_allocator;

// default option
g_default_option.lightmode = true;
g_default_option.num_threads = num_threads;
g_default_option.blob_allocator = &g_blob_pool_allocator;
g_default_option.workspace_allocator = &g_workspace_pool_allocator;
#if NCNN_VULKAN
opt.vulkan_compute = g_use_vulkan_compute;
opt.blob_vkallocator = g_blob_vkallocator;
opt.workspace_vkallocator = g_blob_vkallocator;
opt.staging_vkallocator = g_staging_vkallocator;
g_default_option.blob_vkallocator = g_blob_vkallocator;
g_default_option.workspace_vkallocator = g_blob_vkallocator;
g_default_option.staging_vkallocator = g_staging_vkallocator;
#endif // NCNN_VULKAN

ncnn::set_default_option(opt);
g_default_option.use_winograd_convolution = true;
g_default_option.use_sgemm_convolution = true;
g_default_option.use_int8_inference = true;
g_default_option.use_vulkan_compute = use_vulkan_compute;
g_default_option.use_fp16_packed = true;
g_default_option.use_fp16_storage = true;
g_default_option.use_fp16_arithmetic = true;
g_default_option.use_int8_storage = true;
g_default_option.use_int8_arithmetic = true;

ncnn::set_cpu_powersave(powersave);

@@ -257,21 +260,21 @@ int main(int argc, char** argv)
benchmark("squeezenet", ncnn::Mat(227, 227, 3));

#if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("squeezenet_int8", ncnn::Mat(227, 227, 3));

benchmark("mobilenet", ncnn::Mat(224, 224, 3));

#if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("mobilenet_int8", ncnn::Mat(224, 224, 3));

benchmark("mobilenet_v2", ncnn::Mat(224, 224, 3));

// #if NCNN_VULKAN
// if (!g_use_vulkan_compute)
// if (!use_vulkan_compute)
// #endif // NCNN_VULKAN
// benchmark("mobilenet_v2_int8", ncnn::Mat(224, 224, 3));

@@ -284,14 +287,14 @@ int main(int argc, char** argv)
benchmark("googlenet", ncnn::Mat(224, 224, 3));

#if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("googlenet_int8", ncnn::Mat(224, 224, 3));

benchmark("resnet18", ncnn::Mat(224, 224, 3));

#if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("resnet18_int8", ncnn::Mat(224, 224, 3));

@@ -300,28 +303,28 @@ int main(int argc, char** argv)
benchmark("vgg16", ncnn::Mat(224, 224, 3));

#if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("vgg16_int8", ncnn::Mat(224, 224, 3));

benchmark("resnet50", ncnn::Mat(224, 224, 3));

#if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("resnet50_int8", ncnn::Mat(224, 224, 3));

benchmark("squeezenet_ssd", ncnn::Mat(300, 300, 3));

#if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("squeezenet_ssd_int8", ncnn::Mat(300, 300, 3));

benchmark("mobilenet_ssd", ncnn::Mat(300, 300, 3));

#if NCNN_VULKAN
if (!g_use_vulkan_compute)
if (!use_vulkan_compute)
#endif // NCNN_VULKAN
benchmark("mobilenet_ssd_int8", ncnn::Mat(300, 300, 3));



+ 1
- 1
examples/fasterrcnn.cpp View File

@@ -107,7 +107,7 @@ static int detect_fasterrcnn(const cv::Mat& bgr, std::vector<Object>& objects)
ncnn::Net fasterrcnn;

#if NCNN_VULKAN
fasterrcnn.use_vulkan_compute = true;
fasterrcnn.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN

// original pretrained model from https://github.com/rbgirshick/py-faster-rcnn


+ 1
- 1
examples/mobilenetssd.cpp View File

@@ -36,7 +36,7 @@ static int detect_mobilenet(const cv::Mat& bgr, std::vector<Object>& objects)
ncnn::Net mobilenet;

#if NCNN_VULKAN
mobilenet.use_vulkan_compute = true;
mobilenet.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN

// model is converted from https://github.com/chuanqi305/MobileNet-SSD


+ 1
- 1
examples/mobilenetv2ssdlite.cpp View File

@@ -39,7 +39,7 @@ static int detect_mobilenetv2(const cv::Mat& bgr, std::vector<Object>& objects)
ncnn::Net mobilenetv2;

#if NCNN_VULKAN
mobilenetv2.use_vulkan_compute = true;
mobilenetv2.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN

mobilenetv2.register_custom_layer("Silence", Noop_layer_creator);


+ 4
- 3
examples/peleenetssd_seg.cpp View File

@@ -36,7 +36,7 @@ static int detect_peleenet(const cv::Mat& bgr, std::vector<Object>& objects,ncnn
ncnn::Net peleenet;

#if NCNN_VULKAN
peleenet.use_vulkan_compute = true;
peleenet.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN

// model is converted from https://github.com/eric612/MobileNet-YOLO
@@ -94,7 +94,8 @@ static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects,
"traffic light","traffic sign","train"};

cv::Mat image = bgr.clone();
std::vector<int> color = {128,255,128,244,35,232};
const int color[] = {128,255,128,244,35,232};
const int color_count = sizeof(color) / sizeof(int);
for (size_t i = 0; i < objects.size(); i++)
{
@@ -147,7 +148,7 @@ static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects,
}
if(index > -1) {
int color_index = (index)*3;
if(color_index<color.size()) {
if(color_index<color_count) {
int b = color[color_index];
int g = color[color_index+1];
int r = color[color_index+2];


+ 1
- 1
examples/rfcn.cpp View File

@@ -121,7 +121,7 @@ static int detect_rfcn(const cv::Mat& bgr, std::vector<Object>& objects)
ncnn::Net rfcn;

#if NCNN_VULKAN
rfcn.use_vulkan_compute = true;
rfcn.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN

// original pretrained model from https://github.com/YuwenXiong/py-R-FCN


+ 1
- 1
examples/shufflenetv2.cpp View File

@@ -29,7 +29,7 @@ static int detect_shufflenetv2(const cv::Mat& bgr, std::vector<float>& cls_score
ncnn::Net shufflenetv2;

#if NCNN_VULKAN
shufflenetv2.use_vulkan_compute = true;
shufflenetv2.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN

// https://github.com/miaow1988/ShuffleNet_V2_pytorch_caffe


+ 1
- 1
examples/squeezenet.cpp View File

@@ -29,7 +29,7 @@ static int detect_squeezenet(const cv::Mat& bgr, std::vector<float>& cls_scores)
ncnn::Net squeezenet;

#if NCNN_VULKAN
squeezenet.use_vulkan_compute = true;
squeezenet.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN

squeezenet.load_param("squeezenet_v1.1.param");


+ 1
- 1
examples/squeezenetssd.cpp View File

@@ -36,7 +36,7 @@ static int detect_squeezenet(const cv::Mat& bgr, std::vector<Object>& objects)
ncnn::Net squeezenet;

#if NCNN_VULKAN
squeezenet.use_vulkan_compute = true;
squeezenet.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN

// original pretrained model from https://github.com/chuanqi305/SqueezeNet-SSD


+ 1
- 1
examples/yolov2.cpp View File

@@ -36,7 +36,7 @@ static int detect_yolov2(const cv::Mat& bgr, std::vector<Object>& objects)
ncnn::Net yolov2;

#if NCNN_VULKAN
yolov2.use_vulkan_compute = true;
yolov2.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN

// original pretrained model from https://github.com/eric612/MobileNet-YOLO


+ 1
- 1
examples/yolov3.cpp View File

@@ -36,7 +36,7 @@ static int detect_yolov3(const cv::Mat& bgr, std::vector<Object>& objects)
ncnn::Net yolov3;

#if NCNN_VULKAN
yolov3.use_vulkan_compute = true;
yolov3.opt.use_vulkan_compute = true;
#endif // NCNN_VULKAN

// original pretrained model from https://github.com/eric612/MobileNet-YOLO


+ 4
- 2
src/CMakeLists.txt View File

@@ -25,6 +25,7 @@ set(ncnn_SRCS
modelbin.cpp
net.cpp
opencv.cpp
option.cpp
paramdict.cpp
pipeline.cpp
benchmark.cpp
@@ -87,8 +88,8 @@ macro(ncnn_add_layer class)
if(WITH_LAYER_${name}_vulkan)
set(layer_declaration "${layer_declaration}#include \"layer/vulkan/${name}_vulkan.h\"\n")
set(layer_declaration_class "${layer_declaration_class}, virtual public ${class}_vulkan")
set(create_pipeline_content "${create_pipeline_content} if (opt.vulkan_compute) { int ret = ${class}_vulkan::create_pipeline(opt); if (ret) return ret; }\n")
set(destroy_pipeline_content " if (opt.vulkan_compute) { int ret = ${class}_vulkan::destroy_pipeline(opt); if (ret) return ret; }\n${destroy_pipeline_content}")
set(create_pipeline_content "${create_pipeline_content} if (opt.use_vulkan_compute) { int ret = ${class}_vulkan::create_pipeline(opt); if (ret) return ret; }\n")
set(destroy_pipeline_content " if (opt.use_vulkan_compute) { int ret = ${class}_vulkan::destroy_pipeline(opt); if (ret) return ret; }\n${destroy_pipeline_content}")

file(GLOB_RECURSE SHADER_SRCS "layer/vulkan/shader/${name}.comp")
file(GLOB_RECURSE SHADER_SUBSRCS "layer/vulkan/shader/${name}_*.comp")
@@ -297,6 +298,7 @@ if(NCNN_INSTALL_SDK)
modelbin.h
net.h
opencv.h
option.h
paramdict.h
pipeline.h
benchmark.h


+ 6
- 6
src/gpu.cpp View File

@@ -645,7 +645,7 @@ int create_gpu_instance()
}

// check features
gpu_info.support_fp16_packed = false;// TODO
gpu_info.support_fp16_packed = true;
gpu_info.support_fp16_storage = false;
gpu_info.support_fp16_arithmetic = false;
gpu_info.support_int8_storage = false;
@@ -698,11 +698,11 @@ int create_gpu_instance()
{
gpu_info.support_fp16_storage = query16BitStorageFeatures.storageBuffer16BitAccess && query16BitStorageFeatures.uniformAndStorageBuffer16BitAccess;
}
// if (gpu_info.support_VK_KHR_shader_float16_int8)
// {
// gpu_info.support_fp16_arithmetic = queryFloat16Int8Features.shaderFloat16;
// gpu_info.support_int8_arithmetic = queryFloat16Int8Features.shaderInt8;
// }
if (gpu_info.support_VK_KHR_shader_float16_int8)
{
gpu_info.support_fp16_arithmetic = queryFloat16Int8Features.shaderFloat16;
gpu_info.support_int8_arithmetic = queryFloat16Int8Features.shaderInt8;
}
}
else
{


+ 0
- 40
src/layer.cpp View File

@@ -27,46 +27,6 @@

namespace ncnn {

Option::Option()
{
lightmode = true;
num_threads = get_cpu_count();
blob_allocator = 0;
workspace_allocator = 0;

vulkan_compute = false;

#if NCNN_VULKAN
blob_vkallocator = 0;
workspace_vkallocator = 0;
staging_vkallocator = 0;
#endif // NCNN_VULKAN

use_winograd_convolution = 1;
use_sgemm_convolution = 1;
use_int8_inference = 1;
}

static Option g_default_option;

const Option& get_default_option()
{
return g_default_option;
}

int set_default_option(const Option& opt)
{
if (opt.num_threads <= 0)
{
fprintf(stderr, "invalid option num_threads %d\n", opt.num_threads);
return -1;
}

g_default_option = opt;

return 0;
}

Layer::Layer()
{
one_blob_only = false;


+ 12
- 62
src/layer.h View File

@@ -19,10 +19,11 @@
#include <string>
#include <vector>
#include <math.h>
#include "platform.h"
#include "mat.h"
#include "modelbin.h"
#include "option.h"
#include "paramdict.h"
#include "platform.h"

#if NCNN_VULKAN
#include <vulkan/vulkan.h>
@@ -32,57 +33,6 @@

namespace ncnn {

#if NCNN_VULKAN
class VkAllocator;
#endif // NCNN_VULKAN

class Allocator;
class Option
{
public:
// default option
Option();

public:
// light mode
// intermediate blob will be recycled when enabled
// enabled by default
bool lightmode;

// thread count
// default value is the one returned by get_cpu_count()
int num_threads;

// blob memory allocator
Allocator* blob_allocator;

// workspace memory allocator
Allocator* workspace_allocator;

// enable vulkan compute
bool vulkan_compute;

#if NCNN_VULKAN
// blob memory allocator
VkAllocator* blob_vkallocator;

// workspace memory allocator
VkAllocator* workspace_vkallocator;

// staging memory allocator
VkAllocator* staging_vkallocator;
#endif // NCNN_VULKAN

public:
int use_winograd_convolution;
int use_sgemm_convolution;
int use_int8_inference;
};

// the global default option
const Option& get_default_option();
int set_default_option(const Option& opt);

class Layer
{
public:
@@ -100,10 +50,10 @@ public:
virtual int load_model(const ModelBin& mb);

//
virtual int create_pipeline(const Option& opt = get_default_option());
virtual int create_pipeline(const Option& opt = Option());

//
virtual int destroy_pipeline(const Option& opt = get_default_option());
virtual int destroy_pipeline(const Option& opt = Option());

public:
// one input and one output blob
@@ -118,13 +68,13 @@ public:
public:
// implement inference
// return 0 if success
virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt = get_default_option()) const;
virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt = get_default_option()) const;
virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt = Option()) const;
virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt = Option()) const;

// implement inplace inference
// return 0 if success
virtual int forward_inplace(std::vector<Mat>& bottom_top_blobs, const Option& opt = get_default_option()) const;
virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt = get_default_option()) const;
virtual int forward_inplace(std::vector<Mat>& bottom_top_blobs, const Option& opt = Option()) const;
virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt = Option()) const;

#if NCNN_VULKAN
public:
@@ -134,13 +84,13 @@ public:
public:
// implement inference
// return 0 if success
virtual int forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt = get_default_option()) const;
virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt = get_default_option()) const;
virtual int forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt = Option()) const;
virtual int forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt = Option()) const;

// implement inplace inference
// return 0 if success
virtual int forward_inplace(std::vector<VkMat>& bottom_top_blobs, VkCompute& cmd, const Option& opt = get_default_option()) const;
virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt = get_default_option()) const;
virtual int forward_inplace(std::vector<VkMat>& bottom_top_blobs, VkCompute& cmd, const Option& opt = Option()) const;
virtual int forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& opt = Option()) const;

public:
// assigned immediately after creating this layer


+ 4
- 4
src/layer/convolution.cpp View File

@@ -74,7 +74,7 @@ int Convolution::load_model(const ModelBin& mb)
int Convolution::create_pipeline(const Option& opt)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;

use_int8_inference = opt.use_int8_inference;

@@ -111,7 +111,7 @@ int Convolution::create_pipeline(const Option& opt)

op->create_pipeline(opt_cpu);

ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.blob_allocator = int8_weight_data.allocator;

const Mat weight_data_n = weight_data.range(weight_data_size_output * n, weight_data_size_output);
@@ -173,7 +173,7 @@ int Convolution::create_pipeline(const Option& opt)
int Convolution::destroy_pipeline(const Option& opt)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;

if (quantize)
{
@@ -286,7 +286,7 @@ int Convolution::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
op->load_model(ModelBinFromMatArray(weights));

Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;
op->create_pipeline(opt_cpu);

// forward


+ 3
- 3
src/layer/convolutiondepthwise.cpp View File

@@ -98,7 +98,7 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)
int ConvolutionDepthWise::create_pipeline(const Option& opt)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;

use_int8_inference = opt.use_int8_inference;

@@ -134,7 +134,7 @@ int ConvolutionDepthWise::create_pipeline(const Option& opt)

op->create_pipeline(opt_cpu);

ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.blob_allocator = int8_weight_data.allocator;

const Mat weight_data_g = weight_data.range(weight_data_size_g * g, weight_data_size_g);
@@ -198,7 +198,7 @@ int ConvolutionDepthWise::create_pipeline(const Option& opt)
int ConvolutionDepthWise::destroy_pipeline(const Option& opt)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;

for (int i=0; i<(int)quantize_ops.size(); i++)
{


+ 3
- 3
src/layer/innerproduct.cpp View File

@@ -65,7 +65,7 @@ int InnerProduct::load_model(const ModelBin& mb)
int InnerProduct::create_pipeline(const Option& opt)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;

use_int8_inference = opt.use_int8_inference;

@@ -143,7 +143,7 @@ int InnerProduct::create_pipeline(const Option& opt)

op->create_pipeline(opt_cpu);

ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.blob_allocator = int8_weight_data.allocator;

const Mat weight_data_n = weight_data.range(weight_data_size_output * n, weight_data_size_output);
@@ -162,7 +162,7 @@ int InnerProduct::create_pipeline(const Option& opt)
int InnerProduct::destroy_pipeline(const Option& opt)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;

if (quantize)
{


+ 2
- 2
src/layer/x86/convolution_x86.cpp View File

@@ -45,7 +45,7 @@ Convolution_x86::Convolution_x86()
int Convolution_x86::create_pipeline(const Option& opt)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;

if (activation_type == 1)
{
@@ -120,7 +120,7 @@ int Convolution_x86::create_pipeline(const Option& opt)
int Convolution_x86::destroy_pipeline(const Option& opt)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;

if (activation)
{


+ 2
- 2
src/layer/x86/convolutiondepthwise_x86.cpp View File

@@ -36,7 +36,7 @@ ConvolutionDepthWise_x86::ConvolutionDepthWise_x86()
int ConvolutionDepthWise_x86::create_pipeline(const Option& opt)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;

if (activation_type == 1)
{
@@ -167,7 +167,7 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt)
int ConvolutionDepthWise_x86::destroy_pipeline(const Option& opt)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;

if (activation)
{


+ 2
- 2
src/layer/yolodetectionoutput.cpp View File

@@ -49,7 +49,7 @@ int YoloDetectionOutput::create_pipeline(const Option& opt)
softmax->load_param(pd);

Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;
softmax->create_pipeline(opt_cpu);
}

@@ -61,7 +61,7 @@ int YoloDetectionOutput::destroy_pipeline(const Option& opt)
if (softmax)
{
Option opt_cpu = opt;
opt_cpu.vulkan_compute = false;
opt_cpu.use_vulkan_compute = false;
softmax->destroy_pipeline(opt_cpu);
delete softmax;
softmax = 0;


+ 8
- 8
src/mat.cpp View File

@@ -95,7 +95,7 @@ void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_val
return;
}

op->forward_inplace(*this, ncnn::get_default_option());
op->forward_inplace(*this);

delete op;
}
@@ -234,7 +234,7 @@ void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, i

padding->load_param(pd);

ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.num_threads = num_threads;
opt.blob_allocator = allocator;

@@ -257,7 +257,7 @@ void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, in

crop->load_param(pd);

ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.num_threads = num_threads;
opt.blob_allocator = allocator;

@@ -277,7 +277,7 @@ void resize_bilinear(const Mat& src, Mat& dst, int w, int h, Allocator* allocato

interp->load_param(pd);

ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.num_threads = num_threads;
opt.blob_allocator = allocator;

@@ -297,7 +297,7 @@ void resize_bicubic(const Mat& src, Mat& dst, int w, int h, Allocator* allocator

interp->load_param(pd);

ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.num_threads = num_threads;
opt.blob_allocator = allocator;

@@ -315,7 +315,7 @@ void convert_packing(const Mat& src, Mat& dst, int _packing, Allocator* allocato

packing->load_param(pd);

ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.num_threads = num_threads;
opt.blob_allocator = allocator;

@@ -334,7 +334,7 @@ void cast_float32_to_float16(const Mat& src, Mat& dst, Allocator* allocator, int

cast->load_param(pd);

ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.num_threads = num_threads;
opt.blob_allocator = allocator;

@@ -353,7 +353,7 @@ void cast_float16_to_float32(const Mat& src, Mat& dst, Allocator* allocator, int

cast->load_param(pd);

ncnn::Option opt = ncnn::get_default_option();
ncnn::Option opt;
opt.num_threads = num_threads;
opt.blob_allocator = allocator;



+ 23
- 69
src/net.cpp View File

@@ -40,11 +40,6 @@ namespace ncnn {

Net::Net()
{
use_winograd_convolution = 1;
use_sgemm_convolution = 1;
use_int8_inference = 1;
use_vulkan_compute = 0;

#if NCNN_VULKAN
vkdev = 0;
weight_vkallocator = 0;
@@ -155,11 +150,8 @@ int Net::load_param(FILE* fp)
blobs.resize((size_t)blob_count);

#if NCNN_VULKAN
if (use_vulkan_compute && !vkdev)
{
// use default vulkan device
if (opt.use_vulkan_compute && !vkdev)
vkdev = get_default_gpu_device();
}
#endif // NCNN_VULKAN

ParamDict pd;
@@ -192,7 +184,7 @@ int Net::load_param(FILE* fp)
}

#if NCNN_VULKAN
if (use_vulkan_compute)
if (opt.use_vulkan_compute)
layer->vkdev = vkdev;
#endif // NCNN_VULKAN

@@ -320,11 +312,8 @@ int Net::load_param_mem(const char* _mem)
blobs.resize(blob_count);

#if NCNN_VULKAN
if (use_vulkan_compute && !vkdev)
{
// use default vulkan device
if (opt.use_vulkan_compute && !vkdev)
vkdev = get_default_gpu_device();
}
#endif // NCNN_VULKAN

ParamDict pd;
@@ -357,7 +346,7 @@ int Net::load_param_mem(const char* _mem)
}

#if NCNN_VULKAN
if (use_vulkan_compute)
if (opt.use_vulkan_compute)
layer->vkdev = vkdev;
#endif // NCNN_VULKAN

@@ -489,11 +478,8 @@ int Net::load_param_bin(FILE* fp)
blobs.resize(blob_count);

#if NCNN_VULKAN
if (use_vulkan_compute && !vkdev)
{
// use default vulkan device
if (opt.use_vulkan_compute && !vkdev)
vkdev = get_default_gpu_device();
}
#endif // NCNN_VULKAN

ParamDict pd;
@@ -526,7 +512,7 @@ int Net::load_param_bin(FILE* fp)
}

#if NCNN_VULKAN
if (use_vulkan_compute)
if (opt.use_vulkan_compute)
layer->vkdev = vkdev;
#endif // NCNN_VULKAN

@@ -613,12 +599,6 @@ int Net::load_model(FILE* fp)
// load file
int ret = 0;

Option opt;
opt.vulkan_compute = use_vulkan_compute;
opt.use_winograd_convolution = use_winograd_convolution;
opt.use_sgemm_convolution = use_sgemm_convolution;
opt.use_int8_inference = use_int8_inference;

ModelBinFromStdio mb(fp);
for (size_t i=0; i<layers.size(); i++)
{
@@ -649,7 +629,7 @@ int Net::load_model(FILE* fp)
}

#if NCNN_VULKAN
if (use_vulkan_compute)
if (opt.use_vulkan_compute)
{
create_pipeline();

@@ -709,11 +689,8 @@ int Net::load_param(const unsigned char* _mem)
blobs.resize(blob_count);

#if NCNN_VULKAN
if (use_vulkan_compute && !vkdev)
{
// use default vulkan device
if (opt.use_vulkan_compute && !vkdev)
vkdev = get_default_gpu_device();
}
#endif // NCNN_VULKAN

ParamDict pd;
@@ -743,7 +720,7 @@ int Net::load_param(const unsigned char* _mem)
}

#if NCNN_VULKAN
if (use_vulkan_compute)
if (opt.use_vulkan_compute)
layer->vkdev = vkdev;
#endif // NCNN_VULKAN

@@ -816,12 +793,6 @@ int Net::load_model(const unsigned char* _mem)
return -1;
}

Option opt;
opt.vulkan_compute = use_vulkan_compute;
opt.use_winograd_convolution = use_winograd_convolution;
opt.use_sgemm_convolution = use_sgemm_convolution;
opt.use_int8_inference = use_int8_inference;

const unsigned char* mem = _mem;
ModelBinFromMemory mb(mem);
for (size_t i=0; i<layers.size(); i++)
@@ -850,7 +821,7 @@ int Net::load_model(const unsigned char* _mem)
}

#if NCNN_VULKAN
if (use_vulkan_compute)
if (opt.use_vulkan_compute)
{
create_pipeline();

@@ -971,12 +942,6 @@ void Net::clear()
destroy_pipeline();
#endif // NCNN_VULKAN

Option opt;
opt.vulkan_compute = use_vulkan_compute;
opt.use_winograd_convolution = use_winograd_convolution;
opt.use_sgemm_convolution = use_sgemm_convolution;
opt.use_int8_inference = use_int8_inference;

blobs.clear();
for (size_t i=0; i<layers.size(); i++)
{
@@ -1053,12 +1018,6 @@ int Net::upload_model()

int Net::create_pipeline()
{
Option opt;
opt.vulkan_compute = use_vulkan_compute;
opt.use_winograd_convolution = use_winograd_convolution;
opt.use_sgemm_convolution = use_sgemm_convolution;
opt.use_int8_inference = use_int8_inference;

if (vkdev->info.support_fp16_packed || vkdev->info.support_fp16_storage)
{
{
@@ -1117,12 +1076,6 @@ int Net::create_pipeline()

int Net::destroy_pipeline()
{
Option opt;
opt.vulkan_compute = use_vulkan_compute;
opt.use_winograd_convolution = use_winograd_convolution;
opt.use_sgemm_convolution = use_sgemm_convolution;
opt.use_int8_inference = use_int8_inference;

if (cast_float32_to_float16)
cast_float32_to_float16->destroy_pipeline(opt);

@@ -1887,19 +1840,20 @@ int Net::forward_layer(int layer_index, std::vector<Mat>& blob_mats, std::vector
Extractor::Extractor(const Net* _net, int blob_count) : net(_net)
{
blob_mats.resize(blob_count);
opt = get_default_option();
opt = net->opt;

#if NCNN_VULKAN
opt.vulkan_compute = net->use_vulkan_compute;

if (net->use_vulkan_compute)
if (net->opt.use_vulkan_compute)
{
blob_mats_gpu.resize(blob_count);

// set default vulkan blob/workspace/staging allocator
opt.blob_vkallocator = net->vkdev->allocator();
opt.workspace_vkallocator = net->vkdev->allocator();
opt.staging_vkallocator = net->vkdev->staging_allocator();
if (!opt.blob_vkallocator)
opt.blob_vkallocator = net->vkdev->allocator();
if (!opt.workspace_vkallocator)
opt.workspace_vkallocator = net->vkdev->allocator();
if (!opt.staging_vkallocator)
opt.staging_vkallocator = net->vkdev->staging_allocator();

blob_mats_gpu.resize(blob_count);
}
#endif // NCNN_VULKAN
}
@@ -1927,9 +1881,9 @@ void Extractor::set_workspace_allocator(Allocator* allocator)
#if NCNN_VULKAN
void Extractor::set_vulkan_compute(bool enable)
{
if (net->use_vulkan_compute)
if (net->opt.use_vulkan_compute)
{
opt.vulkan_compute = enable;
opt.use_vulkan_compute = enable;
}
else
{
@@ -1995,7 +1949,7 @@ int Extractor::extract(int blob_index, Mat& feat)
int layer_index = net->blobs[blob_index].producer;

#if NCNN_VULKAN
if (opt.vulkan_compute)
if (opt.use_vulkan_compute)
{
ncnn::VkCompute cmd(net->vkdev);
#if NCNN_BENCHMARK


+ 12
- 29
src/net.h View File

@@ -17,10 +17,11 @@

#include <stdio.h>
#include <vector>
#include "platform.h"
#include "blob.h"
#include "layer.h"
#include "mat.h"
#include "platform.h"
#include "option.h"

namespace ncnn {

@@ -36,6 +37,16 @@ public:
// clear and destroy
~Net();

public:
// option
Option opt;

#if NCNN_VULKAN

void set_vulkan_device(const VulkanDevice* vkdev);

#endif // NCNN_VULKAN

#if NCNN_STRING
// register custom layer by layer type name
// return 0 if success
@@ -82,34 +93,6 @@ public:
// construct an Extractor from network
Extractor create_extractor() const;

public:
// enable winograd convolution optimization
// improve convolution 3x3 stride1 performace, may consume more memory
// changes should be applied before loading network structure and weight
// enabled by default
int use_winograd_convolution;

// enable sgemm convolution optimization
// improve convolution 1x1 stride1 performace, may consume more memory
// changes should be applied before loading network structure and weight
// enabled by default
int use_sgemm_convolution;

// enable quantized int8 inference
// use low-precision int8 path for quantized model
// changes should be applied before loading network structure and weight
// enabled by default
int use_int8_inference;

// enable vulkan compute
int use_vulkan_compute;

#if NCNN_VULKAN

void set_vulkan_device(const VulkanDevice* vkdev);

#endif // NCNN_VULKAN

protected:
// parse the structure of network
// fuse int8 op dequantize and quantize by requantize


+ 49
- 0
src/option.cpp View File

@@ -0,0 +1,49 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#include "option.h"
#include "cpu.h"

namespace ncnn {

Option::Option()
{
lightmode = true;
num_threads = get_cpu_count();
blob_allocator = 0;
workspace_allocator = 0;

#if NCNN_VULKAN
blob_vkallocator = 0;
workspace_vkallocator = 0;
staging_vkallocator = 0;
#endif // NCNN_VULKAN

use_winograd_convolution = true;
use_sgemm_convolution = true;
use_int8_inference = true;
use_vulkan_compute = false;// TODO enable me

use_fp16_packed = false;// TODO enable me
use_fp16_storage = true;
use_fp16_arithmetic = false;
use_int8_storage = true;
use_int8_arithmetic = false;

// sanitize
if (num_threads <= 0)
num_threads = 1;
}

} // namespace ncnn

+ 91
- 0
src/option.h View File

@@ -0,0 +1,91 @@
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#ifndef NCNN_OPTION_H
#define NCNN_OPTION_H

#include "platform.h"

namespace ncnn {

#if NCNN_VULKAN
class VkAllocator;
#endif // NCNN_VULKAN

class Allocator;
class Option
{
public:
// default option
Option();

public:
// light mode
// intermediate blob will be recycled when enabled
// enabled by default
bool lightmode;

// thread count
// default value is the one returned by get_cpu_count()
int num_threads;

// blob memory allocator
Allocator* blob_allocator;

// workspace memory allocator
Allocator* workspace_allocator;

#if NCNN_VULKAN
// blob memory allocator
VkAllocator* blob_vkallocator;

// workspace memory allocator
VkAllocator* workspace_vkallocator;

// staging memory allocator
VkAllocator* staging_vkallocator;
#endif // NCNN_VULKAN

// enable winograd convolution optimization
// improve convolution 3x3 stride1 performace, may consume more memory
// changes should be applied before loading network structure and weight
// enabled by default
bool use_winograd_convolution;

// enable sgemm convolution optimization
// improve convolution 1x1 stride1 performace, may consume more memory
// changes should be applied before loading network structure and weight
// enabled by default
bool use_sgemm_convolution;

// enable quantized int8 inference
// use low-precision int8 path for quantized model
// changes should be applied before loading network structure and weight
// enabled by default
bool use_int8_inference;

// enable vulkan compute
bool use_vulkan_compute;

// enable options for gpu inference
bool use_fp16_packed;
bool use_fp16_storage;
bool use_fp16_arithmetic;
bool use_int8_storage;
bool use_int8_arithmetic;
};

} // namespace ncnn

#endif // NCNN_OPTION_H

+ 1
- 12
src/pipeline.cpp View File

@@ -47,18 +47,7 @@ int Pipeline::create(const uint32_t* spv_data, size_t spv_data_size, const char*

// fprintf(stderr, "local_shader_module %p %s created\n", local_shader_module, entry_name);

create_descriptorset_layout(binding_count);

create_pipeline_layout(push_constant_count);

create_pipeline(local_shader_module, entry_name, specializations);

if (vkdev->info.support_VK_KHR_descriptor_update_template)
{
create_descriptor_update_template(binding_count);
}

return 0;
return create(local_shader_module, entry_name, specializations, binding_count, push_constant_count);
}

int Pipeline::create(VkShaderModule shader_module, const char* entry_name, const std::vector<vk_specialization_type>& specializations, int binding_count, int push_constant_count)


Loading…
Cancel
Save