From 6cefaad9578ef76ec9641dd932841920458e01d7 Mon Sep 17 00:00:00 2001 From: nihui Date: Thu, 30 Jan 2020 16:33:44 +0800 Subject: [PATCH] ncnnoptimize shape inference, load shape hint --- src/blob.h | 3 + src/layer.h | 3 + src/mat.h | 36 ++++++++++ src/net.cpp | 94 ++++++++++++++++++++++-- tools/ncnnoptimize.cpp | 158 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 290 insertions(+), 4 deletions(-) diff --git a/src/blob.h b/src/blob.h index 31f2c1d48..f168d13a5 100644 --- a/src/blob.h +++ b/src/blob.h @@ -18,6 +18,7 @@ #include #include #include "platform.h" +#include "mat.h" namespace ncnn { @@ -36,6 +37,8 @@ public: int producer; // layer index which need this blob as input std::vector consumers; + // shape hint + Shape shape; }; } // namespace ncnn diff --git a/src/layer.h b/src/layer.h index 9c20cac7c..c49227ea1 100644 --- a/src/layer.h +++ b/src/layer.h @@ -115,6 +115,9 @@ public: std::vector bottoms; // blob index which this layer produces as output std::vector tops; + // shape hint + std::vector bottom_shapes; + std::vector top_shapes; }; // layer factory function diff --git a/src/mat.h b/src/mat.h index df21f05ce..e4ac1cb7e 100644 --- a/src/mat.h +++ b/src/mat.h @@ -39,6 +39,24 @@ namespace ncnn { +// thin and pod shape structure +class Shape +{ +public: + Shape() : dims(0), w(0), h(0), c(0) {} + Shape(int _dims, int _w, int _h, int _c) : dims(_dims), w(_w), h(_h), c(_c) {} + +public: + // the dimension rank + // 0 = empty + int dims; + + // 0 = variable + int w; + int h; + int c; +}; + #if NCNN_VULKAN class VkMat; #endif // NCNN_VULKAN @@ -119,6 +137,7 @@ public: bool empty() const; size_t total() const; + Shape shape() const; // data reference Mat channel(int c); @@ -313,6 +332,7 @@ public: bool empty() const; size_t total() const; + Shape shape() const; // data reference VkMat channel(int c); @@ -397,6 +417,7 @@ public: bool empty() const; size_t total() const; + Shape shape() const; // low-level reference VkImage image() const; @@ -1106,6 +1127,11 @@ inline size_t Mat::total() const return cstep * c; } +inline Shape Mat::shape() const +{ + return Shape(dims, w, h, c); +} + inline Mat Mat::channel(int _c) { return Mat(w, h, (unsigned char*)data + cstep * _c * elemsize, elemsize, elempack, allocator); @@ -1652,6 +1678,11 @@ inline size_t VkMat::total() const return cstep * c; } +inline Shape VkMat::shape() const +{ + return Shape(dims, w, h, c); +} + inline VkMat VkMat::channel(int _c) { return VkMat(w, h, data, cstep * _c * elemsize, elemsize, elempack, allocator, staging_allocator); @@ -1818,6 +1849,11 @@ inline size_t VkImageMat::total() const return width * height; } +inline Shape VkImageMat::shape() const +{ + return Shape(2, width, height, 1); +} + inline VkImage VkImageMat::image() const { return data->image; diff --git a/src/net.cpp b/src/net.cpp index 99ec6658f..2c008524b 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -26,10 +26,6 @@ #include #include -#ifdef _OPENMP -#include -#endif // _OPENMP - #if NCNN_BENCHMARK #include "benchmark.h" #endif // NCNN_BENCHMARK @@ -260,6 +256,51 @@ int Net::load_param(const DataReader& dr) continue; } + // pull out top shape hints + Mat shape_hints = pd.get(30, Mat()); + if (!shape_hints.empty()) + { + const int* psh = shape_hints; + for (int j=0; jtops[j]]; + + int dims = psh[0]; + blob.shape.dims = dims; + + if (dims == 1) + { + blob.shape.w = psh[1]; + } + if (dims == 2) + { + blob.shape.w = psh[1]; + blob.shape.h = psh[2]; + } + if (dims == 3) + { + blob.shape.w = psh[1]; + blob.shape.h = psh[2]; + blob.shape.c = psh[3]; + } + + psh += dims; + } + } + + // set bottom and top shape hints + layer->bottom_shapes.resize(bottom_count); + for (int j=0; jbottom_shapes[j] = blobs[layer->bottoms[j]].shape; + } + + layer->top_shapes.resize(top_count); + for (int j=0; jtop_shapes[j] = blobs[layer->tops[j]].shape; + } + int lr = layer->load_param(pd); if (lr != 0) { @@ -392,6 +433,51 @@ int Net::load_param_bin(const DataReader& dr) continue; } + // pull out top blob shape hints + Mat shape_hints = pd.get(30, Mat()); + if (!shape_hints.empty()) + { + const int* psh = shape_hints; + for (int j=0; jtops[j]]; + + int dims = psh[0]; + blob.shape.dims = dims; + + if (dims == 1) + { + blob.shape.w = psh[1]; + } + if (dims == 2) + { + blob.shape.w = psh[1]; + blob.shape.h = psh[2]; + } + if (dims == 3) + { + blob.shape.w = psh[1]; + blob.shape.h = psh[2]; + blob.shape.c = psh[3]; + } + + psh += dims; + } + } + + // set bottom and top shape hints + layer->bottom_shapes.resize(bottom_count); + for (int j=0; jbottom_shapes[j] = blobs[layer->bottoms[j]].shape; + } + + layer->top_shapes.resize(top_count); + for (int j=0; jtop_shapes[j] = blobs[layer->tops[j]].shape; + } + int lr = layer->load_param(pd); if (lr != 0) { diff --git a/tools/ncnnoptimize.cpp b/tools/ncnnoptimize.cpp index daafd0e9f..2d45ebfbf 100644 --- a/tools/ncnnoptimize.cpp +++ b/tools/ncnnoptimize.cpp @@ -129,6 +129,8 @@ public: int replace_convolution_with_innerproduct_after_global_pooling(); int replace_convolution_with_innerproduct_after_innerproduct(); + int shape_inference(); + public: int fprintf_param_int_array(int id, const ncnn::Mat& m, FILE* pp); int fprintf_param_float_array(int id, const ncnn::Mat& m, FILE* pp); @@ -1731,6 +1733,119 @@ int NetOptimize::replace_convolution_with_innerproduct_after_innerproduct() return 0; } +int NetOptimize::shape_inference() +{ + const size_t layer_count = layers.size(); + const size_t blob_count = blobs.size(); + + ncnn::Extractor ex = create_extractor(); + + // prepare Input blobs + for (size_t i=0; itype == "ncnnfused") + continue; + + if (layer->type != "Input") + continue; + + ncnn::Input* input = (ncnn::Input*)layer; + + int w = input->w; + int h = input->h; + int c = input->c; + + int dims = 0; + if (w == 0 && h == 0 && c == 0) dims = 0; + if (w != 0 && h == 0 && c == 0) dims = 1; + if (w != 0 && h != 0 && c == 0) dims = 2; + if (w != 0 && h != 0 && c != 0) dims = 3; + + if (dims == 0) + { + fprintf(stderr, "Input layer %s without shape info, shape_inference aborted\n", layer->name.c_str()); + return -1; + } + + ncnn::Mat m; + if (dims == 1) m.create(w); + if (dims == 2) m.create(w, h); + if (dims == 3) m.create(w, h, c); + + ex.input(layer->tops[0], m); + } + + // prepare blobs with predefined shape + for (size_t i=0; itype == "ncnnfused") + continue; + + for (size_t j=0; jtops.size(); j++) + { + int top_blob_index = layer->tops[j]; + + ncnn::Mat m; + ex.extract(top_blob_index, m); + + blobs[top_blob_index].shape = m.shape(); + } + } + + // assign all layer blob shape + for (size_t i=0; itype == "ncnnfused") + continue; + + layer->bottom_shapes.resize(layer->bottoms.size()); + for (size_t j=0; jbottoms.size(); j++) + { + int bottom_blob_index = layer->bottoms[j]; + + layer->bottom_shapes[j] = blobs[bottom_blob_index].shape; + } + + layer->top_shapes.resize(layer->tops.size()); + for (size_t j=0; jtops.size(); j++) + { + int top_blob_index = layer->tops[j]; + + layer->top_shapes[j] = blobs[top_blob_index].shape; + +// fprintf(stderr, "%d %4d %4d %4d | %2d %s\n", blobs[top_blob_index].shape.dims, blobs[top_blob_index].shape.w, blobs[top_blob_index].shape.h, blobs[top_blob_index].shape.c, top_blob_index, blobs[top_blob_index].name.c_str()); + } + } + + return 0; +} + int NetOptimize::fprintf_param_int_array(int id, const ncnn::Mat& m, FILE* pp) { const int count = m.w; @@ -1868,6 +1983,47 @@ int NetOptimize::save(const char* parampath, const char* binpath) fprintf(pp, " %s", blobs[top_blob_index].name.c_str()); } + // write shape hints + int shape_hint_array_size = 0; + for (int j=0; jtops[j]; + int dims = blobs[top_blob_index].shape.dims; + if (dims == 0) + { + shape_hint_array_size = 0; + break; + } + + shape_hint_array_size += dims + 1; + } + if (shape_hint_array_size) + { + fprintf(pp, " -23330=%d", shape_hint_array_size); + for (int j=0; jtops[j]; + int dims = blobs[top_blob_index].shape.dims; + int w = blobs[top_blob_index].shape.w; + int h = blobs[top_blob_index].shape.h; + int c = blobs[top_blob_index].shape.c; + fprintf(pp, ",%d", dims); + + if (dims == 1) + { + fprintf(pp, ",%d", w); + } + if (dims == 2) + { + fprintf(pp, ",%d,%d", w, h); + } + if (dims == 3) + { + fprintf(pp, ",%d,%d,%d", w, h, c); + } + } + } + ncnn::Layer* layer_default = ncnn::create_layer(layer->typeindex); ncnn::ParamDict pd; @@ -2566,6 +2722,8 @@ int main(int argc, char** argv) optimizer.eliminate_flatten_after_innerproduct(); optimizer.eliminate_orphaned_memorydata(); + optimizer.shape_inference(); + optimizer.save(outparam, outbin); return 0;