| @@ -17,6 +17,7 @@ | |||
| #endif | |||
| #include <algorithm> | |||
| #include <map> | |||
| #include <set> | |||
| #include <vector> | |||
| @@ -98,6 +99,41 @@ public: | |||
| } | |||
| }; | |||
| class MemoryFootprintAllocator : public ncnn::Allocator | |||
| { | |||
| public: | |||
| MemoryFootprintAllocator() | |||
| { | |||
| current_memory_usage = 0; | |||
| memory_footprint = 0; | |||
| } | |||
| virtual void* fastMalloc(size_t size) | |||
| { | |||
| ncnn::MutexLockGuard g(lock); | |||
| void* ptr = ncnn::fastMalloc(size); | |||
| bookkeeper[ptr] = size; | |||
| current_memory_usage += size; | |||
| memory_footprint = std::max(memory_footprint, current_memory_usage); | |||
| return ptr; | |||
| } | |||
| virtual void fastFree(void* ptr) | |||
| { | |||
| ncnn::MutexLockGuard g(lock); | |||
| size_t size = bookkeeper[ptr]; | |||
| current_memory_usage -= size; | |||
| bookkeeper.erase(bookkeeper.find(ptr)); | |||
| ncnn::fastFree(ptr); | |||
| } | |||
| public: | |||
| int current_memory_usage; | |||
| int memory_footprint; | |||
| ncnn::Mutex lock; | |||
| std::map<void*, size_t> bookkeeper; | |||
| }; | |||
| class NetOptimize : public ncnn::Net | |||
| { | |||
| public: | |||
| @@ -140,6 +176,7 @@ public: | |||
| int replace_convolution_with_innerproduct_after_innerproduct(); | |||
| int shape_inference(); | |||
| int estimate_memory_footprint(); | |||
| public: | |||
| int fprintf_param_int_array(int id, const ncnn::Mat& m, FILE* pp); | |||
| @@ -2545,7 +2582,7 @@ int NetOptimize::shape_inference() | |||
| // prepare blobs with predefined shape | |||
| for (size_t i = 0; i < blob_count; i++) | |||
| { | |||
| const ncnn::Blob blob = blobs[i]; | |||
| const ncnn::Blob& blob = blobs[i]; | |||
| int dims = blob.shape.dims; | |||
| int w = blob.shape.w; | |||
| @@ -2612,6 +2649,78 @@ int NetOptimize::shape_inference() | |||
| return 0; | |||
| } | |||
| int NetOptimize::estimate_memory_footprint() | |||
| { | |||
| const size_t layer_count = layers.size(); | |||
| const size_t blob_count = blobs.size(); | |||
| MemoryFootprintAllocator allocator; | |||
| ncnn::Extractor ex = create_extractor(); | |||
| ex.set_blob_allocator(&allocator); | |||
| ex.set_workspace_allocator(&allocator); | |||
| // prepare Input blobs | |||
| for (size_t i = 0; i < layer_count; i++) | |||
| { | |||
| const ncnn::Layer* layer = layers[i]; | |||
| if (layer->type == "ncnnfused") | |||
| continue; | |||
| if (layer->type != "Input") | |||
| continue; | |||
| ncnn::Input* input = (ncnn::Input*)layer; | |||
| int w = input->w; | |||
| int h = input->h; | |||
| int c = input->c; | |||
| int dims = 0; | |||
| if (w == 0 && h == 0 && c == 0) dims = 0; | |||
| if (w != 0 && h == 0 && c == 0) dims = 1; | |||
| if (w != 0 && h != 0 && c == 0) dims = 2; | |||
| if (w != 0 && h != 0 && c != 0) dims = 3; | |||
| if (dims == 0) | |||
| { | |||
| fprintf(stderr, "Input layer %s without shape info, estimate_memory_footprint aborted\n", layer->name.c_str()); | |||
| return -1; | |||
| } | |||
| ncnn::Mat m; | |||
| if (dims == 1) m.create(w, 4u, &allocator); | |||
| if (dims == 2) m.create(w, h, 4u, &allocator); | |||
| if (dims == 3) m.create(w, h, c, 4u, &allocator); | |||
| ex.input(layer->tops[0], m); | |||
| fprintf(stderr, "input = %s\n", blobs[layer->tops[0]].name.c_str()); | |||
| } | |||
| // find output blobs and do inference | |||
| std::vector<ncnn::Mat> outputs; | |||
| for (size_t i = 0; i < blob_count; i++) | |||
| { | |||
| const ncnn::Blob& blob = blobs[i]; | |||
| if (!blob.consumers.empty()) | |||
| continue; | |||
| // treat blob without any consumers as output | |||
| ncnn::Mat m; | |||
| ex.extract(int(i), m); | |||
| outputs.push_back(m); | |||
| fprintf(stderr, "extract = %s\n", blob.name.c_str()); | |||
| } | |||
| fprintf(stderr, "estimated memory footprint = %.2f KB = %.2f MB\n", allocator.memory_footprint / 1024.f, allocator.memory_footprint / 1024.f / 1024.f); | |||
| return 0; | |||
| } | |||
| int NetOptimize::fprintf_param_int_array(int id, const ncnn::Mat& m, FILE* pp) | |||
| { | |||
| const int count = m.w; | |||
| @@ -3687,6 +3796,8 @@ int main(int argc, char** argv) | |||
| optimizer.shape_inference(); | |||
| optimizer.estimate_memory_footprint(); | |||
| optimizer.save(outparam, outbin); | |||
| return 0; | |||