|
- // Tencent is pleased to support the open source community by making ncnn available.
- //
- // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
- //
- // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
- // in compliance with the License. You may obtain a copy of the License at
- //
- // https://opensource.org/licenses/BSD-3-Clause
- //
- // Unless required by applicable law or agreed to in writing, software distributed
- // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- // CONDITIONS OF ANY KIND, either express or implied. See the License for the
- // specific language governing permissions and limitations under the License.
-
- #include "net.h"
-
- #include "convolution.h"
- #include "convolutiondepthwise.h"
- #include "cpu.h"
- #include "datareader.h"
- #include "layer_type.h"
- #include "modelbin.h"
- #include "paramdict.h"
- #include "relu.h"
-
- #include <stdarg.h>
- #include <stdint.h>
- #include <string.h>
-
- #if NCNN_BENCHMARK
- #include "benchmark.h"
- #endif // NCNN_BENCHMARK
-
- #if NCNN_VULKAN
- #include "command.h"
- #include "pipelinecache.h"
- #endif // NCNN_VULKAN
-
- namespace ncnn {
-
- Net::Net()
- {
- #if NCNN_VULKAN
- vkdev = 0;
- weight_vkallocator = 0;
- weight_staging_vkallocator = 0;
- pipeline_cache = 0;
- #endif // NCNN_VULKAN
- }
-
- Net::~Net()
- {
- clear();
-
- #if NCNN_VULKAN
- #endif // NCNN_VULKAN
- }
-
- #if NCNN_STRING
- int Net::register_custom_layer(const char* type, layer_creator_func creator)
- {
- int typeindex = layer_to_index(type);
- if (typeindex != -1)
- {
- NCNN_LOGE("can not register build-in layer type %s", type);
- return -1;
- }
-
- int custom_index = custom_layer_to_index(type);
- if (custom_index == -1)
- {
- struct layer_registry_entry entry = {type, creator};
- custom_layer_registry.push_back(entry);
- }
- else
- {
- NCNN_LOGE("overwrite existing custom layer type %s", type);
- custom_layer_registry[custom_index].name = type;
- custom_layer_registry[custom_index].creator = creator;
- }
-
- return 0;
- }
- #endif // NCNN_STRING
-
- int Net::register_custom_layer(int index, layer_creator_func creator)
- {
- int custom_index = index & ~LayerType::CustomBit;
- if (index == custom_index)
- {
- NCNN_LOGE("can not register build-in layer index %d", custom_index);
- return -1;
- }
-
- if ((int)custom_layer_registry.size() <= custom_index)
- {
- #if NCNN_STRING
- struct layer_registry_entry dummy = {"", 0};
- #else
- struct layer_registry_entry dummy = {0};
- #endif // NCNN_STRING
- custom_layer_registry.resize(custom_index + 1, dummy);
- }
-
- if (custom_layer_registry[custom_index].creator)
- {
- NCNN_LOGE("overwrite existing custom layer index %d", custom_index);
- }
-
- custom_layer_registry[custom_index].creator = creator;
- return 0;
- }
-
- #if NCNN_STRING
- int Net::load_param(const DataReader& dr)
- {
- #define SCAN_VALUE(fmt, v) \
- if (dr.scan(fmt, &v) != 1) \
- { \
- NCNN_LOGE("parse " #v " failed"); \
- return -1; \
- }
-
- int magic = 0;
- SCAN_VALUE("%d", magic)
- if (magic != 7767517)
- {
- NCNN_LOGE("param is too old, please regenerate");
- return -1;
- }
-
- // parse
- int layer_count = 0;
- int blob_count = 0;
- SCAN_VALUE("%d", layer_count)
- SCAN_VALUE("%d", blob_count)
- if (layer_count <= 0 || blob_count <= 0)
- {
- NCNN_LOGE("invalid layer_count or blob_count");
- return -1;
- }
-
- layers.resize((size_t)layer_count);
- blobs.resize((size_t)blob_count);
-
- #if NCNN_VULKAN
- // TODO enable gpu when bf16 conversion implemented
- if (opt.use_bf16_storage)
- opt.use_vulkan_compute = false;
-
- if (opt.use_vulkan_compute)
- {
- if (!vkdev) vkdev = get_gpu_device();
- if (!vkdev) opt.use_vulkan_compute = false; // no vulkan device, fallback to cpu
- }
- if (opt.use_vulkan_compute)
- {
- // sanitize use options
- if (!vkdev->info.support_fp16_packed) opt.use_fp16_packed = false;
- if (!vkdev->info.support_fp16_storage) opt.use_fp16_storage = false;
- if (!vkdev->info.support_fp16_arithmetic) opt.use_fp16_arithmetic = false;
- if (!vkdev->info.support_int8_storage) opt.use_int8_storage = false;
- if (!vkdev->info.support_int8_arithmetic) opt.use_int8_arithmetic = false;
-
- // TODO give user a choice
- if (vkdev->info.bug_storage_buffer_no_l1) opt.use_image_storage = true;
-
- if (vkdev->info.bug_layout_binding_id_alias) opt.use_image_storage = false;
- }
- #endif // NCNN_VULKAN
-
- ParamDict pd;
-
- int blob_index = 0;
- for (int i = 0; i < layer_count; i++)
- {
- char layer_type[256];
- char layer_name[256];
- int bottom_count = 0;
- int top_count = 0;
- SCAN_VALUE("%255s", layer_type)
- SCAN_VALUE("%255s", layer_name)
- SCAN_VALUE("%d", bottom_count)
- SCAN_VALUE("%d", top_count)
-
- Layer* layer = create_layer(layer_type);
- if (!layer)
- {
- layer = create_custom_layer(layer_type);
- }
- if (!layer)
- {
- NCNN_LOGE("layer %s not exists or registered", layer_type);
- clear();
- return -1;
- }
-
- if (layer->use_int8_inference)
- {
- // no int8 gpu or packing layout support yet
- opt.use_vulkan_compute = false;
- opt.use_packing_layout = false;
- opt.use_fp16_storage = false;
- opt.use_bf16_storage = false;
- }
-
- #if NCNN_VULKAN
- if (opt.use_vulkan_compute)
- layer->vkdev = vkdev;
- #endif // NCNN_VULKAN
-
- layer->type = std::string(layer_type);
- layer->name = std::string(layer_name);
- // NCNN_LOGE("new layer %d %s", i, layer_name);
-
- layer->bottoms.resize(bottom_count);
-
- for (int j = 0; j < bottom_count; j++)
- {
- char bottom_name[256];
- SCAN_VALUE("%255s", bottom_name)
-
- int bottom_blob_index = find_blob_index_by_name(bottom_name);
- if (bottom_blob_index == -1)
- {
- Blob& blob = blobs[blob_index];
-
- bottom_blob_index = blob_index;
-
- blob.name = std::string(bottom_name);
- // NCNN_LOGE("new blob %s", bottom_name);
-
- blob_index++;
- }
-
- Blob& blob = blobs[bottom_blob_index];
-
- blob.consumers.push_back(i);
-
- layer->bottoms[j] = bottom_blob_index;
- }
-
- layer->tops.resize(top_count);
- for (int j = 0; j < top_count; j++)
- {
- Blob& blob = blobs[blob_index];
-
- char blob_name[256];
- SCAN_VALUE("%255s", blob_name)
-
- blob.name = std::string(blob_name);
- // NCNN_LOGE("new blob %s", blob_name);
-
- blob.producer = i;
-
- layer->tops[j] = blob_index;
-
- blob_index++;
- }
-
- // layer specific params
- int pdlr = pd.load_param(dr);
- if (pdlr != 0)
- {
- NCNN_LOGE("ParamDict load_param failed");
- continue;
- }
-
- // pull out top shape hints
- Mat shape_hints = pd.get(30, Mat());
- if (!shape_hints.empty())
- {
- const int* psh = shape_hints;
- for (int j = 0; j < top_count; j++)
- {
- Blob& blob = blobs[layer->tops[j]];
-
- int dims = psh[0];
- if (dims == 1)
- {
- blob.shape = Mat(psh[1], (void*)0, 4u, 1);
- }
- if (dims == 2)
- {
- blob.shape = Mat(psh[1], psh[2], (void*)0, 4u, 1);
- }
- if (dims == 3)
- {
- blob.shape = Mat(psh[1], psh[2], psh[3], (void*)0, 4u, 1);
- }
-
- psh += 4;
- }
- }
-
- // set bottom and top shape hints
- layer->bottom_shapes.resize(bottom_count);
- for (int j = 0; j < bottom_count; j++)
- {
- layer->bottom_shapes[j] = blobs[layer->bottoms[j]].shape;
- }
-
- layer->top_shapes.resize(top_count);
- for (int j = 0; j < top_count; j++)
- {
- layer->top_shapes[j] = blobs[layer->tops[j]].shape;
- }
-
- int lr = layer->load_param(pd);
- if (lr != 0)
- {
- NCNN_LOGE("layer load_param failed");
- continue;
- }
-
- layers[i] = layer;
- }
-
- #undef SCAN_VALUE
- return 0;
- }
- #endif // NCNN_STRING
-
- int Net::load_param_bin(const DataReader& dr)
- {
- #define READ_VALUE(buf) \
- if (dr.read(&buf, sizeof(buf)) != sizeof(buf)) \
- { \
- NCNN_LOGE("read " #buf " failed"); \
- return -1; \
- }
-
- int magic = 0;
- READ_VALUE(magic)
- if (magic != 7767517)
- {
- NCNN_LOGE("param is too old, please regenerate");
- return -1;
- }
-
- int layer_count = 0;
- int blob_count = 0;
- READ_VALUE(layer_count)
- READ_VALUE(blob_count)
- if (layer_count <= 0 || blob_count <= 0)
- {
- NCNN_LOGE("invalid layer_count or blob_count");
- return -1;
- }
-
- layers.resize(layer_count);
- blobs.resize(blob_count);
-
- #if NCNN_VULKAN
- // TODO enable gpu when bf16 conversion implemented
- if (opt.use_bf16_storage)
- opt.use_vulkan_compute = false;
-
- if (opt.use_vulkan_compute)
- {
- if (!vkdev) vkdev = get_gpu_device();
- if (!vkdev) opt.use_vulkan_compute = false; // no vulkan device, fallback to cpu
- }
- if (opt.use_vulkan_compute)
- {
- // sanitize use options
- if (!vkdev->info.support_fp16_packed) opt.use_fp16_packed = false;
- if (!vkdev->info.support_fp16_storage) opt.use_fp16_storage = false;
- if (!vkdev->info.support_fp16_arithmetic) opt.use_fp16_arithmetic = false;
- if (!vkdev->info.support_int8_storage) opt.use_int8_storage = false;
- if (!vkdev->info.support_int8_arithmetic) opt.use_int8_arithmetic = false;
-
- // TODO give user a choice
- if (vkdev->info.bug_storage_buffer_no_l1) opt.use_image_storage = true;
-
- if (vkdev->info.bug_layout_binding_id_alias) opt.use_image_storage = false;
- }
- #endif // NCNN_VULKAN
-
- ParamDict pd;
-
- for (int i = 0; i < layer_count; i++)
- {
- int typeindex;
- int bottom_count;
- int top_count;
- READ_VALUE(typeindex)
- READ_VALUE(bottom_count)
- READ_VALUE(top_count)
-
- Layer* layer = create_layer(typeindex);
- if (!layer)
- {
- int custom_index = typeindex & ~LayerType::CustomBit;
- layer = create_custom_layer(custom_index);
- }
- if (!layer)
- {
- NCNN_LOGE("layer %d not exists or registered", typeindex);
- clear();
- return -1;
- }
-
- if (layer->use_int8_inference)
- {
- // no int8 gpu or packing layout support yet
- opt.use_vulkan_compute = false;
- opt.use_packing_layout = false;
- opt.use_fp16_storage = false;
- opt.use_bf16_storage = false;
- }
-
- #if NCNN_VULKAN
- if (opt.use_vulkan_compute)
- layer->vkdev = vkdev;
- #endif // NCNN_VULKAN
-
- // layer->type = std::string(layer_type);
- // layer->name = std::string(layer_name);
- // NCNN_LOGE("new layer %d", typeindex);
-
- layer->bottoms.resize(bottom_count);
- for (int j = 0; j < bottom_count; j++)
- {
- int bottom_blob_index;
- READ_VALUE(bottom_blob_index)
-
- Blob& blob = blobs[bottom_blob_index];
-
- blob.consumers.push_back(i);
-
- layer->bottoms[j] = bottom_blob_index;
- }
-
- layer->tops.resize(top_count);
- for (int j = 0; j < top_count; j++)
- {
- int top_blob_index;
- READ_VALUE(top_blob_index)
-
- Blob& blob = blobs[top_blob_index];
-
- // blob.name = std::string(blob_name);
- // NCNN_LOGE("new blob %s", blob_name);
-
- blob.producer = i;
-
- layer->tops[j] = top_blob_index;
- }
-
- // layer specific params
- int pdlr = pd.load_param_bin(dr);
- if (pdlr != 0)
- {
- NCNN_LOGE("ParamDict load_param failed");
- continue;
- }
-
- // pull out top blob shape hints
- Mat shape_hints = pd.get(30, Mat());
- if (!shape_hints.empty())
- {
- const int* psh = shape_hints;
- for (int j = 0; j < top_count; j++)
- {
- Blob& blob = blobs[layer->tops[j]];
-
- int dims = psh[0];
- if (dims == 1)
- {
- blob.shape = Mat(psh[1], (void*)0, 4u, 1);
- }
- if (dims == 2)
- {
- blob.shape = Mat(psh[1], psh[2], (void*)0, 4u, 1);
- }
- if (dims == 3)
- {
- blob.shape = Mat(psh[1], psh[2], psh[3], (void*)0, 4u, 1);
- }
-
- psh += 4;
- }
- }
-
- // set bottom and top shape hints
- layer->bottom_shapes.resize(bottom_count);
- for (int j = 0; j < bottom_count; j++)
- {
- layer->bottom_shapes[j] = blobs[layer->bottoms[j]].shape;
- }
-
- layer->top_shapes.resize(top_count);
- for (int j = 0; j < top_count; j++)
- {
- layer->top_shapes[j] = blobs[layer->tops[j]].shape;
- }
-
- int lr = layer->load_param(pd);
- if (lr != 0)
- {
- NCNN_LOGE("layer load_param failed");
- continue;
- }
-
- layers[i] = layer;
- }
-
- #undef READ_VALUE
- return 0;
- }
-
- int Net::load_model(const DataReader& dr)
- {
- if (layers.empty())
- {
- NCNN_LOGE("network graph not ready");
- return -1;
- }
-
- // load file
- int ret = 0;
-
- ModelBinFromDataReader mb(dr);
- for (size_t i = 0; i < layers.size(); i++)
- {
- Layer* layer = layers[i];
-
- //Here we found inconsistent content in the parameter file.
- if (!layer)
- {
- NCNN_LOGE("load_model error at layer %d, parameter file has inconsistent content.", (int)i);
- ret = -1;
- break;
- }
-
- int lret = layer->load_model(mb);
- if (lret != 0)
- {
- NCNN_LOGE("layer load_model %d failed", (int)i);
- ret = -1;
- break;
- }
-
- if (layer->use_int8_inference)
- {
- // no int8 gpu or packing layout support yet
- opt.use_vulkan_compute = false;
- opt.use_packing_layout = false;
- opt.use_fp16_storage = false;
- opt.use_bf16_storage = false;
- }
- }
-
- fuse_network();
-
- #if NCNN_VULKAN
- if (opt.use_vulkan_compute)
- {
- if (!opt.pipeline_cache)
- {
- if (!pipeline_cache)
- pipeline_cache = new PipelineCache(vkdev);
- opt.pipeline_cache = pipeline_cache;
- }
-
- if (vkdev->info.bug_layout_binding_id_alias) opt.use_image_storage = false;
- }
- #endif // NCNN_VULKAN
-
- for (size_t i = 0; i < layers.size(); i++)
- {
- Layer* layer = layers[i];
-
- //Here we found inconsistent content in the parameter file.
- if (!layer)
- {
- NCNN_LOGE("load_model error at layer %d, parameter file has inconsistent content.", (int)i);
- ret = -1;
- break;
- }
-
- Option opt1 = opt;
- #if NCNN_VULKAN
- if (opt.use_vulkan_compute)
- {
- if (!layer->support_image_storage) opt1.use_image_storage = false;
- }
- #endif // NCNN_VULKAN
-
- int cret = layer->create_pipeline(opt1);
- if (cret != 0)
- {
- NCNN_LOGE("layer create_pipeline %d failed", (int)i);
- ret = -1;
- break;
- }
- }
-
- #if NCNN_VULKAN
- if (opt.use_vulkan_compute)
- {
- create_pipeline();
-
- upload_model();
- }
- #endif // NCNN_VULKAN
-
- return ret;
- }
-
- #if NCNN_STDIO
- #if NCNN_STRING
- int Net::load_param(FILE* fp)
- {
- DataReaderFromStdio dr(fp);
- return load_param(dr);
- }
-
- int Net::load_param_mem(const char* _mem)
- {
- const unsigned char* mem = (const unsigned char*)_mem;
- DataReaderFromMemory dr(mem);
- return load_param(dr);
- }
-
- int Net::load_param(const char* protopath)
- {
- FILE* fp = fopen(protopath, "rb");
- if (!fp)
- {
- NCNN_LOGE("fopen %s failed", protopath);
- return -1;
- }
-
- int ret = load_param(fp);
- fclose(fp);
- return ret;
- }
- #endif // NCNN_STRING
-
- int Net::load_param_bin(FILE* fp)
- {
- DataReaderFromStdio dr(fp);
- return load_param_bin(dr);
- }
-
- int Net::load_param_bin(const char* protopath)
- {
- FILE* fp = fopen(protopath, "rb");
- if (!fp)
- {
- NCNN_LOGE("fopen %s failed", protopath);
- return -1;
- }
-
- int ret = load_param_bin(fp);
- fclose(fp);
- return ret;
- }
-
- int Net::load_model(FILE* fp)
- {
- DataReaderFromStdio dr(fp);
- return load_model(dr);
- }
-
- int Net::load_model(const char* modelpath)
- {
- FILE* fp = fopen(modelpath, "rb");
- if (!fp)
- {
- NCNN_LOGE("fopen %s failed", modelpath);
- return -1;
- }
-
- int ret = load_model(fp);
- fclose(fp);
- return ret;
- }
- #endif // NCNN_STDIO
-
- int Net::load_param(const unsigned char* _mem)
- {
- const unsigned char* mem = _mem;
- DataReaderFromMemory dr(mem);
- load_param_bin(dr);
- return static_cast<int>(mem - _mem);
- }
-
- int Net::load_model(const unsigned char* _mem)
- {
- const unsigned char* mem = _mem;
- DataReaderFromMemory dr(mem);
- load_model(dr);
- return static_cast<int>(mem - _mem);
- }
-
- #if __ANDROID_API__ >= 9
- #if NCNN_STRING
- int Net::load_param(AAsset* asset)
- {
- DataReaderFromAndroidAsset dr(asset);
- return load_param(dr);
- }
-
- int Net::load_param(AAssetManager* mgr, const char* assetpath)
- {
- AAsset* asset = AAssetManager_open(mgr, assetpath, AASSET_MODE_BUFFER);
- if (!asset)
- {
- NCNN_LOGE("AAssetManager_open %s failed", assetpath);
- return -1;
- }
-
- int ret = load_param(asset);
- AAsset_close(asset);
- return ret;
- }
- #endif // NCNN_STRING
-
- int Net::load_param_bin(AAsset* asset)
- {
- DataReaderFromAndroidAsset dr(asset);
- return load_param_bin(dr);
- }
-
- int Net::load_param_bin(AAssetManager* mgr, const char* assetpath)
- {
- AAsset* asset = AAssetManager_open(mgr, assetpath, AASSET_MODE_BUFFER);
- if (!asset)
- {
- NCNN_LOGE("AAssetManager_open %s failed", assetpath);
- return -1;
- }
-
- int ret = load_param_bin(asset);
- AAsset_close(asset);
- return ret;
- }
-
- int Net::load_model(AAsset* asset)
- {
- DataReaderFromAndroidAsset dr(asset);
- return load_model(dr);
- }
-
- int Net::load_model(AAssetManager* mgr, const char* assetpath)
- {
- AAsset* asset = AAssetManager_open(mgr, assetpath, AASSET_MODE_STREAMING);
- if (!asset)
- {
- NCNN_LOGE("AAssetManager_open %s failed", assetpath);
- return -1;
- }
-
- int ret = load_model(asset);
- AAsset_close(asset);
- return ret;
- }
- #endif // __ANDROID_API__ >= 9
-
- int Net::fuse_network()
- {
- // set the int8 op fusion:requantize
- #if NCNN_STRING && NCNN_REQUANT
- // NCNN_LOGE("Test op fusion to int8 implement:");
- // parse the network whether is a quantization model
- bool net_quantized = false;
- for (size_t i = 0; i < layers.size(); i++)
- {
- Layer* layer = layers[i];
- if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise")
- {
- if (layer->type == "Convolution" && (((Convolution*)layer)->weight_data.elemsize != 1u))
- continue;
- if (layer->type == "ConvolutionDepthWise" && (((ConvolutionDepthWise*)layer)->weight_data.elemsize != 1u))
- continue;
- net_quantized = true;
- }
- }
-
- if (net_quantized == false)
- return 0;
-
- for (size_t i = 0; i < layers.size(); i++)
- {
- Layer* layer = layers[i];
-
- if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise")
- {
- if (layer->type == "Convolution" && (((Convolution*)layer)->weight_data.elemsize != 1u))
- continue;
- if (layer->type == "ConvolutionDepthWise" && (((ConvolutionDepthWise*)layer)->weight_data.elemsize != 1u))
- continue;
-
- for (size_t n = 0; n < blobs[layer->tops[0]].consumers.size(); n++)
- {
- int layer_next_index = blobs[layer->tops[0]].consumers[n];
- Layer* layer_next = layers[layer_next_index];
-
- if (layer_next->type == "Convolution" || layer_next->type == "ConvolutionDepthWise")
- {
- if (layer_next->type == "Convolution" && ((Convolution*)layer_next)->weight_data.elemsize != 1u)
- continue;
- if (layer_next->type == "ConvolutionDepthWise" && ((ConvolutionDepthWise*)layer_next)->weight_data.elemsize != 1u)
- continue;
-
- // NCNN_LOGE("%s, %s", layer->name.c_str(), layer_next->name.c_str());
- if (layer->type == "Convolution" && layer_next->type == "Convolution")
- {
- ((Convolution*)layer)->use_int8_requantize = true;
- ((Convolution*)layer)->top_blob_int8_scale = ((Convolution*)layer_next)->bottom_blob_int8_scale;
- }
- else if (layer->type == "ConvolutionDepthWise" && layer_next->type == "Convolution")
- {
- ((ConvolutionDepthWise*)layer)->use_int8_requantize = true;
- ((ConvolutionDepthWise*)layer)->top_blob_int8_scale = ((Convolution*)layer_next)->bottom_blob_int8_scale;
- }
- else if (layer->type == "Convolution" && layer_next->type == "ConvolutionDepthWise")
- {
- ((Convolution*)layer)->use_int8_requantize = true;
- ((Convolution*)layer)->top_blob_int8_scale = ((ConvolutionDepthWise*)layer_next)->bottom_blob_int8_scales[0];
- }
- else
- {
- ((ConvolutionDepthWise*)layer)->use_int8_requantize = true;
- ((ConvolutionDepthWise*)layer)->top_blob_int8_scale = ((ConvolutionDepthWise*)layer_next)->bottom_blob_int8_scales[0];
- }
- }
- else if (layer_next->type == "ReLU")
- {
- int layer_next_2_index = blobs[layer_next->tops[0]].consumers[0];
- Layer* layer_next_2 = layers[layer_next_2_index];
-
- if (layer_next_2->type == "Convolution" || layer_next_2->type == "ConvolutionDepthWise")
- {
- if (layer_next_2->type == "Convolution" && ((Convolution*)layer_next_2)->weight_data.elemsize != 1u)
- continue;
- if (layer_next_2->type == "ConvolutionDepthWise" && ((ConvolutionDepthWise*)layer_next_2)->weight_data.elemsize != 1u)
- continue;
-
- // NCNN_LOGE("%s, %s, %s", layer->name.c_str(), layer_next->name.c_str(), layer_next_2->name.c_str());
- if (layer->type == "Convolution" && layer_next_2->type == "Convolution")
- {
- ((Convolution*)layer)->use_int8_requantize = true;
- ((Convolution*)layer)->top_blob_int8_scale = ((Convolution*)layer_next_2)->bottom_blob_int8_scale;
- }
- else if (layer->type == "ConvolutionDepthWise" && layer_next_2->type == "Convolution")
- {
- ((ConvolutionDepthWise*)layer)->use_int8_requantize = true;
- ((ConvolutionDepthWise*)layer)->top_blob_int8_scale = ((Convolution*)layer_next_2)->bottom_blob_int8_scale;
- }
- else if (layer->type == "Convolution" && layer_next_2->type == "ConvolutionDepthWise")
- {
- ((Convolution*)layer)->use_int8_requantize = true;
- ((Convolution*)layer)->top_blob_int8_scale = ((ConvolutionDepthWise*)layer_next_2)->bottom_blob_int8_scales[0];
- }
- else
- {
- ((ConvolutionDepthWise*)layer)->use_int8_requantize = true;
- ((ConvolutionDepthWise*)layer)->top_blob_int8_scale = ((ConvolutionDepthWise*)layer_next_2)->bottom_blob_int8_scales[0];
- }
- }
- else if (layer_next_2->type == "Split")
- {
- bool all_conv = true;
- for (size_t i = 0; i < layer_next_2->tops.size(); i++)
- {
- int layer_next_3_index = blobs[layer_next_2->tops[i]].consumers[0];
- if (layers[layer_next_3_index]->type != "Convolution" && layers[layer_next_3_index]->type != "ConvolutionDepthWise" && layers[layer_next_3_index]->type != "PriorBox")
- {
- // NCNN_LOGE("%s, %s, %s, %s", layer->name.c_str(), layer_next->name.c_str(), layer_next_2->name.c_str(), layers[layer_next_3_index]->name.c_str());
- all_conv = false;
- }
- }
-
- if (all_conv == true && layer_next_2->tops.size() >= size_t(2))
- {
- // NCNN_LOGE("%s, %s, %s, ", layer->name.c_str(), layer_next->name.c_str(), layer_next_2->name.c_str());
- for (size_t i = 0; i < layer_next_2->tops.size(); i++)
- {
- int layer_next_3_index = blobs[layer_next_2->tops[i]].consumers[0];
- Layer* layer_next_3 = layers[layer_next_3_index];
-
- // NCNN_LOGE("%s, ", layer_next_3->name.c_str());
- if (layer_next_3->type == "Convolution")
- {
- ((Convolution*)layer)->top_blob_int8_scale = ((Convolution*)layer_next_3)->bottom_blob_int8_scale;
- }
- }
-
- ((Convolution*)layer)->use_int8_requantize = true;
- // NCNN_LOGE("");
- }
- }
- else
- {
- // NCNN_LOGE("%s, %s", layer->name.c_str(), layer_next->name.c_str());
- }
- }
- else if (layer_next->type == "Pooling")
- {
- // ToDo
- }
- else
- {
- // NCNN_LOGE("%s", layer->name.c_str());
- }
- }
- }
- }
- #endif
- return 0;
- }
-
- void Net::clear()
- {
- #if NCNN_VULKAN
- destroy_pipeline();
- #endif // NCNN_VULKAN
-
- blobs.clear();
- for (size_t i = 0; i < layers.size(); i++)
- {
- Layer* layer = layers[i];
-
- Option opt1 = opt;
- if (!layer->support_image_storage)
- {
- opt1.use_image_storage = false;
- }
-
- int dret = layer->destroy_pipeline(opt1);
- if (dret != 0)
- {
- NCNN_LOGE("layer destroy_pipeline failed");
- // ignore anyway
- }
-
- delete layer;
- }
- layers.clear();
-
- #if NCNN_VULKAN
- if (weight_vkallocator)
- {
- delete weight_vkallocator;
- weight_vkallocator = 0;
- }
- if (weight_staging_vkallocator)
- {
- delete weight_staging_vkallocator;
- weight_staging_vkallocator = 0;
- }
- if (pipeline_cache)
- {
- delete pipeline_cache;
- pipeline_cache = 0;
- opt.pipeline_cache = 0;
- }
- #endif // NCNN_VULKAN
- }
-
- Extractor Net::create_extractor() const
- {
- return Extractor(this, blobs.size());
- }
-
- #if NCNN_VULKAN
- void Net::set_vulkan_device(int device_index)
- {
- vkdev = get_gpu_device(device_index);
- }
-
- void Net::set_vulkan_device(const VulkanDevice* _vkdev)
- {
- vkdev = _vkdev;
- }
-
- const VulkanDevice* Net::vulkan_device() const
- {
- return vkdev;
- }
-
- int Net::upload_model()
- {
- ncnn::VkTransfer cmd(vkdev);
-
- // create gpu device allocator if null
- if (!weight_vkallocator)
- {
- weight_vkallocator = new VkWeightAllocator(vkdev);
- }
- if (!weight_staging_vkallocator)
- {
- weight_staging_vkallocator = new VkWeightStagingAllocator(vkdev);
- }
-
- Option opt_upload = opt;
- opt_upload.blob_vkallocator = weight_vkallocator;
- opt_upload.workspace_vkallocator = weight_vkallocator;
- opt_upload.staging_vkallocator = weight_staging_vkallocator;
-
- for (size_t i = 0; i < layers.size(); i++)
- {
- if (layers[i]->support_vulkan)
- {
- int uret = layers[i]->upload_model(cmd, opt_upload);
- if (uret != 0)
- {
- NCNN_LOGE("layer upload_model %d failed", (int)i);
- return -1;
- }
- }
- }
-
- cmd.submit_and_wait();
-
- return 0;
- }
-
- int Net::create_pipeline()
- {
- return 0;
- }
-
- int Net::destroy_pipeline()
- {
- return 0;
- }
- #endif // NCNN_VULKAN
-
- #if NCNN_STRING
- int Net::find_blob_index_by_name(const char* name) const
- {
- for (size_t i = 0; i < blobs.size(); i++)
- {
- const Blob& blob = blobs[i];
- if (blob.name == name)
- {
- return static_cast<int>(i);
- }
- }
-
- NCNN_LOGE("find_blob_index_by_name %s failed", name);
- return -1;
- }
-
- int Net::find_layer_index_by_name(const char* name) const
- {
- for (size_t i = 0; i < layers.size(); i++)
- {
- const Layer* layer = layers[i];
- if (layer->name == name)
- {
- return static_cast<int>(i);
- }
- }
-
- NCNN_LOGE("find_layer_index_by_name %s failed", name);
- return -1;
- }
-
- int Net::custom_layer_to_index(const char* type)
- {
- const size_t custom_layer_registry_entry_count = custom_layer_registry.size();
- for (size_t i = 0; i < custom_layer_registry_entry_count; i++)
- {
- if (strcmp(type, custom_layer_registry[i].name) == 0)
- return static_cast<int>(i);
- }
-
- return -1;
- }
-
- Layer* Net::create_custom_layer(const char* type)
- {
- int index = custom_layer_to_index(type);
- if (index == -1)
- return 0;
-
- return create_custom_layer(index);
- }
- #endif // NCNN_STRING
-
- Layer* Net::create_custom_layer(int index)
- {
- const size_t custom_layer_registry_entry_count = custom_layer_registry.size();
- if (index < 0 || static_cast<unsigned int>(index) >= custom_layer_registry_entry_count)
- return 0;
-
- layer_creator_func layer_creator = custom_layer_registry[index].creator;
- if (!layer_creator)
- return 0;
-
- return layer_creator();
- }
-
- int Net::forward_layer(int layer_index, std::vector<Mat>& blob_mats, const Option& opt) const
- {
- const Layer* layer = layers[layer_index];
-
- // NCNN_LOGE("forward_layer %d %s", layer_index, layer->name.c_str());
-
- if (layer->one_blob_only)
- {
- // load bottom blob
- int bottom_blob_index = layer->bottoms[0];
- int top_blob_index = layer->tops[0];
-
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, opt);
- if (ret != 0)
- return ret;
- }
-
- Mat bottom_blob = blob_mats[bottom_blob_index];
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats[bottom_blob_index].release();
- // deep copy for inplace forward if data is shared
- if (layer->support_inplace && *bottom_blob.refcount != 1)
- {
- bottom_blob = bottom_blob.clone();
- }
- }
-
- // clang-format off
- // *INDENT-OFF*
- #if NCNN_ARM82
- if (opt.use_fp16_storage && cpu_support_arm_asimdhp())
- {
- if (bottom_blob.elemsize / bottom_blob.elempack == 4u && layer->support_fp16_storage)
- {
- Mat bottom_blob_fp16;
- cast_float32_to_float16(bottom_blob, bottom_blob_fp16, opt);
- bottom_blob = bottom_blob_fp16;
- }
- if (bottom_blob.elemsize / bottom_blob.elempack == 2u && !layer->support_fp16_storage)
- {
- Mat bottom_blob_fp32;
- cast_float16_to_float32(bottom_blob, bottom_blob_fp32, opt);
- bottom_blob = bottom_blob_fp32;
- }
- }
- else
- #endif // NCNN_ARM82
- if (opt.use_bf16_storage)
- {
- if (bottom_blob.elemsize / bottom_blob.elempack == 4u && layer->support_bf16_storage)
- {
- Mat bottom_blob_bf16;
- cast_float32_to_bfloat16(bottom_blob, bottom_blob_bf16, opt);
- bottom_blob = bottom_blob_bf16;
- }
- if (bottom_blob.elemsize / bottom_blob.elempack == 2u && !layer->support_bf16_storage)
- {
- Mat bottom_blob_fp32;
- cast_bfloat16_to_float32(bottom_blob, bottom_blob_fp32, opt);
- bottom_blob = bottom_blob_fp32;
- }
- }
- // *INDENT-ON*
- // clang-format on
-
- if (opt.use_packing_layout)
- {
- // resolve dst_elempack
- int dims = bottom_blob.dims;
- int elemcount = 0;
- if (dims == 1) elemcount = bottom_blob.elempack * bottom_blob.w;
- if (dims == 2) elemcount = bottom_blob.elempack * bottom_blob.h;
- if (dims == 3) elemcount = bottom_blob.elempack * bottom_blob.c;
-
- int dst_elempack = 1;
- if (layer->support_packing)
- {
- #if NCNN_AVX2
- if (elemcount % 8 == 0)
- dst_elempack = 8;
- #elif NCNN_ARM82
- if (elemcount % 8 == 0 && opt.use_fp16_arithmetic && layer->support_fp16_storage)
- dst_elempack = 8;
- else if (elemcount % 4 == 0)
- dst_elempack = 4;
- #else
- if (elemcount % 4 == 0)
- dst_elempack = 4;
- #endif
- }
-
- Mat bottom_blob_packed;
- convert_packing(bottom_blob, bottom_blob_packed, dst_elempack, opt);
- bottom_blob = bottom_blob_packed;
- }
-
- // forward
- if (opt.lightmode && layer->support_inplace)
- {
- Mat& bottom_top_blob = bottom_blob;
- #if NCNN_BENCHMARK
- double start = get_current_time();
- int ret = layer->forward_inplace(bottom_top_blob, opt);
- double end = get_current_time();
- benchmark(layer, bottom_top_blob, bottom_top_blob, start, end);
- #else
- int ret = layer->forward_inplace(bottom_top_blob, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blob
- blob_mats[top_blob_index] = bottom_top_blob;
- }
- else
- {
- Mat top_blob;
- #if NCNN_BENCHMARK
- double start = get_current_time();
- int ret = layer->forward(bottom_blob, top_blob, opt);
- double end = get_current_time();
- benchmark(layer, bottom_blob, top_blob, start, end);
- #else
- int ret = layer->forward(bottom_blob, top_blob, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blob
- blob_mats[top_blob_index] = top_blob;
- }
- }
- else
- {
- // load bottom blobs
- std::vector<Mat> bottom_blobs(layer->bottoms.size());
- for (size_t i = 0; i < layer->bottoms.size(); i++)
- {
- int bottom_blob_index = layer->bottoms[i];
-
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, opt);
- if (ret != 0)
- return ret;
- }
-
- bottom_blobs[i] = blob_mats[bottom_blob_index];
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats[bottom_blob_index].release();
- // deep copy for inplace forward if data is shared
- if (layer->support_inplace && *bottom_blobs[i].refcount != 1)
- {
- bottom_blobs[i] = bottom_blobs[i].clone();
- }
- }
-
- // clang-format off
- // *INDENT-OFF*
- #if NCNN_ARM82
- if (opt.use_fp16_storage && cpu_support_arm_asimdhp())
- {
- if (bottom_blobs[i].elemsize / bottom_blobs[i].elempack == 4u && layer->support_fp16_storage)
- {
- Mat bottom_blob_fp16;
- cast_float32_to_float16(bottom_blobs[i], bottom_blob_fp16, opt);
- bottom_blobs[i] = bottom_blob_fp16;
- }
- if (bottom_blobs[i].elemsize / bottom_blobs[i].elempack == 2u && !layer->support_fp16_storage)
- {
- Mat bottom_blob_fp32;
- cast_float16_to_float32(bottom_blobs[i], bottom_blob_fp32, opt);
- bottom_blobs[i] = bottom_blob_fp32;
- }
- }
- else
- #endif // NCNN_ARM82
- if (opt.use_bf16_storage)
- {
- if (bottom_blobs[i].elemsize / bottom_blobs[i].elempack == 4u && layer->support_bf16_storage)
- {
- Mat bottom_blob_bf16;
- cast_float32_to_bfloat16(bottom_blobs[i], bottom_blob_bf16, opt);
- bottom_blobs[i] = bottom_blob_bf16;
- }
- if (bottom_blobs[i].elemsize / bottom_blobs[i].elempack == 2u && !layer->support_bf16_storage)
- {
- Mat bottom_blob_fp32;
- cast_bfloat16_to_float32(bottom_blobs[i], bottom_blob_fp32, opt);
- bottom_blobs[i] = bottom_blob_fp32;
- }
- }
- // *INDENT-ON*
- // clang-format on
-
- if (opt.use_packing_layout)
- {
- // resolve dst_elempack
- int dims = bottom_blobs[i].dims;
- int elemcount = 0;
- if (dims == 1) elemcount = bottom_blobs[i].elempack * bottom_blobs[i].w;
- if (dims == 2) elemcount = bottom_blobs[i].elempack * bottom_blobs[i].h;
- if (dims == 3) elemcount = bottom_blobs[i].elempack * bottom_blobs[i].c;
-
- int dst_elempack = 1;
- if (layer->support_packing)
- {
- #if NCNN_AVX2
- if (elemcount % 8 == 0)
- dst_elempack = 8;
- #elif NCNN_ARM82
- if (elemcount % 8 == 0 && opt.use_fp16_arithmetic && layer->support_fp16_storage)
- dst_elempack = 8;
- else if (elemcount % 4 == 0)
- dst_elempack = 4;
- #else
- if (elemcount % 4 == 0)
- dst_elempack = 4;
- #endif
- }
-
- Mat bottom_blob_packed;
- convert_packing(bottom_blobs[i], bottom_blob_packed, dst_elempack, opt);
- bottom_blobs[i] = bottom_blob_packed;
- }
- }
-
- // forward
- if (opt.lightmode && layer->support_inplace)
- {
- std::vector<Mat>& bottom_top_blobs = bottom_blobs;
- #if NCNN_BENCHMARK
- double start = get_current_time();
- int ret = layer->forward_inplace(bottom_top_blobs, opt);
- double end = get_current_time();
- benchmark(layer, start, end);
- #else
- int ret = layer->forward_inplace(bottom_top_blobs, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blobs
- for (size_t i = 0; i < layer->tops.size(); i++)
- {
- int top_blob_index = layer->tops[i];
-
- blob_mats[top_blob_index] = bottom_top_blobs[i];
- }
- }
- else
- {
- std::vector<Mat> top_blobs(layer->tops.size());
- #if NCNN_BENCHMARK
- double start = get_current_time();
- int ret = layer->forward(bottom_blobs, top_blobs, opt);
- double end = get_current_time();
- benchmark(layer, start, end);
- #else
- int ret = layer->forward(bottom_blobs, top_blobs, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blobs
- for (size_t i = 0; i < layer->tops.size(); i++)
- {
- int top_blob_index = layer->tops[i];
-
- blob_mats[top_blob_index] = top_blobs[i];
- }
- }
- }
-
- // NCNN_LOGE("forward_layer %d %s done", layer_index, layer->name.c_str());
- // const Mat& blob = blob_mats[layer->tops[0]];
- // NCNN_LOGE("[%-2d %-16s %-16s] %d blobs count = %-3d size = %-3d x %-3d", layer_index, layer->type.c_str(), layer->name.c_str(), layer->tops[0], blob.c, blob.h, blob.w);
-
- return 0;
- }
-
- #if NCNN_VULKAN
- int Net::forward_layer(int layer_index, std::vector<Mat>& blob_mats, std::vector<VkMat>& blob_mats_gpu, VkCompute& cmd, const Option& opt) const
- {
- const Layer* layer = layers[layer_index];
-
- // NCNN_LOGE("forward_layer %d %d %s", layer->support_vulkan, layer_index, layer->name.c_str());
-
- if (layer->support_vulkan)
- {
- if (layer->one_blob_only)
- {
- // load bottom blob
- int bottom_blob_index = layer->bottoms[0];
- int top_blob_index = layer->tops[0];
-
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, cmd, opt);
- if (ret != 0)
- return ret;
- }
-
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- // host to buffer
- cmd.record_upload(blob_mats[bottom_blob_index], blob_mats_gpu[bottom_blob_index], opt);
- }
- }
-
- VkMat bottom_blob = blob_mats_gpu[bottom_blob_index];
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu[bottom_blob_index].release();
- // deep copy for inplace forward if data is shared
- if (layer->support_inplace && *bottom_blob.refcount != 1)
- {
- VkMat bottom_blob_copy;
- cmd.record_clone(bottom_blob, bottom_blob_copy, opt);
- // NCNN_LOGE("clone %p[+%lu] %p[+%lu]", bottom_blob.buffer(), bottom_blob.buffer_offset(), bottom_blob_copy.buffer(), bottom_blob_copy.buffer_offset());
- bottom_blob = bottom_blob_copy;
- }
- }
-
- // forward
- if (opt.lightmode && layer->support_inplace)
- {
- VkMat& bottom_top_blob = bottom_blob;
- #if NCNN_BENCHMARK
- cmd.record_write_timestamp(layer_index * 2);
- int ret = layer->forward_inplace(bottom_top_blob, cmd, opt);
- cmd.record_write_timestamp(layer_index * 2 + 1);
- #else
- int ret = layer->forward_inplace(bottom_top_blob, cmd, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blob
- blob_mats_gpu[top_blob_index] = bottom_top_blob;
- }
- else
- {
- VkMat top_blob;
- #if NCNN_BENCHMARK
- cmd.record_write_timestamp(layer_index * 2);
- int ret = layer->forward(bottom_blob, top_blob, cmd, opt);
- cmd.record_write_timestamp(layer_index * 2 + 1);
- #else
- int ret = layer->forward(bottom_blob, top_blob, cmd, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blob
- blob_mats_gpu[top_blob_index] = top_blob;
- }
- }
- else
- {
- // load bottom blobs
- std::vector<VkMat> bottom_blobs(layer->bottoms.size());
- for (size_t i = 0; i < layer->bottoms.size(); i++)
- {
- int bottom_blob_index = layer->bottoms[i];
-
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, cmd, opt);
- if (ret != 0)
- return ret;
- }
-
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- // host to buffer
- cmd.record_upload(blob_mats[bottom_blob_index], blob_mats_gpu[bottom_blob_index], opt);
- }
- }
-
- bottom_blobs[i] = blob_mats_gpu[bottom_blob_index];
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu[bottom_blob_index].release();
- // deep copy for inplace forward if data is shared
- if (layer->support_inplace && *bottom_blobs[i].refcount != 1)
- {
- VkMat bottom_blob_copy;
- cmd.record_clone(bottom_blobs[i], bottom_blob_copy, opt);
- // NCNN_LOGE("clone %p[+%lu] %p[+%lu]", bottom_blobs[i].buffer(), bottom_blobs[i].buffer_offset(), bottom_blob_copy.buffer(), bottom_blob_copy.buffer_offset());
- bottom_blobs[i] = bottom_blob_copy;
- }
- }
- }
-
- // forward
- if (opt.lightmode && layer->support_inplace)
- {
- std::vector<VkMat>& bottom_top_blobs = bottom_blobs;
- #if NCNN_BENCHMARK
- cmd.record_write_timestamp(layer_index * 2);
- int ret = layer->forward_inplace(bottom_top_blobs, cmd, opt);
- cmd.record_write_timestamp(layer_index * 2 + 1);
- #else
- int ret = layer->forward_inplace(bottom_top_blobs, cmd, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blobs
- for (size_t i = 0; i < layer->tops.size(); i++)
- {
- int top_blob_index = layer->tops[i];
-
- blob_mats_gpu[top_blob_index] = bottom_top_blobs[i];
- }
- }
- else
- {
- std::vector<VkMat> top_blobs(layer->tops.size());
- #if NCNN_BENCHMARK
- cmd.record_write_timestamp(layer_index * 2);
- int ret = layer->forward(bottom_blobs, top_blobs, cmd, opt);
- cmd.record_write_timestamp(layer_index * 2 + 1);
- #else
- int ret = layer->forward(bottom_blobs, top_blobs, cmd, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blobs
- for (size_t i = 0; i < layer->tops.size(); i++)
- {
- int top_blob_index = layer->tops[i];
-
- blob_mats_gpu[top_blob_index] = top_blobs[i];
- }
- }
- }
- }
- else
- {
- if (layer->one_blob_only)
- {
- // load bottom blob
- int bottom_blob_index = layer->bottoms[0];
- int top_blob_index = layer->tops[0];
-
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, cmd, opt);
- if (ret != 0)
- return ret;
- }
-
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- Option opt_download = opt;
- opt_download.use_packing_layout = layer->support_packing;
-
- // buffer to host
- cmd.record_download(blob_mats_gpu[bottom_blob_index], blob_mats[bottom_blob_index], opt_download);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu[bottom_blob_index].release();
- }
-
- cmd.submit_and_wait();
-
- #if NCNN_BENCHMARK
- std::vector<uint64_t> results(layer_index * 2);
- cmd.get_query_pool_results(0, layer_index * 2, results);
- for (int i = 0; i < layer_index; i++)
- {
- uint64_t start = results[i * 2];
- uint64_t end = results[i * 2 + 1];
- if (start == 0 || end == 0)
- continue;
-
- double duration_us = (end - start) * vkdev->info.timestamp_period / 1000;
- NCNN_LOGE("%-24s %-30s %8.2lfus |", layers[i]->type.c_str(), layers[i]->name.c_str(), duration_us);
- }
- #endif // NCNN_BENCHMARK
-
- cmd.reset();
- }
- }
-
- Mat bottom_blob = blob_mats[bottom_blob_index];
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats[bottom_blob_index].release();
- // deep copy for inplace forward if data is shared
- if (layer->support_inplace && *bottom_blob.refcount != 1)
- {
- bottom_blob = bottom_blob.clone();
- }
- }
-
- // forward
- if (opt.lightmode && layer->support_inplace)
- {
- Mat& bottom_top_blob = bottom_blob;
- #if NCNN_BENCHMARK
- double start = get_current_time();
- int ret = layer->forward_inplace(bottom_top_blob, opt);
- double end = get_current_time();
- benchmark(layer, bottom_top_blob, bottom_top_blob, start, end);
- #else
- int ret = layer->forward_inplace(bottom_top_blob, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blob
- blob_mats[top_blob_index] = bottom_top_blob;
- }
- else
- {
- Mat top_blob;
- #if NCNN_BENCHMARK
- double start = get_current_time();
- int ret = layer->forward(bottom_blob, top_blob, opt);
- double end = get_current_time();
- benchmark(layer, bottom_blob, top_blob, start, end);
- #else
- int ret = layer->forward(bottom_blob, top_blob, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blob
- blob_mats[top_blob_index] = top_blob;
- }
- }
- else
- {
- // load bottom blobs
- for (size_t i = 0; i < layer->bottoms.size(); i++)
- {
- int bottom_blob_index = layer->bottoms[i];
-
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, cmd, opt);
- if (ret != 0)
- return ret;
- }
-
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- Option opt_download = opt;
- opt_download.use_packing_layout = layer->support_packing;
-
- // buffer to host
- cmd.record_download(blob_mats_gpu[bottom_blob_index], blob_mats[bottom_blob_index], opt_download);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu[bottom_blob_index].release();
- }
- }
- }
- }
-
- {
- cmd.submit_and_wait();
-
- #if NCNN_BENCHMARK
- std::vector<uint64_t> results(layer_index * 2);
- cmd.get_query_pool_results(0, layer_index * 2, results);
- for (int i = 0; i < layer_index; i++)
- {
- uint64_t start = results[i * 2];
- uint64_t end = results[i * 2 + 1];
- if (start == 0 || end == 0)
- continue;
-
- double duration_us = (end - start) * vkdev->info.timestamp_period / 1000;
- NCNN_LOGE("%-24s %-30s %8.2lfus |", layers[i]->type.c_str(), layers[i]->name.c_str(), duration_us);
- }
- #endif // NCNN_BENCHMARK
-
- cmd.reset();
- }
-
- std::vector<Mat> bottom_blobs(layer->bottoms.size());
- for (size_t i = 0; i < layer->bottoms.size(); i++)
- {
- int bottom_blob_index = layer->bottoms[i];
-
- bottom_blobs[i] = blob_mats[bottom_blob_index];
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats[bottom_blob_index].release();
- // deep copy for inplace forward if data is shared
- if (layer->support_inplace && *bottom_blobs[i].refcount != 1)
- {
- bottom_blobs[i] = bottom_blobs[i].clone();
- }
- }
- }
-
- // forward
- if (opt.lightmode && layer->support_inplace)
- {
- std::vector<Mat>& bottom_top_blobs = bottom_blobs;
- #if NCNN_BENCHMARK
- double start = get_current_time();
- int ret = layer->forward_inplace(bottom_top_blobs, opt);
- double end = get_current_time();
- benchmark(layer, start, end);
- #else
- int ret = layer->forward_inplace(bottom_top_blobs, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blobs
- for (size_t i = 0; i < layer->tops.size(); i++)
- {
- int top_blob_index = layer->tops[i];
-
- blob_mats[top_blob_index] = bottom_top_blobs[i];
- }
- }
- else
- {
- std::vector<Mat> top_blobs(layer->tops.size());
- #if NCNN_BENCHMARK
- double start = get_current_time();
- int ret = layer->forward(bottom_blobs, top_blobs, opt);
- double end = get_current_time();
- benchmark(layer, start, end);
- #else
- int ret = layer->forward(bottom_blobs, top_blobs, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blobs
- for (size_t i = 0; i < layer->tops.size(); i++)
- {
- int top_blob_index = layer->tops[i];
-
- blob_mats[top_blob_index] = top_blobs[i];
- }
- }
- }
- }
-
- // NCNN_LOGE("forward_layer %d %d %s done", layer->support_vulkan, layer_index, layer->name.c_str());
-
- return 0;
- }
-
- int Net::forward_layer(int layer_index, std::vector<Mat>& blob_mats, std::vector<VkMat>& blob_mats_gpu, std::vector<VkImageMat>& blob_mats_gpu_image, VkCompute& cmd, const Option& opt) const
- {
- const Layer* layer = layers[layer_index];
-
- // NCNN_LOGE("forward_layer %d %d %s", layer->support_vulkan, layer_index, layer->name.c_str());
-
- if (layer->support_vulkan)
- {
- if (layer->support_image_storage)
- {
- if (layer->one_blob_only)
- {
- // load bottom blob
- int bottom_blob_index = layer->bottoms[0];
- int top_blob_index = layer->tops[0];
-
- if (blob_mats_gpu_image[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, blob_mats_gpu_image, cmd, opt);
- if (ret != 0)
- return ret;
- }
-
- if (blob_mats_gpu_image[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- // host to image
- cmd.record_upload(blob_mats[bottom_blob_index], blob_mats_gpu_image[bottom_blob_index], opt);
- }
- else
- {
- // buffer to image
- cmd.record_buffer_to_image(blob_mats_gpu[bottom_blob_index], blob_mats_gpu_image[bottom_blob_index], opt);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu[bottom_blob_index].release();
- }
- }
- }
- }
- else
- {
- // buffer to image
- cmd.record_buffer_to_image(blob_mats_gpu[bottom_blob_index], blob_mats_gpu_image[bottom_blob_index], opt);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu[bottom_blob_index].release();
- }
- }
- }
-
- VkImageMat bottom_blob = blob_mats_gpu_image[bottom_blob_index];
-
- if (bottom_blob.empty())
- {
- goto IMAGE_ALLOCATION_FAILED;
- }
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu_image[bottom_blob_index].release();
- // deep copy for inplace forward if data is shared
- if (layer->support_inplace && *bottom_blob.refcount != 1)
- {
- VkImageMat bottom_blob_copy;
- cmd.record_clone(bottom_blob, bottom_blob_copy, opt);
- // NCNN_LOGE("clone %p[+%lu] %p[+%lu]", bottom_blob.buffer(), bottom_blob.buffer_offset(), bottom_blob_copy.buffer(), bottom_blob_copy.buffer_offset());
- bottom_blob = bottom_blob_copy;
- }
- }
-
- // forward
- if (opt.lightmode && layer->support_inplace)
- {
- VkImageMat& bottom_top_blob = bottom_blob;
- #if NCNN_BENCHMARK
- cmd.record_write_timestamp(layer_index * 2);
- int ret = layer->forward_inplace(bottom_top_blob, cmd, opt);
- cmd.record_write_timestamp(layer_index * 2 + 1);
- #else
- int ret = layer->forward_inplace(bottom_top_blob, cmd, opt);
- #endif // NCNN_BENCHMARK
- if (ret == -100)
- {
- goto IMAGE_ALLOCATION_FAILED;
- }
- if (ret != 0)
- return ret;
-
- // store top blob
- blob_mats_gpu_image[top_blob_index] = bottom_top_blob;
- }
- else
- {
- VkImageMat top_blob;
- #if NCNN_BENCHMARK
- cmd.record_write_timestamp(layer_index * 2);
- int ret = layer->forward(bottom_blob, top_blob, cmd, opt);
- cmd.record_write_timestamp(layer_index * 2 + 1);
- #else
- int ret = layer->forward(bottom_blob, top_blob, cmd, opt);
- #endif // NCNN_BENCHMARK
- if (ret == -100)
- {
- goto IMAGE_ALLOCATION_FAILED;
- }
- if (ret != 0)
- return ret;
-
- // store top blob
- blob_mats_gpu_image[top_blob_index] = top_blob;
- }
- }
- else
- {
- // load bottom blobs
- std::vector<VkImageMat> bottom_blobs(layer->bottoms.size());
- for (size_t i = 0; i < layer->bottoms.size(); i++)
- {
- int bottom_blob_index = layer->bottoms[i];
-
- if (blob_mats_gpu_image[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, blob_mats_gpu_image, cmd, opt);
- if (ret != 0)
- return ret;
- }
-
- if (blob_mats_gpu_image[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- // host to image
- cmd.record_upload(blob_mats[bottom_blob_index], blob_mats_gpu_image[bottom_blob_index], opt);
- }
- else
- {
- // buffer to image
- cmd.record_buffer_to_image(blob_mats_gpu[bottom_blob_index], blob_mats_gpu_image[bottom_blob_index], opt);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu[bottom_blob_index].release();
- }
- }
- }
- }
- else
- {
- // buffer to image
- cmd.record_buffer_to_image(blob_mats_gpu[bottom_blob_index], blob_mats_gpu_image[bottom_blob_index], opt);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu[bottom_blob_index].release();
- }
- }
- }
-
- bottom_blobs[i] = blob_mats_gpu_image[bottom_blob_index];
-
- if (bottom_blobs[i].empty())
- {
- goto IMAGE_ALLOCATION_FAILED;
- }
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu_image[bottom_blob_index].release();
- // deep copy for inplace forward if data is shared
- if (layer->support_inplace && *bottom_blobs[i].refcount != 1)
- {
- VkImageMat bottom_blob_copy;
- cmd.record_clone(bottom_blobs[i], bottom_blob_copy, opt);
- // NCNN_LOGE("clone %p[+%lu] %p[+%lu]", bottom_blobs[i].buffer(), bottom_blobs[i].buffer_offset(), bottom_blob_copy.buffer(), bottom_blob_copy.buffer_offset());
- bottom_blobs[i] = bottom_blob_copy;
- }
- }
- }
-
- // forward
- if (opt.lightmode && layer->support_inplace)
- {
- std::vector<VkImageMat>& bottom_top_blobs = bottom_blobs;
- #if NCNN_BENCHMARK
- cmd.record_write_timestamp(layer_index * 2);
- int ret = layer->forward_inplace(bottom_top_blobs, cmd, opt);
- cmd.record_write_timestamp(layer_index * 2 + 1);
- #else
- int ret = layer->forward_inplace(bottom_top_blobs, cmd, opt);
- #endif // NCNN_BENCHMARK
- if (ret == -100)
- {
- goto IMAGE_ALLOCATION_FAILED;
- }
- if (ret != 0)
- return ret;
-
- // store top blobs
- for (size_t i = 0; i < layer->tops.size(); i++)
- {
- int top_blob_index = layer->tops[i];
-
- blob_mats_gpu_image[top_blob_index] = bottom_top_blobs[i];
- }
- }
- else
- {
- std::vector<VkImageMat> top_blobs(layer->tops.size());
- #if NCNN_BENCHMARK
- cmd.record_write_timestamp(layer_index * 2);
- int ret = layer->forward(bottom_blobs, top_blobs, cmd, opt);
- cmd.record_write_timestamp(layer_index * 2 + 1);
- #else
- int ret = layer->forward(bottom_blobs, top_blobs, cmd, opt);
- #endif // NCNN_BENCHMARK
- if (ret == -100)
- {
- goto IMAGE_ALLOCATION_FAILED;
- }
- if (ret != 0)
- return ret;
-
- // store top blobs
- for (size_t i = 0; i < layer->tops.size(); i++)
- {
- int top_blob_index = layer->tops[i];
-
- blob_mats_gpu_image[top_blob_index] = top_blobs[i];
- }
- }
- }
- }
- else
- {
- if (layer->one_blob_only)
- {
- // load bottom blob
- int bottom_blob_index = layer->bottoms[0];
- int top_blob_index = layer->tops[0];
-
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu_image[bottom_blob_index].dims == 0)
- {
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, blob_mats_gpu_image, cmd, opt);
- if (ret != 0)
- return ret;
- }
-
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu_image[bottom_blob_index].dims == 0)
- {
- // host to buffer
- cmd.record_upload(blob_mats[bottom_blob_index], blob_mats_gpu[bottom_blob_index], opt);
- }
- else
- {
- // image to buffer
- cmd.record_image_to_buffer(blob_mats_gpu_image[bottom_blob_index], blob_mats_gpu[bottom_blob_index], opt);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu_image[bottom_blob_index].release();
- }
- }
- }
- }
- else
- {
- // image to buffer
- cmd.record_image_to_buffer(blob_mats_gpu_image[bottom_blob_index], blob_mats_gpu[bottom_blob_index], opt);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu_image[bottom_blob_index].release();
- }
- }
- }
-
- VkMat bottom_blob = blob_mats_gpu[bottom_blob_index];
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu[bottom_blob_index].release();
- // deep copy for inplace forward if data is shared
- if (layer->support_inplace && *bottom_blob.refcount != 1)
- {
- VkMat bottom_blob_copy;
- cmd.record_clone(bottom_blob, bottom_blob_copy, opt);
- // NCNN_LOGE("clone %p[+%lu] %p[+%lu]", bottom_blob.buffer(), bottom_blob.buffer_offset(), bottom_blob_copy.buffer(), bottom_blob_copy.buffer_offset());
- bottom_blob = bottom_blob_copy;
- }
- }
-
- // forward
- if (opt.lightmode && layer->support_inplace)
- {
- VkMat& bottom_top_blob = bottom_blob;
- #if NCNN_BENCHMARK
- cmd.record_write_timestamp(layer_index * 2);
- int ret = layer->forward_inplace(bottom_top_blob, cmd, opt);
- cmd.record_write_timestamp(layer_index * 2 + 1);
- #else
- int ret = layer->forward_inplace(bottom_top_blob, cmd, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blob
- blob_mats_gpu[top_blob_index] = bottom_top_blob;
- }
- else
- {
- VkMat top_blob;
- #if NCNN_BENCHMARK
- cmd.record_write_timestamp(layer_index * 2);
- int ret = layer->forward(bottom_blob, top_blob, cmd, opt);
- cmd.record_write_timestamp(layer_index * 2 + 1);
- #else
- int ret = layer->forward(bottom_blob, top_blob, cmd, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blob
- blob_mats_gpu[top_blob_index] = top_blob;
- }
- }
- else
- {
- // load bottom blobs
- std::vector<VkMat> bottom_blobs(layer->bottoms.size());
- for (size_t i = 0; i < layer->bottoms.size(); i++)
- {
- int bottom_blob_index = layer->bottoms[i];
-
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu_image[bottom_blob_index].dims == 0)
- {
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, blob_mats_gpu_image, cmd, opt);
- if (ret != 0)
- return ret;
- }
-
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu_image[bottom_blob_index].dims == 0)
- {
- // host to buffer
- cmd.record_upload(blob_mats[bottom_blob_index], blob_mats_gpu[bottom_blob_index], opt);
- }
- else
- {
- // image to buffer
- cmd.record_image_to_buffer(blob_mats_gpu_image[bottom_blob_index], blob_mats_gpu[bottom_blob_index], opt);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu_image[bottom_blob_index].release();
- }
- }
- }
- }
- else
- {
- // image to buffer
- cmd.record_image_to_buffer(blob_mats_gpu_image[bottom_blob_index], blob_mats_gpu[bottom_blob_index], opt);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu_image[bottom_blob_index].release();
- }
- }
- }
-
- bottom_blobs[i] = blob_mats_gpu[bottom_blob_index];
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu[bottom_blob_index].release();
- // deep copy for inplace forward if data is shared
- if (layer->support_inplace && *bottom_blobs[i].refcount != 1)
- {
- VkMat bottom_blob_copy;
- cmd.record_clone(bottom_blobs[i], bottom_blob_copy, opt);
- // NCNN_LOGE("clone %p[+%lu] %p[+%lu]", bottom_blobs[i].buffer(), bottom_blobs[i].buffer_offset(), bottom_blob_copy.buffer(), bottom_blob_copy.buffer_offset());
- bottom_blobs[i] = bottom_blob_copy;
- }
- }
- }
-
- // forward
- if (opt.lightmode && layer->support_inplace)
- {
- std::vector<VkMat>& bottom_top_blobs = bottom_blobs;
- #if NCNN_BENCHMARK
- cmd.record_write_timestamp(layer_index * 2);
- int ret = layer->forward_inplace(bottom_top_blobs, cmd, opt);
- cmd.record_write_timestamp(layer_index * 2 + 1);
- #else
- int ret = layer->forward_inplace(bottom_top_blobs, cmd, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blobs
- for (size_t i = 0; i < layer->tops.size(); i++)
- {
- int top_blob_index = layer->tops[i];
-
- blob_mats_gpu[top_blob_index] = bottom_top_blobs[i];
- }
- }
- else
- {
- std::vector<VkMat> top_blobs(layer->tops.size());
- #if NCNN_BENCHMARK
- cmd.record_write_timestamp(layer_index * 2);
- int ret = layer->forward(bottom_blobs, top_blobs, cmd, opt);
- cmd.record_write_timestamp(layer_index * 2 + 1);
- #else
- int ret = layer->forward(bottom_blobs, top_blobs, cmd, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blobs
- for (size_t i = 0; i < layer->tops.size(); i++)
- {
- int top_blob_index = layer->tops[i];
-
- blob_mats_gpu[top_blob_index] = top_blobs[i];
- }
- }
- }
- }
- }
- else
- {
- IMAGE_ALLOCATION_FAILED:
-
- if (layer->one_blob_only)
- {
- // load bottom blob
- int bottom_blob_index = layer->bottoms[0];
- int top_blob_index = layer->tops[0];
-
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu_image[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, blob_mats_gpu_image, cmd, opt);
- if (ret != 0)
- return ret;
- }
-
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu_image[bottom_blob_index].dims == 0)
- {
- // buffer to host
- cmd.record_download(blob_mats_gpu[bottom_blob_index], blob_mats[bottom_blob_index], opt);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu[bottom_blob_index].release();
- }
- }
- else
- {
- // image to host
- cmd.record_download(blob_mats_gpu_image[bottom_blob_index], blob_mats[bottom_blob_index], opt);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu_image[bottom_blob_index].release();
- }
- }
- }
- }
- else
- {
- // image to host
- cmd.record_download(blob_mats_gpu_image[bottom_blob_index], blob_mats[bottom_blob_index], opt);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu_image[bottom_blob_index].release();
- }
- }
-
- cmd.submit_and_wait();
-
- #if NCNN_BENCHMARK
- std::vector<uint64_t> results(layer_index * 2);
- cmd.get_query_pool_results(0, layer_index * 2, results);
- for (int i = 0; i < layer_index; i++)
- {
- uint64_t start = results[i * 2];
- uint64_t end = results[i * 2 + 1];
- if (start == 0 || end == 0)
- continue;
-
- double duration_us = (end - start) * vkdev->info.timestamp_period / 1000;
- NCNN_LOGE("%-24s %-30s %8.2lfus |", layers[i]->type.c_str(), layers[i]->name.c_str(), duration_us);
- }
- #endif // NCNN_BENCHMARK
-
- cmd.reset();
- }
-
- Mat bottom_blob = blob_mats[bottom_blob_index];
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats[bottom_blob_index].release();
- // deep copy for inplace forward if data is shared
- if (layer->support_inplace && *bottom_blob.refcount != 1)
- {
- bottom_blob = bottom_blob.clone();
- }
- }
-
- // forward
- if (opt.lightmode && layer->support_inplace)
- {
- Mat& bottom_top_blob = bottom_blob;
- #if NCNN_BENCHMARK
- double start = get_current_time();
- int ret = layer->forward_inplace(bottom_top_blob, opt);
- double end = get_current_time();
- benchmark(layer, bottom_top_blob, bottom_top_blob, start, end);
- #else
- int ret = layer->forward_inplace(bottom_top_blob, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blob
- blob_mats[top_blob_index] = bottom_top_blob;
- }
- else
- {
- Mat top_blob;
- #if NCNN_BENCHMARK
- double start = get_current_time();
- int ret = layer->forward(bottom_blob, top_blob, opt);
- double end = get_current_time();
- benchmark(layer, bottom_blob, top_blob, start, end);
- #else
- int ret = layer->forward(bottom_blob, top_blob, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blob
- blob_mats[top_blob_index] = top_blob;
- }
- }
- else
- {
- // load bottom blobs
- for (size_t i = 0; i < layer->bottoms.size(); i++)
- {
- int bottom_blob_index = layer->bottoms[i];
-
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu_image[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu[bottom_blob_index].dims == 0)
- {
- int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, blob_mats_gpu_image, cmd, opt);
- if (ret != 0)
- return ret;
- }
-
- if (blob_mats[bottom_blob_index].dims == 0)
- {
- if (blob_mats_gpu_image[bottom_blob_index].dims == 0)
- {
- // buffer to host
- cmd.record_download(blob_mats_gpu[bottom_blob_index], blob_mats[bottom_blob_index], opt);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu[bottom_blob_index].release();
- }
- }
- else
- {
- // image to host
- cmd.record_download(blob_mats_gpu_image[bottom_blob_index], blob_mats[bottom_blob_index], opt);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu_image[bottom_blob_index].release();
- }
- }
- }
- }
- else
- {
- // image to host
- cmd.record_download(blob_mats_gpu_image[bottom_blob_index], blob_mats[bottom_blob_index], opt);
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats_gpu_image[bottom_blob_index].release();
- }
- }
- }
- }
-
- {
- cmd.submit_and_wait();
-
- #if NCNN_BENCHMARK
- std::vector<uint64_t> results(layer_index * 2);
- cmd.get_query_pool_results(0, layer_index * 2, results);
- for (int i = 0; i < layer_index; i++)
- {
- uint64_t start = results[i * 2];
- uint64_t end = results[i * 2 + 1];
- if (start == 0 || end == 0)
- continue;
-
- double duration_us = (end - start) * vkdev->info.timestamp_period / 1000;
- NCNN_LOGE("%-24s %-30s %8.2lfus |", layers[i]->type.c_str(), layers[i]->name.c_str(), duration_us);
- }
- #endif // NCNN_BENCHMARK
-
- cmd.reset();
- }
-
- std::vector<Mat> bottom_blobs(layer->bottoms.size());
- for (size_t i = 0; i < layer->bottoms.size(); i++)
- {
- int bottom_blob_index = layer->bottoms[i];
-
- bottom_blobs[i] = blob_mats[bottom_blob_index];
-
- if (opt.lightmode)
- {
- // delete after taken in light mode
- blob_mats[bottom_blob_index].release();
- // deep copy for inplace forward if data is shared
- if (layer->support_inplace && *bottom_blobs[i].refcount != 1)
- {
- bottom_blobs[i] = bottom_blobs[i].clone();
- }
- }
- }
-
- // forward
- if (opt.lightmode && layer->support_inplace)
- {
- std::vector<Mat>& bottom_top_blobs = bottom_blobs;
- #if NCNN_BENCHMARK
- double start = get_current_time();
- int ret = layer->forward_inplace(bottom_top_blobs, opt);
- double end = get_current_time();
- benchmark(layer, start, end);
- #else
- int ret = layer->forward_inplace(bottom_top_blobs, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blobs
- for (size_t i = 0; i < layer->tops.size(); i++)
- {
- int top_blob_index = layer->tops[i];
-
- blob_mats[top_blob_index] = bottom_top_blobs[i];
- }
- }
- else
- {
- std::vector<Mat> top_blobs(layer->tops.size());
- #if NCNN_BENCHMARK
- double start = get_current_time();
- int ret = layer->forward(bottom_blobs, top_blobs, opt);
- double end = get_current_time();
- benchmark(layer, start, end);
- #else
- int ret = layer->forward(bottom_blobs, top_blobs, opt);
- #endif // NCNN_BENCHMARK
- if (ret != 0)
- return ret;
-
- // store top blobs
- for (size_t i = 0; i < layer->tops.size(); i++)
- {
- int top_blob_index = layer->tops[i];
-
- blob_mats[top_blob_index] = top_blobs[i];
- }
- }
- }
- }
-
- // NCNN_LOGE("forward_layer %d %d %s done", layer->support_vulkan, layer_index, layer->name.c_str());
-
- return 0;
- }
- #endif // NCNN_VULKAN
-
- Extractor::Extractor(const Net* _net, size_t blob_count)
- : net(_net)
- {
- blob_mats.resize(blob_count);
- opt = net->opt;
-
- #if NCNN_VULKAN
- if (net->opt.use_vulkan_compute)
- {
- local_blob_vkallocator = 0;
- local_staging_vkallocator = 0;
-
- blob_mats_gpu.resize(blob_count);
- blob_mats_gpu_image.resize(blob_count);
- }
- #endif // NCNN_VULKAN
- }
-
- Extractor::~Extractor()
- {
- blob_mats.clear();
-
- #if NCNN_VULKAN
- if (net->opt.use_vulkan_compute)
- {
- blob_mats_gpu.clear();
- blob_mats_gpu_image.clear();
-
- if (local_blob_vkallocator)
- {
- net->vkdev->reclaim_blob_allocator(local_blob_vkallocator);
- }
- if (local_staging_vkallocator)
- {
- net->vkdev->reclaim_staging_allocator(local_staging_vkallocator);
- }
- }
- #endif // NCNN_VULKAN
- }
-
- void Extractor::set_light_mode(bool enable)
- {
- opt.lightmode = enable;
- }
-
- void Extractor::set_num_threads(int num_threads)
- {
- opt.num_threads = num_threads;
- }
-
- void Extractor::set_blob_allocator(Allocator* allocator)
- {
- opt.blob_allocator = allocator;
- }
-
- void Extractor::set_workspace_allocator(Allocator* allocator)
- {
- opt.workspace_allocator = allocator;
- }
-
- #if NCNN_VULKAN
- void Extractor::set_vulkan_compute(bool enable)
- {
- if (net->opt.use_vulkan_compute)
- {
- opt.use_vulkan_compute = enable;
- }
- else
- {
- NCNN_LOGE("set_vulkan_compute failed, network use_vulkan_compute disabled");
- }
- }
-
- void Extractor::set_blob_vkallocator(VkAllocator* allocator)
- {
- opt.blob_vkallocator = allocator;
- }
-
- void Extractor::set_workspace_vkallocator(VkAllocator* allocator)
- {
- opt.workspace_vkallocator = allocator;
- }
-
- void Extractor::set_staging_vkallocator(VkAllocator* allocator)
- {
- opt.staging_vkallocator = allocator;
- }
- #endif // NCNN_VULKAN
-
- #if NCNN_STRING
- int Extractor::input(const char* blob_name, const Mat& in)
- {
- int blob_index = net->find_blob_index_by_name(blob_name);
- if (blob_index == -1)
- return -1;
-
- return input(blob_index, in);
- }
-
- int Extractor::extract(const char* blob_name, Mat& feat)
- {
- int blob_index = net->find_blob_index_by_name(blob_name);
- if (blob_index == -1)
- return -1;
-
- return extract(blob_index, feat);
- }
- #endif // NCNN_STRING
-
- int Extractor::input(int blob_index, const Mat& in)
- {
- if (blob_index < 0 || blob_index >= (int)blob_mats.size())
- return -1;
-
- blob_mats[blob_index] = in;
-
- return 0;
- }
-
- int Extractor::extract(int blob_index, Mat& feat)
- {
- if (blob_index < 0 || blob_index >= (int)blob_mats.size())
- return -1;
-
- int old_blocktime = get_kmp_blocktime();
- set_kmp_blocktime(opt.openmp_blocktime);
-
- int ret = 0;
-
- if (blob_mats[blob_index].dims == 0)
- {
- int layer_index = net->blobs[blob_index].producer;
-
- #if NCNN_VULKAN
- if (opt.use_vulkan_compute)
- {
- // use local allocator
- if (!opt.blob_vkallocator)
- {
- local_blob_vkallocator = net->vkdev->acquire_blob_allocator();
- opt.blob_vkallocator = local_blob_vkallocator;
- }
- if (!opt.workspace_vkallocator)
- {
- opt.workspace_vkallocator = opt.blob_vkallocator;
- }
- if (!opt.staging_vkallocator)
- {
- local_staging_vkallocator = net->vkdev->acquire_staging_allocator();
- opt.staging_vkallocator = local_staging_vkallocator;
- }
-
- ncnn::VkCompute cmd(net->vkdev);
- #if NCNN_BENCHMARK
- cmd.create_query_pool(net->layers.size() * 2);
- #endif // NCNN_BENCHMARK
-
- // TODO vkimagemat for adreno
- if (opt.use_image_storage)
- {
- VkImageMat feat_gpu;
- ret = extract(blob_index, feat_gpu, cmd);
-
- if (blob_mats[blob_index].dims == 0 && feat_gpu.dims != 0)
- {
- cmd.record_download(feat_gpu, blob_mats[blob_index], opt);
-
- cmd.submit_and_wait();
-
- #if NCNN_BENCHMARK
- std::vector<uint64_t> results(net->layers.size() * 2);
- cmd.get_query_pool_results(0, net->layers.size() * 2, results);
- for (size_t i = 0; i < net->layers.size(); i++)
- {
- uint64_t start = results[i * 2];
- uint64_t end = results[i * 2 + 1];
- if (start == 0 || end == 0)
- continue;
-
- double duration_us = (end - start) * net->vkdev->info.timestamp_period / 1000;
- NCNN_LOGE("%-24s %-30s %8.2lfus |", net->layers[i]->type.c_str(), net->layers[i]->name.c_str(), duration_us);
- }
- #endif // NCNN_BENCHMARK
- }
- }
- else
- {
- VkMat feat_gpu;
- ret = extract(blob_index, feat_gpu, cmd);
-
- if (blob_mats[blob_index].dims == 0 && feat_gpu.dims != 0)
- {
- cmd.record_download(feat_gpu, blob_mats[blob_index], opt);
-
- cmd.submit_and_wait();
-
- #if NCNN_BENCHMARK
- std::vector<uint64_t> results(net->layers.size() * 2);
- cmd.get_query_pool_results(0, net->layers.size() * 2, results);
- for (size_t i = 0; i < net->layers.size(); i++)
- {
- uint64_t start = results[i * 2];
- uint64_t end = results[i * 2 + 1];
- if (start == 0 || end == 0)
- continue;
-
- double duration_us = (end - start) * net->vkdev->info.timestamp_period / 1000;
- NCNN_LOGE("%-24s %-30s %8.2lfus |", net->layers[i]->type.c_str(), net->layers[i]->name.c_str(), duration_us);
- }
- #endif // NCNN_BENCHMARK
- }
- }
- }
- else
- {
- ret = net->forward_layer(layer_index, blob_mats, opt);
- }
- #else
- ret = net->forward_layer(layer_index, blob_mats, opt);
- #endif // NCNN_VULKAN
- }
-
- feat = blob_mats[blob_index];
-
- if (opt.use_packing_layout)
- {
- Mat bottom_blob_unpacked;
- convert_packing(feat, bottom_blob_unpacked, 1, opt);
- feat = bottom_blob_unpacked;
- }
-
- // clang-format off
- // *INDENT-OFF*
- #if NCNN_ARM82
- if (opt.use_fp16_storage && cpu_support_arm_asimdhp())
- {
- if (feat.elemsize / feat.elempack == 2u)
- {
- Mat feat_fp32;
- cast_float16_to_float32(feat, feat_fp32, opt);
- feat = feat_fp32;
- }
- }
- else
- #endif // NCNN_ARM82
- if (opt.use_bf16_storage)
- {
- if (feat.elemsize / feat.elempack == 2u)
- {
- Mat feat_fp32;
- cast_bfloat16_to_float32(feat, feat_fp32, opt);
- feat = feat_fp32;
- }
- }
- // *INDENT-ON*
- // clang-format on
-
- set_kmp_blocktime(old_blocktime);
-
- return ret;
- }
-
- #if NCNN_VULKAN
- #if NCNN_STRING
- int Extractor::input(const char* blob_name, const VkMat& in)
- {
- int blob_index = net->find_blob_index_by_name(blob_name);
- if (blob_index == -1)
- return -1;
-
- return input(blob_index, in);
- }
-
- int Extractor::extract(const char* blob_name, VkMat& feat, VkCompute& cmd)
- {
- int blob_index = net->find_blob_index_by_name(blob_name);
- if (blob_index == -1)
- return -1;
-
- return extract(blob_index, feat, cmd);
- }
-
- int Extractor::input(const char* blob_name, const VkImageMat& in)
- {
- int blob_index = net->find_blob_index_by_name(blob_name);
- if (blob_index == -1)
- return -1;
-
- return input(blob_index, in);
- }
-
- int Extractor::extract(const char* blob_name, VkImageMat& feat, VkCompute& cmd)
- {
- int blob_index = net->find_blob_index_by_name(blob_name);
- if (blob_index == -1)
- return -1;
-
- return extract(blob_index, feat, cmd);
- }
- #endif // NCNN_STRING
-
- int Extractor::input(int blob_index, const VkMat& in)
- {
- if (blob_index < 0 || blob_index >= (int)blob_mats.size())
- return -1;
-
- blob_mats_gpu[blob_index] = in;
-
- return 0;
- }
-
- int Extractor::extract(int blob_index, VkMat& feat, VkCompute& cmd)
- {
- if (blob_index < 0 || blob_index >= (int)blob_mats.size())
- return -1;
-
- int ret = 0;
-
- if (blob_mats_gpu[blob_index].dims == 0)
- {
- int layer_index = net->blobs[blob_index].producer;
- ret = net->forward_layer(layer_index, blob_mats, blob_mats_gpu, cmd, opt);
- }
-
- if (blob_mats_gpu[blob_index].dims == 0 && blob_mats_gpu_image[blob_index].dims != 0)
- {
- // image to buffer
- cmd.record_image_to_buffer(blob_mats_gpu_image[blob_index], blob_mats_gpu[blob_index], opt);
- }
-
- feat = blob_mats_gpu[blob_index];
-
- return ret;
- }
-
- int Extractor::input(int blob_index, const VkImageMat& in)
- {
- if (blob_index < 0 || blob_index >= (int)blob_mats.size())
- return -1;
-
- blob_mats_gpu_image[blob_index] = in;
-
- return 0;
- }
-
- int Extractor::extract(int blob_index, VkImageMat& feat, VkCompute& cmd)
- {
- if (blob_index < 0 || blob_index >= (int)blob_mats.size())
- return -1;
-
- int old_blocktime = get_kmp_blocktime();
- set_kmp_blocktime(opt.openmp_blocktime);
-
- int ret = 0;
-
- if (blob_mats_gpu_image[blob_index].dims == 0)
- {
- int layer_index = net->blobs[blob_index].producer;
- ret = net->forward_layer(layer_index, blob_mats, blob_mats_gpu, blob_mats_gpu_image, cmd, opt);
- }
-
- if (blob_mats_gpu_image[blob_index].dims == 0 && blob_mats_gpu[blob_index].dims != 0)
- {
- // buffer to image
- cmd.record_buffer_to_image(blob_mats_gpu[blob_index], blob_mats_gpu_image[blob_index], opt);
- }
-
- feat = blob_mats_gpu_image[blob_index];
-
- set_kmp_blocktime(old_blocktime);
-
- return ret;
- }
- #endif // NCNN_VULKAN
-
- } // namespace ncnn
|