|
- // Tencent is pleased to support the open source community by making ncnn available.
- //
- // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
- //
- // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
- // in compliance with the License. You may obtain a copy of the License at
- //
- // https://opensource.org/licenses/BSD-3-Clause
- //
- // Unless required by applicable law or agreed to in writing, software distributed
- // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- // CONDITIONS OF ANY KIND, either express or implied. See the License for the
- // specific language governing permissions and limitations under the License.
-
- #ifndef NCNN_OPTION_H
- #define NCNN_OPTION_H
-
- #include "platform.h"
-
- namespace ncnn {
-
- #if NCNN_VULKAN
- class VkAllocator;
- class PipelineCache;
- #endif // NCNN_VULKAN
-
- class Allocator;
- class NCNN_EXPORT Option
- {
- public:
- // default option
- Option();
-
- public:
- // light mode
- // intermediate blob will be recycled when enabled
- // enabled by default
- bool lightmode;
-
- // thread count
- // default value is the one returned by get_cpu_count()
- int num_threads;
-
- // blob memory allocator
- Allocator* blob_allocator;
-
- // workspace memory allocator
- Allocator* workspace_allocator;
-
- #if NCNN_VULKAN
- // blob memory allocator
- VkAllocator* blob_vkallocator;
-
- // workspace memory allocator
- VkAllocator* workspace_vkallocator;
-
- // staging memory allocator
- VkAllocator* staging_vkallocator;
-
- // pipeline cache
- PipelineCache* pipeline_cache;
- #endif // NCNN_VULKAN
-
- // the time openmp threads busy-wait for more work before going to sleep
- // default value is 20ms to keep the cores enabled
- // without too much extra power consumption afterwards
- int openmp_blocktime;
-
- // enable winograd convolution optimization
- // improve convolution 3x3 stride1 performance, may consume more memory
- // changes should be applied before loading network structure and weight
- // enabled by default
- bool use_winograd_convolution;
-
- // enable sgemm convolution optimization
- // improve convolution 1x1 stride1 performance, may consume more memory
- // changes should be applied before loading network structure and weight
- // enabled by default
- bool use_sgemm_convolution;
-
- // enable quantized int8 inference
- // use low-precision int8 path for quantized model
- // changes should be applied before loading network structure and weight
- // enabled by default
- bool use_int8_inference;
-
- // enable vulkan compute
- bool use_vulkan_compute;
-
- // enable bf16 data type for storage
- // improve most operator performance on all arm devices, may consume more memory
- bool use_bf16_storage;
-
- // enable options for gpu inference
- bool use_fp16_packed;
- bool use_fp16_storage;
- bool use_fp16_arithmetic;
- bool use_int8_packed;
- bool use_int8_storage;
- bool use_int8_arithmetic;
-
- // enable simd-friendly packed memory layout
- // improve all operator performance on all arm devices, will consume more memory
- // changes should be applied before loading network structure and weight
- // enabled by default
- bool use_packing_layout;
-
- bool use_shader_pack8;
-
- // subgroup option
- bool use_subgroup_basic;
- bool use_subgroup_vote;
- bool use_subgroup_ballot;
- bool use_subgroup_shuffle;
-
- // turn on for adreno
- bool use_image_storage;
- bool use_tensor_storage;
-
- bool use_reserved_0;
-
- // enable DAZ(Denormals-Are-Zero) and FTZ(Flush-To-Zero)
- // default value is 3
- // 0 = DAZ OFF, FTZ OFF
- // 1 = DAZ ON , FTZ OFF
- // 2 = DAZ OFF, FTZ ON
- // 3 = DAZ ON, FTZ ON
- int flush_denormals;
-
- bool use_local_pool_allocator;
-
- // enable local memory optimization for gpu inference
- bool use_shader_local_memory;
-
- // enable cooperative matrix optimization for gpu inference
- bool use_cooperative_matrix;
-
- // more fine-grained control of winograd convolution
- bool use_winograd23_convolution;
- bool use_winograd43_convolution;
- bool use_winograd63_convolution;
-
- // this option is turned on for A53/A55 automatically
- // but you can force this on/off if you wish
- bool use_a53_a55_optimized_kernel;
-
- // enable options for shared variables in gpu shader
- bool use_fp16_uniform;
- bool use_int8_uniform;
-
- bool use_reserved_9;
- bool use_reserved_10;
- bool use_reserved_11;
- };
-
- } // namespace ncnn
-
- #endif // NCNN_OPTION_H
|