 new int8 implement,better accuracy (#749)
* add the armv7a conv3x3s1 implement without overflow,remove old codes
* fix the bug of conv3x3s2 packed int8
* new int8 implement,weight quant by perchanel,better accuracy~
* fix the bug of conv3x3s1 packed int8 neon
* add the naive c fp32 and int8 winograd F(2,3)
* add the neon intrinsic int8 winograd F(2,3)
* optimize the armv7a int8 winograd F(2,3) with neon assembly
* optimize the armv7a int8 winograd F(2,3) input transform with assembly.
* add the requantize layer and int8 relu implement.
* add graph optimize conv1x1s2 -> conv1x1s1,begin optimize int8 aarch64.
* fix int8 bugs
* add the c naive im2col with sgemm
* add aarch64 int8 winograd f23, conv3x3s2 naive implement
* add the int8 sgemm conv7x7s2 on x86/armv7a platform
* optimize the int8 sgemm by neon intrinsic and packed kernel
* optimize the int8 sgemm with packed data
* optimize the int8 sgemm with armv7a neon assembly
* add the int8 sgemm on arm64-v8a platform
* perpare to merge latest codes from master
* add the int8 param files
* In the Class Net,add the fuse_network method
7 years ago  new int8 implement,better accuracy (#749)
* add the armv7a conv3x3s1 implement without overflow,remove old codes
* fix the bug of conv3x3s2 packed int8
* new int8 implement,weight quant by perchanel,better accuracy~
* fix the bug of conv3x3s1 packed int8 neon
* add the naive c fp32 and int8 winograd F(2,3)
* add the neon intrinsic int8 winograd F(2,3)
* optimize the armv7a int8 winograd F(2,3) with neon assembly
* optimize the armv7a int8 winograd F(2,3) input transform with assembly.
* add the requantize layer and int8 relu implement.
* add graph optimize conv1x1s2 -> conv1x1s1,begin optimize int8 aarch64.
* fix int8 bugs
* add the c naive im2col with sgemm
* add aarch64 int8 winograd f23, conv3x3s2 naive implement
* add the int8 sgemm conv7x7s2 on x86/armv7a platform
* optimize the int8 sgemm by neon intrinsic and packed kernel
* optimize the int8 sgemm with packed data
* optimize the int8 sgemm with armv7a neon assembly
* add the int8 sgemm on arm64-v8a platform
* perpare to merge latest codes from master
* add the int8 param files
* In the Class Net,add the fuse_network method
7 years ago |
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- // Tencent is pleased to support the open source community by making ncnn available.
- //
- // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
- //
- // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
- // in compliance with the License. You may obtain a copy of the License at
- //
- // https://opensource.org/licenses/BSD-3-Clause
- //
- // Unless required by applicable law or agreed to in writing, software distributed
- // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- // CONDITIONS OF ANY KIND, either express or implied. See the License for the
- // specific language governing permissions and limitations under the License.
-
- #ifndef LAYER_CONVOLUTION_H
- #define LAYER_CONVOLUTION_H
-
- #include "layer.h"
-
- namespace ncnn {
-
- class Convolution : public Layer
- {
- public:
- Convolution();
-
- virtual int load_param(const ParamDict& pd);
-
- virtual int load_model(const ModelBin& mb);
-
- virtual int create_pipeline(const Option& opt);
-
- virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
-
- protected:
- void make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const;
-
- #if NCNN_INT8
- int forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
- #endif
-
- public:
- // param
- int num_output;
- int kernel_w;
- int kernel_h;
- int dilation_w;
- int dilation_h;
- int stride_w;
- int stride_h;
- int pad_left; // -233=SAME_UPPER -234=SAME_LOWER
- int pad_right;
- int pad_top;
- int pad_bottom;
- float pad_value;
- int bias_term;
-
- int weight_data_size;
-
- int int8_scale_term;
-
- // 0=none 1=relu 2=leakyrelu 3=clip 4=sigmoid
- int activation_type;
- Mat activation_params;
-
- // model
- Mat weight_data;
- Mat bias_data;
-
- #if NCNN_INT8
- Mat weight_data_int8_scales;
- Mat bottom_blob_int8_scales;
- Mat top_blob_int8_scales;
- #endif
-
- // implementation type, 0 means do not use auto pack model
- int impl_type;
- };
-
- } // namespace ncnn
-
- #endif // LAYER_CONVOLUTION_H
|