You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

requantize.cpp 5.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. // BUG1989 is pleased to support the open source community by supporting ncnn available.
  2. //
  3. // Copyright (C) 2019 BUG1989. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "requantize.h"
  15. #include <math.h>
  16. namespace ncnn {
  17. DEFINE_LAYER_CREATOR(Requantize)
  18. Requantize::Requantize()
  19. {
  20. one_blob_only = true;
  21. support_inplace = false;
  22. fusion_relu = false;
  23. }
  24. static inline signed char float2int8(float v)
  25. {
  26. int int32 = static_cast<int>(round(v));
  27. if (int32 > 127) return 127;
  28. if (int32 < -127) return -127;
  29. return (signed char)int32;
  30. }
  31. int Requantize::load_param(const ParamDict& pd)
  32. {
  33. scale_in = pd.get(0, 1.f); // bottom_blob_scale * weight_scale
  34. scale_out = pd.get(1, 1.f); // top_blob_scale
  35. bias_term = pd.get(2, 0);
  36. bias_data_size = pd.get(3, 0);
  37. fusion_relu = pd.get(4, 0);
  38. return 0;
  39. }
  40. int Requantize::load_model(const ModelBin& mb)
  41. {
  42. if (bias_term)
  43. {
  44. bias_data = mb.load(bias_data_size, 1);
  45. if (bias_data.empty())
  46. return -100;
  47. }
  48. return 0;
  49. }
  50. int Requantize::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
  51. {
  52. int dims = bottom_blob.dims;
  53. if (dims == 1)
  54. {
  55. int w = bottom_blob.w;
  56. const int* intptr = bottom_blob;
  57. signed char * ptr = top_blob;
  58. if (bias_term)
  59. {
  60. if (bias_data_size > 1)
  61. {
  62. #pragma omp parallel for num_threads(opt.num_threads)
  63. for (int i=0; i<w; i++)
  64. {
  65. ptr[i] = float2int8(((intptr[i] * scale_in) + bias_data[i]) * scale_out);
  66. if (fusion_relu && ptr[i] < 0)
  67. ptr[i] = 0;
  68. }
  69. }
  70. else
  71. {
  72. float bias = bias_data[0];
  73. #pragma omp parallel for num_threads(opt.num_threads)
  74. for (int i=0; i<w; i++)
  75. {
  76. ptr[i] = float2int8(((intptr[i] * scale_in) + bias) * scale_out);
  77. if (fusion_relu && ptr[i] < 0)
  78. ptr[i] = 0;
  79. }
  80. }
  81. }
  82. else
  83. {
  84. #pragma omp parallel for num_threads(opt.num_threads)
  85. for (int i=0; i<w; i++)
  86. {
  87. ptr[i] = float2int8(intptr[i] * scale_in * scale_out);
  88. if (fusion_relu && ptr[i] < 0)
  89. ptr[i] = 0;
  90. }
  91. }
  92. }
  93. if (dims == 2)
  94. {
  95. int w = bottom_blob.w;
  96. int h = bottom_blob.h;
  97. if (bias_term)
  98. {
  99. #pragma omp parallel for num_threads(opt.num_threads)
  100. for (int i=0; i<h; i++)
  101. {
  102. const int* intptr = bottom_blob.row<const int>(i);
  103. signed char* ptr = top_blob.row<signed char>(i);
  104. float bias = bias_data_size > 1 ? bias_data[i] : bias_data[0];
  105. for (int j=0; j<w; j++)
  106. {
  107. ptr[j] = float2int8(((intptr[j] * scale_in) + bias) * scale_out);
  108. if (fusion_relu && ptr[j] < 0)
  109. ptr[j] = 0;
  110. }
  111. }
  112. }
  113. else
  114. {
  115. #pragma omp parallel for num_threads(opt.num_threads)
  116. for (int i=0; i<h; i++)
  117. {
  118. const int* intptr = bottom_blob.row<const int>(i);
  119. signed char* ptr = top_blob.row<signed char>(i);
  120. for (int j=0; j<w; j++)
  121. {
  122. ptr[j] = float2int8(intptr[j] * scale_in * scale_out);
  123. if (fusion_relu && ptr[j] < 0)
  124. ptr[j] = 0;
  125. }
  126. }
  127. }
  128. }
  129. if (dims == 3)
  130. {
  131. int w = bottom_blob.w;
  132. int h = bottom_blob.h;
  133. int channels = bottom_blob.c;
  134. int size = w * h;
  135. if (bias_term)
  136. {
  137. #pragma omp parallel for num_threads(opt.num_threads)
  138. for (int q=0; q<channels; q++)
  139. {
  140. const int* intptr = bottom_blob.channel(q);
  141. signed char* ptr = top_blob.channel(q);
  142. float bias = bias_data_size > 1 ? bias_data[q] : bias_data[0];
  143. for (int i=0; i<size; i++)
  144. {
  145. ptr[i] = float2int8(((intptr[i] * scale_in) + bias) * scale_out);
  146. if (fusion_relu && ptr[i] < 0)
  147. ptr[i] = 0;
  148. }
  149. }
  150. }
  151. else
  152. {
  153. #pragma omp parallel for num_threads(opt.num_threads)
  154. for (int q=0; q<channels; q++)
  155. {
  156. const int* intptr = bottom_blob.channel(q);
  157. signed char* ptr = top_blob.channel(q);
  158. for (int i=0; i<size; i++)
  159. {
  160. ptr[i] = float2int8(intptr[i] * scale_in * scale_out);
  161. if (fusion_relu && ptr[i] < 0)
  162. ptr[i] = 0;
  163. }
  164. }
  165. }
  166. }
  167. return 0;
  168. }
  169. } // namespace ncnn