You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

normalize.cpp 5.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "normalize.h"
  15. #include <math.h>
  16. namespace ncnn {
  17. DEFINE_LAYER_CREATOR(Normalize)
  18. Normalize::Normalize()
  19. {
  20. one_blob_only = true;
  21. support_inplace = false;
  22. }
  23. int Normalize::load_param(const ParamDict& pd)
  24. {
  25. across_spatial = pd.get(0, 0);
  26. channel_shared = pd.get(1, 0);
  27. eps = pd.get(2, 0.0001f);
  28. scale_data_size = pd.get(3, 0);
  29. return 0;
  30. }
  31. int Normalize::load_model(const ModelBin& mb)
  32. {
  33. scale_data = mb.load(scale_data_size, 1);
  34. if (scale_data.empty())
  35. return -100;
  36. return 0;
  37. }
  38. int Normalize::forward(const Mat& bottom_blob, Mat& top_blob) const
  39. {
  40. int w = bottom_blob.w;
  41. int h = bottom_blob.h;
  42. int channels = bottom_blob.c;
  43. int size = w * h;
  44. top_blob.create(w, h, channels);
  45. if (top_blob.empty())
  46. return -100;
  47. if (across_spatial)
  48. {
  49. // square
  50. Mat square_sum_blob;
  51. square_sum_blob.create(channels);
  52. if (square_sum_blob.empty())
  53. return -100;
  54. #pragma omp parallel for
  55. for (int q=0; q<channels; q++)
  56. {
  57. const float* ptr = bottom_blob.channel(q);
  58. float ssum = 0.f;
  59. for (int i=0; i<size; i++)
  60. {
  61. ssum += ptr[i] * ptr[i];
  62. }
  63. square_sum_blob[q] = ssum;
  64. }
  65. // sum + eps
  66. float ssum = eps;
  67. for (int q=0; q<channels; q++)
  68. {
  69. ssum += square_sum_blob[q];
  70. }
  71. // 1 / sqrt(ssum)
  72. float a = 1.f / sqrt(ssum);
  73. if (channel_shared)
  74. {
  75. float scale = a * scale_data[0];
  76. #pragma omp parallel for
  77. for (int q=0; q<channels; q++)
  78. {
  79. const float* ptr = bottom_blob.channel(q);
  80. float* outptr = top_blob.channel(q);
  81. for (int i=0; i<size; i++)
  82. {
  83. outptr[i] = ptr[i] * scale;
  84. }
  85. }
  86. }
  87. else
  88. {
  89. #pragma omp parallel for
  90. for (int q=0; q<channels; q++)
  91. {
  92. const float* ptr = bottom_blob.channel(q);
  93. float* outptr = top_blob.channel(q);
  94. float scale = a * scale_data[q];
  95. for (int i=0; i<size; i++)
  96. {
  97. outptr[i] = ptr[i] * scale;
  98. }
  99. }
  100. }
  101. }
  102. else
  103. {
  104. // square sum, 1 / sqrt(ssum)
  105. Mat square_sum_blob;
  106. square_sum_blob.create(size);
  107. if (square_sum_blob.empty())
  108. return -100;
  109. if (channel_shared)
  110. {
  111. float scale = scale_data[0];
  112. #pragma omp parallel for
  113. for (int i=0; i<size; i++)
  114. {
  115. float ssum = eps;
  116. for (int q=0; q<channels; q++)
  117. {
  118. const float* ptr = bottom_blob.channel(q);
  119. ssum += ptr[i] * ptr[i];
  120. }
  121. square_sum_blob[i] = 1.f / sqrt(ssum) * scale;
  122. }
  123. #pragma omp parallel for
  124. for (int q=0; q<channels; q++)
  125. {
  126. const float* ptr = bottom_blob.channel(q);
  127. float* outptr = top_blob.channel(q);
  128. for (int i=0; i<size; i++)
  129. {
  130. outptr[i] = ptr[i] * square_sum_blob[i];
  131. }
  132. }
  133. }
  134. else
  135. {
  136. #pragma omp parallel for
  137. for (int i=0; i<size; i++)
  138. {
  139. float ssum = eps;
  140. for (int q=0; q<channels; q++)
  141. {
  142. const float* ptr = bottom_blob.channel(q);
  143. ssum += ptr[i] * ptr[i];
  144. }
  145. square_sum_blob[i] = 1.f / sqrt(ssum);
  146. }
  147. #pragma omp parallel for
  148. for (int q=0; q<channels; q++)
  149. {
  150. const float* ptr = bottom_blob.channel(q);
  151. float* outptr = top_blob.channel(q);
  152. float scale = scale_data[q];
  153. for (int i=0; i<size; i++)
  154. {
  155. outptr[i] = ptr[i] * square_sum_blob[i] * scale;
  156. }
  157. }
  158. }
  159. }
  160. return 0;
  161. }
  162. } // namespace ncnn