You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_lstm.cpp 6.8 kB

LSTM arm/x86 + fp16 innerproduct arm (#1881) * added fp16 weight storage version * Small changes * Fixed fp16 weight storage layers * fix innerproduct * fix loop error * Fix windows build. Disable fp 16 conversion when detecting int8 weights. Implement requested changes. * Restyled by clang-format * Restyled by astyle * Restyled by clang-format * Restyled by astyle * Update option.cpp Set fp16 storage based on vulkan being used or not. * added ability for storing state in lstm layer * added avx lstm * added arm lstm * fix innerproduct activation location and add 4 parallel channel version * Restyled by clang-format * Restyled by astyle * Restyled by clang-format * Restyled by astyle * revert arm file * commit before switch * implement requested changes * Restyled by clang-format * Restyled by astyle * Restyled by clang-format * Restyled by astyle * More x86 optimized implementations of common layers. Added LSTM layers for arm and x86 + a ctest to verify the layer accuracy Added fp16 innerproduct for arm * fix non avx build * Add fp16 arm compiler and cpu checks. Remove statefullness from LSTM implementation. * Fix build check for fp16 arm * Bypass lstm_fp16 if not supported * Build order was incorrect * fix std::min missing in windows build * Restyled by clang-format * Restyled by astyle * Restyled by clang-format * Restyled by astyle * attempting to fix gnu build by enabling: -mfp16-format=ieee to fix the missing __fp16 type * remove double "fix" * Specify ieee fp16 format * implement requested changes * fix arm non-fp16 build * fix arm lstm * Restyled/pull 1881 (#15) * Restyled by clang-format * Restyled by astyle * Restyled by clang-format * Restyled by astyle Co-authored-by: Restyled.io <commits@restyled.io> * Check blob size on arm lstm * fix styling Co-authored-by: Restyled.io <commits@restyled.io>
5 years ago
LSTM arm/x86 + fp16 innerproduct arm (#1881) * added fp16 weight storage version * Small changes * Fixed fp16 weight storage layers * fix innerproduct * fix loop error * Fix windows build. Disable fp 16 conversion when detecting int8 weights. Implement requested changes. * Restyled by clang-format * Restyled by astyle * Restyled by clang-format * Restyled by astyle * Update option.cpp Set fp16 storage based on vulkan being used or not. * added ability for storing state in lstm layer * added avx lstm * added arm lstm * fix innerproduct activation location and add 4 parallel channel version * Restyled by clang-format * Restyled by astyle * Restyled by clang-format * Restyled by astyle * revert arm file * commit before switch * implement requested changes * Restyled by clang-format * Restyled by astyle * Restyled by clang-format * Restyled by astyle * More x86 optimized implementations of common layers. Added LSTM layers for arm and x86 + a ctest to verify the layer accuracy Added fp16 innerproduct for arm * fix non avx build * Add fp16 arm compiler and cpu checks. Remove statefullness from LSTM implementation. * Fix build check for fp16 arm * Bypass lstm_fp16 if not supported * Build order was incorrect * fix std::min missing in windows build * Restyled by clang-format * Restyled by astyle * Restyled by clang-format * Restyled by astyle * attempting to fix gnu build by enabling: -mfp16-format=ieee to fix the missing __fp16 type * remove double "fix" * Specify ieee fp16 format * implement requested changes * fix arm non-fp16 build * fix arm lstm * Restyled/pull 1881 (#15) * Restyled by clang-format * Restyled by astyle * Restyled by clang-format * Restyled by astyle Co-authored-by: Restyled.io <commits@restyled.io> * Check blob size on arm lstm * fix styling Co-authored-by: Restyled.io <commits@restyled.io>
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "layer/lstm.h"
  15. #include "testutil.h"
  16. static int test_lstm(const ncnn::Mat& a, int outch, int direction)
  17. {
  18. int input_size = a.w * a.h * a.c;
  19. int num_directions = direction == 2 ? 2 : 1;
  20. ncnn::ParamDict pd;
  21. pd.set(0, outch); // num_output
  22. pd.set(1, outch * input_size * 4 * num_directions);
  23. pd.set(2, direction); // bias_term
  24. std::vector<ncnn::Mat> weights(3);
  25. weights[0] = RandomMat(outch * input_size * 4 * num_directions);
  26. weights[1] = RandomMat(outch * 4 * num_directions);
  27. weights[2] = RandomMat(outch * outch * 4 * num_directions);
  28. int ret = test_layer<ncnn::LSTM>("LSTM", pd, weights, a);
  29. if (ret != 0)
  30. {
  31. fprintf(stderr, "test_lstm failed a.dims=%d a=(%d %d %d) outch=%d, direction = %d \n", a.dims, a.w, a.h, a.c, outch, direction);
  32. }
  33. return ret;
  34. }
  35. int test_lstm_layer(const ncnn::Mat& a, int outch, int direction, float epsilon = 0.01)
  36. {
  37. int input_size = a.w * a.h * a.c;
  38. ncnn::ParamDict pd;
  39. pd.set(0, outch); // num_output
  40. pd.set(1, outch * input_size * 4);
  41. pd.set(2, direction); // bias_term
  42. int num_directions = direction == 2 ? 2 : 1;
  43. std::vector<ncnn::Mat> weights(3);
  44. weights[0] = RandomMat(outch * input_size * 4 * num_directions);
  45. weights[1] = RandomMat(outch * 4 * num_directions);
  46. weights[2] = RandomMat(outch * outch * 4 * num_directions);
  47. ncnn::Option opt;
  48. opt.num_threads = 1;
  49. opt.use_int8_inference = false;
  50. ncnn::LSTM* op = (ncnn::LSTM*)ncnn::create_layer(ncnn::layer_to_index("LSTM"));
  51. if (!op->support_vulkan) opt.use_vulkan_compute = false;
  52. if (!op->support_packing) opt.use_packing_layout = false;
  53. if (!op->support_bf16_storage) opt.use_bf16_storage = false;
  54. if (!op->support_image_storage) opt.use_image_storage = false;
  55. op->load_param(pd);
  56. ncnn::ModelBinFromMatArray mb(weights.data());
  57. op->load_model(mb);
  58. op->create_pipeline(opt);
  59. ncnn::Mat b;
  60. op->LSTM::forward(a, b, opt);
  61. std::vector<ncnn::Mat> _c1(3);
  62. std::vector<ncnn::Mat> _c2(3);
  63. std::vector<ncnn::Mat> a1(3);
  64. std::vector<ncnn::Mat> a2(3);
  65. if (direction == 0)
  66. {
  67. a1[0] = a.row_range(0, a.h / 2).clone();
  68. a2[0] = a.row_range(a.h / 2, a.h - a.h / 2).clone();
  69. }
  70. else
  71. {
  72. a2[0] = a.row_range(0, a.h / 2).clone();
  73. a1[0] = a.row_range(a.h / 2, a.h - a.h / 2).clone();
  74. }
  75. // initial hidden state
  76. ncnn::Mat hidden(outch);
  77. if (hidden.empty())
  78. return -100;
  79. hidden.fill(0.f);
  80. ncnn::Mat cell(outch);
  81. if (cell.empty())
  82. return -100;
  83. cell.fill(0.f);
  84. a1[1] = hidden;
  85. a1[2] = cell;
  86. op->forward(a1, _c1, opt);
  87. a2[1] = _c1[1];
  88. a2[2] = _c1[2];
  89. op->forward(a2, _c2, opt);
  90. ncnn::Mat c1 = _c1[0];
  91. ncnn::Mat c2 = _c2[0];
  92. if (direction == 1)
  93. {
  94. c2 = _c1[0];
  95. c1 = _c2[0];
  96. }
  97. // total height
  98. ncnn::Mat c;
  99. c.create(b.w, b.h, b.elemsize, opt.blob_allocator);
  100. if (c.empty())
  101. return -100;
  102. unsigned char* outptr = c;
  103. int c1_size = c1.w * c1.h;
  104. const unsigned char* c1ptr = c1;
  105. memcpy(outptr, c1ptr, c1_size * c1.elemsize);
  106. outptr += c1_size * c1.elemsize;
  107. int c2_size = c2.w * c2.h;
  108. const unsigned char* c2ptr = c2;
  109. memcpy(outptr, c2ptr, c2_size * c2.elemsize);
  110. op->destroy_pipeline(opt);
  111. delete op;
  112. if (CompareMat(b, c, epsilon) != 0)
  113. {
  114. fprintf(stderr, "test_lstm two step failed a.dims=%d a=(%d %d %d) outch=%d, direction = %d \n", a.dims, a.w, a.h, a.c, outch, direction);
  115. return -1;
  116. }
  117. return 0;
  118. }
  119. static int test_lstm_0()
  120. {
  121. return 0
  122. || test_lstm(RandomMat(4, 1), 2, 2)
  123. || test_lstm(RandomMat(8, 2), 2, 2)
  124. || test_lstm(RandomMat(16, 8), 7, 2)
  125. || test_lstm(RandomMat(17, 8), 8, 2)
  126. || test_lstm(RandomMat(19, 15), 8, 2)
  127. || test_lstm(RandomMat(5, 16), 16, 2)
  128. || test_lstm(RandomMat(3, 16), 8, 2)
  129. || test_lstm(RandomMat(8, 16), 16, 2)
  130. || test_lstm(RandomMat(2, 5), 17, 2);
  131. }
  132. static int test_lstm_1()
  133. {
  134. return 0
  135. || test_lstm_layer(RandomMat(4, 4), 1, 1)
  136. || test_lstm_layer(RandomMat(8, 2), 2, 1)
  137. || test_lstm_layer(RandomMat(16, 8), 7, 1)
  138. || test_lstm_layer(RandomMat(17, 8), 8, 1)
  139. || test_lstm_layer(RandomMat(19, 15), 8, 1)
  140. || test_lstm_layer(RandomMat(5, 16), 16, 1)
  141. || test_lstm_layer(RandomMat(3, 16), 8, 1)
  142. || test_lstm_layer(RandomMat(2, 5), 99, 1)
  143. || test_lstm_layer(RandomMat(4, 2), 1, 0)
  144. || test_lstm_layer(RandomMat(8, 2), 2, 0)
  145. || test_lstm_layer(RandomMat(16, 8), 7, 0)
  146. || test_lstm_layer(RandomMat(17, 8), 8, 0)
  147. || test_lstm_layer(RandomMat(19, 15), 8, 0)
  148. || test_lstm_layer(RandomMat(5, 16), 16, 0)
  149. || test_lstm_layer(RandomMat(3, 16), 8, 0)
  150. || test_lstm_layer(RandomMat(2, 5), 17, 0);
  151. }
  152. static int test_lstm_2()
  153. {
  154. return 0
  155. || test_lstm(RandomMat(4, 1), 1, 0)
  156. || test_lstm(RandomMat(8, 2), 2, 0)
  157. || test_lstm(RandomMat(16, 8), 7, 0)
  158. || test_lstm(RandomMat(17, 8), 8, 0)
  159. || test_lstm(RandomMat(19, 15), 8, 0)
  160. || test_lstm(RandomMat(5, 16), 16, 0)
  161. || test_lstm(RandomMat(3, 16), 8, 0)
  162. || test_lstm(RandomMat(8, 16), 16, 0)
  163. || test_lstm(RandomMat(2, 5), 17, 0);
  164. }
  165. static int test_lstm_3()
  166. {
  167. return 0
  168. || test_lstm(RandomMat(4, 1), 1, 1)
  169. || test_lstm(RandomMat(8, 2), 2, 1)
  170. || test_lstm(RandomMat(16, 8), 7, 1)
  171. || test_lstm(RandomMat(17, 8), 8, 1)
  172. || test_lstm(RandomMat(19, 15), 8, 1)
  173. || test_lstm(RandomMat(5, 16), 16, 1)
  174. || test_lstm(RandomMat(3, 16), 8, 1)
  175. || test_lstm(RandomMat(8, 16), 16, 1)
  176. || test_lstm(RandomMat(2, 5), 17, 1);
  177. }
  178. int main()
  179. {
  180. SRAND(7767517);
  181. return 0 || test_lstm_0() || test_lstm_1() || test_lstm_2() || test_lstm_3();
  182. }