You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_multiheadattention_1.cpp 7.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "testutil.h"
  15. #if NCNN_INT8
  16. static int test_multiheadattention_int8(const ncnn::Mat& q, const ncnn::Mat& k, const ncnn::Mat& v, int embed_dim, int num_heads, int attn_mask)
  17. {
  18. const int qdim = q.w;
  19. const int kdim = k.w;
  20. const int vdim = v.w;
  21. ncnn::ParamDict pd;
  22. pd.set(0, embed_dim);
  23. pd.set(1, num_heads);
  24. pd.set(2, embed_dim * qdim);
  25. pd.set(3, kdim);
  26. pd.set(4, vdim);
  27. pd.set(5, attn_mask);
  28. pd.set(6, 1.f / sqrtf(embed_dim / num_heads));
  29. pd.set(18, 2); // int8_scale_term
  30. std::vector<ncnn::Mat> weights(12);
  31. weights[0] = RandomS8Mat(embed_dim * qdim);
  32. weights[1] = RandomMat(embed_dim);
  33. weights[2] = RandomS8Mat(embed_dim * kdim);
  34. weights[3] = RandomMat(embed_dim);
  35. weights[4] = RandomS8Mat(embed_dim * vdim);
  36. weights[5] = RandomMat(embed_dim);
  37. weights[6] = RandomS8Mat(qdim * embed_dim);
  38. weights[7] = RandomMat(qdim);
  39. weights[8] = RandomMat(embed_dim, 160.f, 200.f);
  40. weights[9] = RandomMat(embed_dim, 160.f, 200.f);
  41. weights[10] = RandomMat(embed_dim, 160.f, 200.f);
  42. weights[11] = RandomMat(1, 160.f, 200.f);
  43. std::vector<ncnn::Mat> as(3);
  44. as[0] = q;
  45. as[1] = k;
  46. as[2] = v;
  47. if (attn_mask)
  48. {
  49. as.push_back(RandomMat(k.h, q.h));
  50. }
  51. float epsilon = 0.1;
  52. int ret = test_layer("MultiHeadAttention", pd, weights, as, 1, epsilon);
  53. if (ret != 0)
  54. {
  55. fprintf(stderr, "test_multiheadattention_int8 failed q=(%d %d) k=(%d %d) v=(%d %d) embed_dim=%d num_heads=%d kdim=%d vdim=%d attn_mask=%d\n", q.w, q.h, k.w, k.h, v.w, v.h, embed_dim, num_heads, kdim, vdim, attn_mask);
  56. }
  57. return ret;
  58. }
  59. static int test_multiheadattention_int8_samekv(const ncnn::Mat& q, const ncnn::Mat& kv, int embed_dim, int num_heads)
  60. {
  61. const int qdim = q.w;
  62. const int kvdim = kv.w;
  63. ncnn::ParamDict pd;
  64. pd.set(0, embed_dim);
  65. pd.set(1, num_heads);
  66. pd.set(2, embed_dim * qdim);
  67. pd.set(3, kvdim);
  68. pd.set(4, kvdim);
  69. pd.set(6, 1.f / sqrtf(embed_dim / num_heads));
  70. pd.set(18, 2); // int8_scale_term
  71. std::vector<ncnn::Mat> weights(12);
  72. weights[0] = RandomS8Mat(embed_dim * qdim);
  73. weights[1] = RandomMat(embed_dim);
  74. weights[2] = RandomS8Mat(embed_dim * kvdim);
  75. weights[3] = RandomMat(embed_dim);
  76. weights[4] = RandomS8Mat(embed_dim * kvdim);
  77. weights[5] = RandomMat(embed_dim);
  78. weights[6] = RandomS8Mat(qdim * embed_dim);
  79. weights[7] = RandomMat(qdim);
  80. weights[8] = RandomMat(embed_dim, 160.f, 200.f);
  81. weights[9] = RandomMat(embed_dim, 160.f, 200.f);
  82. weights[10] = RandomMat(embed_dim, 160.f, 200.f);
  83. weights[11] = RandomMat(1, 160.f, 200.f);
  84. std::vector<ncnn::Mat> as(2);
  85. as[0] = q;
  86. as[1] = kv;
  87. float epsilon = 0.1;
  88. int ret = test_layer("MultiHeadAttention", pd, weights, as, 1, epsilon);
  89. if (ret != 0)
  90. {
  91. fprintf(stderr, "test_multiheadattention_int8_samekv failed q=(%d %d) kv=(%d %d) embed_dim=%d num_heads=%d kvdim=%d\n", q.w, q.h, kv.w, kv.h, embed_dim, num_heads, kvdim);
  92. }
  93. return ret;
  94. }
  95. static int test_multiheadattention_int8_sameqkv(const ncnn::Mat& a, int embed_dim, int num_heads)
  96. {
  97. const int qdim = a.w;
  98. ncnn::ParamDict pd;
  99. pd.set(0, embed_dim);
  100. pd.set(1, num_heads);
  101. pd.set(2, embed_dim * qdim);
  102. pd.set(3, qdim);
  103. pd.set(4, qdim);
  104. pd.set(6, 1.f / sqrtf(embed_dim / num_heads));
  105. pd.set(18, 2); // int8_scale_term
  106. std::vector<ncnn::Mat> weights(12);
  107. weights[0] = RandomS8Mat(embed_dim * qdim);
  108. weights[1] = RandomMat(embed_dim);
  109. weights[2] = RandomS8Mat(embed_dim * qdim);
  110. weights[3] = RandomMat(embed_dim);
  111. weights[4] = RandomS8Mat(embed_dim * qdim);
  112. weights[5] = RandomMat(embed_dim);
  113. weights[6] = RandomS8Mat(qdim * embed_dim);
  114. weights[7] = RandomMat(qdim);
  115. weights[8] = RandomMat(embed_dim, 160.f, 200.f);
  116. weights[9] = RandomMat(embed_dim, 160.f, 200.f);
  117. weights[10] = RandomMat(embed_dim, 160.f, 200.f);
  118. weights[11] = RandomMat(1, 160.f, 200.f);
  119. std::vector<ncnn::Mat> as(1);
  120. as[0] = a;
  121. float epsilon = 0.1;
  122. int ret = test_layer("MultiHeadAttention", pd, weights, as, 1, epsilon);
  123. if (ret != 0)
  124. {
  125. fprintf(stderr, "test_multiheadattention_int8_sameqkv failed a=(%d %d) embed_dim=%d num_heads=%d\n", a.w, a.h, embed_dim, num_heads);
  126. }
  127. return ret;
  128. }
  129. static int test_multiheadattention_0()
  130. {
  131. return 0
  132. || test_multiheadattention_int8(RandomMat(62, 66), RandomMat(32, 66), RandomMat(20, 66), 62, 2, 0)
  133. || test_multiheadattention_int8(RandomMat(26, 64), RandomMat(32, 64), RandomMat(18, 64), 26, 2, 1)
  134. || test_multiheadattention_int8(RandomMat(64, 128), RandomMat(64, 128), RandomMat(64, 128), 64, 4, 0)
  135. || test_multiheadattention_int8(RandomMat(48, 127), RandomMat(64, 127), RandomMat(64, 127), 64, 16, 1)
  136. || test_multiheadattention_int8(RandomMat(16, 128), RandomMat(44, 128), RandomMat(55, 128), 16, 2, 0)
  137. || test_multiheadattention_int8(RandomMat(12, 128), RandomMat(44, 127), RandomMat(55, 127), 16, 4, 1)
  138. || test_multiheadattention_int8(RandomMat(12, 17), RandomMat(28, 127), RandomMat(32, 127), 12, 3, 0)
  139. || test_multiheadattention_int8(RandomMat(12, 17), RandomMat(28, 32), RandomMat(11, 32), 12, 3, 1);
  140. }
  141. static int test_multiheadattention_1()
  142. {
  143. return 0
  144. || test_multiheadattention_int8_samekv(RandomMat(64, 128), RandomMat(64, 128), 64, 4)
  145. || test_multiheadattention_int8_samekv(RandomMat(48, 127), RandomMat(64, 127), 64, 16)
  146. || test_multiheadattention_int8_samekv(RandomMat(16, 128), RandomMat(44, 128), 16, 2)
  147. || test_multiheadattention_int8_samekv(RandomMat(12, 128), RandomMat(22, 127), 16, 4)
  148. || test_multiheadattention_int8_samekv(RandomMat(12, 17), RandomMat(28, 127), 12, 3)
  149. || test_multiheadattention_int8_samekv(RandomMat(12, 17), RandomMat(11, 32), 12, 3);
  150. }
  151. static int test_multiheadattention_2()
  152. {
  153. return 0
  154. || test_multiheadattention_int8_sameqkv(RandomMat(64, 128), 64, 4)
  155. || test_multiheadattention_int8_sameqkv(RandomMat(48, 127), 64, 8);
  156. }
  157. #endif
  158. int main()
  159. {
  160. SRAND(7767517);
  161. #if NCNN_INT8
  162. return 0
  163. || test_multiheadattention_0()
  164. || test_multiheadattention_1()
  165. || test_multiheadattention_2();
  166. #else
  167. // test nothing
  168. return 0;
  169. #endif
  170. }