You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

projectq.patch001 53 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969
  1. diff --color -aur ProjectQ-0.5.1/projectq/backends/_sim/_cppkernels/intrin/kernel1.hpp ProjectQ-0.5.1_new/projectq/backends/_sim/_cppkernels/intrin/kernel1.hpp
  2. --- ProjectQ-0.5.1/projectq/backends/_sim/_cppkernels/intrin/kernel1.hpp 2020-06-05 21:07:57.000000000 +0800
  3. +++ ProjectQ-0.5.1_new/projectq/backends/_sim/_cppkernels/intrin/kernel1.hpp 2021-04-19 18:04:05.541802882 +0800
  4. @@ -17,18 +17,18 @@
  5. {
  6. __m256d v[2];
  7. - v[0] = load2(&psi[I]);
  8. - v[1] = load2(&psi[I + d0]);
  9. + v[0] = load2(psi + 2 * I);
  10. + v[1] = load2(psi + 2 * (I + d0));
  11. - _mm256_storeu2_m128d((double*)&psi[I + d0], (double*)&psi[I], add(mul(v[0], m[0], mt[0]), mul(v[1], m[1], mt[1])));
  12. + _mm256_storeu2_m128d(psi + 2 * (I + d0), psi + 2 * I, add(mul(v[0], m[0], mt[0]), mul(v[1], m[1], mt[1])));
  13. }
  14. // bit indices id[.] are given from high to low (e.g. control first for CNOT)
  15. template <class V, class M>
  16. -void kernel(V &psi, unsigned id0, M const& m, std::size_t ctrlmask)
  17. +void kernel(V &psi, unsigned id0, M const& m, std::size_t ctrlmask, unsigned len)
  18. {
  19. - std::size_t n = psi.size();
  20. + std::size_t n = len;
  21. std::size_t d0 = 1UL << id0;
  22. __m256d mm[] = {load(&m[0][0], &m[1][0]), load(&m[0][1], &m[1][1])};
  23. diff --color -aur ProjectQ-0.5.1/projectq/backends/_sim/_cppkernels/intrin/kernel2.hpp ProjectQ-0.5.1_new/projectq/backends/_sim/_cppkernels/intrin/kernel2.hpp
  24. --- ProjectQ-0.5.1/projectq/backends/_sim/_cppkernels/intrin/kernel2.hpp 2020-06-05 21:07:57.000000000 +0800
  25. +++ ProjectQ-0.5.1_new/projectq/backends/_sim/_cppkernels/intrin/kernel2.hpp 2021-04-19 18:04:05.541802882 +0800
  26. @@ -17,21 +17,21 @@
  27. {
  28. __m256d v[4];
  29. - v[0] = load2(&psi[I]);
  30. - v[1] = load2(&psi[I + d0]);
  31. - v[2] = load2(&psi[I + d1]);
  32. - v[3] = load2(&psi[I + d0 + d1]);
  33. + v[0] = load2(psi + 2 * I);
  34. + v[1] = load2(psi + 2 * (I + d0));
  35. + v[2] = load2(psi + 2 * (I + d1));
  36. + v[3] = load2(psi + 2 * (I + d0 + d1));
  37. - _mm256_storeu2_m128d((double*)&psi[I + d0], (double*)&psi[I], add(mul(v[0], m[0], mt[0]), add(mul(v[1], m[1], mt[1]), add(mul(v[2], m[2], mt[2]), mul(v[3], m[3], mt[3])))));
  38. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d1], (double*)&psi[I + d1], add(mul(v[0], m[4], mt[4]), add(mul(v[1], m[5], mt[5]), add(mul(v[2], m[6], mt[6]), mul(v[3], m[7], mt[7])))));
  39. + _mm256_storeu2_m128d(psi + 2 * (I + d0), psi + 2 * I, add(mul(v[0], m[0], mt[0]), add(mul(v[1], m[1], mt[1]), add(mul(v[2], m[2], mt[2]), mul(v[3], m[3], mt[3])))));
  40. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d1), psi + 2 * (I + d1), add(mul(v[0], m[4], mt[4]), add(mul(v[1], m[5], mt[5]), add(mul(v[2], m[6], mt[6]), mul(v[3], m[7], mt[7])))));
  41. }
  42. // bit indices id[.] are given from high to low (e.g. control first for CNOT)
  43. template <class V, class M>
  44. -void kernel(V &psi, unsigned id1, unsigned id0, M const& m, std::size_t ctrlmask)
  45. +void kernel(V &psi, unsigned id1, unsigned id0, M const& m, std::size_t ctrlmask, unsigned len)
  46. {
  47. - std::size_t n = psi.size();
  48. + std::size_t n = len;
  49. std::size_t d0 = 1UL << id0;
  50. std::size_t d1 = 1UL << id1;
  51. diff --color -aur ProjectQ-0.5.1/projectq/backends/_sim/_cppkernels/intrin/kernel3.hpp ProjectQ-0.5.1_new/projectq/backends/_sim/_cppkernels/intrin/kernel3.hpp
  52. --- ProjectQ-0.5.1/projectq/backends/_sim/_cppkernels/intrin/kernel3.hpp 2020-06-05 21:07:57.000000000 +0800
  53. +++ ProjectQ-0.5.1_new/projectq/backends/_sim/_cppkernels/intrin/kernel3.hpp 2021-04-19 18:04:05.541802882 +0800
  54. @@ -17,10 +17,10 @@
  55. {
  56. __m256d v[4];
  57. - v[0] = load2(&psi[I]);
  58. - v[1] = load2(&psi[I + d0]);
  59. - v[2] = load2(&psi[I + d1]);
  60. - v[3] = load2(&psi[I + d0 + d1]);
  61. + v[0] = load2(psi + 2 * I);
  62. + v[1] = load2(psi + 2 * (I + d0));
  63. + v[2] = load2(psi + 2 * (I + d1));
  64. + v[3] = load2(psi + 2 * (I + d0 + d1));
  65. __m256d tmp[4];
  66. @@ -29,23 +29,23 @@
  67. tmp[2] = add(mul(v[0], m[8], mt[8]), add(mul(v[1], m[9], mt[9]), add(mul(v[2], m[10], mt[10]), mul(v[3], m[11], mt[11]))));
  68. tmp[3] = add(mul(v[0], m[12], mt[12]), add(mul(v[1], m[13], mt[13]), add(mul(v[2], m[14], mt[14]), mul(v[3], m[15], mt[15]))));
  69. - v[0] = load2(&psi[I + d2]);
  70. - v[1] = load2(&psi[I + d0 + d2]);
  71. - v[2] = load2(&psi[I + d1 + d2]);
  72. - v[3] = load2(&psi[I + d0 + d1 + d2]);
  73. -
  74. - _mm256_storeu2_m128d((double*)&psi[I + d0], (double*)&psi[I], add(tmp[0], add(mul(v[0], m[16], mt[16]), add(mul(v[1], m[17], mt[17]), add(mul(v[2], m[18], mt[18]), mul(v[3], m[19], mt[19]))))));
  75. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d1], (double*)&psi[I + d1], add(tmp[1], add(mul(v[0], m[20], mt[20]), add(mul(v[1], m[21], mt[21]), add(mul(v[2], m[22], mt[22]), mul(v[3], m[23], mt[23]))))));
  76. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d2], (double*)&psi[I + d2], add(tmp[2], add(mul(v[0], m[24], mt[24]), add(mul(v[1], m[25], mt[25]), add(mul(v[2], m[26], mt[26]), mul(v[3], m[27], mt[27]))))));
  77. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d1 + d2], (double*)&psi[I + d1 + d2], add(tmp[3], add(mul(v[0], m[28], mt[28]), add(mul(v[1], m[29], mt[29]), add(mul(v[2], m[30], mt[30]), mul(v[3], m[31], mt[31]))))));
  78. + v[0] = load2(psi + 2 * (I + d2));
  79. + v[1] = load2(psi + 2 * (I + d0 + d2));
  80. + v[2] = load2(psi + 2 * (I + d1 + d2));
  81. + v[3] = load2(psi + 2 * (I + d0 + d1 + d2));
  82. +
  83. + _mm256_storeu2_m128d(psi + 2 * (I + d0), psi + 2 * I, add(tmp[0], add(mul(v[0], m[16], mt[16]), add(mul(v[1], m[17], mt[17]), add(mul(v[2], m[18], mt[18]), mul(v[3], m[19], mt[19]))))));
  84. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d1), psi + 2 * (I + d1), add(tmp[1], add(mul(v[0], m[20], mt[20]), add(mul(v[1], m[21], mt[21]), add(mul(v[2], m[22], mt[22]), mul(v[3], m[23], mt[23]))))));
  85. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d2), psi + 2 * (I + d2), add(tmp[2], add(mul(v[0], m[24], mt[24]), add(mul(v[1], m[25], mt[25]), add(mul(v[2], m[26], mt[26]), mul(v[3], m[27], mt[27]))))));
  86. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d1 + d2), psi + 2 * (I + d1 + d2), add(tmp[3], add(mul(v[0], m[28], mt[28]), add(mul(v[1], m[29], mt[29]), add(mul(v[2], m[30], mt[30]), mul(v[3], m[31], mt[31]))))));
  87. }
  88. // bit indices id[.] are given from high to low (e.g. control first for CNOT)
  89. template <class V, class M>
  90. -void kernel(V &psi, unsigned id2, unsigned id1, unsigned id0, M const& m, std::size_t ctrlmask)
  91. +void kernel(V &psi, unsigned id2, unsigned id1, unsigned id0, M const& m, std::size_t ctrlmask, unsigned len)
  92. {
  93. - std::size_t n = psi.size();
  94. + std::size_t n = len;
  95. std::size_t d0 = 1UL << id0;
  96. std::size_t d1 = 1UL << id1;
  97. std::size_t d2 = 1UL << id2;
  98. diff --color -aur ProjectQ-0.5.1/projectq/backends/_sim/_cppkernels/intrin/kernel4.hpp ProjectQ-0.5.1_new/projectq/backends/_sim/_cppkernels/intrin/kernel4.hpp
  99. --- ProjectQ-0.5.1/projectq/backends/_sim/_cppkernels/intrin/kernel4.hpp 2020-06-05 21:07:57.000000000 +0800
  100. +++ ProjectQ-0.5.1_new/projectq/backends/_sim/_cppkernels/intrin/kernel4.hpp 2021-04-19 18:04:05.541802882 +0800
  101. @@ -17,10 +17,10 @@
  102. {
  103. __m256d v[4];
  104. - v[0] = load2(&psi[I]);
  105. - v[1] = load2(&psi[I + d0]);
  106. - v[2] = load2(&psi[I + d1]);
  107. - v[3] = load2(&psi[I + d0 + d1]);
  108. + v[0] = load2(psi + 2 * I);
  109. + v[1] = load2(psi + 2 * (I + d0));
  110. + v[2] = load2(psi + 2 * (I + d1));
  111. + v[3] = load2(psi + 2 * (I + d0 + d1));
  112. __m256d tmp[8];
  113. @@ -33,10 +33,10 @@
  114. tmp[6] = add(mul(v[0], m[24], mt[24]), add(mul(v[1], m[25], mt[25]), add(mul(v[2], m[26], mt[26]), mul(v[3], m[27], mt[27]))));
  115. tmp[7] = add(mul(v[0], m[28], mt[28]), add(mul(v[1], m[29], mt[29]), add(mul(v[2], m[30], mt[30]), mul(v[3], m[31], mt[31]))));
  116. - v[0] = load2(&psi[I + d2]);
  117. - v[1] = load2(&psi[I + d0 + d2]);
  118. - v[2] = load2(&psi[I + d1 + d2]);
  119. - v[3] = load2(&psi[I + d0 + d1 + d2]);
  120. + v[0] = load2(psi + 2 * (I + d2));
  121. + v[1] = load2(psi + 2 * (I + d0 + d2));
  122. + v[2] = load2(psi + 2 * (I + d1 + d2));
  123. + v[3] = load2(psi + 2 * (I + d0 + d1 + d2));
  124. tmp[0] = add(tmp[0], add(mul(v[0], m[32], mt[32]), add(mul(v[1], m[33], mt[33]), add(mul(v[2], m[34], mt[34]), mul(v[3], m[35], mt[35])))));
  125. tmp[1] = add(tmp[1], add(mul(v[0], m[36], mt[36]), add(mul(v[1], m[37], mt[37]), add(mul(v[2], m[38], mt[38]), mul(v[3], m[39], mt[39])))));
  126. @@ -47,10 +47,10 @@
  127. tmp[6] = add(tmp[6], add(mul(v[0], m[56], mt[56]), add(mul(v[1], m[57], mt[57]), add(mul(v[2], m[58], mt[58]), mul(v[3], m[59], mt[59])))));
  128. tmp[7] = add(tmp[7], add(mul(v[0], m[60], mt[60]), add(mul(v[1], m[61], mt[61]), add(mul(v[2], m[62], mt[62]), mul(v[3], m[63], mt[63])))));
  129. - v[0] = load2(&psi[I + d3]);
  130. - v[1] = load2(&psi[I + d0 + d3]);
  131. - v[2] = load2(&psi[I + d1 + d3]);
  132. - v[3] = load2(&psi[I + d0 + d1 + d3]);
  133. + v[0] = load2(psi + 2 * (I + d3));
  134. + v[1] = load2(psi + 2 * (I + d0 + d3));
  135. + v[2] = load2(psi + 2 * (I + d1 + d3));
  136. + v[3] = load2(psi + 2 * (I + d0 + d1 + d3));
  137. tmp[0] = add(tmp[0], add(mul(v[0], m[64], mt[64]), add(mul(v[1], m[65], mt[65]), add(mul(v[2], m[66], mt[66]), mul(v[3], m[67], mt[67])))));
  138. tmp[1] = add(tmp[1], add(mul(v[0], m[68], mt[68]), add(mul(v[1], m[69], mt[69]), add(mul(v[2], m[70], mt[70]), mul(v[3], m[71], mt[71])))));
  139. @@ -61,27 +61,27 @@
  140. tmp[6] = add(tmp[6], add(mul(v[0], m[88], mt[88]), add(mul(v[1], m[89], mt[89]), add(mul(v[2], m[90], mt[90]), mul(v[3], m[91], mt[91])))));
  141. tmp[7] = add(tmp[7], add(mul(v[0], m[92], mt[92]), add(mul(v[1], m[93], mt[93]), add(mul(v[2], m[94], mt[94]), mul(v[3], m[95], mt[95])))));
  142. - v[0] = load2(&psi[I + d2 + d3]);
  143. - v[1] = load2(&psi[I + d0 + d2 + d3]);
  144. - v[2] = load2(&psi[I + d1 + d2 + d3]);
  145. - v[3] = load2(&psi[I + d0 + d1 + d2 + d3]);
  146. -
  147. - _mm256_storeu2_m128d((double*)&psi[I + d0], (double*)&psi[I], add(tmp[0], add(mul(v[0], m[96], mt[96]), add(mul(v[1], m[97], mt[97]), add(mul(v[2], m[98], mt[98]), mul(v[3], m[99], mt[99]))))));
  148. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d1], (double*)&psi[I + d1], add(tmp[1], add(mul(v[0], m[100], mt[100]), add(mul(v[1], m[101], mt[101]), add(mul(v[2], m[102], mt[102]), mul(v[3], m[103], mt[103]))))));
  149. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d2], (double*)&psi[I + d2], add(tmp[2], add(mul(v[0], m[104], mt[104]), add(mul(v[1], m[105], mt[105]), add(mul(v[2], m[106], mt[106]), mul(v[3], m[107], mt[107]))))));
  150. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d1 + d2], (double*)&psi[I + d1 + d2], add(tmp[3], add(mul(v[0], m[108], mt[108]), add(mul(v[1], m[109], mt[109]), add(mul(v[2], m[110], mt[110]), mul(v[3], m[111], mt[111]))))));
  151. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d3], (double*)&psi[I + d3], add(tmp[4], add(mul(v[0], m[112], mt[112]), add(mul(v[1], m[113], mt[113]), add(mul(v[2], m[114], mt[114]), mul(v[3], m[115], mt[115]))))));
  152. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d1 + d3], (double*)&psi[I + d1 + d3], add(tmp[5], add(mul(v[0], m[116], mt[116]), add(mul(v[1], m[117], mt[117]), add(mul(v[2], m[118], mt[118]), mul(v[3], m[119], mt[119]))))));
  153. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d2 + d3], (double*)&psi[I + d2 + d3], add(tmp[6], add(mul(v[0], m[120], mt[120]), add(mul(v[1], m[121], mt[121]), add(mul(v[2], m[122], mt[122]), mul(v[3], m[123], mt[123]))))));
  154. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d1 + d2 + d3], (double*)&psi[I + d1 + d2 + d3], add(tmp[7], add(mul(v[0], m[124], mt[124]), add(mul(v[1], m[125], mt[125]), add(mul(v[2], m[126], mt[126]), mul(v[3], m[127], mt[127]))))));
  155. + v[0] = load2(psi + 2 * (I + d2 + d3));
  156. + v[1] = load2(psi + 2 * (I + d0 + d2 + d3));
  157. + v[2] = load2(psi + 2 * (I + d1 + d2 + d3));
  158. + v[3] = load2(psi + 2 * (I + d0 + d1 + d2 + d3));
  159. +
  160. + _mm256_storeu2_m128d(psi + 2 * (I + d0), psi + 2 * I, add(tmp[0], add(mul(v[0], m[96], mt[96]), add(mul(v[1], m[97], mt[97]), add(mul(v[2], m[98], mt[98]), mul(v[3], m[99], mt[99]))))));
  161. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d1), psi + 2 * (I + d1), add(tmp[1], add(mul(v[0], m[100], mt[100]), add(mul(v[1], m[101], mt[101]), add(mul(v[2], m[102], mt[102]), mul(v[3], m[103], mt[103]))))));
  162. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d2), psi + 2 * (I + d2), add(tmp[2], add(mul(v[0], m[104], mt[104]), add(mul(v[1], m[105], mt[105]), add(mul(v[2], m[106], mt[106]), mul(v[3], m[107], mt[107]))))));
  163. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d1 + d2), psi + 2 * (I + d1 + d2), add(tmp[3], add(mul(v[0], m[108], mt[108]), add(mul(v[1], m[109], mt[109]), add(mul(v[2], m[110], mt[110]), mul(v[3], m[111], mt[111]))))));
  164. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d3), psi + 2 * (I + d3), add(tmp[4], add(mul(v[0], m[112], mt[112]), add(mul(v[1], m[113], mt[113]), add(mul(v[2], m[114], mt[114]), mul(v[3], m[115], mt[115]))))));
  165. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d1 + d3), psi + 2 * (I + d1 + d3), add(tmp[5], add(mul(v[0], m[116], mt[116]), add(mul(v[1], m[117], mt[117]), add(mul(v[2], m[118], mt[118]), mul(v[3], m[119], mt[119]))))));
  166. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d2 + d3), psi + 2 * (I + d2 + d3), add(tmp[6], add(mul(v[0], m[120], mt[120]), add(mul(v[1], m[121], mt[121]), add(mul(v[2], m[122], mt[122]), mul(v[3], m[123], mt[123]))))));
  167. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d1 + d2 + d3), psi + 2 * (I + d1 + d2 + d3), add(tmp[7], add(mul(v[0], m[124], mt[124]), add(mul(v[1], m[125], mt[125]), add(mul(v[2], m[126], mt[126]), mul(v[3], m[127], mt[127]))))));
  168. }
  169. // bit indices id[.] are given from high to low (e.g. control first for CNOT)
  170. template <class V, class M>
  171. -void kernel(V &psi, unsigned id3, unsigned id2, unsigned id1, unsigned id0, M const& m, std::size_t ctrlmask)
  172. +void kernel(V &psi, unsigned id3, unsigned id2, unsigned id1, unsigned id0, M const& m, std::size_t ctrlmask, unsigned len)
  173. {
  174. - std::size_t n = psi.size();
  175. + std::size_t n = len;
  176. std::size_t d0 = 1UL << id0;
  177. std::size_t d1 = 1UL << id1;
  178. std::size_t d2 = 1UL << id2;
  179. diff --color -aur ProjectQ-0.5.1/projectq/backends/_sim/_cppkernels/intrin/kernel5.hpp ProjectQ-0.5.1_new/projectq/backends/_sim/_cppkernels/intrin/kernel5.hpp
  180. --- ProjectQ-0.5.1/projectq/backends/_sim/_cppkernels/intrin/kernel5.hpp 2020-06-05 21:07:57.000000000 +0800
  181. +++ ProjectQ-0.5.1_new/projectq/backends/_sim/_cppkernels/intrin/kernel5.hpp 2021-04-19 18:04:05.541802882 +0800
  182. @@ -17,10 +17,10 @@
  183. {
  184. __m256d v[4];
  185. - v[0] = load2(&psi[I]);
  186. - v[1] = load2(&psi[I + d0]);
  187. - v[2] = load2(&psi[I + d1]);
  188. - v[3] = load2(&psi[I + d0 + d1]);
  189. + v[0] = load2(psi + 2 * I);
  190. + v[1] = load2(psi + 2 * (I + d0));
  191. + v[2] = load2(psi + 2 * (I + d1));
  192. + v[3] = load2(psi + 2 * (I + d0 + d1));
  193. __m256d tmp[16];
  194. @@ -41,10 +41,10 @@
  195. tmp[14] = add(mul(v[0], m[56], mt[56]), add(mul(v[1], m[57], mt[57]), add(mul(v[2], m[58], mt[58]), mul(v[3], m[59], mt[59]))));
  196. tmp[15] = add(mul(v[0], m[60], mt[60]), add(mul(v[1], m[61], mt[61]), add(mul(v[2], m[62], mt[62]), mul(v[3], m[63], mt[63]))));
  197. - v[0] = load2(&psi[I + d2]);
  198. - v[1] = load2(&psi[I + d0 + d2]);
  199. - v[2] = load2(&psi[I + d1 + d2]);
  200. - v[3] = load2(&psi[I + d0 + d1 + d2]);
  201. + v[0] = load2(psi + 2 * (I + d2));
  202. + v[1] = load2(psi + 2 * (I + d0 + d2));
  203. + v[2] = load2(psi + 2 * (I + d1 + d2));
  204. + v[3] = load2(psi + 2 * (I + d0 + d1 + d2));
  205. tmp[0] = add(tmp[0], add(mul(v[0], m[64], mt[64]), add(mul(v[1], m[65], mt[65]), add(mul(v[2], m[66], mt[66]), mul(v[3], m[67], mt[67])))));
  206. tmp[1] = add(tmp[1], add(mul(v[0], m[68], mt[68]), add(mul(v[1], m[69], mt[69]), add(mul(v[2], m[70], mt[70]), mul(v[3], m[71], mt[71])))));
  207. @@ -63,10 +63,10 @@
  208. tmp[14] = add(tmp[14], add(mul(v[0], m[120], mt[120]), add(mul(v[1], m[121], mt[121]), add(mul(v[2], m[122], mt[122]), mul(v[3], m[123], mt[123])))));
  209. tmp[15] = add(tmp[15], add(mul(v[0], m[124], mt[124]), add(mul(v[1], m[125], mt[125]), add(mul(v[2], m[126], mt[126]), mul(v[3], m[127], mt[127])))));
  210. - v[0] = load2(&psi[I + d3]);
  211. - v[1] = load2(&psi[I + d0 + d3]);
  212. - v[2] = load2(&psi[I + d1 + d3]);
  213. - v[3] = load2(&psi[I + d0 + d1 + d3]);
  214. + v[0] = load2(psi + 2 * (I + d3));
  215. + v[1] = load2(psi + 2 * (I + d0 + d3));
  216. + v[2] = load2(psi + 2 * (I + d1 + d3));
  217. + v[3] = load2(psi + 2 * (I + d0 + d1 + d3));
  218. tmp[0] = add(tmp[0], add(mul(v[0], m[128], mt[128]), add(mul(v[1], m[129], mt[129]), add(mul(v[2], m[130], mt[130]), mul(v[3], m[131], mt[131])))));
  219. tmp[1] = add(tmp[1], add(mul(v[0], m[132], mt[132]), add(mul(v[1], m[133], mt[133]), add(mul(v[2], m[134], mt[134]), mul(v[3], m[135], mt[135])))));
  220. @@ -85,10 +85,10 @@
  221. tmp[14] = add(tmp[14], add(mul(v[0], m[184], mt[184]), add(mul(v[1], m[185], mt[185]), add(mul(v[2], m[186], mt[186]), mul(v[3], m[187], mt[187])))));
  222. tmp[15] = add(tmp[15], add(mul(v[0], m[188], mt[188]), add(mul(v[1], m[189], mt[189]), add(mul(v[2], m[190], mt[190]), mul(v[3], m[191], mt[191])))));
  223. - v[0] = load2(&psi[I + d2 + d3]);
  224. - v[1] = load2(&psi[I + d0 + d2 + d3]);
  225. - v[2] = load2(&psi[I + d1 + d2 + d3]);
  226. - v[3] = load2(&psi[I + d0 + d1 + d2 + d3]);
  227. + v[0] = load2(psi + 2 * (I + d2 + d3));
  228. + v[1] = load2(psi + 2 * (I + d0 + d2 + d3));
  229. + v[2] = load2(psi + 2 * (I + d1 + d2 + d3));
  230. + v[3] = load2(psi + 2 * (I + d0 + d1 + d2 + d3));
  231. tmp[0] = add(tmp[0], add(mul(v[0], m[192], mt[192]), add(mul(v[1], m[193], mt[193]), add(mul(v[2], m[194], mt[194]), mul(v[3], m[195], mt[195])))));
  232. tmp[1] = add(tmp[1], add(mul(v[0], m[196], mt[196]), add(mul(v[1], m[197], mt[197]), add(mul(v[2], m[198], mt[198]), mul(v[3], m[199], mt[199])))));
  233. @@ -107,10 +107,10 @@
  234. tmp[14] = add(tmp[14], add(mul(v[0], m[248], mt[248]), add(mul(v[1], m[249], mt[249]), add(mul(v[2], m[250], mt[250]), mul(v[3], m[251], mt[251])))));
  235. tmp[15] = add(tmp[15], add(mul(v[0], m[252], mt[252]), add(mul(v[1], m[253], mt[253]), add(mul(v[2], m[254], mt[254]), mul(v[3], m[255], mt[255])))));
  236. - v[0] = load2(&psi[I + d4]);
  237. - v[1] = load2(&psi[I + d0 + d4]);
  238. - v[2] = load2(&psi[I + d1 + d4]);
  239. - v[3] = load2(&psi[I + d0 + d1 + d4]);
  240. + v[0] = load2(psi + 2 * (I + d4));
  241. + v[1] = load2(psi + 2 * (I + d0 + d4));
  242. + v[2] = load2(psi + 2 * (I + d1 + d4));
  243. + v[3] = load2(psi + 2 * (I + d0 + d1 + d4));
  244. tmp[0] = add(tmp[0], add(mul(v[0], m[256], mt[256]), add(mul(v[1], m[257], mt[257]), add(mul(v[2], m[258], mt[258]), mul(v[3], m[259], mt[259])))));
  245. tmp[1] = add(tmp[1], add(mul(v[0], m[260], mt[260]), add(mul(v[1], m[261], mt[261]), add(mul(v[2], m[262], mt[262]), mul(v[3], m[263], mt[263])))));
  246. @@ -129,10 +129,10 @@
  247. tmp[14] = add(tmp[14], add(mul(v[0], m[312], mt[312]), add(mul(v[1], m[313], mt[313]), add(mul(v[2], m[314], mt[314]), mul(v[3], m[315], mt[315])))));
  248. tmp[15] = add(tmp[15], add(mul(v[0], m[316], mt[316]), add(mul(v[1], m[317], mt[317]), add(mul(v[2], m[318], mt[318]), mul(v[3], m[319], mt[319])))));
  249. - v[0] = load2(&psi[I + d2 + d4]);
  250. - v[1] = load2(&psi[I + d0 + d2 + d4]);
  251. - v[2] = load2(&psi[I + d1 + d2 + d4]);
  252. - v[3] = load2(&psi[I + d0 + d1 + d2 + d4]);
  253. + v[0] = load2(psi + 2 * (I + d2 + d4));
  254. + v[1] = load2(psi + 2 * (I + d0 + d2 + d4));
  255. + v[2] = load2(psi + 2 * (I + d1 + d2 + d4));
  256. + v[3] = load2(psi + 2 * (I + d0 + d1 + d2 + d4));
  257. tmp[0] = add(tmp[0], add(mul(v[0], m[320], mt[320]), add(mul(v[1], m[321], mt[321]), add(mul(v[2], m[322], mt[322]), mul(v[3], m[323], mt[323])))));
  258. tmp[1] = add(tmp[1], add(mul(v[0], m[324], mt[324]), add(mul(v[1], m[325], mt[325]), add(mul(v[2], m[326], mt[326]), mul(v[3], m[327], mt[327])))));
  259. @@ -151,10 +151,10 @@
  260. tmp[14] = add(tmp[14], add(mul(v[0], m[376], mt[376]), add(mul(v[1], m[377], mt[377]), add(mul(v[2], m[378], mt[378]), mul(v[3], m[379], mt[379])))));
  261. tmp[15] = add(tmp[15], add(mul(v[0], m[380], mt[380]), add(mul(v[1], m[381], mt[381]), add(mul(v[2], m[382], mt[382]), mul(v[3], m[383], mt[383])))));
  262. - v[0] = load2(&psi[I + d3 + d4]);
  263. - v[1] = load2(&psi[I + d0 + d3 + d4]);
  264. - v[2] = load2(&psi[I + d1 + d3 + d4]);
  265. - v[3] = load2(&psi[I + d0 + d1 + d3 + d4]);
  266. + v[0] = load2(psi + 2 * (I + d3 + d4));
  267. + v[1] = load2(psi + 2 * (I + d0 + d3 + d4));
  268. + v[2] = load2(psi + 2 * (I + d1 + d3 + d4));
  269. + v[3] = load2(psi + 2 * (I + d0 + d1 + d3 + d4));
  270. tmp[0] = add(tmp[0], add(mul(v[0], m[384], mt[384]), add(mul(v[1], m[385], mt[385]), add(mul(v[2], m[386], mt[386]), mul(v[3], m[387], mt[387])))));
  271. tmp[1] = add(tmp[1], add(mul(v[0], m[388], mt[388]), add(mul(v[1], m[389], mt[389]), add(mul(v[2], m[390], mt[390]), mul(v[3], m[391], mt[391])))));
  272. @@ -173,35 +173,35 @@
  273. tmp[14] = add(tmp[14], add(mul(v[0], m[440], mt[440]), add(mul(v[1], m[441], mt[441]), add(mul(v[2], m[442], mt[442]), mul(v[3], m[443], mt[443])))));
  274. tmp[15] = add(tmp[15], add(mul(v[0], m[444], mt[444]), add(mul(v[1], m[445], mt[445]), add(mul(v[2], m[446], mt[446]), mul(v[3], m[447], mt[447])))));
  275. - v[0] = load2(&psi[I + d2 + d3 + d4]);
  276. - v[1] = load2(&psi[I + d0 + d2 + d3 + d4]);
  277. - v[2] = load2(&psi[I + d1 + d2 + d3 + d4]);
  278. - v[3] = load2(&psi[I + d0 + d1 + d2 + d3 + d4]);
  279. -
  280. - _mm256_storeu2_m128d((double*)&psi[I + d0], (double*)&psi[I], add(tmp[0], add(mul(v[0], m[448], mt[448]), add(mul(v[1], m[449], mt[449]), add(mul(v[2], m[450], mt[450]), mul(v[3], m[451], mt[451]))))));
  281. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d1], (double*)&psi[I + d1], add(tmp[1], add(mul(v[0], m[452], mt[452]), add(mul(v[1], m[453], mt[453]), add(mul(v[2], m[454], mt[454]), mul(v[3], m[455], mt[455]))))));
  282. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d2], (double*)&psi[I + d2], add(tmp[2], add(mul(v[0], m[456], mt[456]), add(mul(v[1], m[457], mt[457]), add(mul(v[2], m[458], mt[458]), mul(v[3], m[459], mt[459]))))));
  283. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d1 + d2], (double*)&psi[I + d1 + d2], add(tmp[3], add(mul(v[0], m[460], mt[460]), add(mul(v[1], m[461], mt[461]), add(mul(v[2], m[462], mt[462]), mul(v[3], m[463], mt[463]))))));
  284. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d3], (double*)&psi[I + d3], add(tmp[4], add(mul(v[0], m[464], mt[464]), add(mul(v[1], m[465], mt[465]), add(mul(v[2], m[466], mt[466]), mul(v[3], m[467], mt[467]))))));
  285. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d1 + d3], (double*)&psi[I + d1 + d3], add(tmp[5], add(mul(v[0], m[468], mt[468]), add(mul(v[1], m[469], mt[469]), add(mul(v[2], m[470], mt[470]), mul(v[3], m[471], mt[471]))))));
  286. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d2 + d3], (double*)&psi[I + d2 + d3], add(tmp[6], add(mul(v[0], m[472], mt[472]), add(mul(v[1], m[473], mt[473]), add(mul(v[2], m[474], mt[474]), mul(v[3], m[475], mt[475]))))));
  287. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d1 + d2 + d3], (double*)&psi[I + d1 + d2 + d3], add(tmp[7], add(mul(v[0], m[476], mt[476]), add(mul(v[1], m[477], mt[477]), add(mul(v[2], m[478], mt[478]), mul(v[3], m[479], mt[479]))))));
  288. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d4], (double*)&psi[I + d4], add(tmp[8], add(mul(v[0], m[480], mt[480]), add(mul(v[1], m[481], mt[481]), add(mul(v[2], m[482], mt[482]), mul(v[3], m[483], mt[483]))))));
  289. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d1 + d4], (double*)&psi[I + d1 + d4], add(tmp[9], add(mul(v[0], m[484], mt[484]), add(mul(v[1], m[485], mt[485]), add(mul(v[2], m[486], mt[486]), mul(v[3], m[487], mt[487]))))));
  290. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d2 + d4], (double*)&psi[I + d2 + d4], add(tmp[10], add(mul(v[0], m[488], mt[488]), add(mul(v[1], m[489], mt[489]), add(mul(v[2], m[490], mt[490]), mul(v[3], m[491], mt[491]))))));
  291. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d1 + d2 + d4], (double*)&psi[I + d1 + d2 + d4], add(tmp[11], add(mul(v[0], m[492], mt[492]), add(mul(v[1], m[493], mt[493]), add(mul(v[2], m[494], mt[494]), mul(v[3], m[495], mt[495]))))));
  292. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d3 + d4], (double*)&psi[I + d3 + d4], add(tmp[12], add(mul(v[0], m[496], mt[496]), add(mul(v[1], m[497], mt[497]), add(mul(v[2], m[498], mt[498]), mul(v[3], m[499], mt[499]))))));
  293. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d1 + d3 + d4], (double*)&psi[I + d1 + d3 + d4], add(tmp[13], add(mul(v[0], m[500], mt[500]), add(mul(v[1], m[501], mt[501]), add(mul(v[2], m[502], mt[502]), mul(v[3], m[503], mt[503]))))));
  294. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d2 + d3 + d4], (double*)&psi[I + d2 + d3 + d4], add(tmp[14], add(mul(v[0], m[504], mt[504]), add(mul(v[1], m[505], mt[505]), add(mul(v[2], m[506], mt[506]), mul(v[3], m[507], mt[507]))))));
  295. - _mm256_storeu2_m128d((double*)&psi[I + d0 + d1 + d2 + d3 + d4], (double*)&psi[I + d1 + d2 + d3 + d4], add(tmp[15], add(mul(v[0], m[508], mt[508]), add(mul(v[1], m[509], mt[509]), add(mul(v[2], m[510], mt[510]), mul(v[3], m[511], mt[511]))))));
  296. + v[0] = load2(psi + 2 * (I + d2 + d3 + d4));
  297. + v[1] = load2(psi + 2 * (I + d0 + d2 + d3 + d4));
  298. + v[2] = load2(psi + 2 * (I + d1 + d2 + d3 + d4));
  299. + v[3] = load2(psi + 2 * (I + d0 + d1 + d2 + d3 + d4));
  300. +
  301. + _mm256_storeu2_m128d(psi + 2 * (I + d0), psi + 2 * I, add(tmp[0], add(mul(v[0], m[448], mt[448]), add(mul(v[1], m[449], mt[449]), add(mul(v[2], m[450], mt[450]), mul(v[3], m[451], mt[451]))))));
  302. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d1), psi + 2 * (I + d1), add(tmp[1], add(mul(v[0], m[452], mt[452]), add(mul(v[1], m[453], mt[453]), add(mul(v[2], m[454], mt[454]), mul(v[3], m[455], mt[455]))))));
  303. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d2), psi + 2 * (I + d2), add(tmp[2], add(mul(v[0], m[456], mt[456]), add(mul(v[1], m[457], mt[457]), add(mul(v[2], m[458], mt[458]), mul(v[3], m[459], mt[459]))))));
  304. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d1 + d2), psi + 2 * (I + d1 + d2), add(tmp[3], add(mul(v[0], m[460], mt[460]), add(mul(v[1], m[461], mt[461]), add(mul(v[2], m[462], mt[462]), mul(v[3], m[463], mt[463]))))));
  305. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d3), psi + 2 * (I + d3), add(tmp[4], add(mul(v[0], m[464], mt[464]), add(mul(v[1], m[465], mt[465]), add(mul(v[2], m[466], mt[466]), mul(v[3], m[467], mt[467]))))));
  306. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d1 + d3), psi + 2 * (I + d1 + d3), add(tmp[5], add(mul(v[0], m[468], mt[468]), add(mul(v[1], m[469], mt[469]), add(mul(v[2], m[470], mt[470]), mul(v[3], m[471], mt[471]))))));
  307. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d2 + d3), psi + 2 * (I + d2 + d3), add(tmp[6], add(mul(v[0], m[472], mt[472]), add(mul(v[1], m[473], mt[473]), add(mul(v[2], m[474], mt[474]), mul(v[3], m[475], mt[475]))))));
  308. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d1 + d2 + d3), psi + 2 * (I + d1 + d2 + d3), add(tmp[7], add(mul(v[0], m[476], mt[476]), add(mul(v[1], m[477], mt[477]), add(mul(v[2], m[478], mt[478]), mul(v[3], m[479], mt[479]))))));
  309. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d4), psi + 2 * (I + d4), add(tmp[8], add(mul(v[0], m[480], mt[480]), add(mul(v[1], m[481], mt[481]), add(mul(v[2], m[482], mt[482]), mul(v[3], m[483], mt[483]))))));
  310. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d1 + d4), psi + 2 * (I + d1 + d4), add(tmp[9], add(mul(v[0], m[484], mt[484]), add(mul(v[1], m[485], mt[485]), add(mul(v[2], m[486], mt[486]), mul(v[3], m[487], mt[487]))))));
  311. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d2 + d4), psi + 2 * (I + d2 + d4), add(tmp[10], add(mul(v[0], m[488], mt[488]), add(mul(v[1], m[489], mt[489]), add(mul(v[2], m[490], mt[490]), mul(v[3], m[491], mt[491]))))));
  312. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d1 + d2 + d4), psi + 2 * (I + d1 + d2 + d4), add(tmp[11], add(mul(v[0], m[492], mt[492]), add(mul(v[1], m[493], mt[493]), add(mul(v[2], m[494], mt[494]), mul(v[3], m[495], mt[495]))))));
  313. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d3 + d4), psi + 2 * (I + d3 + d4), add(tmp[12], add(mul(v[0], m[496], mt[496]), add(mul(v[1], m[497], mt[497]), add(mul(v[2], m[498], mt[498]), mul(v[3], m[499], mt[499]))))));
  314. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d1 + d3 + d4), psi + 2 * (I + d1 + d3 + d4), add(tmp[13], add(mul(v[0], m[500], mt[500]), add(mul(v[1], m[501], mt[501]), add(mul(v[2], m[502], mt[502]), mul(v[3], m[503], mt[503]))))));
  315. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d2 + d3 + d4), psi + 2 * (I + d2 + d3 + d4), add(tmp[14], add(mul(v[0], m[504], mt[504]), add(mul(v[1], m[505], mt[505]), add(mul(v[2], m[506], mt[506]), mul(v[3], m[507], mt[507]))))));
  316. + _mm256_storeu2_m128d(psi + 2 * (I + d0 + d1 + d2 + d3 + d4), psi + 2 * (I + d1 + d2 + d3 + d4), add(tmp[15], add(mul(v[0], m[508], mt[508]), add(mul(v[1], m[509], mt[509]), add(mul(v[2], m[510], mt[510]), mul(v[3], m[511], mt[511]))))));
  317. }
  318. // bit indices id[.] are given from high to low (e.g. control first for CNOT)
  319. template <class V, class M>
  320. -void kernel(V &psi, unsigned id4, unsigned id3, unsigned id2, unsigned id1, unsigned id0, M const& m, std::size_t ctrlmask)
  321. +void kernel(V &psi, unsigned id4, unsigned id3, unsigned id2, unsigned id1, unsigned id0, M const& m, std::size_t ctrlmask, unsigned len)
  322. {
  323. - std::size_t n = psi.size();
  324. + std::size_t n = len;
  325. std::size_t d0 = 1UL << id0;
  326. std::size_t d1 = 1UL << id1;
  327. std::size_t d2 = 1UL << id2;
  328. diff --color -aur ProjectQ-0.5.1/projectq/backends/_sim/_cppkernels/simulator.hpp ProjectQ-0.5.1_new/projectq/backends/_sim/_cppkernels/simulator.hpp
  329. --- ProjectQ-0.5.1/projectq/backends/_sim/_cppkernels/simulator.hpp 2020-06-05 21:07:57.000000000 +0800
  330. +++ ProjectQ-0.5.1_new/projectq/backends/_sim/_cppkernels/simulator.hpp 2021-04-20 11:46:27.115554725 +0800
  331. @@ -18,11 +18,7 @@
  332. #include <vector>
  333. #include <complex>
  334. -#if defined(NOINTRIN) || !defined(INTRIN)
  335. -#include "nointrin/kernels.hpp"
  336. -#else
  337. #include "intrin/kernels.hpp"
  338. -#endif
  339. #include "intrin/alignedallocator.hpp"
  340. #include "fusion.hpp"
  341. @@ -32,173 +28,29 @@
  342. #include <tuple>
  343. #include <random>
  344. #include <functional>
  345. -
  346. +#include <cstring>
  347. class Simulator{
  348. public:
  349. using calc_type = double;
  350. using complex_type = std::complex<calc_type>;
  351. - using StateVector = std::vector<complex_type, aligned_allocator<complex_type,512>>;
  352. + using StateVector = calc_type *;
  353. using Map = std::map<unsigned, unsigned>;
  354. using RndEngine = std::mt19937;
  355. using Term = std::vector<std::pair<unsigned, char>>;
  356. using TermsDict = std::vector<std::pair<Term, calc_type>>;
  357. using ComplexTermsDict = std::vector<std::pair<Term, complex_type>>;
  358. + StateVector vec_;
  359. - Simulator(unsigned seed = 1) : N_(0), vec_(1,0.), fusion_qubits_min_(4),
  360. + Simulator(unsigned seed = 1, unsigned N = 0) : N_(N), fusion_qubits_min_(4),
  361. fusion_qubits_max_(5), rnd_eng_(seed) {
  362. + len_ = 1UL << (N_ + 1);
  363. + vec_ = (StateVector)calloc(len_, sizeof(calc_type));
  364. vec_[0]=1.; // all-zero initial state
  365. std::uniform_real_distribution<double> dist(0., 1.);
  366. rng_ = std::bind(dist, std::ref(rnd_eng_));
  367. - }
  368. -
  369. - void allocate_qubit(unsigned id){
  370. - if (map_.count(id) == 0){
  371. - map_[id] = N_++;
  372. - StateVector newvec; // avoid large memory allocations
  373. - if( tmpBuff1_.capacity() >= (1UL << N_) )
  374. - std::swap(newvec, tmpBuff1_);
  375. - newvec.resize(1UL << N_);
  376. -#pragma omp parallel for schedule(static)
  377. - for (std::size_t i = 0; i < newvec.size(); ++i)
  378. - newvec[i] = (i < vec_.size())?vec_[i]:0.;
  379. - std::swap(vec_, newvec);
  380. - // recycle large memory
  381. - std::swap(tmpBuff1_, newvec);
  382. - if( tmpBuff1_.capacity() < tmpBuff2_.capacity() )
  383. - std::swap(tmpBuff1_, tmpBuff2_);
  384. - }
  385. - else
  386. - throw(std::runtime_error(
  387. - "AllocateQubit: ID already exists. Qubit IDs should be unique."));
  388. - }
  389. -
  390. - bool get_classical_value(unsigned id, calc_type tol = 1.e-12){
  391. - run();
  392. - unsigned pos = map_[id];
  393. - std::size_t delta = (1UL << pos);
  394. -
  395. - for (std::size_t i = 0; i < vec_.size(); i += 2*delta){
  396. - for (std::size_t j = 0; j < delta; ++j){
  397. - if (std::norm(vec_[i+j]) > tol)
  398. - return false;
  399. - if (std::norm(vec_[i+j+delta]) > tol)
  400. - return true;
  401. - }
  402. - }
  403. - assert(false); // this will never happen
  404. - return false; // suppress 'control reaches end of non-void...'
  405. - }
  406. -
  407. - bool is_classical(unsigned id, calc_type tol = 1.e-12){
  408. - run();
  409. - unsigned pos = map_[id];
  410. - std::size_t delta = (1UL << pos);
  411. -
  412. - short up = 0, down = 0;
  413. - #pragma omp parallel for schedule(static) reduction(|:up,down)
  414. - for (std::size_t i = 0; i < vec_.size(); i += 2*delta){
  415. - for (std::size_t j = 0; j < delta; ++j){
  416. - up = up | ((std::norm(vec_[i+j]) > tol)&1);
  417. - down = down | ((std::norm(vec_[i+j+delta]) > tol)&1);
  418. - }
  419. - }
  420. -
  421. - return 1 == (up^down);
  422. - }
  423. -
  424. - void collapse_vector(unsigned id, bool value = false, bool shrink = false){
  425. - run();
  426. - unsigned pos = map_[id];
  427. - std::size_t delta = (1UL << pos);
  428. -
  429. - if (!shrink){
  430. - #pragma omp parallel for schedule(static)
  431. - for (std::size_t i = 0; i < vec_.size(); i += 2*delta){
  432. - for (std::size_t j = 0; j < delta; ++j)
  433. - vec_[i+j+static_cast<std::size_t>(!value)*delta] = 0.;
  434. - }
  435. - }
  436. - else{
  437. - StateVector newvec; // avoid costly memory reallocations
  438. - if( tmpBuff1_.capacity() >= (1UL << (N_-1)) )
  439. - std::swap(tmpBuff1_, newvec);
  440. - newvec.resize((1UL << (N_-1)));
  441. - #pragma omp parallel for schedule(static) if(0)
  442. - for (std::size_t i = 0; i < vec_.size(); i += 2*delta)
  443. - std::copy_n(&vec_[i + static_cast<std::size_t>(value)*delta],
  444. - delta, &newvec[i/2]);
  445. - std::swap(vec_, newvec);
  446. - std::swap(tmpBuff1_, newvec);
  447. - if( tmpBuff1_.capacity() < tmpBuff2_.capacity() )
  448. - std::swap(tmpBuff1_, tmpBuff2_);
  449. -
  450. - for (auto& p : map_){
  451. - if (p.second > pos)
  452. - p.second--;
  453. - }
  454. - map_.erase(id);
  455. - N_--;
  456. - }
  457. - }
  458. -
  459. - void measure_qubits(std::vector<unsigned> const& ids, std::vector<bool> &res){
  460. - run();
  461. -
  462. - std::vector<unsigned> positions(ids.size());
  463. - for (unsigned i = 0; i < ids.size(); ++i)
  464. - positions[i] = map_[ids[i]];
  465. -
  466. - calc_type P = 0.;
  467. - calc_type rnd = rng_();
  468. -
  469. - // pick entry at random with probability |entry|^2
  470. - std::size_t pick = 0;
  471. - while (P < rnd && pick < vec_.size())
  472. - P += std::norm(vec_[pick++]);
  473. -
  474. - pick--;
  475. - // determine result vector (boolean values for each qubit)
  476. - // and create mask to detect bad entries (i.e., entries that don't agree with measurement)
  477. - res = std::vector<bool>(ids.size());
  478. - std::size_t mask = 0;
  479. - std::size_t val = 0;
  480. - for (unsigned i = 0; i < ids.size(); ++i){
  481. - bool r = ((pick >> positions[i]) & 1) == 1;
  482. - res[i] = r;
  483. - mask |= (1UL << positions[i]);
  484. - val |= (static_cast<std::size_t>(r&1) << positions[i]);
  485. - }
  486. - // set bad entries to 0
  487. - calc_type N = 0.;
  488. - #pragma omp parallel for reduction(+:N) schedule(static)
  489. - for (std::size_t i = 0; i < vec_.size(); ++i){
  490. - if ((i & mask) != val)
  491. - vec_[i] = 0.;
  492. - else
  493. - N += std::norm(vec_[i]);
  494. - }
  495. - // re-normalize
  496. - N = 1./std::sqrt(N);
  497. - #pragma omp parallel for schedule(static)
  498. - for (std::size_t i = 0; i < vec_.size(); ++i)
  499. - vec_[i] *= N;
  500. - }
  501. -
  502. - std::vector<bool> measure_qubits_return(std::vector<unsigned> const& ids){
  503. - std::vector<bool> ret;
  504. - measure_qubits(ids, ret);
  505. - return ret;
  506. - }
  507. -
  508. - void deallocate_qubit(unsigned id){
  509. - run();
  510. - assert(map_.count(id) == 1);
  511. - if (!is_classical(id))
  512. - throw(std::runtime_error("Error: Qubit has not been measured / uncomputed! There is most likely a bug in your code."));
  513. -
  514. - bool value = get_classical_value(id);
  515. - collapse_vector(id, value, true);
  516. + for (unsigned i = 0; i < N_; i++)
  517. + map_[i] = i;
  518. }
  519. template <class M>
  520. @@ -221,84 +73,13 @@
  521. fused_gates_ = fused_gates;
  522. }
  523. - template <class F, class QuReg>
  524. - void emulate_math(F const& f, QuReg quregs, const std::vector<unsigned>& ctrl,
  525. - bool parallelize = false){
  526. - run();
  527. - auto ctrlmask = get_control_mask(ctrl);
  528. -
  529. - for (unsigned i = 0; i < quregs.size(); ++i)
  530. - for (unsigned j = 0; j < quregs[i].size(); ++j)
  531. - quregs[i][j] = map_[quregs[i][j]];
  532. -
  533. - StateVector newvec; // avoid costly memory reallocations
  534. - if( tmpBuff1_.capacity() >= vec_.size() )
  535. - std::swap(newvec, tmpBuff1_);
  536. - newvec.resize(vec_.size());
  537. -#pragma omp parallel for schedule(static)
  538. - for (std::size_t i = 0; i < vec_.size(); i++)
  539. - newvec[i] = 0;
  540. -
  541. -//#pragma omp parallel reduction(+:newvec[:newvec.size()]) if(parallelize) // requires OpenMP 4.5
  542. - {
  543. - std::vector<int> res(quregs.size());
  544. - //#pragma omp for schedule(static)
  545. - for (std::size_t i = 0; i < vec_.size(); ++i){
  546. - if ((ctrlmask&i) == ctrlmask){
  547. - for (unsigned qr_i = 0; qr_i < quregs.size(); ++qr_i){
  548. - res[qr_i] = 0;
  549. - for (unsigned qb_i = 0; qb_i < quregs[qr_i].size(); ++qb_i)
  550. - res[qr_i] |= ((i >> quregs[qr_i][qb_i])&1) << qb_i;
  551. - }
  552. - f(res);
  553. - auto new_i = i;
  554. - for (unsigned qr_i = 0; qr_i < quregs.size(); ++qr_i){
  555. - for (unsigned qb_i = 0; qb_i < quregs[qr_i].size(); ++qb_i){
  556. - if (!(((new_i >> quregs[qr_i][qb_i])&1) == ((res[qr_i] >> qb_i)&1)))
  557. - new_i ^= (1UL << quregs[qr_i][qb_i]);
  558. - }
  559. - }
  560. - newvec[new_i] += vec_[i];
  561. - }
  562. - else
  563. - newvec[i] += vec_[i];
  564. - }
  565. - }
  566. - std::swap(vec_, newvec);
  567. - std::swap(tmpBuff1_, newvec);
  568. - }
  569. -
  570. - // faster version without calling python
  571. - template<class QuReg>
  572. - inline void emulate_math_addConstant(int a, const QuReg& quregs, const std::vector<unsigned>& ctrl)
  573. - {
  574. - emulate_math([a](std::vector<int> &res){for(auto& x: res) x = x + a;}, quregs, ctrl, true);
  575. - }
  576. -
  577. - // faster version without calling python
  578. - template<class QuReg>
  579. - inline void emulate_math_addConstantModN(int a, int N, const QuReg& quregs, const std::vector<unsigned>& ctrl)
  580. - {
  581. - emulate_math([a,N](std::vector<int> &res){for(auto& x: res) x = (x + a) % N;}, quregs, ctrl, true);
  582. - }
  583. -
  584. - // faster version without calling python
  585. - template<class QuReg>
  586. - inline void emulate_math_multiplyByConstantModN(int a, int N, const QuReg& quregs, const std::vector<unsigned>& ctrl)
  587. - {
  588. - emulate_math([a,N](std::vector<int> &res){for(auto& x: res) x = (x * a) % N;}, quregs, ctrl, true);
  589. - }
  590. -
  591. calc_type get_expectation_value(TermsDict const& td, std::vector<unsigned> const& ids){
  592. run();
  593. calc_type expectation = 0.;
  594. - StateVector current_state; // avoid costly memory reallocations
  595. - if( tmpBuff1_.capacity() >= vec_.size() )
  596. - std::swap(tmpBuff1_, current_state);
  597. - current_state.resize(vec_.size());
  598. + StateVector current_state = (StateVector)malloc(len_ *sizeof(calc_type));
  599. #pragma omp parallel for schedule(static)
  600. - for (std::size_t i = 0; i < vec_.size(); ++i)
  601. + for (std::size_t i = 0; i < len_; ++i)
  602. current_state[i] = vec_[i];
  603. for (auto const& term : td){
  604. @@ -306,81 +87,53 @@
  605. apply_term(term.first, ids, {});
  606. calc_type delta = 0.;
  607. #pragma omp parallel for reduction(+:delta) schedule(static)
  608. - for (std::size_t i = 0; i < vec_.size(); ++i){
  609. - auto const a1 = std::real(current_state[i]);
  610. - auto const b1 = -std::imag(current_state[i]);
  611. - auto const a2 = std::real(vec_[i]);
  612. - auto const b2 = std::imag(vec_[i]);
  613. + for (std::size_t i = 0; i < (len_ >> 1); ++i){
  614. + auto const a1 = current_state[2 * i];
  615. + auto const b1 = -current_state[2 * i + 1];
  616. + auto const a2 = vec_[2 * i];
  617. + auto const b2 = vec_[2 * i + 1];
  618. delta += a1 * a2 - b1 * b2;
  619. // reset vec_
  620. - vec_[i] = current_state[i];
  621. + vec_[2 * i] = current_state[2 * i];
  622. + vec_[2 * i + 1] = current_state[2 * i + 1];
  623. }
  624. expectation += coefficient * delta;
  625. }
  626. - std::swap(current_state, tmpBuff1_);
  627. + if (NULL != current_state){
  628. + free(current_state);
  629. + current_state = NULL;
  630. + }
  631. return expectation;
  632. }
  633. void apply_qubit_operator(ComplexTermsDict const& td, std::vector<unsigned> const& ids){
  634. run();
  635. - StateVector new_state, current_state; // avoid costly memory reallocations
  636. - if( tmpBuff1_.capacity() >= vec_.size() )
  637. - std::swap(tmpBuff1_, new_state);
  638. - if( tmpBuff2_.capacity() >= vec_.size() )
  639. - std::swap(tmpBuff2_, current_state);
  640. - new_state.resize(vec_.size());
  641. - current_state.resize(vec_.size());
  642. + StateVector new_state = (StateVector)calloc(len_, sizeof(calc_type));
  643. + StateVector current_state = (StateVector)malloc(len_ * sizeof(calc_type));
  644. #pragma omp parallel for schedule(static)
  645. - for (std::size_t i = 0; i < vec_.size(); ++i){
  646. - new_state[i] = 0;
  647. + for (std::size_t i = 0; i < len_; ++i){
  648. current_state[i] = vec_[i];
  649. }
  650. for (auto const& term : td){
  651. auto const& coefficient = term.second;
  652. apply_term(term.first, ids, {});
  653. #pragma omp parallel for schedule(static)
  654. - for (std::size_t i = 0; i < vec_.size(); ++i){
  655. - new_state[i] += coefficient * vec_[i];
  656. - vec_[i] = current_state[i];
  657. + for (std::size_t i = 0; i < (len_ >> 1); ++i){
  658. + new_state[2 * i] += coefficient.real() * vec_[2 * i] - coefficient.imag() * vec_[2 * i + 1];
  659. + new_state[2 * i + 1] += coefficient.real() * vec_[2 * i + 1] + coefficient.imag() * vec_[2 * i];
  660. + vec_[2 * i] = current_state[2 * i];
  661. + vec_[2 * i + 1] = current_state[2 * i + 1];
  662. }
  663. }
  664. - std::swap(vec_, new_state);
  665. - std::swap(tmpBuff1_, new_state);
  666. - std::swap(tmpBuff2_, current_state);
  667. - }
  668. -
  669. - calc_type get_probability(std::vector<bool> const& bit_string,
  670. - std::vector<unsigned> const& ids){
  671. - run();
  672. - if (!check_ids(ids))
  673. - throw(std::runtime_error("get_probability(): Unknown qubit id. Please make sure you have called eng.flush()."));
  674. - std::size_t mask = 0, bit_str = 0;
  675. - for (unsigned i = 0; i < ids.size(); ++i){
  676. - mask |= 1UL << map_[ids[i]];
  677. - bit_str |= (bit_string[i]?1UL:0UL) << map_[ids[i]];
  678. - }
  679. - calc_type probability = 0.;
  680. - #pragma omp parallel for reduction(+:probability) schedule(static)
  681. - for (std::size_t i = 0; i < vec_.size(); ++i)
  682. - if ((i & mask) == bit_str)
  683. - probability += std::norm(vec_[i]);
  684. - return probability;
  685. - }
  686. -
  687. - complex_type const& get_amplitude(std::vector<bool> const& bit_string,
  688. - std::vector<unsigned> const& ids){
  689. - run();
  690. - std::size_t chk = 0;
  691. - std::size_t index = 0;
  692. - for (unsigned i = 0; i < ids.size(); ++i){
  693. - if (map_.count(ids[i]) == 0)
  694. - break;
  695. - chk |= 1UL << map_[ids[i]];
  696. - index |= (bit_string[i]?1UL:0UL) << map_[ids[i]];
  697. + if (NULL != vec_)
  698. + free(vec_);
  699. + vec_ = new_state;
  700. + if (NULL != new_state)
  701. + new_state = NULL;
  702. + if (NULL != current_state){
  703. + free(current_state);
  704. + current_state = NULL;
  705. }
  706. - if (chk + 1 != vec_.size())
  707. - throw(std::runtime_error("The second argument to get_amplitude() must be a permutation of all allocated qubits. Please make sure you have called eng.flush()."));
  708. - return vec_[index];
  709. }
  710. void emulate_time_evolution(TermsDict const& tdict, calc_type const& time,
  711. @@ -400,87 +153,71 @@
  712. }
  713. unsigned s = std::abs(time) * op_nrm + 1.;
  714. complex_type correction = std::exp(-time * I * tr / (double)s);
  715. - auto output_state = vec_;
  716. + auto output_state = copy(vec_, len_);
  717. auto ctrlmask = get_control_mask(ctrl);
  718. for (unsigned i = 0; i < s; ++i){
  719. calc_type nrm_change = 1.;
  720. for (unsigned k = 0; nrm_change > 1.e-12; ++k){
  721. auto coeff = (-time * I) / double(s * (k + 1));
  722. - auto current_state = vec_;
  723. - auto update = StateVector(vec_.size(), 0.);
  724. + auto current_state = copy(vec_, len_);
  725. + auto update = (StateVector)calloc(len_, sizeof(calc_type));
  726. for (auto const& tup : td){
  727. apply_term(tup.first, ids, {});
  728. #pragma omp parallel for schedule(static)
  729. - for (std::size_t j = 0; j < vec_.size(); ++j){
  730. + for (std::size_t j = 0; j < len_; ++j){
  731. update[j] += vec_[j] * tup.second;
  732. vec_[j] = current_state[j];
  733. }
  734. }
  735. nrm_change = 0.;
  736. #pragma omp parallel for reduction(+:nrm_change) schedule(static)
  737. - for (std::size_t j = 0; j < vec_.size(); ++j){
  738. - update[j] *= coeff;
  739. - vec_[j] = update[j];
  740. + for (std::size_t j = 0; j < (len_ >> 1); ++j){
  741. + complex_type tmp(update[2 * j], update[2 * j + 1]);
  742. + tmp *= coeff;
  743. + update[2 * j] *= std::real(tmp);
  744. + update[2 * j + 1] *= std::imag(tmp);
  745. + vec_[2 * j] = update[2 * j];
  746. + vec_[2 * j + 1] = update[2 * j + 1];
  747. if ((j & ctrlmask) == ctrlmask){
  748. - output_state[j] += update[j];
  749. - nrm_change += std::norm(update[j]);
  750. + output_state[2 * j] += update[2 * j];
  751. + output_state[2 * j + 1] += update[2 * j + 1];
  752. + nrm_change += std::sqrt(update[2 * j] * update[2 * j] + update[2 * j + 1] * update[2 * j + 1]);
  753. }
  754. }
  755. nrm_change = std::sqrt(nrm_change);
  756. + if (NULL != current_state){
  757. + free(current_state);
  758. + current_state = NULL;
  759. + }
  760. + if (NULL != update){
  761. + free(update);
  762. + update = NULL;
  763. + }
  764. }
  765. #pragma omp parallel for schedule(static)
  766. - for (std::size_t j = 0; j < vec_.size(); ++j){
  767. - if ((j & ctrlmask) == ctrlmask)
  768. - output_state[j] *= correction;
  769. - vec_[j] = output_state[j];
  770. + for (std::size_t j = 0; j < (len_ >>1); ++j){
  771. + if ((j & ctrlmask) == ctrlmask){
  772. + complex_type tmp(output_state[2 * j], output_state[2 * j + 1]);
  773. + tmp *= correction;
  774. + output_state[2 * j] = std::real(tmp);
  775. + output_state[2 * j + 1] =std::imag(tmp);
  776. + }
  777. + vec_[2 * j] = output_state[2 * j];
  778. + vec_[2 * j + 1] = output_state[2 * j + 1];
  779. }
  780. }
  781. + if (NULL != output_state){
  782. + free(output_state);
  783. + output_state = NULL;
  784. + }
  785. }
  786. void set_wavefunction(StateVector const& wavefunction, std::vector<unsigned> const& ordering){
  787. run();
  788. - // make sure there are 2^n amplitudes for n qubits
  789. - assert(wavefunction.size() == (1UL << ordering.size()));
  790. - // check that all qubits have been allocated previously
  791. - if (map_.size() != ordering.size() || !check_ids(ordering))
  792. - throw(std::runtime_error("set_wavefunction(): Invalid mapping provided. Please make sure all qubits have been allocated previously (call eng.flush())."));
  793. -
  794. - // set mapping and wavefunction
  795. - for (unsigned i = 0; i < ordering.size(); ++i)
  796. - map_[ordering[i]] = i;
  797. - #pragma omp parallel for schedule(static)
  798. - for (std::size_t i = 0; i < wavefunction.size(); ++i)
  799. - vec_[i] = wavefunction[i];
  800. - }
  801. -
  802. - void collapse_wavefunction(std::vector<unsigned> const& ids, std::vector<bool> const& values){
  803. - run();
  804. - assert(ids.size() == values.size());
  805. - if (!check_ids(ids))
  806. - throw(std::runtime_error("collapse_wavefunction(): Unknown qubit id(s) provided. Try calling eng.flush() before invoking this function."));
  807. - std::size_t mask = 0, val = 0;
  808. - for (unsigned i = 0; i < ids.size(); ++i){
  809. - mask |= (1UL << map_[ids[i]]);
  810. - val |= ((values[i]?1UL:0UL) << map_[ids[i]]);
  811. - }
  812. - // set bad entries to 0 and compute probability of outcome to renormalize
  813. - calc_type N = 0.;
  814. - #pragma omp parallel for reduction(+:N) schedule(static)
  815. - for (std::size_t i = 0; i < vec_.size(); ++i){
  816. - if ((i & mask) == val)
  817. - N += std::norm(vec_[i]);
  818. - }
  819. - if (N < 1.e-12)
  820. - throw(std::runtime_error("collapse_wavefunction(): Invalid collapse! Probability is ~0."));
  821. - // re-normalize (if possible)
  822. - N = 1./std::sqrt(N);
  823. - #pragma omp parallel for schedule(static)
  824. - for (std::size_t i = 0; i < vec_.size(); ++i){
  825. - if ((i & mask) != val)
  826. - vec_[i] = 0.;
  827. - else
  828. - vec_[i] *= N;
  829. + if (NULL != vec_){
  830. + free(vec_);
  831. }
  832. + vec_ = copy(wavefunction, len_);
  833. }
  834. void run(){
  835. @@ -500,23 +237,23 @@
  836. switch (ids.size()){
  837. case 1:
  838. #pragma omp parallel
  839. - kernel(vec_, ids[0], m, ctrlmask);
  840. + kernel(vec_, ids[0], m, ctrlmask, len_ >> 1);
  841. break;
  842. case 2:
  843. #pragma omp parallel
  844. - kernel(vec_, ids[1], ids[0], m, ctrlmask);
  845. + kernel(vec_, ids[1], ids[0], m, ctrlmask, len_ >> 1);
  846. break;
  847. case 3:
  848. #pragma omp parallel
  849. - kernel(vec_, ids[2], ids[1], ids[0], m, ctrlmask);
  850. + kernel(vec_, ids[2], ids[1], ids[0], m, ctrlmask, len_ >> 1);
  851. break;
  852. case 4:
  853. #pragma omp parallel
  854. - kernel(vec_, ids[3], ids[2], ids[1], ids[0], m, ctrlmask);
  855. + kernel(vec_, ids[3], ids[2], ids[1], ids[0], m, ctrlmask, len_ >> 1);
  856. break;
  857. case 5:
  858. #pragma omp parallel
  859. - kernel(vec_, ids[4], ids[3], ids[2], ids[1], ids[0], m, ctrlmask);
  860. + kernel(vec_, ids[4], ids[3], ids[2], ids[1], ids[0], m, ctrlmask, len_ >> 1);
  861. break;
  862. default:
  863. throw std::invalid_argument("Gates with more than 5 qubits are not supported!");
  864. @@ -525,12 +262,27 @@
  865. fused_gates_ = Fusion();
  866. }
  867. - std::tuple<Map, StateVector&> cheat(){
  868. + std::vector<complex_type> cheat(){
  869. run();
  870. - return make_tuple(map_, std::ref(vec_));
  871. + std::vector<complex_type> result;
  872. + for (unsigned int i = 0; i < (len_ >> 1); i++){
  873. + result.push_back({vec_[2 * i], vec_[2 * i + 1]});
  874. + }
  875. + return result;
  876. + }
  877. +
  878. + inline StateVector copy(StateVector source, unsigned len){
  879. + StateVector result = (StateVector)malloc(len * sizeof(calc_type));
  880. +#pragma omp parallel for schedule(static)
  881. + for (std::size_t i = 0; i < len; ++i) {
  882. + result[i] = source[i];
  883. }
  884. + return result;
  885. +}
  886. ~Simulator(){
  887. + if (NULL != vec_)
  888. + free(vec_);
  889. }
  890. private:
  891. @@ -562,18 +314,13 @@
  892. }
  893. unsigned N_; // #qubits
  894. - StateVector vec_;
  895. + unsigned len_;
  896. Map map_;
  897. Fusion fused_gates_;
  898. unsigned fusion_qubits_min_, fusion_qubits_max_;
  899. RndEngine rnd_eng_;
  900. std::function<double()> rng_;
  901. -
  902. - // large array buffers to avoid costly reallocations
  903. - static StateVector tmpBuff1_, tmpBuff2_;
  904. };
  905. -Simulator::StateVector Simulator::tmpBuff1_;
  906. -Simulator::StateVector Simulator::tmpBuff2_;
  907. #endif