You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dgemm_thread_safety.cpp 4.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. #include <iostream>
  2. #include <vector>
  3. #include <random>
  4. #include <future>
  5. #include <omp.h>
  6. #include "/opt/OpenBLAS_zen_serial/include/cblas.h"
  7. const blasint randomMatSize = 1024; //dimension of the random square matrices used
  8. const uint32_t numConcurrentThreads = 52; //number of concurrent calls of the functions being tested
  9. const uint32_t numTestRounds = 8; //number of testing rounds before success exit
  10. inline void pauser(){
  11. /// a portable way to pause a program
  12. std::string dummy;
  13. std::cout << "Press enter to continue...";
  14. std::getline(std::cin, dummy);
  15. }
  16. void launch_cblas_dgemm(double* A, double* B, double* C){
  17. cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, randomMatSize, randomMatSize, randomMatSize, 1.0, A, randomMatSize, B, randomMatSize, 0.1, C, randomMatSize);
  18. }
  19. void FillMatrices(std::vector<std::vector<double>>& matBlock, std::mt19937_64& PRNG, std::uniform_real_distribution<double>& rngdist){
  20. for(uint32_t i=0; i<3; i++){
  21. for(uint32_t j=0; j<(randomMatSize*randomMatSize); j++){
  22. matBlock[i][j] = rngdist(PRNG);
  23. }
  24. }
  25. for(uint32_t i=3; i<(numConcurrentThreads*3); i+=3){
  26. matBlock[i] = matBlock[0];
  27. matBlock[i+1] = matBlock[1];
  28. matBlock[i+2] = matBlock[2];
  29. }
  30. }
  31. std::mt19937_64 InitPRNG(){
  32. std::random_device rd;
  33. std::mt19937_64 PRNG(rd()); //seed PRNG using /dev/urandom or similar OS provided RNG
  34. std::uniform_real_distribution<double> rngdist{-1.0, 1.0};
  35. //make sure the internal state of the PRNG is properly mixed by generating 10M random numbers
  36. //PRNGs often have unreliable distribution uniformity and other statistical properties before their internal state is sufficiently mixed
  37. for (uint32_t i=0;i<10000000;i++) rngdist(PRNG);
  38. return PRNG;
  39. }
  40. void PrintMatrices(const std::vector<std::vector<double>>& matBlock){
  41. for (uint32_t i=0;i<numConcurrentThreads*3;i++){
  42. std::cout<<i<<std::endl;
  43. for (uint32_t j=0;j<randomMatSize;j++){
  44. for (uint32_t k=0;k<randomMatSize;k++){
  45. std::cout<<matBlock[i][j*randomMatSize + k]<<" ";
  46. }
  47. std::cout<<std::endl;
  48. }
  49. std::cout<<std::endl;
  50. }
  51. }
  52. int main(){
  53. std::uniform_real_distribution<double> rngdist{-1.0, 1.0};
  54. std::vector<std::vector<double>> matBlock(numConcurrentThreads*3);
  55. std::vector<std::future<void>> futureBlock(numConcurrentThreads);
  56. std::cout<<"*----------------------------*\n";
  57. std::cout<<"| DGEMM thread safety tester |\n";
  58. std::cout<<"*----------------------------*\n";
  59. std::cout<<"Size of random matrices(N=M=K): "<<randomMatSize<<'\n';
  60. std::cout<<"Number of concurrent calls into OpenBLAS : "<<numConcurrentThreads<<'\n';
  61. std::cout<<"Number of testing rounds : "<<numTestRounds<<'\n';
  62. std::cout<<"This test will need "<<(static_cast<uint64_t>(randomMatSize*randomMatSize)*numConcurrentThreads*3*8)/static_cast<double>(1024*1024)<<" MiB of RAM\n"<<std::endl;
  63. std::cout<<"Initializing random number generator..."<<std::flush;
  64. std::mt19937_64 PRNG = InitPRNG();
  65. std::cout<<"done\n";
  66. std::cout<<"Preparing to test CBLAS DGEMM thread safety\n";
  67. std::cout<<"Allocating matrices..."<<std::flush;
  68. for(uint32_t i=0; i<(numConcurrentThreads*3); i++){
  69. matBlock[i].resize(randomMatSize*randomMatSize);
  70. }
  71. std::cout<<"done\n";
  72. //pauser();
  73. std::cout<<"Filling matrices with random numbers..."<<std::flush;
  74. FillMatrices(matBlock, PRNG, rngdist);
  75. //PrintMatrices(matBlock);
  76. std::cout<<"done\n";
  77. std::cout<<"Testing CBLAS DGEMM thread safety\n";
  78. omp_set_num_threads(numConcurrentThreads);
  79. for(uint32_t R=0; R<numTestRounds; R++){
  80. std::cout<<"DGEMM round #"<<R<<std::endl;
  81. std::cout<<"Launching "<<numConcurrentThreads<<" threads..."<<std::flush;
  82. #pragma omp parallel for default(none) shared(futureBlock, matBlock)
  83. for(uint32_t i=0; i<numConcurrentThreads; i++){
  84. futureBlock[i] = std::async(std::launch::async, launch_cblas_dgemm, &matBlock[i*3][0], &matBlock[i*3+1][0], &matBlock[i*3+2][0]);
  85. //launch_cblas_dgemm( &matBlock[i][0], &matBlock[i+1][0], &matBlock[i+2][0]);
  86. }
  87. std::cout<<"done\n";
  88. std::cout<<"Waiting for threads to finish..."<<std::flush;
  89. for(uint32_t i=0; i<numConcurrentThreads; i++){
  90. futureBlock[i].get();
  91. }
  92. std::cout<<"done\n";
  93. //PrintMatrices(matBlock);
  94. std::cout<<"Comparing results from different threads..."<<std::flush;
  95. for(uint32_t i=3; i<(numConcurrentThreads*3); i+=3){
  96. for(uint32_t j=0; j<(randomMatSize*randomMatSize); j++){
  97. if (std::abs(matBlock[i+2][j] - matBlock[2][j]) > 1.0E-13){
  98. std::cout<<"ERROR: one of the threads returned a different result!"<<i+2<<std::endl;
  99. std::cout<<"CBLAS DGEMM thread safety test FAILED!"<<std::endl;
  100. return -1;
  101. }
  102. }
  103. }
  104. std::cout<<"OK!"<<std::endl;
  105. }
  106. std::cout<<"CBLAS DGEMM thread safety test PASSED!"<<std::endl;
  107. return 0;
  108. }