|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990 |
- #include <cblas.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <math.h>
-
- #include <vector>
-
- //------------------------------------------------------------------------------
- void fill_rand( int m, int n, double* A, int ld )
- {
- for (int j = 0; j < n; ++j) {
- for (int i = 0; i < m; ++i) {
- A[ i + j*ld ] = rand() / double(RAND_MAX);
- }
- }
- }
-
- //------------------------------------------------------------------------------
- inline double max_nan( double x, double y )
- {
- return (isnan(y) || (y) >= (x) ? (y) : (x));
- }
-
- //------------------------------------------------------------------------------
- int main( int argc, char** argv )
- {
- int batch_size = 1000;
- int n = 50;
- if (argc > 1)
- batch_size = atoi( argv[1] );
- if (argc > 2)
- n = atoi( argv[2] );
- printf( "batch_size %d, n %d\n", batch_size, n );
-
- int ld = n;
- double alpha = 3.1416;
- double beta = 2.7183;
-
- printf( "init\n" );
- std::vector<double*> A_array( batch_size ),
- B_array( batch_size ),
- C_array( batch_size ),
- D_array( batch_size );
- for (int i = 0; i < batch_size; ++i) {
- A_array[ i ] = new double[ ld*n ];
- B_array[ i ] = new double[ ld*n ];
- C_array[ i ] = new double[ ld*n ];
- D_array[ i ] = new double[ ld*n ];
- fill_rand( n, n, A_array[ i ], ld );
- fill_rand( n, n, B_array[ i ], ld );
- fill_rand( n, n, C_array[ i ], ld );
- std::copy( C_array[ i ], C_array[ i ] + ld*n, D_array[ i ] );
- }
-
- printf( "test\n" );
- for (int i = 0; i < batch_size; ++i) {
- cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, n, n, n,
- alpha, A_array[ i ], ld, B_array[ i ], ld,
- beta, C_array[ i ], ld );
- }
-
- printf( "test OpenMP\n" );
- #pragma omp parallel for
- for (int i = 0; i < batch_size; ++i) {
- cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, n, n, n,
- alpha, A_array[ i ], ld, B_array[ i ], ld,
- beta, D_array[ i ], ld );
- }
-
- printf( "compare\n" );
- double max_error = 0;
- for (int i = 0; i < batch_size; ++i) {
- // norm( D - C )
- cblas_daxpy( ld*n, -1.0, C_array[ i ], 1, D_array[ i ], 1 );
- double error = cblas_dnrm2( ld*n, D_array[ i ], 1 );
- max_error = max_nan( error, max_error );
- }
- printf( "max error %.2e\n", max_error );
-
- printf( "delete\n" );
- for (int i = 0; i < batch_size; ++i) {
- delete [] A_array[ i ];
- delete [] B_array[ i ];
- delete [] C_array[ i ];
- }
-
- printf( "done\n" );
- return 0;
- }
-
|