| @@ -630,85 +630,85 @@ zcholesky.essl : zcholesky.$(SUFFIX) | |||
| ##################################### Sgemm #################################################### | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| sbgemm.goto : sbgemm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||
| $(CXX) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||
| endif | |||
| sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||
| $(CXX) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||
| sgemm.acml : sgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| sgemm.atlas : sgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| sgemm.mkl : sgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| sgemm.veclib : sgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| sgemm.essl : sgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Dgemm #################################################### | |||
| dgemm.goto : dgemm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||
| $(CXX) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||
| dgemm.acml : dgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dgemm.atlas : dgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dgemm.mkl : dgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dgemm.veclib : dgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| dgemm.essl : dgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Cgemm #################################################### | |||
| cgemm.goto : cgemm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||
| $(CXX) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||
| cgemm.acml : cgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| cgemm.atlas : cgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| cgemm.mkl : cgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| cgemm.veclib : cgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| cgemm.essl : cgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Zgemm #################################################### | |||
| zgemm.goto : zgemm.$(SUFFIX) ../$(LIBNAME) | |||
| $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||
| $(CXX) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||
| zgemm.acml : zgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zgemm.atlas : zgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zgemm.mkl : zgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zgemm.veclib : zgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| zgemm.essl : zgemm.$(SUFFIX) | |||
| -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| -$(CXX) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||
| ##################################### Ssymm #################################################### | |||
| ssymm.goto : ssymm.$(SUFFIX) ../$(LIBNAME) | |||
| @@ -2959,21 +2959,21 @@ zcholesky.$(SUFFIX) : cholesky.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| ifeq ($(BUILD_BFLOAT16),1) | |||
| sbgemm.$(SUFFIX) : gemm.c | |||
| $(CC) $(CFLAGS) -c -DHALF -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| sbgemm.$(SUFFIX) : gemm.cpp | |||
| $(CC) $(CFLAGS) -c -DHALF -UCOMPLEX -UDOUBLE -DANKERL_NANOBENCH_IMPLEMENT -o $(@F) $^ | |||
| endif | |||
| sgemm.$(SUFFIX) : gemm.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| sgemm.$(SUFFIX) : gemm.cpp | |||
| $(CXX) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -DANKERL_NANOBENCH_IMPLEMENT -o $(@F) $^ | |||
| dgemm.$(SUFFIX) : gemm.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| dgemm.$(SUFFIX) : gemm.cpp | |||
| $(CXX) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -DANKERL_NANOBENCH_IMPLEMENT -o $(@F) $^ | |||
| cgemm.$(SUFFIX) : gemm.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| cgemm.$(SUFFIX) : gemm.cpp | |||
| $(CXX) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -DANKERL_NANOBENCH_IMPLEMENT -o $(@F) $^ | |||
| zgemm.$(SUFFIX) : gemm.c | |||
| $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||
| zgemm.$(SUFFIX) : gemm.cpp | |||
| $(CXX) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -DANKERL_NANOBENCH_IMPLEMENT -o $(@F) $^ | |||
| ssymm.$(SUFFIX) : symm.c | |||
| $(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||
| @@ -1,178 +0,0 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2014, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include "bench.h" | |||
| #undef GEMM | |||
| #ifndef COMPLEX | |||
| #ifdef DOUBLE | |||
| #define GEMM BLASFUNC(dgemm) | |||
| #elif defined(HALF) | |||
| #define GEMM BLASFUNC(sbgemm) | |||
| #else | |||
| #define GEMM BLASFUNC(sgemm) | |||
| #endif | |||
| #else | |||
| #ifdef DOUBLE | |||
| #define GEMM BLASFUNC(zgemm) | |||
| #else | |||
| #define GEMM BLASFUNC(cgemm) | |||
| #endif | |||
| #endif | |||
| int main(int argc, char *argv[]){ | |||
| IFLOAT *a, *b; | |||
| FLOAT *c; | |||
| FLOAT alpha[] = {1.0, 0.0}; | |||
| FLOAT beta [] = {0.0, 0.0}; | |||
| char transa = 'N'; | |||
| char transb = 'N'; | |||
| blasint m, n, k, i, j, lda, ldb, ldc; | |||
| int loops = 1; | |||
| int has_param_m = 0; | |||
| int has_param_n = 0; | |||
| int has_param_k = 0; | |||
| char *p; | |||
| int from = 1; | |||
| int to = 200; | |||
| int step = 1; | |||
| double time1, timeg; | |||
| argc--;argv++; | |||
| if (argc > 0) { from = atol(*argv); argc--; argv++; } | |||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++; } | |||
| if (argc > 0) { step = atol(*argv); argc--; argv++; } | |||
| if ((p = getenv("OPENBLAS_TRANS"))) { | |||
| transa=*p; | |||
| transb=*p; | |||
| } | |||
| if ((p = getenv("OPENBLAS_TRANSA"))) { | |||
| transa=*p; | |||
| } | |||
| if ((p = getenv("OPENBLAS_TRANSB"))) { | |||
| transb=*p; | |||
| } | |||
| TOUPPER(transa); | |||
| TOUPPER(transb); | |||
| fprintf(stderr, "From : %3d To : %3d Step=%d : Transa=%c : Transb=%c\n", from, to, step, transa, transb); | |||
| p = getenv("OPENBLAS_LOOPS"); | |||
| if ( p != NULL ) { | |||
| loops = atoi(p); | |||
| } | |||
| if ((p = getenv("OPENBLAS_PARAM_M"))) { | |||
| m = atoi(p); | |||
| has_param_m=1; | |||
| } else { | |||
| m = to; | |||
| } | |||
| if ((p = getenv("OPENBLAS_PARAM_N"))) { | |||
| n = atoi(p); | |||
| has_param_n=1; | |||
| } else { | |||
| n = to; | |||
| } | |||
| if ((p = getenv("OPENBLAS_PARAM_K"))) { | |||
| k = atoi(p); | |||
| has_param_k=1; | |||
| } else { | |||
| k = to; | |||
| } | |||
| if (( a = (IFLOAT *)malloc(sizeof(IFLOAT) * m * k * COMPSIZE)) == NULL) { | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( b = (IFLOAT *)malloc(sizeof(IFLOAT) * k * n * COMPSIZE)) == NULL) { | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| if (( c = (FLOAT *)malloc(sizeof(FLOAT) * m * n * COMPSIZE)) == NULL) { | |||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
| } | |||
| #ifdef __linux | |||
| srandom(getpid()); | |||
| #endif | |||
| for (i = 0; i < m * k * COMPSIZE; i++) { | |||
| a[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| for (i = 0; i < k * n * COMPSIZE; i++) { | |||
| b[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| for (i = 0; i < m * n * COMPSIZE; i++) { | |||
| c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| fprintf(stderr, " SIZE Flops Time\n"); | |||
| for (i = from; i <= to; i += step) { | |||
| timeg=0; | |||
| if (!has_param_m) { m = i; } | |||
| if (!has_param_n) { n = i; } | |||
| if (!has_param_k) { k = i; } | |||
| if (transa == 'N') { lda = m; } | |||
| else { lda = k; } | |||
| if (transb == 'N') { ldb = k; } | |||
| else { ldb = n; } | |||
| ldc = m; | |||
| fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k); | |||
| begin(); | |||
| for (j=0; j<loops; j++) { | |||
| GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc); | |||
| } | |||
| end(); | |||
| time1 = getsec(); | |||
| timeg = time1/loops; | |||
| fprintf(stderr, | |||
| " %10.2f MFlops %10.6f sec\n", | |||
| COMPSIZE * COMPSIZE * 2. * (double)k * (double)m * (double)n / timeg * 1.e-6, time1); | |||
| } | |||
| return 0; | |||
| } | |||
| // void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||
| @@ -0,0 +1,152 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2014, 2023. The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include <algorithm> | |||
| #include <iostream> | |||
| #include <random> | |||
| #include <common.h> | |||
| #include "nanobench.h" | |||
| #undef GEMM | |||
| #ifndef COMPLEX | |||
| #ifdef DOUBLE | |||
| #define GEMM BLASFUNC(dgemm) | |||
| #elif defined(HALF) | |||
| #define GEMM BLASFUNC(sbgemm) | |||
| #else | |||
| #define GEMM BLASFUNC(sgemm) | |||
| #endif | |||
| #else | |||
| #ifdef DOUBLE | |||
| #define GEMM BLASFUNC(zgemm) | |||
| #else | |||
| #define GEMM BLASFUNC(cgemm) | |||
| #endif | |||
| #endif | |||
| template <typename T> static void fill_vector(std::vector<T> vec) { | |||
| std::random_device rand_dev; | |||
| std::mt19937 generator(rand_dev()); | |||
| std::uniform_real_distribution<T> distribution(std::numeric_limits<T>::min(), | |||
| std::numeric_limits<T>::max()); | |||
| std::generate(vec.begin(), vec.end(), | |||
| [&]() { return distribution(generator); }); | |||
| } | |||
| static std::pair<bool, std::string> | |||
| env_param(const std::string &name, const std::string &default_value) { | |||
| const char *value = getenv(name.c_str()); | |||
| return {value == nullptr, value ? value : default_value}; | |||
| } | |||
| static std::string env_value(const std::string &name, | |||
| const std::string &default_value) { | |||
| return env_param(name, default_value).second; | |||
| } | |||
| int main(int argc, char *argv[]) { | |||
| int from = (argc > 1) ? atol(argv[1]) : 1; | |||
| int to = (argc > 2) ? MAX(atol(argv[2]), from) : 200; | |||
| int step = (argc > 3) ? atol(argv[3]) : 1; | |||
| FLOAT alpha[] = {1.0, 0.0}; | |||
| FLOAT beta[] = {0.0, 0.0}; | |||
| int epochs = atoi(env_value("OPENBLAS_EPOCHS", "1").c_str()); | |||
| bool json_output = env_value("OPENBLAS_OUTPUT_JSON", "0").front() == '1'; | |||
| std::pair<bool, std::string> param_m = env_param("OPENBLAS_PARAM_M", "100"); | |||
| std::pair<bool, std::string> param_n = env_param("OPENBLAS_PARAM_N", "100"); | |||
| std::pair<bool, std::string> param_k = env_param("OPENBLAS_PARAM_K", "100"); | |||
| blasint m = param_m.first ? atoi(param_m.second.c_str()) : to; | |||
| blasint n = param_n.first ? atoi(param_n.second.c_str()) : to; | |||
| blasint k = param_k.first ? atoi(param_k.second.c_str()) : to; | |||
| char transpose = toupper(env_value("OPENBLAS_TRANS", "N").front()); | |||
| char transpose_a = toupper(env_value("OPENBLAS_TRANSA", "N").front()); | |||
| char transpose_b = toupper(env_value("OPENBLAS_TRANSB", "N").front()); | |||
| bool is_specific_size = param_m.first && param_n.first && param_k.first; | |||
| if (is_specific_size) { | |||
| from = 1; | |||
| to = 1; | |||
| step = 1; | |||
| } | |||
| std::vector<IFLOAT> a(m * k); | |||
| std::vector<IFLOAT> b(n * k); | |||
| std::vector<FLOAT> c(m * n); | |||
| fill_vector(a); | |||
| fill_vector(b); | |||
| fill_vector(c); | |||
| if (!is_specific_size) { | |||
| std::cout << "From: " << std::to_string(from) << " To: " << std::to_string(to) | |||
| << " Step: " << std::to_string(step) << " TransA: " << transpose_a | |||
| << " TransB: " << transpose_b << "\n"; | |||
| } else { | |||
| std::cout << "M: " << std::to_string(m) << " N: " << std::to_string(n) | |||
| << " K: " << std::to_string(k) << " TransA: " << transpose_a | |||
| << " TransB: " << transpose_b << "\n"; | |||
| } | |||
| for (int i = from; i <= to; i += step) { | |||
| if (!param_m.first) { | |||
| m = i; | |||
| } | |||
| if (!param_n.first) { | |||
| n = i; | |||
| } | |||
| if (!param_k.first) { | |||
| k = i; | |||
| } | |||
| blasint lda = transpose == 'N' && transpose_a == 'N' ? m : k; | |||
| blasint ldb = transpose == 'N' && transpose_b == 'N' ? k : n; | |||
| blasint ldc = m; | |||
| ankerl::nanobench::Bench bench; | |||
| if (json_output) { | |||
| bench.output(nullptr); | |||
| } | |||
| std::string bench_name = "M=" + std::to_string(m) + | |||
| " N=" + std::to_string(n) + | |||
| " K=" + std::to_string(k); | |||
| bench.minEpochIterations(epochs).run(bench_name, [&]() { | |||
| GEMM(&transpose_a, &transpose_b, &m, &n, &k, alpha, a.data(), &lda, | |||
| b.data(), &ldb, beta, c.data(), &ldc); | |||
| }); | |||
| if (json_output) { | |||
| bench.render(ankerl::nanobench::templates::json(), std::cout); | |||
| } | |||
| } | |||
| } | |||