You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.h 16 kB

14 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
13 years ago
13 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #ifndef COMMON_H
  39. #define COMMON_H
  40. #ifdef __cplusplus
  41. extern "C" {
  42. /* Assume C declarations for C++ */
  43. #endif /* __cplusplus */
  44. #ifndef _GNU_SOURCE
  45. #define _GNU_SOURCE
  46. #endif
  47. #ifndef __USE_XOPEN
  48. #define __USE_XOPEN
  49. #endif
  50. #ifndef __USE_SVID
  51. #define __USE_SVID
  52. #endif
  53. #ifdef BUILD_KERNEL
  54. #include "config_kernel.h"
  55. #else
  56. #include "config.h"
  57. #endif
  58. #undef ENABLE_SSE_EXCEPTION
  59. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  60. #define SMP
  61. #endif
  62. #if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
  63. #define WINDOWS_ABI
  64. #define OS_WINDOWS
  65. #ifdef DOUBLE
  66. #define DOUBLE_DEFINED DOUBLE
  67. #undef DOUBLE
  68. #endif
  69. #endif
  70. #if !defined(NOINCLUDE) && !defined(ASSEMBLER)
  71. #include <stdio.h>
  72. #include <stdlib.h>
  73. #include <string.h>
  74. #include <unistd.h>
  75. #ifdef OS_LINUX
  76. #include <malloc.h>
  77. #include <sched.h>
  78. #endif
  79. #if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD)
  80. #include <sched.h>
  81. #endif
  82. #ifdef OS_ANDROID
  83. #define NO_SYSV_IPC
  84. #endif
  85. #ifdef OS_WINDOWS
  86. #ifdef ATOM
  87. #define GOTO_ATOM ATOM
  88. #undef ATOM
  89. #endif
  90. #include <windows.h>
  91. #include <math.h>
  92. #ifdef GOTO_ATOM
  93. #define ATOM GOTO_ATOM
  94. #undef GOTO_ATOM
  95. #endif
  96. #else
  97. #include <sys/mman.h>
  98. #ifndef NO_SYSV_IPC
  99. #include <sys/shm.h>
  100. #endif
  101. #include <sys/time.h>
  102. #include <unistd.h>
  103. #include <math.h>
  104. #ifdef SMP
  105. #include <pthread.h>
  106. #endif
  107. #endif
  108. #if defined(OS_SUNOS)
  109. #include <thread.h>
  110. #endif
  111. #ifdef __DECC
  112. #include <c_asm.h>
  113. #include <machine/builtins.h>
  114. #endif
  115. #if defined(ARCH_IA64) && defined(ENABLE_SSE_EXCEPTION)
  116. #include <fenv.h>
  117. #endif
  118. #endif
  119. #if defined(OS_WINDOWS) && defined(DOUBLE_DEFINED)
  120. #define DOUBLE DOUBLE_DEFINED
  121. #undef DOUBLE_DEFINED
  122. #endif
  123. #undef DEBUG_INFO
  124. #define SMP_DEBUG
  125. #undef MALLOC_DEBUG
  126. #undef SMP_ALLOC_DEBUG
  127. #ifndef ZERO
  128. #ifdef XDOUBLE
  129. #define ZERO 0.e0L
  130. #elif defined DOUBLE
  131. #define ZERO 0.e0
  132. #else
  133. #define ZERO 0.e0f
  134. #endif
  135. #endif
  136. #ifndef ONE
  137. #ifdef XDOUBLE
  138. #define ONE 1.e0L
  139. #elif defined DOUBLE
  140. #define ONE 1.e0
  141. #else
  142. #define ONE 1.e0f
  143. #endif
  144. #endif
  145. #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
  146. #define ALLOCA_ALIGN 63UL
  147. #define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
  148. #ifdef NEEDBUNDERSCORE
  149. #define BLASFUNC(FUNC) FUNC##_
  150. #else
  151. #define BLASFUNC(FUNC) FUNC
  152. #endif
  153. #undef USE_PTHREAD_LOCK
  154. #undef USE_PTHREAD_SPINLOCK
  155. #if defined(USE_PTHREAD_LOCK) && defined(USE_PTHREAD_SPINLOCK)
  156. #error "You can't specify both LOCK operation!"
  157. #endif
  158. #ifdef SMP
  159. #define USE_PTHREAD_LOCK
  160. #undef USE_PTHREAD_SPINLOCK
  161. #endif
  162. #ifdef OS_WINDOWS
  163. #undef USE_PTHREAD_LOCK
  164. #undef USE_PTHREAD_SPINLOCK
  165. #endif
  166. #if defined(USE_PTHREAD_LOCK)
  167. #define LOCK_COMMAND(x) pthread_mutex_lock(x)
  168. #define UNLOCK_COMMAND(x) pthread_mutex_unlock(x)
  169. #elif defined(USE_PTHREAD_SPINLOCK)
  170. #ifndef ASSEMBLER
  171. typedef volatile int pthread_spinlock_t;
  172. int pthread_spin_lock (pthread_spinlock_t *__lock);
  173. int pthread_spin_unlock (pthread_spinlock_t *__lock);
  174. #endif
  175. #define LOCK_COMMAND(x) pthread_spin_lock(x)
  176. #define UNLOCK_COMMAND(x) pthread_spin_unlock(x)
  177. #else
  178. #define LOCK_COMMAND(x) blas_lock(x)
  179. #define UNLOCK_COMMAND(x) blas_unlock(x)
  180. #endif
  181. #define GOTO_SHMID 0x510510
  182. #if 0
  183. #ifndef __CUDACC__
  184. #define __global__
  185. #define __device__
  186. #define __host__
  187. #define __shared__
  188. #endif
  189. #endif
  190. #ifndef ASSEMBLER
  191. #ifdef QUAD_PRECISION
  192. typedef struct {
  193. unsigned long x[2];
  194. } xdouble;
  195. #elif defined EXPRECISION
  196. #define xdouble long double
  197. #else
  198. #define xdouble double
  199. #endif
  200. #if defined(OS_WINDOWS) && defined(__64BIT__)
  201. typedef long long BLASLONG;
  202. typedef unsigned long long BLASULONG;
  203. #else
  204. typedef long BLASLONG;
  205. typedef unsigned long BLASULONG;
  206. #endif
  207. #ifdef USE64BITINT
  208. typedef BLASLONG blasint;
  209. #else
  210. typedef int blasint;
  211. #endif
  212. #else
  213. #ifdef USE64BITINT
  214. #define INTSHIFT 3
  215. #define INTSIZE 8
  216. #else
  217. #define INTSHIFT 2
  218. #define INTSIZE 4
  219. #endif
  220. #endif
  221. #ifdef XDOUBLE
  222. #define FLOAT xdouble
  223. #ifdef QUAD_PRECISION
  224. #define XFLOAT xidouble
  225. #endif
  226. #ifdef QUAD_PRECISION
  227. #define SIZE 32
  228. #define BASE_SHIFT 5
  229. #define ZBASE_SHIFT 6
  230. #else
  231. #define SIZE 16
  232. #define BASE_SHIFT 4
  233. #define ZBASE_SHIFT 5
  234. #endif
  235. #elif defined(DOUBLE)
  236. #define FLOAT double
  237. #define SIZE 8
  238. #define BASE_SHIFT 3
  239. #define ZBASE_SHIFT 4
  240. #elif defined(INTEGER) //extend for integer matrix
  241. #define FLOAT int
  242. #define SIZE 4
  243. #define BASE_SHIFT 2
  244. #define ZBASE_SHIFT 3
  245. #else
  246. #define FLOAT float
  247. #define SIZE 4
  248. #define BASE_SHIFT 2
  249. #define ZBASE_SHIFT 3
  250. #endif
  251. #ifndef XFLOAT
  252. #define XFLOAT FLOAT
  253. #endif
  254. #ifndef COMPLEX
  255. #define COMPSIZE 1
  256. #else
  257. #define COMPSIZE 2
  258. #endif
  259. #if defined(C_PGI) || defined(C_SUN)
  260. #define CREAL(X) (*((FLOAT *)&X + 0))
  261. #define CIMAG(X) (*((FLOAT *)&X + 1))
  262. #else
  263. #define CREAL __real__
  264. #define CIMAG __imag__
  265. #endif
  266. #define Address_H(x) (((x)+(1<<15))>>16)
  267. #define Address_L(x) ((x)-((Address_H(x))<<16))
  268. #ifndef MAX_CPU_NUMBER
  269. #define MAX_CPU_NUMBER 2
  270. #endif
  271. #if defined(OS_SUNOS)
  272. #define YIELDING thr_yield()
  273. #endif
  274. #if defined(OS_WINDOWS)
  275. #define YIELDING SwitchToThread()
  276. #endif
  277. #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
  278. #define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
  279. #endif
  280. #ifdef BULLDOZER
  281. #ifndef YIELDING
  282. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  283. #endif
  284. #endif
  285. #ifdef PILEDRIVER
  286. #ifndef YIELDING
  287. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  288. #endif
  289. #endif
  290. /*
  291. #ifdef STEAMROLLER
  292. #ifndef YIELDING
  293. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  294. #endif
  295. #endif
  296. */
  297. #ifndef YIELDING
  298. #define YIELDING sched_yield()
  299. #endif
  300. /***
  301. To alloc job_t on heap or statck.
  302. please https://github.com/xianyi/OpenBLAS/issues/246
  303. ***/
  304. #if defined(OS_WINDOWS)
  305. #define GETRF_MEM_ALLOC_THRESHOLD 32
  306. #define BLAS3_MEM_ALLOC_THRESHOLD 32
  307. #endif
  308. #ifndef GETRF_MEM_ALLOC_THRESHOLD
  309. #define GETRF_MEM_ALLOC_THRESHOLD 80
  310. #endif
  311. #ifndef BLAS3_MEM_ALLOC_THRESHOLD
  312. #define BLAS3_MEM_ALLOC_THRESHOLD 160
  313. #endif
  314. #ifdef QUAD_PRECISION
  315. #include "common_quad.h"
  316. #endif
  317. #ifdef ARCH_ALPHA
  318. #include "common_alpha.h"
  319. #endif
  320. #ifdef ARCH_X86
  321. #include "common_x86.h"
  322. #endif
  323. #ifdef ARCH_X86_64
  324. #include "common_x86_64.h"
  325. #endif
  326. #ifdef ARCH_IA64
  327. #include "common_ia64.h"
  328. #endif
  329. #ifdef ARCH_POWER
  330. #include "common_power.h"
  331. #endif
  332. #ifdef sparc
  333. #include "common_sparc.h"
  334. #endif
  335. #ifdef ARCH_MIPS64
  336. #include "common_mips64.h"
  337. #endif
  338. #ifdef ARCH_ARM
  339. #include "common_arm.h"
  340. #endif
  341. #ifdef ARCH_ARM64
  342. #include "common_arm64.h"
  343. #endif
  344. #ifndef ASSEMBLER
  345. #ifdef OS_WINDOWS
  346. typedef char env_var_t[MAX_PATH];
  347. #define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p))
  348. #else
  349. typedef char* env_var_t;
  350. #define readenv(p, n) ((p)=getenv(n))
  351. #endif
  352. #endif
  353. #ifdef OS_LINUX
  354. #include "common_linux.h"
  355. #endif
  356. #define MMAP_ACCESS (PROT_READ | PROT_WRITE)
  357. #ifdef __NetBSD__
  358. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANON)
  359. #else
  360. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
  361. #endif
  362. #include "param.h"
  363. #include "common_param.h"
  364. #ifndef STDERR
  365. #define STDERR stderr
  366. #endif
  367. #ifndef MASK
  368. #define MASK(a, b) (((a) + ((b) - 1)) & ~((b) - 1))
  369. #endif
  370. #if defined(XDOUBLE) || defined(DOUBLE)
  371. #define FLOATRET FLOAT
  372. #else
  373. #ifdef NEED_F2CCONV
  374. #define FLOATRET double
  375. #else
  376. #define FLOATRET float
  377. #endif
  378. #endif
  379. #ifndef ASSEMBLER
  380. #ifndef NOINCLUDE
  381. /* Inclusion of a standard header file is needed for definition of __STDC_*
  382. predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs
  383. as a side effect of including either <features.h> or <stdc-predef.h>. */
  384. #include <stdio.h>
  385. #endif // NOINCLUDE
  386. /* C99 supports complex floating numbers natively, which GCC also offers as an
  387. extension since version 3.0. If neither are available, use a compatible
  388. structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
  389. #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
  390. (__GNUC__ >= 3 && !defined(__cplusplus)))
  391. #define OPENBLAS_COMPLEX_C99
  392. typedef float _Complex openblas_complex_float;
  393. typedef double _Complex openblas_complex_double;
  394. typedef xdouble _Complex openblas_complex_xdouble;
  395. #else
  396. #define OPENBLAS_COMPLEX_STRUCT
  397. typedef struct { float real, imag; } openblas_complex_float;
  398. typedef struct { double real, imag; } openblas_complex_double;
  399. typedef struct { xdouble real, imag; } openblas_complex_xdouble;
  400. #endif
  401. #endif // ASSEMBLER
  402. #ifndef IFLUSH
  403. #define IFLUSH
  404. #endif
  405. #ifndef IFLUSH_HALF
  406. #define IFLUSH_HALF
  407. #endif
  408. #if defined(C_GCC) && (( __GNUC__ <= 3) || ((__GNUC__ == 4) && (__GNUC_MINOR__ < 2)))
  409. #ifdef USE_OPENMP
  410. #undef USE_OPENMP
  411. #endif
  412. #endif
  413. #ifndef ASSEMBLER
  414. #ifndef MIN
  415. #define MIN(a,b) (a>b? b:a)
  416. #endif
  417. #ifndef MAX
  418. #define MAX(a,b) (a<b? b:a)
  419. #endif
  420. #define TOUPPER(a) {if ((a) > 0x60) (a) -= 0x20;}
  421. #if defined(__FreeBSD__) || defined(__APPLE__)
  422. #define MAP_ANONYMOUS MAP_ANON
  423. #endif
  424. /* Common Memory Management Routine */
  425. void blas_set_parameter(void);
  426. int blas_get_cpu_number(void);
  427. void *blas_memory_alloc (int);
  428. void blas_memory_free (void *);
  429. void *blas_memory_alloc_nolock (int); //use malloc without blas_lock
  430. void blas_memory_free_nolock (void *);
  431. int get_num_procs (void);
  432. #if defined(OS_LINUX) && defined(SMP) && !defined(NO_AFFINITY)
  433. int get_num_nodes (void);
  434. int get_num_proc (int);
  435. int get_node_equal (void);
  436. #endif
  437. void goto_set_num_threads(int);
  438. void gotoblas_affinity_init(void);
  439. void gotoblas_affinity_quit(void);
  440. void gotoblas_dynamic_init(void);
  441. void gotoblas_dynamic_quit(void);
  442. void gotoblas_profile_init(void);
  443. void gotoblas_profile_quit(void);
  444. #ifdef USE_OPENMP
  445. int omp_in_parallel(void);
  446. int omp_get_num_procs(void);
  447. #else
  448. #ifdef __ELF__
  449. int omp_in_parallel (void) __attribute__ ((weak));
  450. int omp_get_num_procs(void) __attribute__ ((weak));
  451. #endif
  452. #endif
  453. static __inline void blas_unlock(volatile BLASULONG *address){
  454. MB;
  455. *address = 0;
  456. }
  457. #ifdef OS_WINDOWS
  458. static __inline int readenv_atoi(char *env) {
  459. env_var_t p;
  460. return readenv(p,env) ? 0 : atoi(p);
  461. }
  462. #else
  463. static __inline int readenv_atoi(char *env) {
  464. char *p;
  465. if (( p = getenv(env) ))
  466. return (atoi(p));
  467. else
  468. return(0);
  469. }
  470. #endif
  471. #if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
  472. static __inline void compinv(FLOAT *b, FLOAT ar, FLOAT ai){
  473. #ifndef UNIT
  474. FLOAT ratio, den;
  475. if (
  476. #ifdef XDOUBLE
  477. (fabsl(ar)) >= (fabsl(ai))
  478. #elif defined DOUBLE
  479. (fabs (ar)) >= (fabs (ai))
  480. #else
  481. (fabsf(ar)) >= (fabsf(ai))
  482. #endif
  483. ) {
  484. ratio = ai / ar;
  485. den = (FLOAT)(ONE / (ar * (ONE + ratio * ratio)));
  486. ar = den;
  487. ai = -ratio * den;
  488. } else {
  489. ratio = ar / ai;
  490. den = (FLOAT)(ONE /(ai * (ONE + ratio * ratio)));
  491. ar = ratio * den;
  492. ai = -den;
  493. }
  494. b[0] = ar;
  495. b[1] = ai;
  496. #else
  497. b[0] = ONE;
  498. b[1] = ZERO;
  499. #endif
  500. }
  501. #endif
  502. #ifdef MALLOC_DEBUG
  503. void *blas_debug_alloc(int);
  504. void *blas_debug_free(void *);
  505. #undef malloc
  506. #undef free
  507. #define malloc(a) blas_debug_alloc(a)
  508. #define free(a) blas_debug_free (a)
  509. #endif
  510. #ifndef COPYOVERHEAD
  511. #define GEMMRETTYPE int
  512. #else
  513. typedef struct {
  514. double outercopy;
  515. double innercopy;
  516. double kernel;
  517. double mflops;
  518. } copyoverhead_t;
  519. #define GEMMRETTYPE copyoverhead_t
  520. #endif
  521. #endif
  522. #ifndef BUILD_KERNEL
  523. #define KNAME(A, B) A
  524. #else
  525. #define KNAME(A, B) A##B
  526. #endif
  527. #include "common_interface.h"
  528. #ifdef SANITY_CHECK
  529. #include "common_reference.h"
  530. #endif
  531. #include "common_macro.h"
  532. #include "common_level1.h"
  533. #include "common_level2.h"
  534. #include "common_level3.h"
  535. #include "common_lapack.h"
  536. #ifdef CBLAS
  537. # define OPENBLAS_CONST /* see comment in cblas.h */
  538. # include "cblas.h"
  539. #endif
  540. #ifndef ASSEMBLER
  541. #if 0
  542. #include "symcopy.h"
  543. #endif
  544. #if defined(SMP_SERVER) && defined(SMP_ONDEMAND)
  545. #error Both SMP_SERVER and SMP_ONDEMAND are specified.
  546. #endif
  547. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  548. #include "common_thread.h"
  549. #endif
  550. #endif
  551. #define INFO_NUM 99
  552. #ifndef DEFAULT_CPU_NUMBER
  553. #define DEFAULT_CPU_NUMBER 4
  554. #endif
  555. #ifndef IDEBUG_START
  556. #define IDEBUG_START
  557. #endif
  558. #ifndef IDEBUG_END
  559. #define IDEBUG_END
  560. #endif
  561. #if !defined(ASSEMBLER) && defined(FUNCTION_PROFILE)
  562. typedef struct {
  563. int func;
  564. unsigned long long calls, fops, area, cycles, tcycles;
  565. } func_profile_t;
  566. extern func_profile_t function_profile_table[];
  567. extern int gotoblas_profile;
  568. #ifdef XDOUBLE
  569. #define NUMOPT QNUMOPT
  570. #elif defined DOUBLE
  571. #define NUMOPT DNUMOPT
  572. #else
  573. #define NUMOPT SNUMOPT
  574. #endif
  575. #define FUNCTION_PROFILE_START() { unsigned long long profile_start = rpcc(), profile_end;
  576. #ifdef SMP
  577. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  578. if (gotoblas_profile) { \
  579. profile_end = rpcc(); \
  580. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  581. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  582. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  583. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  584. function_profile_table[PROFILE_FUNC_NAME].tcycles += blas_cpu_number * (profile_end - profile_start); \
  585. } \
  586. }
  587. #else
  588. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  589. if (gotoblas_profile) { \
  590. profile_end = rpcc(); \
  591. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  592. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  593. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  594. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  595. function_profile_table[PROFILE_FUNC_NAME].tcycles += (profile_end - profile_start); \
  596. } \
  597. }
  598. #endif
  599. #else
  600. #define FUNCTION_PROFILE_START()
  601. #define FUNCTION_PROFILE_END(COMP, AREA, OPS)
  602. #endif
  603. #if 1
  604. #define PRINT_DEBUG_CNAME
  605. #define PRINT_DEBUG_NAME
  606. #else
  607. #define PRINT_DEBUG_CNAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
  608. #define PRINT_DEBUG_NAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
  609. #endif
  610. #ifdef __cplusplus
  611. }
  612. #endif /* __cplusplus */
  613. #endif