You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

gemm_beta.S 4.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #include "version.h"
  41. .set noat
  42. .set noreorder
  43. .text
  44. .align 5
  45. .globl CNAME
  46. .ent CNAME
  47. CNAME:
  48. .frame $sp, 0, $26, 0
  49. #ifdef PROFILE
  50. ldgp $gp, 0($27)
  51. lda $28, _mcount
  52. jsr $28, ($28), _mcount
  53. #endif
  54. ldq $18, 16($sp)
  55. ble $16, $End
  56. ldl $19, 24($sp)
  57. ble $17, $End
  58. #ifndef PROFILE
  59. .prologue 0
  60. #else
  61. .prologue 1
  62. #endif
  63. fbeq $f19, $BETA_EQ_ZERO # if (beta == ZERO)
  64. .align 4
  65. $BETA_NE_ZERO:
  66. sra $16, 3, $2 # i = (m >> 3)
  67. mov $18, $1 # c_offset = c
  68. lda $17, -1($17) # j --
  69. ble $2,$L52
  70. .align 4
  71. $L51:
  72. lds $f31, 64($1)
  73. lda $2, -1($2)
  74. LD $f14, 0*SIZE($1)
  75. LD $f15, 1*SIZE($1)
  76. LD $f16, 2*SIZE($1)
  77. LD $f17, 3*SIZE($1)
  78. LD $f18, 4*SIZE($1)
  79. LD $f11, 5*SIZE($1)
  80. LD $f21, 6*SIZE($1)
  81. LD $f22, 7*SIZE($1)
  82. MUL $f19, $f14, $f23
  83. MUL $f19, $f15, $f24
  84. MUL $f19, $f16, $f25
  85. MUL $f19, $f17, $f26
  86. MUL $f19, $f18, $f27
  87. MUL $f19, $f11, $f28
  88. MUL $f19, $f21, $f29
  89. MUL $f19, $f22, $f30
  90. ST $f23, 0*SIZE($1)
  91. ST $f24, 1*SIZE($1)
  92. ST $f25, 2*SIZE($1)
  93. ST $f26, 3*SIZE($1)
  94. ST $f27, 4*SIZE($1)
  95. ST $f28, 5*SIZE($1)
  96. ST $f29, 6*SIZE($1)
  97. ST $f30, 7*SIZE($1)
  98. lda $1,8*SIZE($1)
  99. bgt $2,$L51
  100. .align 4
  101. $L52:
  102. and $16, 7, $2
  103. ble $2,$L54
  104. .align 4
  105. $L53:
  106. LD $f12, 0($1)
  107. lda $2, -1($2)
  108. MUL $f19, $f12, $f23
  109. ST $f23, 0($1)
  110. lda $1, SIZE($1)
  111. bgt $2,$L53
  112. .align 4
  113. $L54:
  114. SXADDQ $19, $18, $18 # c += ldc
  115. bgt $17,$BETA_NE_ZERO
  116. clr $0
  117. ret
  118. .align 4
  119. $BETA_EQ_ZERO:
  120. sra $16, 3, $2 # i = (m >> 3)
  121. lda $4, 8*SIZE($18)
  122. mov $18, $1 # c_offset = c
  123. lda $17, -1($17) # j --
  124. ble $2,$L42
  125. .align 4
  126. $L41:
  127. ST $f31, 0*SIZE($1)
  128. ST $f31, 1*SIZE($1)
  129. ST $f31, 2*SIZE($1)
  130. ST $f31, 3*SIZE($1)
  131. ST $f31, 4*SIZE($1)
  132. ST $f31, 5*SIZE($1)
  133. ST $f31, 6*SIZE($1)
  134. ST $f31, 7*SIZE($1)
  135. lda $2, -1($2)
  136. lda $4, 8*SIZE($4)
  137. lda $1, 8*SIZE($1)
  138. bgt $2,$L41
  139. .align 4
  140. $L42:
  141. and $16, 7, $2
  142. ble $2,$L44
  143. .align 4
  144. $L43:
  145. lda $2, -1($2)
  146. ST $f31, 0($1)
  147. lda $1, SIZE($1)
  148. bgt $2, $L43
  149. .align 4
  150. $L44:
  151. SXADDQ $19, $18, $18 # c += ldc
  152. bgt $17,$BETA_EQ_ZERO
  153. clr $0
  154. .align 4
  155. $End:
  156. ret
  157. .ident VERSION
  158. .end CNAME