You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

snrm2.S 4.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. /*******************************************************************************
  2. Copyright (c) 2015, The OpenBLAS Project
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the OpenBLAS project nor the names of
  14. its contributors may be used to endorse or promote products
  15. derived from this software without specific prior written permission.
  16. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
  20. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  25. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. *******************************************************************************/
  27. #define ASSEMBLER
  28. #include "common.h"
  29. #define N x0 /* vector length */
  30. #define X x1 /* X vector address */
  31. #define INC_X x2 /* X stride */
  32. #define I x5 /* loop variable */
  33. /*******************************************************************************
  34. * Macro definitions
  35. *******************************************************************************/
  36. #define TMPF s6
  37. #define SSQ s0
  38. #define TMPVF {v6.s}[0]
  39. #define SZ 4
  40. /******************************************************************************/
  41. .macro INIT_F1
  42. ldr TMPF, [X], #SZ
  43. fmul SSQ, TMPF, TMPF
  44. .endm
  45. .macro KERNEL_F1
  46. ldr TMPF, [X], #SZ
  47. fmul TMPF, TMPF, TMPF
  48. fadd SSQ, SSQ, TMPF
  49. .endm
  50. .macro INIT_F4
  51. ld1 {v1.4s}, [X], #16
  52. fmul v1.4s, v1.4s, v1.4s
  53. ext v2.16b, v1.16b, v1.16b, #8
  54. fadd v2.2s, v1.2s, v2.2s
  55. faddp SSQ, v2.2s
  56. .endm
  57. .macro KERNEL_F4
  58. ld1 {v1.4s}, [X], #16
  59. fmul v1.4s, v1.4s, v1.4s
  60. ext v2.16b, v1.16b, v1.16b, #8
  61. fadd v2.2s, v1.2s, v2.2s
  62. faddp TMPF, v2.2s
  63. fadd SSQ, SSQ, TMPF
  64. .endm
  65. .macro INIT_S
  66. lsl INC_X, INC_X, #2
  67. ld1 TMPVF, [X], INC_X
  68. fmul SSQ, TMPF, TMPF
  69. .endm
  70. .macro KERNEL_S1
  71. ld1 TMPVF, [X], INC_X
  72. fmul TMPF, TMPF, TMPF
  73. fadd SSQ, SSQ, TMPF
  74. .endm
  75. /*******************************************************************************
  76. * End of macro definitions
  77. *******************************************************************************/
  78. PROLOGUE
  79. cmp N, xzr
  80. ble nrm2_kernel_zero
  81. cmp INC_X, xzr
  82. ble nrm2_kernel_zero
  83. cmp INC_X, #1
  84. bne nrm2_kernel_S_BEGIN
  85. nrm2_kernel_F_BEGIN:
  86. asr I, N, #2
  87. cmp I, xzr
  88. beq nrm2_kernel_F1_INIT
  89. INIT_F4
  90. subs I, I, #1
  91. beq nrm2_kernel_F1
  92. nrm2_kernel_F4:
  93. KERNEL_F4
  94. subs I, I, #1
  95. bne nrm2_kernel_F4
  96. nrm2_kernel_F1:
  97. ands I, N, #3
  98. ble nrm2_kernel_L999
  99. nrm2_kernel_F10:
  100. KERNEL_F1
  101. subs I, I, #1
  102. bne nrm2_kernel_F10
  103. b nrm2_kernel_L999
  104. nrm2_kernel_F1_INIT:
  105. INIT_F1
  106. subs N, N, #1
  107. b nrm2_kernel_F1
  108. nrm2_kernel_S_BEGIN:
  109. INIT_S
  110. subs N, N, #1
  111. ble nrm2_kernel_L999
  112. asr I, N, #2
  113. cmp I, xzr
  114. ble nrm2_kernel_S1
  115. nrm2_kernel_S4:
  116. KERNEL_S1
  117. KERNEL_S1
  118. KERNEL_S1
  119. KERNEL_S1
  120. subs I, I, #1
  121. bne nrm2_kernel_S4
  122. nrm2_kernel_S1:
  123. ands I, N, #3
  124. ble nrm2_kernel_L999
  125. nrm2_kernel_S10:
  126. KERNEL_S1
  127. subs I, I, #1
  128. bne nrm2_kernel_S10
  129. nrm2_kernel_L999:
  130. fsqrt SSQ, SSQ
  131. ret
  132. nrm2_kernel_zero:
  133. fmov SSQ, wzr
  134. ret
  135. EPILOGUE