| @@ -1,61 +1,51 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| /* without modification, are permitted provided that the following */ | |||
| /* conditions are met: */ | |||
| /* */ | |||
| /* 1. Redistributions of source code must retain the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer. */ | |||
| /* */ | |||
| /* 2. Redistributions in binary form must reproduce the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer in the documentation and/or other materials */ | |||
| /* provided with the distribution. */ | |||
| /* */ | |||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||
| /* */ | |||
| /* The views and conclusions contained in the software and */ | |||
| /* documentation are those of the authors and should not be */ | |||
| /* interpreted as representing official policies, either expressed */ | |||
| /* or implied, of The University of Texas at Austin. */ | |||
| /*********************************************************************/ | |||
| /********************************************************************************* | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| /********************************************************************* | |||
| * 2013/10/19 Saar | |||
| * BLASTEST : | |||
| * 2013/10/28 Saar | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| * 2013/08/16 Saar | |||
| * 2013/10/28 Saar | |||
| * Parameter: | |||
| * CGEMM_DEFAULT_UNROLL_N 2 | |||
| * CGEMM_DEFAULT_UNROLL_M 8 | |||
| * CGEMM_DEFAULT_P 224 | |||
| * CGEMM_DEFAULT_Q 224 | |||
| * CGEMM_DEFAULT_P 384 | |||
| * CGEMM_DEFAULT_Q 192 | |||
| * A_PR1 512 | |||
| * B_PR1 512 | |||
| * | |||
| * BLASTEST: OK | |||
| * Performance at 6912x6912x6912: | |||
| * 1 thread: 84 GFLOPS (SANDYBRIDGE: 60) (MKL: 86) | |||
| * 2 threads: 153 GFLOPS (SANDYBRIDGE: 114) (MKL: 155) | |||
| * 3 threads: 224 GFLOPS (SANDYBRIDGE: 162) (MKL: 222) | |||
| * 4 threads: 278 GFLOPS (SANDYBRIDGE: 223) (MKL: 279) | |||
| * | |||
| * Performance: | |||
| * 1 thread: 2.04 times faster than sandybridge | |||
| * 4 threads: 1.96 times faster than sandybridge | |||
| * | |||
| * Compile for FMA3: OK | |||
| * | |||
| *********************************************************************/ | |||
| @@ -235,8 +225,8 @@ | |||
| #endif | |||
| #define A_PR1 384 | |||
| #define B_PR1 192 | |||
| #define A_PR1 512 | |||
| #define B_PR1 512 | |||
| /***************************************************************************************************************************/ | |||
| @@ -338,6 +328,9 @@ | |||
| vmovups %ymm10 , (CO1, LDC) | |||
| vmovups %ymm14 , 8 * SIZE(CO1, LDC) | |||
| prefetcht0 64(CO1) | |||
| prefetcht0 64(CO1, LDC) | |||
| .endm | |||
| /***************************************************************************************************************************/ | |||
| @@ -1,61 +1,51 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| /* without modification, are permitted provided that the following */ | |||
| /* conditions are met: */ | |||
| /* */ | |||
| /* 1. Redistributions of source code must retain the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer. */ | |||
| /* */ | |||
| /* 2. Redistributions in binary form must reproduce the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer in the documentation and/or other materials */ | |||
| /* provided with the distribution. */ | |||
| /* */ | |||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||
| /* */ | |||
| /* The views and conclusions contained in the software and */ | |||
| /* documentation are those of the authors and should not be */ | |||
| /* interpreted as representing official policies, either expressed */ | |||
| /* or implied, of The University of Texas at Austin. */ | |||
| /*********************************************************************/ | |||
| /********************************************************************************* | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| /********************************************************************* | |||
| * 2013/10/19 Saar | |||
| * BLASTEST : | |||
| * 2013/10/28 Saar | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| * 2013/08/15 Saar | |||
| * 2013/10/28 Saar | |||
| * Parameter: | |||
| * SGEMM_DEFAULT_UNROLL_N 4 | |||
| * SGEMM_DEFAULT_UNROLL_M 16 | |||
| * SGEMM_DEFAULT_P 768 | |||
| * SGEMM_DEFAULT_Q 168 | |||
| * SGEMM_DEFAULT_Q 384 | |||
| * A_PR1 512 | |||
| * B_PR1 512 | |||
| * | |||
| * BLASTEST: OK | |||
| * | |||
| * Performance: | |||
| * 1 thread: 2.22 times faster than sandybridge | |||
| * 4 threads: 2.26 times faster than sandybridge | |||
| * | |||
| * Compile for FMA3: OK | |||
| * Performance at 9216x9216x9216: | |||
| * 1 thread: 86 GFLOPS (SANDYBRIDGE: 59) (MKL: 83) | |||
| * 2 threads: 157 GFLOPS (SANDYBRIDGE: 116) (MKL: 155) | |||
| * 3 threads: 235 GFLOPS (SANDYBRIDGE: 165) (MKL: 230) | |||
| * 4 threads: 288 GFLOPS (SANDYBRIDGE: 223) (MKL: 267) | |||
| * | |||
| *********************************************************************/ | |||
| @@ -162,8 +152,8 @@ | |||
| #endif | |||
| #define A_PR1 384 | |||
| #define B_PR1 192 | |||
| #define A_PR1 512 | |||
| #define B_PR1 512 | |||
| /******************************************************************************************* | |||
| * 4 lines of N | |||
| @@ -230,6 +220,11 @@ | |||
| vmovups %ymm10, (CO2, LDC) | |||
| vmovups %ymm11, 8 * SIZE(CO2, LDC) | |||
| prefetcht0 64(CO1) | |||
| prefetcht0 64(CO1, LDC) | |||
| prefetcht0 64(CO2) | |||
| prefetcht0 64(CO2, LDC) | |||
| .endm | |||
| @@ -1,62 +1,53 @@ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| /* */ | |||
| /* Redistribution and use in source and binary forms, with or */ | |||
| /* without modification, are permitted provided that the following */ | |||
| /* conditions are met: */ | |||
| /* */ | |||
| /* 1. Redistributions of source code must retain the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer. */ | |||
| /* */ | |||
| /* 2. Redistributions in binary form must reproduce the above */ | |||
| /* copyright notice, this list of conditions and the following */ | |||
| /* disclaimer in the documentation and/or other materials */ | |||
| /* provided with the distribution. */ | |||
| /* */ | |||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||
| /* */ | |||
| /* The views and conclusions contained in the software and */ | |||
| /* documentation are those of the authors and should not be */ | |||
| /* interpreted as representing official policies, either expressed */ | |||
| /* or implied, of The University of Texas at Austin. */ | |||
| /*********************************************************************/ | |||
| /********************************************************************* | |||
| * 2013/10/19 Saar | |||
| * BLASTEST : | |||
| /********************************************************************************* | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | |||
| /******************************************************************************** | |||
| * 2013/10/28 Saar | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * | |||
| * 2013/08/16 Saar | |||
| * 2013/10/28 Saar | |||
| * Parameter: | |||
| * ZGEMM_DEFAULT_UNROLL_N 2 | |||
| * ZGEMM_DEFAULT_UNROLL_M 4 | |||
| * ZGEMM_DEFAULT_P 112 | |||
| * ZGEMM_DEFAULT_Q 224 | |||
| * ZGEMM_DEFAULT_P 256 | |||
| * ZGEMM_DEFAULT_Q 128 | |||
| * A_PR1 512 | |||
| * B_PR1 512 | |||
| * | |||
| * | |||
| * Performance: | |||
| * 1 thread: 1.80 times faster than sandybridge | |||
| * 4 threads: 1.74 times faster than sandybridge | |||
| * Performance at 4608x4608x4608: | |||
| * 1 thread: 43 GFLOPS (SANDYBRIDGE: 29) (MKL: 53) | |||
| * 2 threads: 85 GFLOPS (SANDYBRIDGE: 59) (MKL: 100) | |||
| * 3 threads: 122 GFLOPS (SANDYBRIDGE: 86) (MKL: 138) | |||
| * 4 threads: 156 GFLOPS (SANDYBRIDGE: 108) (MKL: 172) | |||
| * | |||
| * Compile for FMA3: OK | |||
| * | |||
| *********************************************************************/ | |||
| ********************************************************************************/ | |||
| #define ASSEMBLER | |||
| @@ -232,8 +223,8 @@ | |||
| #endif | |||
| #define A_PR1 384 | |||
| #define B_PR1 192 | |||
| #define A_PR1 512 | |||
| #define B_PR1 512 | |||
| /***************************************************************************************************/ | |||
| .macro KERNEL4x2_SUB | |||
| @@ -335,7 +326,8 @@ | |||
| vmovups %ymm10 , (CO1, LDC) | |||
| vmovups %ymm14 , 4 * SIZE(CO1, LDC) | |||
| prefetcht0 64(CO1) | |||
| prefetcht0 64(CO1, LDC) | |||
| .endm | |||
| @@ -1223,12 +1223,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define SGEMM_DEFAULT_P 768 | |||
| #define DGEMM_DEFAULT_P 512 | |||
| #define CGEMM_DEFAULT_P 384 | |||
| #define ZGEMM_DEFAULT_P 192 | |||
| #define ZGEMM_DEFAULT_P 256 | |||
| #define SGEMM_DEFAULT_Q 168 | |||
| #define SGEMM_DEFAULT_Q 384 | |||
| #define DGEMM_DEFAULT_Q 256 | |||
| #define CGEMM_DEFAULT_Q 168 | |||
| #define ZGEMM_DEFAULT_Q 168 | |||
| #define CGEMM_DEFAULT_Q 192 | |||
| #define ZGEMM_DEFAULT_Q 128 | |||
| #define SGEMM_DEFAULT_R sgemm_r | |||
| //#define DGEMM_DEFAULT_R dgemm_r | |||