/*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ /* without modification, are permitted provided that the following */ /* conditions are met: */ /* */ /* 1. Redistributions of source code must retain the above */ /* copyright notice, this list of conditions and the following */ /* disclaimer. */ /* */ /* 2. Redistributions in binary form must reproduce the above */ /* copyright notice, this list of conditions and the following */ /* disclaimer in the documentation and/or other materials */ /* provided with the distribution. */ /* */ /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /* POSSIBILITY OF SUCH DAMAGE. */ /* */ /* The views and conclusions contained in the software and */ /* documentation are those of the authors and should not be */ /* interpreted as representing official policies, either expressed */ /* or implied, of The University of Texas at Austin. */ /*********************************************************************/ #include #include "common.h" blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) { BLASLONG n, bk, i, blocking, lda; int mode; blas_arg_t newarg; FLOAT *a; FLOAT alpha[2] = { ONE, ZERO}; #ifndef COMPLEX #ifdef XDOUBLE mode = BLAS_XDOUBLE | BLAS_REAL; #elif defined(DOUBLE) mode = BLAS_DOUBLE | BLAS_REAL; #else mode = BLAS_SINGLE | BLAS_REAL; #endif #else #ifdef XDOUBLE mode = BLAS_XDOUBLE | BLAS_COMPLEX; #elif defined(DOUBLE) mode = BLAS_DOUBLE | BLAS_COMPLEX; #else mode = BLAS_SINGLE | BLAS_COMPLEX; #endif #endif if (args -> nthreads == 1) { LAUUM_U_SINGLE(args, NULL, NULL, sa, sb, 0); return 0; } n = args -> n; a = (FLOAT *)args -> a; lda = args -> lda; if (range_n) n = range_n[1] - range_n[0]; if (n <= GEMM_UNROLL_N * 2) { LAUUM_U_SINGLE(args, NULL, range_n, sa, sb, 0); return 0; } newarg.lda = lda; newarg.ldb = lda; newarg.ldc = lda; newarg.alpha = alpha; newarg.beta = NULL; newarg.nthreads = args -> nthreads; blocking = (n / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); if (blocking > GEMM_Q) blocking = GEMM_Q; for (i = 0; i < n; i += blocking) { bk = n - i; if (bk > blocking) bk = blocking; newarg.n = i; newarg.k = bk; newarg.a = a + ( i * lda) * COMPSIZE; newarg.c = a; syrk_thread(mode | BLAS_TRANSA_N | BLAS_TRANSB_T, &newarg, NULL, NULL, (void *)HERK_UN, sa, sb, args -> nthreads); newarg.m = i; newarg.n = bk; newarg.a = a + (i + i * lda) * COMPSIZE; newarg.b = a + ( i * lda) * COMPSIZE; gemm_thread_m(mode | BLAS_TRANSA_T | BLAS_RSIDE, &newarg, NULL, NULL, (void *)TRMM_RCUN, sa, sb, args -> nthreads); newarg.m = bk; newarg.n = bk; newarg.a = a + (i + i * lda) * COMPSIZE; CNAME(&newarg, NULL, NULL, sa, sb, 0); } return 0; }