166 lines
3.7 KiB
C
166 lines
3.7 KiB
C
|
#ifndef GEMV_PARAM_H
|
||
|
#define GEMV_PARAM_H
|
||
|
|
||
|
#ifdef movsd
|
||
|
#undef movsd
|
||
|
#endif
|
||
|
|
||
|
#undef movapd
|
||
|
#define movapd movaps
|
||
|
|
||
|
#ifdef ATHLON
|
||
|
#define ALIGNED_ACCESS
|
||
|
#define MOVUPS_A movaps
|
||
|
#define MOVUPS_XL movaps
|
||
|
#define MOVUPS_XS movaps
|
||
|
#define MOVUPS_YL movaps
|
||
|
#define MOVUPS_YS movaps
|
||
|
#define PREFETCH prefetcht0
|
||
|
#define PREFETCHSIZE 64 * 3
|
||
|
#endif
|
||
|
|
||
|
#ifdef PENTIUM4
|
||
|
#define ALIGNED_ACCESS
|
||
|
#define MOVUPS_A movaps
|
||
|
#define MOVUPS_XL movaps
|
||
|
#define MOVUPS_XS movaps
|
||
|
#define MOVUPS_YL movaps
|
||
|
#define MOVUPS_YS movaps
|
||
|
#define PREFETCH prefetcht0
|
||
|
#define PREFETCHSIZE 64 * 2
|
||
|
#endif
|
||
|
|
||
|
#ifdef CORE2
|
||
|
#define ALIGNED_ACCESS
|
||
|
#define MOVUPS_A movaps
|
||
|
#define MOVUPS_XL movaps
|
||
|
#define MOVUPS_XS movaps
|
||
|
#define MOVUPS_YL movaps
|
||
|
#define MOVUPS_YS movaps
|
||
|
#define PREFETCH prefetcht0
|
||
|
#define PREFETCHSIZE 64 * 4
|
||
|
#endif
|
||
|
|
||
|
#ifdef PENRYN
|
||
|
#define ALIGNED_ACCESS
|
||
|
#define MOVUPS_A movaps
|
||
|
#define MOVUPS_XL movaps
|
||
|
#define MOVUPS_XS movaps
|
||
|
#define MOVUPS_YL movaps
|
||
|
#define MOVUPS_YS movaps
|
||
|
#define PREFETCH prefetcht0
|
||
|
#define PREFETCHSIZE 64 * 4
|
||
|
#endif
|
||
|
|
||
|
#ifdef NEHALEM
|
||
|
#define MOVUPS_A movups
|
||
|
#define MOVUPS_XL movups
|
||
|
#define MOVUPS_XS movups
|
||
|
#define MOVUPS_YL movups
|
||
|
#define MOVUPS_YS movups
|
||
|
#define PREFETCH prefetcht0
|
||
|
#define PREFETCHW prefetcht0
|
||
|
#define PREFETCHSIZE 64 * 3
|
||
|
#endif
|
||
|
|
||
|
#ifdef OPTERON
|
||
|
#define PREFETCH prefetch
|
||
|
#define PREFETCHW prefetchw
|
||
|
#ifndef COMPLEX
|
||
|
#define PREFETCHSIZE 64 * 1
|
||
|
#else
|
||
|
#define PREFETCHSIZE 64 * 1
|
||
|
#endif
|
||
|
#define movsd movlps
|
||
|
#endif
|
||
|
|
||
|
#if defined(BARCELONA) || defined(SHANGHAI)
|
||
|
#define ALIGNED_ACCESS
|
||
|
#define MOVUPS_A movaps
|
||
|
#define MOVUPS_XL movaps
|
||
|
#define MOVUPS_XS movaps
|
||
|
#define MOVUPS_YL movaps
|
||
|
#define MOVUPS_YS movaps
|
||
|
|
||
|
#define PREFETCH prefetch
|
||
|
#define PREFETCHW prefetchw
|
||
|
#ifndef COMPLEX
|
||
|
#define PREFETCHSIZE 64 * 2
|
||
|
#else
|
||
|
#define PREFETCHSIZE 64 * 4
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifdef NANO
|
||
|
#define ALIGNED_ACCESS
|
||
|
#define MOVUPS_A movaps
|
||
|
#define MOVUPS_XL movaps
|
||
|
#define MOVUPS_XS movaps
|
||
|
#define MOVUPS_YL movaps
|
||
|
#define MOVUPS_YS movaps
|
||
|
#define PREFETCH prefetcht0
|
||
|
#ifndef COMPLEX
|
||
|
#define PREFETCHSIZE 64 * 1
|
||
|
#else
|
||
|
#define PREFETCHSIZE 64 * 2
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifndef PREOFFSET
|
||
|
#ifdef L1_DATA_LINESIZE
|
||
|
#define PREOFFSET (L1_DATA_LINESIZE >> 1)
|
||
|
#else
|
||
|
#define PREOFFSET 32
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#ifndef GEMV_UNROLL
|
||
|
#define GEMV_UNROLL 4
|
||
|
#endif
|
||
|
|
||
|
#ifndef ZGEMV_UNROLL
|
||
|
#define ZGEMV_UNROLL 4
|
||
|
#endif
|
||
|
|
||
|
/* #define COPY_FORCE */ /* Always copy X or Y to the buffer */
|
||
|
/* #define NOCOPY_UNALIGNED */ /* Not copy if X or Y is not aligned */
|
||
|
|
||
|
#ifdef MOVUPS_A
|
||
|
#define MOVUPS_A1(OFF, ADDR, REGS) MOVUPS_A OFF(ADDR), REGS
|
||
|
#define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS) MOVUPS_A OFF(ADDR, BASE, SCALE), REGS
|
||
|
#else
|
||
|
#define MOVUPS_A1(OFF, ADDR, REGS) movsd OFF(ADDR), REGS; movhps OFF + 8(ADDR), REGS
|
||
|
#define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS) movsd OFF(ADDR, BASE, SCALE), REGS; movhps OFF + 8(ADDR, BASE, SCALE), REGS
|
||
|
#endif
|
||
|
|
||
|
#define MOVRPS_A1(OFF, ADDR, REGS) movsd OFF + 8(ADDR), REGS; movhps OFF(ADDR), REGS
|
||
|
#define MOVRPS_A2(OFF, ADDR, BASE, SCALE, REGS) movsd OFF + 8(ADDR, BASE, SCALE), REGS; movhps OFF(ADDR, BASE, SCALE), REGS
|
||
|
|
||
|
#ifdef MOVUPS_XL
|
||
|
#define MOVUPS_XL1(OFF, ADDR, REGS) MOVUPS_XL OFF(ADDR), REGS
|
||
|
#else
|
||
|
#define MOVUPS_XL1(OFF, ADDR, REGS) movsd OFF(ADDR), REGS; movhps OFF + 8(ADDR), REGS
|
||
|
#endif
|
||
|
|
||
|
#ifdef MOVUPS_XS
|
||
|
#define MOVUPS_XS1(OFF, ADDR, REGS) MOVUPS_XS REGS, OFF(ADDR)
|
||
|
#else
|
||
|
#define MOVUPS_XS1(OFF, ADDR, REGS) movsd REGS, OFF(ADDR); movhps REGS, OFF + 8(ADDR)
|
||
|
#endif
|
||
|
|
||
|
#ifdef MOVUPS_YL
|
||
|
#define MOVUPS_YL1(OFF, ADDR, REGS) MOVUPS_YL OFF(ADDR), REGS
|
||
|
#else
|
||
|
#define MOVUPS_YL1(OFF, ADDR, REGS) movsd OFF(ADDR), REGS; movhps OFF + 8(ADDR), REGS
|
||
|
#endif
|
||
|
|
||
|
#ifdef MOVUPS_YS
|
||
|
#define MOVUPS_YS1(OFF, ADDR, REGS) MOVUPS_YS REGS, OFF(ADDR)
|
||
|
#else
|
||
|
#define MOVUPS_YS1(OFF, ADDR, REGS) movsd REGS, OFF(ADDR); movhps REGS, OFF + 8(ADDR)
|
||
|
#endif
|
||
|
|
||
|
|
||
|
|
||
|
#endif
|