tahoma2d/thirdparty/openblas/xianyi-OpenBLAS-e6e87a2/kernel/mips64/sgemm_kernel_8x4_ps.S
2016-03-24 02:47:04 +09:00

7797 lines
134 KiB
ArmAsm

#define REALNAME ASMNAME
#define ASSEMBLER
#include "common.h"
#define FETCH ld
#define STACKSIZE 160
#define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
#define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
##### Parameter registers ####
#define M $4
#define N $5
#define K $6
#define A $8
#define B $9
#define C $10
#define LDC $11
#### Pointer A, B, C ####
#define AO $12
#define BO $13
#define CO1 $14
#define CO2 $15
#define CO3 $16
#define CO4 $17
#define PREA $18
#define PREB $19
#### Used registers ####
#define A1 $f0
#define A2 $f1
#define A3 $f2
#define A4 $f3
#define A5 $f4
#define A6 $f5
#define A7 $f6
#define A8 $f7
#define B1 $f8
#define B2 $f9
#define B3 $f10
#define B4 $f11
#define B5 $f12
#define B6 $f13
#define B7 $f14
#define B8 $f15
#define C11 $f16
#define C12 $f17
#define C21 $f18
#define C22 $f19
#define C31 $f20
#define C32 $f21
#define C41 $f22
#define C42 $f23
#define C13 $f24
#define C14 $f25
#define C23 $f26
#define C24 $f27
#define C33 $f28
#define C34 $f29
#define C43 $f30
#define C44 $f31
#define I $2
#define J $3
#define L $7
#### Alpha register ####
#define ALPHA $f15
#define F31 31
#define F30 30
#define F29 29
#define F28 28
#define F27 27
#define F26 26
#define F25 25
#define F24 24
#define F23 23
#define F22 22
#define F21 21
#define F20 20
#define F19 19
#define F18 18
#define F17 17
#define F16 16
#define F15 15
#define F14 14
#define F13 13
#define F12 12
#define F11 11
#define F10 10
#define F9 9
#define F8 8
#define F7 7
#define F6 6
#define F5 5
#define F4 4
#define F3 3
#define F2 2
#define F1 1
#define F0 0
#define R12 12
#define R13 13
#define R14 14
#define R15 15
#define R16 16
#define R17 17
#if defined(TRMMKERNEL)
#define OFFSET $23
#define KK $24
#define TEMP $25
#endif
# .text
# .align 2
## .globl gemm
# .set nomips16
# .ent gemm
# .type gemm, @function
#gemm:
# .frame $sp,STACKSIZE,$31 # vars= 48, regs= 1/0, args= 0, gp= 0
# .mask 0x40000000,-8
# .fmask 0x00000000,0
# .set noreorder
# .set nomacro
PROLOGUE
daddiu $sp,$sp,-STACKSIZE
sd $16, 0($sp)
sd $17, 8($sp)
sd $18, 16($sp)
sd $19, 24($sp)
sd $20, 32($sp)
sd $21, 40($sp)
sd $22, 48($sp)
ST $f24, 56($sp)
ST $f25, 64($sp)
ST $f26, 72($sp)
ST $f27, 80($sp)
ST $f28, 88($sp)
#if defined(TRMMKERNEL)
sd $23, 96($sp)
sd $24, 104($sp)
sd $25, 112($sp)
LDARG OFFSET, 160($sp)
#endif
#ifndef __64BIT__
ST $f20,120($sp)
ST $f21,128($sp)
ST $f22,136($sp)
ST $f23,144($sp)
#endif
.align 4
.L4:
dsra J, N, 2 # NR=4
dsll LDC, LDC, BASE_SHIFT# LDC*SIZE
#if defined(TRMMKERNEL) && !defined(LEFT)
neg KK, OFFSET
#endif
blez J, .L2
ST ALPHA, 152($sp)
.L48:
dsra I, M, 3 # MR=8
dsll PREA, K, BASE_SHIFT
move AO, A # Reset A
move CO1, C
daddu CO2, C, LDC
daddu CO3, CO2, LDC
daddu CO4, CO3, LDC
daddu PREA, A, PREA
#if defined(TRMMKERNEL) && defined(LEFT)
move KK, OFFSET
#endif
blez I, .L44
daddu C, CO4, LDC
.align 4
.L481:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) ||\
(!defined(LEFT) && !defined(TRANSA))
move BO, B
#else
dsll L, KK, 3 + BASE_SHIFT # kk*8mr*datasize
dsll TEMP, KK, 2 + BASE_SHIFT
daddu AO, AO, L # AO point to the data addr
daddu BO, B, TEMP
#endif
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
dsll PREB, K, BASE_SHIFT
MOV C21, C11
MOV C22, C11
MOV C31, C11
MOV C32, C11
gsLQC1(R13, F9, F8, 0) # B1 B2
MOV C41, C11
MOV C42, C11
gsLQC1(R12, F1, F0, 0) # A1 A2
MOV C13, C11
MOV C14, C11
gsLQC1(R12, F3, F2, 1) # A3 A4
MOV C23, C11
FETCH $0, 0 * SIZE(CO1)
MOV C24, C11
FETCH $0, 4 * SIZE(CO1)
MOV C33, C11
FETCH $0, 0 * SIZE(CO2)
MOV C34, C11
FETCH $0, 4 * SIZE(CO2)
daddu PREB, B, PREB
MOV C43, C11
FETCH $0, 0 * SIZE(CO3)
MOV C44, C11
FETCH $0, 4 * SIZE(CO3)
PLU B3, B1, B1
FETCH $0, 0 * SIZE(CO4)
PLU B4, B2, B2
FETCH $0, 4 * SIZE(CO4)
#if (defined(LEFT) && !defined(TRANSA)) ||\
(!defined(LEFT) && defined(TRANSA))
dsubu TEMP, K, KK # TEMP is the length of the data part
#elif defined(LEFT)
daddiu TEMP, KK, 8
#else
daddiu TEMP, KK, 4
#endif
dsra L, TEMP, 6
blez L, .L482
NOP
#else
# GEMM PART
move BO, B # Reset B
dsra L, K, 6 # UnRoll K=64
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
dsll PREB, K, BASE_SHIFT
MOV C21, C11
MOV C22, C11
MOV C31, C11
MOV C32, C11
gsLQC1(R13, F9, F8, 0) # B1 B2
MOV C41, C11
MOV C42, C11
gsLQC1(R12, F1, F0, 0) # A1 A2
MOV C13, C11
MOV C14, C11
gsLQC1(R12, F3, F2, 1) # A3 A4
MOV C23, C11
FETCH $0, 0 * SIZE(CO1)
MOV C24, C11
FETCH $0, 4 * SIZE(CO1)
MOV C33, C11
FETCH $0, 0 * SIZE(CO2)
MOV C34, C11
FETCH $0, 4 * SIZE(CO2)
daddu PREB, B, PREB
MOV C43, C11
FETCH $0, 0 * SIZE(CO3)
MOV C44, C11
FETCH $0, 4 * SIZE(CO3)
PLU B3, B1, B1
FETCH $0, 0 * SIZE(CO4)
PLU B4, B2, B2
blez L, .L482
FETCH $0, 4 * SIZE(CO4)
#endif
.L4810:
daddiu L, L, -1
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
bgtz L, .L4810
MADPS C44, C44, A8, B8
.align 4
.L482:
#ifndef TRMMKERNEL
andi L, K, 32
#else
andi L, TEMP, 32
#endif
blez L, .L483
NOP
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
.align 4
.L483:
#ifndef TRMMKERNEL
andi L, K, 16
#else
andi L, TEMP, 16
#endif
blez L, .L484
NOP
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
.align 4
.L484:
#ifndef TRMMKERNEL
andi L, K, 8
#else
andi L, TEMP, 8
#endif
blez L, .L485
NOP
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
.align 4
.L485:
#ifndef TRMMKERNEL
andi L, K, 4
#else
andi L, TEMP, 4
#endif
blez L, .L486
NOP
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 4) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 5) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 6) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 7) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 8 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 32 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 16 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 20 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 24 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 28 * SIZE(PREA)
daddiu PREA, PREA, 32 * SIZE
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
.align 4
.L486:
#ifndef TRMMKERNEL
andi L, K, 2
#else
andi L, TEMP, 2
#endif
blez L, .L487
NOP
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C13, C13, A1, B3
daddiu BO, BO, 8 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 16 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
PLU B7, B5, B5
FETCH $0, 0 * SIZE(PREA)
MADPS C24, C24, A2, B4
PLU B8, B6, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
MADPS C11, C11, A5, B5
MADPS C21, C21, A6, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C12, C12, A5, B6
MADPS C22, C22, A6, B6
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C31, C31, A7, B5
MADPS C41, C41, A8, B5
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C32, C32, A7, B6
MADPS C42, C42, A8, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C13, C13, A5, B7
MADPS C23, C23, A6, B7
daddiu PREB, PREB, 8 * SIZE
MADPS C33, C33, A7, B7
MADPS C43, C43, A8, B7
MADPS C14, C14, A5, B8
PLU B3, B1, B1
FETCH $0, 8 * SIZE(PREA)
MADPS C24, C24, A6, B8
PLU B4, B2, B2
FETCH $0, 12 * SIZE(PREA)
MADPS C34, C34, A7, B8
MADPS C44, C44, A8, B8
daddiu PREA, PREA, 16 * SIZE
.align 4
.L487:
#ifndef TRMMKERNEL
andi L, K, 1
#else
andi L, TEMP, 1
#endif
blez L, .L480
LD ALPHA, 152($sp)
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
MADPS C12, C12, A1, B2
MADPS C22, C22, A2, B2
MADPS C31, C31, A3, B1
MADPS C41, C41, A4, B1
MADPS C32, C32, A3, B2
MADPS C42, C42, A4, B2
MADPS C13, C13, A1, B3
daddiu BO, BO, 4 * SIZE # 4KR*4NR
MADPS C23, C23, A2, B3
daddiu AO, AO, 8 * SIZE # 4KR*8MR
MADPS C33, C33, A3, B3
MADPS C43, C43, A4, B3
MADPS C14, C14, A1, B4
MADPS C24, C24, A2, B4
MADPS C34, C34, A3, B4
MADPS C44, C44, A4, B4
.align 4
.L480: # Write Back
#ifndef TRMMKERNEL
daddiu I, I, -1
CVTU A1, C13 # A1=C13.upper=c12
CVTU A2, C11 # A2=C11.upper=c22
CVTU A3, C23 # A3=C23.upper=c14
LD B1, 1 * SIZE(CO1)
CVTU A4, C21 # A4=C21.upper=c24
LD B2, 1 * SIZE(CO2)
CVTU A5, C33 # A5=C33.upper=c16
LD B3, 3 * SIZE(CO1)
CVTU A6, C31 # A6=C31.upper=c26
LD B4, 3 * SIZE(CO2)
CVTU A7, C43 # A7=C43.upper=c18
LD B5, 5 * SIZE(CO1)
CVTU A8, C41 # A8=C41.upper=c28
LD B6, 5 * SIZE(CO2)
MADD A1, B1, A1, ALPHA # c12
LD B7, 7 * SIZE(CO1)
MADD A2, B2, A2, ALPHA # c22
LD B1, 7 * SIZE(CO2)
MADD A3, B3, A3, ALPHA # c14
LD B2, 0 * SIZE(CO1)
MADD A4, B4, A4, ALPHA # c24
LD B3, 0 * SIZE(CO2)
MADD A5, B5, A5, ALPHA # c16
LD B4, 2 * SIZE(CO1)
MADD A6, B6, A6, ALPHA # c26
LD B5, 2 * SIZE(CO2)
MADD A7, B7, A7, ALPHA # c18
LD B6, 4 * SIZE(CO1)
MADD A8, B1, A8, ALPHA # c28
ST A1, 1 * SIZE(CO1)
MADD C11, B2, C11, ALPHA # c12
LD B7, 4 * SIZE(CO2)
MADD C13, B3, C13, ALPHA # c22
ST A2, 1 * SIZE(CO2)
MADD C21, B4, C21, ALPHA # c14
LD A1, 6 * SIZE(CO1)
MADD C23, B5, C23, ALPHA # c24
ST A3, 3 * SIZE(CO1)
MADD C31, B6, C31, ALPHA # c16
LD A2, 6 * SIZE(CO2)
MADD C33, B7, C33, ALPHA # c26
ST A4, 3 * SIZE(CO2)
ST A5, 5 * SIZE(CO1)
ST A6, 5 * SIZE(CO2)
ST A7, 7 * SIZE(CO1)
ST A8, 7 * SIZE(CO2)
MADD C41, A1, C41, ALPHA # c18
ST C11, 0 * SIZE(CO1)
MADD C43, A2, C43, ALPHA # c28
ST C13, 0 * SIZE(CO2)
ST C21, 2 * SIZE(CO1)
ST C23, 2 * SIZE(CO2)
ST C31, 4 * SIZE(CO1)
ST C33, 4 * SIZE(CO2)
ST C41, 6 * SIZE(CO1)
CVTU A1, C14 # B1=C12.upper=c42
ST C43, 6 * SIZE(CO2)
CVTU A2, C12 # B2=C14.upper=c32
LD B1, 1 * SIZE(CO3)
CVTU A3, C24 # B3=C22.upper=c44
LD B2, 1 * SIZE(CO4)
CVTU A4, C22 # B4=C24.upper=c34
LD B3, 3 * SIZE(CO3)
CVTU A5, C34 # B5=C32.upper=c46
LD B4, 3 * SIZE(CO4)
CVTU A6, C32 # B6=C24.upper=c36
LD B5, 5 * SIZE(CO3)
CVTU A7, C44 # B7=C42.upper=c48
LD B6, 5 * SIZE(CO4)
CVTU A8, C42 # A1=C44.upper=c38
LD B7, 7 * SIZE(CO3)
MADD A1, B1, A1, ALPHA # c31
LD C11, 7 * SIZE(CO4)
MADD A2, B2, A2, ALPHA
LD C13, 0 * SIZE(CO3)
MADD A3, B3, A3, ALPHA
LD C21, 0 * SIZE(CO4)
MADD A4, B4, A4, ALPHA
LD C23, 2 * SIZE(CO3)
MADD A5, B5, A5, ALPHA
LD C31, 2 * SIZE(CO4)
MADD A6, B6, A6, ALPHA
LD C33, 4 * SIZE(CO3)
MADD A7, B7, A7, ALPHA
LD C41, 4 * SIZE(CO4)
MADD A8, C11, A8, ALPHA
ST A1, 1 * SIZE(CO3)
MADD C12, C13, C12, ALPHA
LD C43, 6 * SIZE(CO3)
MADD C14, C21, C14, ALPHA
ST A2, 1 * SIZE(CO4)
MADD C22, C23, C22, ALPHA
LD B1, 6 * SIZE(CO4)
MADD C24, C31, C24, ALPHA
ST A3, 3 * SIZE(CO3)
MADD C32, C33, C32, ALPHA
ST A4, 3 * SIZE(CO4)
MADD C34, C41, C34, ALPHA
ST A5, 5 * SIZE(CO3)
MADD C42, C43, C42, ALPHA
ST A6, 5 * SIZE(CO4)
ST A7, 7 * SIZE(CO3)
NOP
MADD C44, B1, C44, ALPHA
ST A8, 7 * SIZE(CO4)
ST C12, 0 * SIZE(CO3)
ST C14, 0 * SIZE(CO4)
ST C22, 2 * SIZE(CO3)
ST C24, 2 * SIZE(CO4)
ST C32, 4 * SIZE(CO3)
ST C34, 4 * SIZE(CO4)
ST C42, 6 * SIZE(CO3)
ST C44, 6 * SIZE(CO4)
daddiu CO1, CO1, 8 * SIZE
daddiu CO2, CO2, 8 * SIZE
daddiu CO3, CO3, 8 * SIZE
bgtz I, .L481
daddiu CO4, CO4, 8 * SIZE
#else
daddiu I, I, -1
CVTU A1, C13 # A1=C13.upper=c12
CVTU A2, C11 # A2=C11.upper=c22
CVTU A3, C23 # A3=C23.upper=c14
CVTU A4, C21 # A4=C21.upper=c24
CVTU A5, C33 # A5=C33.upper=c16
CVTU A6, C31 # A6=C31.upper=c26
CVTU A7, C43 # A7=C43.upper=c18
CVTU A8, C41 # A8=C41.upper=c28
MUL A1, A1, ALPHA # c12
MUL A2, A2, ALPHA # c22
MUL A3, A3, ALPHA # c14
MUL A4, A4, ALPHA # c24
MUL A5, A5, ALPHA # c16
MUL A6, A6, ALPHA # c26
MUL A7, A7, ALPHA # c18
MUL A8, A8, ALPHA # c28
MUL C11, C11, ALPHA # c12
ST A1, 1 * SIZE(CO1)
MUL C13, C13, ALPHA # c22
ST A2, 1 * SIZE(CO2)
MUL C21, C21, ALPHA # c14
ST A3, 3 * SIZE(CO1)
MUL C23, C23, ALPHA # c24
ST A4, 3 * SIZE(CO2)
MUL C31, C31, ALPHA # c16
ST A5, 5 * SIZE(CO1)
MUL C33, C33, ALPHA # c26
ST A6, 5 * SIZE(CO2)
MUL C41, C41, ALPHA # c18
ST A7, 7 * SIZE(CO1)
MUL C43, C43, ALPHA # c28
ST A8, 7 * SIZE(CO2)
CVTU A1, C14 # B1=C12.upper=c42
ST C11, 0 * SIZE(CO1)
CVTU A2, C12 # B2=C14.upper=c32
ST C13, 0 * SIZE(CO2)
CVTU A3, C24 # B3=C22.upper=c44
ST C21, 2 * SIZE(CO1)
CVTU A4, C22 # B4=C24.upper=c34
ST C23, 2 * SIZE(CO2)
CVTU A5, C34 # B5=C32.upper=c46
ST C31, 4 * SIZE(CO1)
CVTU A6, C32 # B6=C24.upper=c36
ST C33, 4 * SIZE(CO2)
CVTU A7, C44 # B7=C42.upper=c48
ST C41, 6 * SIZE(CO1)
CVTU A8, C42 # A1=C44.upper=c38
ST C43, 6 * SIZE(CO2)
MUL A1, A1, ALPHA # c31
MUL A2, A2, ALPHA
MUL A3, A3, ALPHA
MUL A4, A4, ALPHA
MUL A5, A5, ALPHA
MUL A6, A6, ALPHA
MUL A7, A7, ALPHA
MUL A8, A8, ALPHA
MUL C12, C12, ALPHA
ST A1, 1 * SIZE(CO3)
MUL C14, C14, ALPHA
ST A2, 1 * SIZE(CO4)
MUL C22, C22, ALPHA
ST A3, 3 * SIZE(CO3)
MUL C24, C24, ALPHA
ST A4, 3 * SIZE(CO4)
MUL C32, C32, ALPHA
ST A5, 5 * SIZE(CO3)
MUL C34, C34, ALPHA
ST A6, 5 * SIZE(CO4)
MUL C42, C42, ALPHA
ST A7, 7 * SIZE(CO3)
MUL C44, C44, ALPHA
ST A8, 7 * SIZE(CO4)
ST C12, 0 * SIZE(CO3)
ST C14, 0 * SIZE(CO4)
ST C22, 2 * SIZE(CO3)
ST C24, 2 * SIZE(CO4)
ST C32, 4 * SIZE(CO3)
ST C34, 4 * SIZE(CO4)
ST C42, 6 * SIZE(CO3)
ST C44, 6 * SIZE(CO4)
daddiu CO1, CO1, 8 * SIZE
daddiu CO2, CO2, 8 * SIZE
daddiu CO3, CO3, 8 * SIZE
daddiu CO4, CO4, 8 * SIZE
#if ( defined(LEFT) && defined(TRANSA)) ||\
(!defined(LEFT) && !defined(TRANSA))
dsubu TEMP, K, KK
#ifdef LEFT
daddiu TEMP, TEMP, -8
#else
daddiu TEMP, TEMP, -4
#endif
dsll L, TEMP, 3 + BASE_SHIFT
dsll TEMP, TEMP, 2 + BASE_SHIFT
daddu AO, AO, L
daddu BO, BO, TEMP
#endif
#ifdef LEFT
daddiu KK, KK, 8
#endif
bgtz I, .L481
NOP
#endif
.align 4
.L44:
andi I, M, 4 # MR=4
blez I, .L42
NOP
.align 4
.L441:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) ||\
(!defined(LEFT) && !defined(TRANSA))
move BO, B # Reset B
#else
dsll L, KK, 2 + BASE_SHIFT
dsll TEMP, KK, 2 + BASE_SHIFT
daddu AO, AO, L
daddu BO, B, TEMP
#endif
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
dsll PREB, K, BASE_SHIFT
MOV C21, C11
MOV C22, C11
MOV C31, C11
MOV C32, C11
gsLQC1(R13, F9, F8, 0) # B1 B2
MOV C41, C11
MOV C42, C11
gsLQC1(R12, F1, F0, 0) # A1 A2
MOV C13, C11
MOV C14, C11
MOV C23, C11
FETCH $0, 0 * SIZE(CO1)
MOV C24, C11
MOV C33, C11
FETCH $0, 0 * SIZE(CO2)
MOV C34, C11
daddu PREB, B, PREB
MOV C43, C11
FETCH $0, 0 * SIZE(CO3)
MOV C44, C11
PLU B3, B1, B1
FETCH $0, 0 * SIZE(CO4)
PLU B4, B2, B2
#if (defined(LEFT) && !defined(TRANSA)) ||\
(!defined(LEFT) && defined(TRANSA))
dsubu TEMP, K, KK
#elif defined(LEFT)
daddu TEMP, KK, 4
#else
daddu TEMP, KK, 4
#endif
dsra L, TEMP, 2
blez L, .L442
NOP
#else
move BO, B # Reset B
dsra L, K, 2 # UnRoll K=4
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
dsll PREB, K, BASE_SHIFT
MOV C21, C11
MOV C22, C11
MOV C31, C11
MOV C32, C11
gsLQC1(R13, F9, F8, 0) # B1 B2
MOV C41, C11
MOV C42, C11
gsLQC1(R12, F1, F0, 0) # A1 A2
MOV C13, C11
MOV C14, C11
MOV C23, C11
FETCH $0, 0 * SIZE(CO1)
MOV C24, C11
MOV C33, C11
FETCH $0, 0 * SIZE(CO2)
MOV C34, C11
daddu PREB, B, PREB
MOV C43, C11
FETCH $0, 0 * SIZE(CO3)
MOV C44, C11
PLU B3, B1, B1
FETCH $0, 0 * SIZE(CO4)
blez L, .L442
PLU B4, B2, B2
#endif
.L4410: #
daddiu L, L, -1
MADPS C11, C11, A1, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C21, C21, A2, B1
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C12, C12, A1, B2
FETCH $0, 0 * SIZE(PREB)
MADPS C22, C22, A2, B2
FETCH $0, 0 * SIZE(PREA)
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C14, C14, A1, B4
MADPS C24, C24, A2, B4
PLU B7, B5, B5
PLU B8, B6, B6
MADPS C11, C11, A3, B5
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C21, C21, A4, B5
gsLQC1(R12, F5, F4, 2) # A5 A6
MADPS C12, C12, A3, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C22, C22, A4, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C13, C13, A3, B7
MADPS C23, C23, A4, B7
MADPS C14, C14, A3, B8
MADPS C24, C24, A4, B8
PLU B3, B1, B1
PLU B4, B2, B2
MADPS C11, C11, A5, B1
gsLQC1(R13, F13, F12, 3) # B3 B4
MADPS C21, C21, A6, B1
gsLQC1(R12, F7, F6, 3) # A7 A8
MADPS C12, C12, A5, B2
FETCH $0, 8 * SIZE(PREB)
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C22, C22, A6, B2
FETCH $0, 8 * SIZE(PREA)
daddiu AO, AO, 16 * SIZE # 4KR*4MR
MADPS C13, C13, A5, B3
MADPS C23, C23, A6, B3
MADPS C14, C14, A5, B4
MADPS C24, C24, A6, B4
PLU B7, B5, B5
PLU B8, B6, B6
MADPS C11, C11, A7, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C21, C21, A8, B5
gsLQC1(R12, F1, F0, 0) # A1 A2
MADPS C12, C12, A7, B6
FETCH $0, 12 * SIZE(PREB)
MADPS C22, C22, A8, B6
FETCH $0, 12 * SIZE(PREA)
MADPS C13, C13, A7, B7
daddiu PREA, PREA, 16 * SIZE
MADPS C23, C23, A8, B7
daddiu PREB, PREB, 16 * SIZE
MADPS C14, C14, A7, B8
MADPS C24, C24, A8, B8
PLU B3, B1, B1
bgtz L, .L4410
PLU B4, B2, B2
.align 4
.L442:
#ifndef TRMMKERNEL
andi L, K, 2
#else
andi L, TEMP, 2
#endif
blez L, .L443
NOP
MADPS C11, C11, A1, B1
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C21, C21, A2, B1
gsLQC1(R12, F3, F2, 1) # A3 A4
MADPS C12, C12, A1, B2
FETCH $0, 0 * SIZE(PREB)
daddiu BO, BO, 8 * SIZE # 2KR*4NR
MADPS C22, C22, A2, B2
FETCH $0, 0 * SIZE(PREA)
daddiu AO, AO, 8 * SIZE # 2KR*4MR
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C14, C14, A1, B4
MADPS C24, C24, A2, B4
PLU B7, B5, B5
PLU B8, B6, B6
MADPS C11, C11, A3, B5
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C21, C21, A4, B5
gsLQC1(R12, F1, F0, 0) # A5 A6
MADPS C12, C12, A3, B6
FETCH $0, 4 * SIZE(PREB)
MADPS C22, C22, A4, B6
FETCH $0, 4 * SIZE(PREA)
MADPS C13, C13, A3, B7
daddiu PREB, PREB, 8
MADPS C23, C23, A4, B7
daddiu PREA, PREA, 8
MADPS C14, C14, A3, B8
MADPS C24, C24, A4, B8
PLU B3, B1, B1
PLU B4, B2, B2
.align 4
.L443:
#ifndef TRMMKERNEL
andi L, K, 1
#else
andi L, TEMP, 1
#endif
blez L, .L440
LD ALPHA, 152($sp)
MADPS C11, C11, A1, B1
MADPS C21, C21, A2, B1
MADPS C12, C12, A1, B2
daddiu BO, BO, 4 * SIZE # 1KR*4NR
MADPS C22, C22, A2, B2
daddiu AO, AO, 4 * SIZE # 1KR*4MR
MADPS C13, C13, A1, B3
MADPS C23, C23, A2, B3
MADPS C14, C14, A1, B4
MADPS C24, C24, A2, B4
.align 4
.L440:
#ifndef TRMMKERNEL
CVTU A1, C13 # A1=C13.upper=c12
LD B1, 1 * SIZE(CO1)
CVTU A2, C11 # A2=C11.upper=c22
LD B2, 1 * SIZE(CO2)
CVTU A3, C23 # A3=C23.upper=c14
LD B3, 3 * SIZE(CO1)
CVTU A4, C21 # A4=C21.upper=c24
LD B4, 3 * SIZE(CO2)
MADD A1, B1, A1, ALPHA # c12
LD B5, 0 * SIZE(CO1)
MADD A2, B2, A2, ALPHA # c22
LD B6, 0 * SIZE(CO2)
MADD A3, B3, A3, ALPHA # c14
LD B7, 2 * SIZE(CO1)
MADD A4, B4, A4, ALPHA # c24
LD B1, 2 * SIZE(CO2)
MADD C11, B5, C11, ALPHA # c12
ST A1, 1 * SIZE(CO1)
MADD C13, B6, C13, ALPHA # c22
ST A2, 1 * SIZE(CO2)
MADD C21, B7, C21, ALPHA # c14
ST A3, 3 * SIZE(CO1)
MADD C23, B1, C23, ALPHA # c24
ST A4, 3 * SIZE(CO2)
ST C11, 0 * SIZE(CO1)
ST C13, 0 * SIZE(CO2)
ST C21, 2 * SIZE(CO1)
ST C23, 2 * SIZE(CO2)
CVTU A1, C14 # B1=C12.upper=c42
LD B1, 1 * SIZE(CO3)
CVTU A2, C12 # B2=C14.upper=c32
LD B2, 1 * SIZE(CO4)
CVTU A3, C24 # B3=C22.upper=c44
LD B3, 3 * SIZE(CO3)
CVTU A4, C22 # B4=C24.upper=c34
LD B4, 3 * SIZE(CO4)
MADD A1, B1, A1, ALPHA # c31
LD A5, 0 * SIZE(CO3)
MADD A2, B2, A2, ALPHA
LD A6, 0 * SIZE(CO4)
MADD A3, B3, A3, ALPHA
LD A7, 2 * SIZE(CO3)
MADD A4, B4, A4, ALPHA
LD A8, 2 * SIZE(CO4)
MADD C12, A5, C12, ALPHA
ST A1, 1 * SIZE(CO3)
MADD C14, A6, C14, ALPHA
ST A2, 1 * SIZE(CO4)
MADD C22, A7, C22, ALPHA
ST A3, 3 * SIZE(CO3)
MADD C24, A8, C24, ALPHA
ST A4, 3 * SIZE(CO4)
ST C12, 0 * SIZE(CO3)
ST C14, 0 * SIZE(CO4)
ST C22, 2 * SIZE(CO3)
ST C24, 2 * SIZE(CO4)
daddiu CO1, CO1, 4 * SIZE
daddiu CO2, CO2, 4 * SIZE
daddiu CO3, CO3, 4 * SIZE
daddiu CO4, CO4, 4 * SIZE
#else
CVTU A1, C13 # A1=C13.upper=c12
CVTU A2, C11 # A2=C11.upper=c22
CVTU A3, C23 # A3=C23.upper=c14
CVTU A4, C21 # A4=C21.upper=c24
MUL A1, A1, ALPHA # c12
MUL A2, A2, ALPHA # c22
MUL A3, A3, ALPHA # c14
MUL A4, A4, ALPHA # c24
MUL C11, C11, ALPHA # c12
ST A1, 1 * SIZE(CO1)
MUL C13, C13, ALPHA # c22
ST A2, 1 * SIZE(CO2)
MUL C21, C21, ALPHA # c14
ST A3, 3 * SIZE(CO1)
MUL C23, C23, ALPHA # c24
ST A4, 3 * SIZE(CO2)
CVTU A5, C14 # B1=C12.upper=c42
ST C11, 0 * SIZE(CO1)
CVTU A6, C12 # B2=C14.upper=c32
ST C13, 0 * SIZE(CO2)
CVTU A7, C24 # B3=C22.upper=c44
ST C21, 2 * SIZE(CO1)
CVTU A8, C22 # B4=C24.upper=c34
ST C23, 2 * SIZE(CO2)
MUL A5, A5, ALPHA # c31
MUL A6, A6, ALPHA
MUL A7, A7, ALPHA
MUL A8, A8, ALPHA
MUL C12, C12, ALPHA
ST A5, 1 * SIZE(CO3)
MUL C14, C14, ALPHA
ST A6, 1 * SIZE(CO4)
MUL C22, C22, ALPHA
ST A7, 3 * SIZE(CO3)
MUL C24, C24, ALPHA
ST A8, 3 * SIZE(CO4)
ST C12, 0 * SIZE(CO3)
ST C14, 0 * SIZE(CO4)
ST C22, 2 * SIZE(CO3)
ST C24, 2 * SIZE(CO4)
daddiu CO1, CO1, 4 * SIZE
daddiu CO2, CO2, 4 * SIZE
daddiu CO3, CO3, 4 * SIZE
daddiu CO4, CO4, 4 * SIZE
#if ( defined(LEFT) && defined(TRANSA))||\
(!defined(LEFT) && !defined(TRANSA))
dsubu TEMP, K, KK
#ifdef LEFT
daddiu TEMP, TEMP, -4
#else
daddiu TEMP, TEMP, -4
#endif
dsll L, TEMP, 2 + BASE_SHIFT
dsll TEMP, TEMP, 2 + BASE_SHIFT
daddu AO, AO, L
daddu BO, BO, TEMP
#endif
#ifdef LEFT
daddiu KK, KK, 4
#endif
#endif
.align 4
.L42:
andi I, M, 2
blez I, .L41
NOP
.align 4
.L421:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) ||\
(!defined(LEFT) && !defined(TRANSA))
move BO, B
#else
dsll L, KK, 1 + BASE_SHIFT
dsll TEMP, KK, 2 + BASE_SHIFT
daddu AO, AO, L
daddu BO, B, TEMP
#endif
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
MOV C21, C11
MOV C22, C11
MOV C31, C11
MOV C32, C11
gsLQC1(R13, F9, F8, 0) # B1 B2
MOV C41, C11
MOV C42, C11
gsLQC1(R12, F1, F0, 0) # A1 A2
MOV C13, C11
MOV C14, C11
MOV C23, C11
FETCH $0, 0 * SIZE(CO1)
MOV C24, C11
MOV C33, C11
FETCH $0, 0 * SIZE(CO2)
MOV C34, C11
MOV C43, C11
FETCH $0, 0 * SIZE(CO3)
MOV C44, C11
PLU B3, B1, B1
FETCH $0, 0 * SIZE(CO4)
PLU B4, B2, B2
#if (defined(LEFT) && !defined(TRANSA)) ||\
(!defined(LEFT) && defined(TRANSA))
dsubu TEMP, K, KK
#elif defined(LEFT)
daddiu TEMP, KK, 2
#else
daddiu TEMP, KK, 4
#endif
dsra L, TEMP, 2
blez L, .L422
NOP
#else
move BO, B # Reset B
dsra L, K, 2 # UnRoll K=4
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
MOV C21, C11
MOV C22, C11
MOV C31, C11
MOV C32, C11
gsLQC1(R13, F9, F8, 0) # B1 B2
MOV C41, C11
MOV C42, C11
gsLQC1(R12, F1, F0, 0) # A1 A2
MOV C13, C11
MOV C14, C11
MOV C23, C11
FETCH $0, 0 * SIZE(CO1)
MOV C24, C11
MOV C33, C11
FETCH $0, 0 * SIZE(CO2)
MOV C34, C11
MOV C43, C11
FETCH $0, 0 * SIZE(CO3)
MOV C44, C11
PLU B3, B1, B1
FETCH $0, 0 * SIZE(CO4)
blez L, .L422
PLU B4, B2, B2
#endif
.L4210:
daddiu L, L, -1
MADPS C11, C11, A1, B1
MADPS C12, C12, A1, B2
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C13, C13, A1, B3
MADPS C14, C14, A1, B4
gsLQC1(R12, F3, F2, 1) # B1 B2
PLU B7, B5, B5
PLU B8, B6, B6
MADPS C11, C11, A2, B5
MADPS C12, C12, A2, B6
daddiu AO, AO, 8 * SIZE # 4KR*2MR
gsLQC1(R13, F9, F8, 2) # B1 B2
MADPS C13, C13, A2, B7
MADPS C14, C14, A2, B8
PLU B3, B1, B1
PLU B4, B2, B2
MADPS C11, C11, A3, B1
gsLQC1(R12, F1, F0, 0) # B3 B4
MADPS C12, C12, A3, B2
gsLQC1(R13, F13, F12, 3) # B3 B4
daddiu BO, BO, 16 * SIZE # 4KR*4NR
MADPS C13, C13, A3, B3
MADPS C14, C14, A3, B4
PLU B7, B5, B5
PLU B8, B6, B6
MADPS C11, C11, A4, B5
MADPS C12, C12, A4, B6
gsLQC1(R13, F9, F8, 0) # B3 B4
MADPS C13, C13, A4, B7
MADPS C14, C14, A4, B8
PLU B3, B1, B1
bgtz L, .L4210
PLU B4, B2, B2
.align 4
.L422:
#ifndef TRMMKERNEL
andi L, K, 2
#else
andi L, TEMP, 2
#endif
blez L, .L423
NOP
daddiu AO, AO, 4 * SIZE # 2KR*2MR
MADPS C11, C11, A1, B1
MADPS C12, C12, A1, B2
gsLQC1(R13, F13, F12, 1) # B3 B4
MADPS C13, C13, A1, B3
MADPS C14, C14, A1, B4
daddiu BO, BO, 8 * SIZE # 2KR*2MR
PLU B7, B5, B5
PLU B8, B6, B6
MADPS C11, C11, A2, B5
MADPS C12, C12, A2, B6
gsLQC1(R13, F9, F8, 0) # B1 B2
MADPS C13, C13, A2, B7
MADPS C14, C14, A2, B8
gsLQC1(R12, F1, F0, 0)
PLU B3, B1, B1
PLU B4, B2, B2
.L423:
#ifndef TRMMKERNEL
andi L, K, 1
#else
andi L, TEMP, 1
#endif
blez L, .L420
LD ALPHA, 152($sp)
MADPS C11, C11, A1, B1
MADPS C12, C12, A1, B2
daddiu BO, BO, 4 * SIZE # 2KR*4NR
daddiu AO, AO, 2 * SIZE # 2KR*4MR
MADPS C13, C13, A1, B3
MADPS C14, C14, A1, B4
.align 4
.L420:
#ifndef TRMMKERNEL
CVTU A1, C13 # A1=C13.upper=c12
LD B1, 1 * SIZE(CO1)
CVTU A2, C11 # A2=C11.upper=c22
LD B2, 1 * SIZE(CO2)
MADD A1, B1, A1, ALPHA # c12
LD B5, 0 * SIZE(CO1)
MADD A2, B2, A2, ALPHA # c22
LD B6, 0 * SIZE(CO2)
MADD C11, B5, C11, ALPHA # c12
ST A1, 1 * SIZE(CO1)
MADD C13, B6, C13, ALPHA # c22
ST A2, 1 * SIZE(CO2)
ST C11, 0 * SIZE(CO1)
ST C13, 0 * SIZE(CO2)
CVTU A1, C14 # B1=C12.upper=c42
LD B1, 1 * SIZE(CO3)
CVTU A2, C12 # B2=C14.upper=c32
LD B2, 1 * SIZE(CO4)
MADD A1, B1, A1, ALPHA # c31
LD A5, 0 * SIZE(CO3)
MADD A2, B2, A2, ALPHA
LD A6, 0 * SIZE(CO4)
MADD C12, A5, C12, ALPHA
ST A1, 1 * SIZE(CO3)
MADD C14, A6, C14, ALPHA
ST A2, 1 * SIZE(CO4)
ST C12, 0 * SIZE(CO3)
ST C14, 0 * SIZE(CO4)
daddiu CO1, CO1, 2 * SIZE
daddiu CO2, CO2, 2 * SIZE
daddiu CO3, CO3, 2 * SIZE
daddiu CO4, CO4, 2 * SIZE
#else
CVTU A1, C13 # A1=C13.upper=c12
CVTU A2, C11 # A2=C11.upper=c22
MUL A1, A1, ALPHA # c12
MUL A2, A2, ALPHA # c22
MUL C11, C11, ALPHA # c12
MUL C13, C13, ALPHA # c22
CVTU A3, C14 # B1=C12.upper=c42
CVTU A4, C12 # B2=C14.upper=c32
MUL A3, A3, ALPHA # c31
ST A1, 1 * SIZE(CO1)
MUL A4, A4, ALPHA
ST A2, 1 * SIZE(CO2)
MUL C12, C12, ALPHA
ST C11, 0 * SIZE(CO1)
MUL C14, C14, ALPHA
ST C13, 0 * SIZE(CO2)
ST A3, 1 * SIZE(CO3)
ST A4, 1 * SIZE(CO4)
ST C12, 0 * SIZE(CO3)
ST C14, 0 * SIZE(CO4)
daddiu CO1, CO1, 2 * SIZE
daddiu CO2, CO2, 2 * SIZE
daddiu CO3, CO3, 2 * SIZE
daddiu CO4, CO4, 2 * SIZE
#if ( defined(LEFT) && defined(TRANSA))||\
(!defined(LEFT) && !defined(TRANSA))
dsubu TEMP, K, KK
#ifdef LEFT
daddiu TEMP, TEMP, -2
#else
daddiu TEMP, TEMP, -4
#endif
dsll L, TEMP, 1 + BASE_SHIFT
dsll TEMP, TEMP, 2 + BASE_SHIFT
daddu AO, AO, L
daddu BO, BO, TEMP
#endif
#ifdef LEFT
daddiu KK, KK, 2
#endif
#endif
.align 4
.L41:
andi I, M, 1
blez I, .L40
NOP
.align 4
.L411:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) ||\
(!defined(LEFT) && !defined(TRANSA))
move BO, B
#else
dsll L, KK, BASE_SHIFT
dsll TEMP, KK, 2 + BASE_SHIFT
daddu AO, AO, L
daddu BO, B, TEMP
#endif
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
LD B1, 0 * SIZE(BO)
MOV C21, C11
MOV C22, C11
LD A1, 0 * SIZE(AO)
MOV C31, C11
MOV C32, C11
LD B2, 1 * SIZE(BO)
MOV C41, C11
MOV C42, C11
LD B3, 2 * SIZE(BO)
MOV C13, C11
MOV C14, C11
LD B4, 3 * SIZE(BO)
MOV C23, C11
MOV C24, C11
MOV C33, C11
MOV C34, C11
MOV C43, C11
MOV C44, C11
#if (defined(LEFT) && !defined(TRANSA))||\
(!defined(LEFT) && defined(TRANSA))
dsubu TEMP, K, KK
#elif defined(LEFT)
daddiu TEMP, KK, 1
#else
daddiu TEMP, KK, 4
#endif
dsra L, TEMP, 2
blez L, .L412
#else
move BO, B # Reset B
dsra L, K, 2 # UnRoll K=4
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
LD B1, 0 * SIZE(BO)
MOV C21, C11
MOV C22, C11
LD A1, 0 * SIZE(AO)
MOV C31, C11
MOV C32, C11
LD B2, 1 * SIZE(BO)
MOV C41, C11
MOV C42, C11
LD B3, 2 * SIZE(BO)
MOV C13, C11
MOV C14, C11
LD B4, 3 * SIZE(BO)
MOV C23, C11
MOV C24, C11
MOV C33, C11
MOV C34, C11
MOV C43, C11
blez L, .L412
MOV C44, C11
#endif
.L4110:
daddiu L, L, -1
LD A2, 1 * SIZE(AO)
MADD C11, C11, A1, B1
LD B5, 4 * SIZE(BO)
MADD C12, C12, A1, B2
LD B6, 5 * SIZE(BO)
MADD C13, C13, A1, B3
LD B7, 6 * SIZE(BO)
MADD C14, C14, A1, B4
LD B8, 7 * SIZE(BO)
LD A3, 2 * SIZE(AO)
NOP
MADD C11, C11, A2, B5
LD B1, 8 * SIZE(BO)
MADD C12, C12, A2, B6
LD B2, 9 * SIZE(BO)
MADD C13, C13, A2, B7
LD B3, 10 * SIZE(BO)
MADD C14, C14, A2, B8
LD B4, 11 * SIZE(BO)
LD A4, 3 * SIZE(AO)
daddiu AO, AO, 4 * SIZE
MADD C11, C11, A3, B1
LD B5, 12 * SIZE(BO)
MADD C12, C12, A3, B2
LD B6, 13 * SIZE(BO)
MADD C13, C13, A3, B3
LD B7, 14 * SIZE(BO)
MADD C14, C14, A3, B4
LD B8, 15 * SIZE(BO)
LD A1, 0 * SIZE(AO)
daddiu BO, BO, 16 * SIZE
MADD C11, C11, A4, B5
LD B1, 0 * SIZE(BO)
MADD C12, C12, A4, B6
LD B2, 1 * SIZE(BO)
MADD C13, C13, A4, B7
LD B3, 2 * SIZE(BO)
MADD C14, C14, A4, B8
bgtz L, .L4110
LD B4, 3 * SIZE(BO)
.L412:
#ifndef TRMMKERNEL
andi L, K, 2
#else
andi L, TEMP, 2
#endif
blez L, .L413
NOP
LD A2, 1 * SIZE(AO)
daddiu AO, AO, 2 * SIZE
MADD C11, C11, A1, B1
LD B5, 4 * SIZE(BO)
MADD C12, C12, A1, B2
LD B6, 5 * SIZE(BO)
MADD C13, C13, A1, B3
LD B7, 6 * SIZE(BO)
MADD C14, C14, A1, B4
LD B8, 7 * SIZE(BO)
LD A1, 0 * SIZE(AO)
daddiu BO, BO, 8 * SIZE
MADD C11, C11, A2, B5
LD B1, 0 * SIZE(BO)
MADD C12, C12, A2, B6
LD B2, 1 * SIZE(BO)
MADD C13, C13, A2, B7
LD B3, 2 * SIZE(BO)
MADD C14, C14, A2, B8
LD B4, 3 * SIZE(BO)
.L413:
#ifndef TRMMKERNEL
andi L, K, 1
#else
andi L, TEMP, 1
#endif
blez L, .L410
LD ALPHA, 152($sp)
MADD C11, C11, A1, B1
MADD C12, C12, A1, B2
daddiu AO, AO, 1 * SIZE
MADD C13, C13, A1, B3
MADD C14, C14, A1, B4
daddiu BO, BO, 4 * SIZE
.align 4
.L410:
#ifndef TRMMKERNEL
LD A5, 0 * SIZE(CO1)
LD A6, 0 * SIZE(CO2)
LD A7, 0 * SIZE(CO3)
LD A8, 0 * SIZE(CO4)
MADD A5, A5, C11, ALPHA
MADD A6, A6, C12, ALPHA
MADD A7, A7, C13, ALPHA
MADD A8, A8, C14, ALPHA
ST A5, 0 * SIZE(CO1)
ST A6, 0 * SIZE(CO2)
ST A7, 0 * SIZE(CO3)
ST A8, 0 * SIZE(CO4)
daddiu CO1, CO1, 1 * SIZE
daddiu CO2, CO2, 1 * SIZE
daddiu CO3, CO3, 1 * SIZE
daddiu CO4, CO4, 1 * SIZE
#else
MUL A5, C11, ALPHA
MUL A6, C12, ALPHA
MUL A7, C13, ALPHA
MUL A8, C14, ALPHA
ST A5, 0 * SIZE(CO1)
ST A6, 0 * SIZE(CO2)
ST A7, 0 * SIZE(CO3)
ST A8, 0 * SIZE(CO4)
daddiu CO1, CO1, 1 * SIZE
daddiu CO2, CO2, 1 * SIZE
daddiu CO3, CO3, 1 * SIZE
daddiu CO4, CO4, 1 * SIZE
#if ( defined(LEFT) && defined(TRANSA))||\
(!defined(LEFT) && !defined(TRANSA))
dsubu TEMP, K, KK
#ifdef LEFT
daddiu TEMP, TEMP, -1
#else
daddiu TEMP, TEMP, -4
#endif
dsll L, TEMP, BASE_SHIFT
dsll TEMP, TEMP, 2 + BASE_SHIFT
daddu AO, AO, L
daddu BO, BO, TEMP
#endif
#ifdef LEFT
daddiu KK, KK, 1
#endif
#endif
.align 4
.L40:
#if defined(TRMMKERNEL) && !defined(LEFT)
daddiu KK, KK, 4
#endif
daddiu J, J, -1
move B, BO
bgtz J, .L48
NOP
.align 4
.L2: # Nr=2
andi J, N, 2
blez J, .L1
NOP
.L28:
dsra I, M, 3 # MR=8
move AO, A # Reset A
move CO1, C
#if defined(TRMMKERNEL) && defined(LEFT)
move KK, OFFSET
#endif
daddu CO2, C, LDC
blez I, .L24
daddu C, CO2, LDC
.align 4
.L281:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
move BO, B
#else
dsll L, KK, 3 + BASE_SHIFT
dsll TEMP, KK, 1 + BASE_SHIFT
daddu AO, AO, L
daddu BO, B, TEMP
#endif
MTC $0, C11 # CLEAR REAULTS REGISTERS
LD A1, 0 * SIZE(AO)
MOV C12, C11
LD A2, 1 * SIZE(AO)
MOV C21, C11
LD A3, 2 * SIZE(AO)
MOV C22, C11
LD A4, 3 * SIZE(AO)
MOV C31, C11
LD A5, 4 * SIZE(AO)
MOV C32, C11
LD A6, 5 * SIZE(AO)
MOV C41, C11
LD B1, 0 * SIZE(BO)
MOV C42, C11
LD B2, 1 * SIZE(BO)
MOV C13, C11
LD A7, 6 * SIZE(AO)
MOV C14, C11
LD A8, 7 * SIZE(AO)
MOV C23, C11
MOV C24, C11
MOV C33, C11
MOV C34, C11
MOV C43, C11
MOV C44, C11
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, K, KK
#elif defined(LEFT)
daddiu TEMP, KK, 8
#else
daddiu TEMP, KK, 2
#endif
dsra L, TEMP, 1
blez L, .L282
NOP
#else
move BO, B # Reset B
dsra L, K, 1 # UnRoll K=4
MTC $0, C11 # CLEAR REAULTS REGISTERS
LD A1, 0 * SIZE(AO)
MOV C12, C11
LD A2, 1 * SIZE(AO)
MOV C21, C11
LD A3, 2 * SIZE(AO)
MOV C22, C11
LD A4, 3 * SIZE(AO)
MOV C31, C11
LD A5, 4 * SIZE(AO)
MOV C32, C11
LD A6, 5 * SIZE(AO)
MOV C41, C11
LD B1, 0 * SIZE(BO)
MOV C42, C11
LD B2, 1 * SIZE(BO)
MOV C13, C11
LD A7, 6 * SIZE(AO)
MOV C14, C11
LD A8, 7 * SIZE(AO)
MOV C23, C11
MOV C24, C11
MOV C33, C11
MOV C34, C11
MOV C43, C11
blez L, .L282
MOV C44, C11
#endif
.align 4
.L2810:
daddiu L, L, -1
MADD C11, C11, A1, B1
LD B5, 8 * SIZE(AO)
MADD C21, C21, A2, B1
LD B6, 9 * SIZE(AO)
MADD C31, C31, A3, B1
LD B7, 10 * SIZE(AO)
MADD C41, C41, A4, B1
LD B8, 11 * SIZE(AO)
MADD C12, C12, A1, B2
MADD C22, C22, A2, B2
LD B3, 2 * SIZE(BO)
MADD C32, C32, A3, B2
MADD C42, C42, A4, B2
LD B4, 3 * SIZE(BO)
daddiu BO, BO, 4 * SIZE
MADD C13, C13, A5, B1
MADD C23, C23, A6, B1
LD A1, 12 * SIZE(AO)
MADD C33, C33, A7, B1
MADD C43, C43, A8, B1
LD A2, 13 * SIZE(AO)
MADD C14, C14, A5, B2
MADD C24, C24, A6, B2
LD A3, 14 * SIZE(AO)
MADD C34, C34, A7, B2
MADD C44, C44, A8, B2
LD A4, 15 * SIZE(AO)
daddiu AO, AO, 16 * SIZE
MADD C11, C11, B5, B3
LD A5, 4 * SIZE(AO)
MADD C21, C21, B6, B3
LD A6, 5 * SIZE(AO)
MADD C13, C13, A1, B3
MADD C23, C23, A2, B3
LD A7, 6 * SIZE(AO)
MADD C33, C33, A3, B3
MADD C43, C43, A4, B3
LD A8, 7 * SIZE(AO)
MADD C14, C14, A1, B4
MADD C24, C24, A2, B4
LD B1, 0 * SIZE(BO)
MADD C34, C34, A3, B4
MADD C44, C44, A4, B4
LD B2, 1 * SIZE(BO)
MADD C31, C31, B7, B3
MADD C41, C41, B8, B3
LD A1, 0 * SIZE(AO)
MADD C12, C12, B5, B4
LD A2, 1 * SIZE(AO)
MADD C22, C22, B6, B4
LD A3, 2 * SIZE(AO)
LD A4, 3 * SIZE(AO)
MADD C32, C32, B7, B4
bgtz L, .L2810
MADD C42, C42, B8, B4
.align 4
.L282:
#ifndef TRMMKERNEL
andi L, K, 1
#else
andi L, TEMP, 1
#endif
blez L, .L280
LD ALPHA, 152($sp)
MADD C13, C13, A5, B1
MADD C23, C23, A6, B1
MADD C33, C33, A7, B1
MADD C43, C43, A8, B1
MADD C14, C14, A5, B2
MADD C24, C24, A6, B2
MADD C34, C34, A7, B2
MADD C44, C44, A8, B2
daddiu AO, AO, 8 * SIZE
MADD C11, C11, A1, B1
MADD C21, C21, A2, B1
MADD C31, C31, A3, B1
MADD C41, C41, A4, B1
MADD C12, C12, A1, B2
MADD C22, C22, A2, B2
MADD C32, C32, A3, B2
MADD C42, C42, A4, B2
daddiu BO, BO, 2 * SIZE
.align 4
.L280: # Write Back
#ifndef TRMMKERNEL
daddiu I, I, -1
LD A1, 0 * SIZE(CO1)
LD A2, 1 * SIZE(CO1)
LD A3, 2 * SIZE(CO1)
LD A4, 3 * SIZE(CO1)
LD A5, 4 * SIZE(CO1)
LD A6, 5 * SIZE(CO1)
LD A7, 6 * SIZE(CO1)
LD A8, 7 * SIZE(CO1)
MADD A1, A1, C11, ALPHA
LD B1, 0 * SIZE(CO2)
MADD A2, A2, C21, ALPHA
LD B2, 1 * SIZE(CO2)
MADD A3, A3, C31, ALPHA
LD B3, 2 * SIZE(CO2)
MADD A4, A4, C41, ALPHA
LD B4, 3 * SIZE(CO2)
MADD A5, A5, C13, ALPHA
LD B5, 4 * SIZE(CO2)
MADD A6, A6, C23, ALPHA
LD B6, 5 * SIZE(CO2)
MADD A7, A7, C33, ALPHA
LD B7, 6 * SIZE(CO2)
MADD A8, A8, C43, ALPHA
LD C11, 7 * SIZE(CO2)
MADD B1, B1, C12, ALPHA
ST A1, 0 * SIZE(CO1)
MADD B2, B2, C22, ALPHA
ST A2, 1 * SIZE(CO1)
MADD B3, B3, C32, ALPHA
ST A3, 2 * SIZE(CO1)
MADD B4, B4, C42, ALPHA
ST A4, 3 * SIZE(CO1)
MADD B5, B5, C14, ALPHA
ST A5, 4 * SIZE(CO1)
MADD B6, B6, C24, ALPHA
ST A6, 5 * SIZE(CO1)
MADD B7, B7, C34, ALPHA
ST A7, 6 * SIZE(CO1)
MADD C11, C11, C44, ALPHA
ST A8, 7 * SIZE(CO1)
ST B1, 0 * SIZE(CO2)
ST B2, 1 * SIZE(CO2)
ST B3, 2 * SIZE(CO2)
ST B4, 3 * SIZE(CO2)
ST B5, 4 * SIZE(CO2)
ST B6, 5 * SIZE(CO2)
ST B7, 6 * SIZE(CO2)
ST C11, 7 * SIZE(CO2)
daddiu CO1, CO1, 8 * SIZE
bgtz I, .L281
daddiu CO2, CO2, 8 * SIZE
#else
daddiu I, I, -1
MUL A1, C11, ALPHA
MUL A2, C21, ALPHA
MUL A3, C31, ALPHA
MUL A4, C41, ALPHA
MUL A5, C13, ALPHA
MUL A6, C23, ALPHA
MUL A7, C33, ALPHA
MUL A8, C43, ALPHA
MUL B1, C12, ALPHA
ST A1, 0 * SIZE(CO1)
MUL B2, C22, ALPHA
ST A2, 1 * SIZE(CO1)
MUL B3, C32, ALPHA
ST A3, 2 * SIZE(CO1)
MUL B4, C42, ALPHA
ST A4, 3 * SIZE(CO1)
MUL B5, C14, ALPHA
ST A5, 4 * SIZE(CO1)
MUL B6, C24, ALPHA
ST A6, 5 * SIZE(CO1)
MUL B7, C34, ALPHA
ST A7, 6 * SIZE(CO1)
MUL C11, C44, ALPHA
ST A8, 7 * SIZE(CO1)
ST B1, 0 * SIZE(CO2)
ST B2, 1 * SIZE(CO2)
ST B3, 2 * SIZE(CO2)
ST B4, 3 * SIZE(CO2)
ST B5, 4 * SIZE(CO2)
ST B6, 5 * SIZE(CO2)
ST B7, 6 * SIZE(CO2)
ST C11, 7 * SIZE(CO2)
#if ( defined(LEFT) && defined(TRANSA)) ||(!defined(LEFT) && !defined(TRANSA))
dsubu TEMP, K, KK
#ifdef LEFT
daddiu TEMP, TEMP, -8
#else
daddiu TEMP, TEMP, -2
#endif
dsll L, TEMP, 3 + BASE_SHIFT
dsll TEMP, TEMP, 1 + BASE_SHIFT
daddu AO, AO, L
daddu BO, BO, TEMP
#endif
#ifdef LEFT
daddiu KK, KK, 8
#endif
daddiu CO1, CO1, 8 * SIZE
bgtz I, .L281
daddiu CO2, CO2, 8 * SIZE
#endif
.align 4
.L24:
andi I, M, 4 # MR=4
blez I, .L22
NOP
.align 4
.L241:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
move BO, B
#else
dsll L, KK, 2 + BASE_SHIFT
dsll TEMP, KK, 1 + BASE_SHIFT
daddu AO, AO, L
daddu BO, B, TEMP
#endif
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
LD A1, 0 * SIZE(AO)
MOV C21, C11
MOV C22, C11
LD A2, 1 * SIZE(AO)
MOV C31, C11
MOV C32, C11
LD A3, 2 * SIZE(AO)
MOV C41, C11
MOV C42, C11
LD A4, 3 * SIZE(AO)
MOV C13, C11
MOV C14, C11
LD B1, 0 * SIZE(BO)
MOV C23, C11
MOV C24, C11
LD B2, 1 * SIZE(BO)
MOV C33, C11
MOV C34, C11
MOV C43, C11
MOV C44, C11
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, K, KK
#elif defined(LEFT)
daddiu TEMP, KK, 4
#else
daddiu TEMP, KK, 2
#endif
dsra L, TEMP, 1
blez L, .L242
NOP
#else
move BO, B # Reset B
dsra L, K, 1 # UnRoll K=4
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
LD A1, 0 * SIZE(AO)
MOV C21, C11
MOV C22, C11
LD A2, 1 * SIZE(AO)
MOV C31, C11
MOV C32, C11
LD A3, 2 * SIZE(AO)
MOV C41, C11
MOV C42, C11
LD A4, 3 * SIZE(AO)
MOV C13, C11
MOV C14, C11
LD B1, 0 * SIZE(BO)
MOV C23, C11
MOV C24, C11
LD B2, 1 * SIZE(BO)
MOV C33, C11
MOV C34, C11
MOV C43, C11
blez L, .L242
MOV C44, C11
#endif
.align 4
.L2410:
daddiu L, L, -1
MADD C11, C11, A1, B1
LD A5, 4 * SIZE(AO)
MADD C21, C21, A2, B1
LD B3, 2 * SIZE(BO)
MADD C31, C31, A3, B1
LD B4, 3 * SIZE(BO)
MADD C41, C41, A4, B1
LD A6, 5 * SIZE(AO)
daddiu BO, BO, 4 * SIZE
MADD C12, C12, A1, B2
LD A7, 6 * SIZE(AO)
MADD C22, C22, A2, B2
LD A8, 7 * SIZE(AO)
daddiu AO, AO, 8 * SIZE
MADD C32, C32, A3, B2
MADD C42, C42, A4, B2
MADD C11, C11, A5, B3
LD A1, 0 * SIZE(AO)
MADD C21, C21, A6, B3
LD B1, 0 * SIZE(BO)
MADD C31, C31, A7, B3
LD B2, 1 * SIZE(BO)
MADD C41, C41, A8, B3
LD A2, 1 * SIZE(AO)
MADD C12, C12, A5, B4
LD A3, 2 * SIZE(AO)
MADD C22, C22, A6, B4
LD A4, 3 * SIZE(AO)
MADD C32, C32, A7, B4
bgtz L, .L2410
MADD C42, C42, A8, B4
.align 4
.L242:
#ifndef TRMMKERNEL
andi L, K, 1
#else
andi L, TEMP, 1
#endif
blez L, .L240
LD ALPHA, 152($sp)
MADD C11, C11, A1, B1
MADD C21, C21, A2, B1
MADD C31, C31, A3, B1
MADD C41, C41, A4, B1
MADD C12, C12, A1, B2
MADD C22, C22, A2, B2
MADD C32, C32, A3, B2
MADD C42, C42, A4, B2
daddiu AO, AO, 4 * SIZE
daddiu BO, BO, 2 * SIZE
.align 4
.L240: # Write Back
#ifndef TRMMKERNEL
LD A1, 0 * SIZE(CO1)
LD A2, 1 * SIZE(CO1)
LD A3, 2 * SIZE(CO1)
LD A4, 3 * SIZE(CO1)
MADD A1, A1, C11, ALPHA
LD B1, 0 * SIZE(CO2)
MADD A2, A2, C21, ALPHA
LD B2, 1 * SIZE(CO2)
MADD A3, A3, C31, ALPHA
LD B3, 2 * SIZE(CO2)
MADD A4, A4, C41, ALPHA
LD B4, 3 * SIZE(CO2)
MADD B1, B1, C12, ALPHA
ST A1, 0 * SIZE(CO1)
MADD B2, B2, C22, ALPHA
ST A2, 1 * SIZE(CO1)
MADD B3, B3, C32, ALPHA
ST A3, 2 * SIZE(CO1)
MADD B4, B4, C42, ALPHA
ST A4, 3 * SIZE(CO1)
ST B1, 0 * SIZE(CO2)
ST B2, 1 * SIZE(CO2)
ST B3, 2 * SIZE(CO2)
ST B4, 3 * SIZE(CO2)
daddiu CO1, CO1, 4 * SIZE
daddiu CO2, CO2, 4 * SIZE
#else
MUL A1, C11, ALPHA
MUL A2, C21, ALPHA
MUL A3, C31, ALPHA
MUL A4, C41, ALPHA
MUL B1, C12, ALPHA
ST A1, 0 * SIZE(CO1)
MUL B2, C22, ALPHA
ST A2, 1 * SIZE(CO1)
MUL B3, C32, ALPHA
ST A3, 2 * SIZE(CO1)
MUL B4, C42, ALPHA
ST A4, 3 * SIZE(CO1)
ST B1, 0 * SIZE(CO2)
ST B2, 1 * SIZE(CO2)
ST B3, 2 * SIZE(CO2)
ST B4, 3 * SIZE(CO2)
daddiu CO1, CO1, 4 * SIZE
daddiu CO2, CO2, 4 * SIZE
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
dsubu TEMP, K, KK
#ifdef LEFT
daddiu TEMP, TEMP, -4
#else
daddiu TEMP, TEMP, -2
#endif
dsll L, TEMP, 2 + BASE_SHIFT
dsll TEMP, TEMP, 1 + BASE_SHIFT
daddu AO, AO, L
daddu BO, BO, TEMP
#endif
#ifdef LEFT
daddiu KK, KK, 4
#endif
#endif
.align 4
.L22:
andi I, M, 2
blez I, .L21
NOP
.align 4
.L221:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
move BO, B
#else
dsll L, KK, 1 + BASE_SHIFT
dsll TEMP, KK, 1 + BASE_SHIFT
daddu AO, AO, L
daddu BO, B, TEMP
#endif
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
LD A1, 0 * SIZE(AO)
MOV C21, C11
MOV C22, C11
LD A2, 1 * SIZE(AO)
MOV C31, C11
MOV C32, C11
LD B1, 0 * SIZE(BO)
MOV C41, C11
MOV C42, C11
LD B2, 1 * SIZE(BO)
MOV C43, C11
MOV C44, C11
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, K, KK
#elif defined(LEFT)
daddiu TEMP, KK, 2
#else
daddiu TEMP, KK, 2
#endif
dsra L, TEMP, 1
blez L, .L222
NOP
#else
move BO, B # Reset B
dsra L, K, 1 # UnRoll K=4
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
LD A1, 0 * SIZE(AO)
MOV C21, C11
MOV C22, C11
LD A2, 1 * SIZE(AO)
MOV C31, C11
MOV C32, C11
LD B1, 0 * SIZE(BO)
MOV C41, C11
MOV C42, C11
LD B2, 1 * SIZE(BO)
MOV C43, C11
blez L, .L222
MOV C44, C11
#endif
.align 4
.L2210:
daddiu L, L, -1
MADD C11, C11, A1, B1
LD A3, 2 * SIZE(AO)
MADD C21, C21, A2, B1
LD B3, 2 * SIZE(BO)
MADD C12, C12, A1, B2
LD A4, 3 * SIZE(AO)
daddiu AO, AO, 4 * SIZE
MADD C22, C22, A2, B2
LD B4, 3 * SIZE(BO)
daddiu BO, BO, 4 * SIZE
MADD C11, C11, A3, B3
LD A1, 0 * SIZE(AO)
MADD C21, C21, A4, B3
LD B1, 0 * SIZE(BO)
MADD C12, C12, A3, B4
LD B2, 1 * SIZE(BO)
MADD C22, C22, A4, B4
bgtz L, .L2210
LD A2, 1 * SIZE(AO)
.align 4
.L222:
#ifndef TRMMKERNEL
andi L, K, 1
#else
andi L, TEMP, 1
#endif
blez L, .L220
LD ALPHA, 152($sp)
MADD C11, C11, A1, B1
MADD C21, C21, A2, B1
MADD C12, C12, A1, B2
MADD C22, C22, A2, B2
daddiu AO, AO, 2 * SIZE
daddiu BO, BO, 2 * SIZE
.align 4
.L220: # Write Back
#ifndef TRMMKERNEL
LD A1, 0 * SIZE(CO1)
LD A2, 1 * SIZE(CO1)
MADD A1, A1, C11, ALPHA
LD B1, 0 * SIZE(CO2)
MADD A2, A2, C21, ALPHA
LD B2, 1 * SIZE(CO2)
MADD B1, B1, C12, ALPHA
ST A1, 0 * SIZE(CO1)
MADD B2, B2, C22, ALPHA
ST A2, 1 * SIZE(CO1)
ST B1, 0 * SIZE(CO2)
ST B2, 1 * SIZE(CO2)
daddiu CO1, CO1, 2 * SIZE
daddiu CO2, CO2, 2 * SIZE
#else
MUL A1, C11, ALPHA
MUL A2, C21, ALPHA
MUL B1, C12, ALPHA
MUL B2, C22, ALPHA
ST A1, 0 * SIZE(CO1)
ST A2, 1 * SIZE(CO1)
ST B1, 0 * SIZE(CO2)
ST B2, 1 * SIZE(CO2)
daddiu CO1, CO1, 2 * SIZE
daddiu CO2, CO2, 2 * SIZE
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
dsubu TEMP, K, KK
#ifdef LEFT
daddiu TEMP, TEMP, -2
#else
daddiu TEMP, TEMP, -2
#endif
dsll L, TEMP, 1 + BASE_SHIFT
dsll TEMP, TEMP, 1 + BASE_SHIFT
daddu AO, AO, L
daddu BO, BO, TEMP
#endif
#ifdef LEFT
daddu KK, KK, 2
#endif
#endif
.align 4
.L21:
andi I, M, 1
blez I, .L20
NOP
.align 4
.L211:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
move BO, B # Reset B
#else
dsll L, KK, BASE_SHIFT
dsll TEMP, KK, 1 + BASE_SHIFT
daddu AO, AO, L
daddu BO, B, TEMP
#endif
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
LD A1, 0 * SIZE(AO)
MOV C21, C11
MOV C22, C11
MOV C31, C11
MOV C32, C11
LD B1, 0 * SIZE(BO)
MOV C41, C11
MOV C42, C11
LD B2, 1 * SIZE(BO)
MOV C43, C11
MOV C44, C11
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, K, KK
#elif defined(LEFT)
daddiu TEMP, KK, 1
#else
daddiu TEMP, KK, 2
#endif
dsra L, TEMP, 1
blez L, .L212
NOP
#else
move BO, B # Reset B
dsra L, K, 1 # UnRoll K=4
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
LD A1, 0 * SIZE(AO)
MOV C21, C11
MOV C22, C11
MOV C31, C11
MOV C32, C11
LD B1, 0 * SIZE(BO)
MOV C41, C11
MOV C42, C11
LD B2, 1 * SIZE(BO)
MOV C43, C11
blez L, .L212
MOV C44, C11
#endif
.align 4
.L2110:
daddiu L, L, -1
MADD C11, C11, A1, B1
LD A2, 1 * SIZE(AO)
MADD C12, C12, A1, B2
LD B3, 2 * SIZE(BO)
LD B4, 3 * SIZE(BO)
daddiu AO, AO, 2 * SIZE
daddiu BO, BO, 4 * SIZE
MADD C11, C11, A2, B3
LD A1, 0 * SIZE(AO)
MADD C12, C12, A2, B4
LD B1, 0 * SIZE(BO)
bgtz L, .L2110
LD B2, 1 * SIZE(BO)
.align 4
.L212:
#ifndef TRMMKERNEL
andi L, K, 1
#else
andi L, TEMP, 1
#endif
blez L, .L210
LD ALPHA, 152($sp)
MADD C11, C11, A1, B1
MADD C12, C12, A1, B2
daddiu AO, AO, 1 * SIZE
daddiu BO, BO, 2 * SIZE
.align 4
.L210: # Write Back
#ifndef TRMMKERNEL
LD A1, 0 * SIZE(CO1)
MADD A1, A1, C11, ALPHA
LD B1, 0 * SIZE(CO2)
MADD B1, B1, C12, ALPHA
ST A1, 0 * SIZE(CO1)
ST B1, 0 * SIZE(CO2)
daddiu CO1, CO1, 1 * SIZE
daddiu CO2, CO2, 1 * SIZE
#else
MUL A1, C11, ALPHA
MUL B1, C12, ALPHA
ST A1, 0 * SIZE(CO1)
ST B1, 0 * SIZE(CO2)
daddiu CO1, CO1, 1 * SIZE
daddiu CO2, CO2, 1 * SIZE
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
dsubu TEMP, K, KK
#ifdef LEFT
daddiu TEMP, TEMP, -1
#else
daddiu TEMP, TEMP, -2
#endif
dsll L, TEMP, BASE_SHIFT
dsll TEMP, TEMP, 1 + BASE_SHIFT
daddu AO, AO, L
daddu BO, BO, TEMP
#endif
#ifdef LEFT
daddiu KK, KK, 1
#endif
#endif
.align 4
.L20:
#if defined(TRMMKERNEL) && !defined(LEFT)
daddiu KK, KK, 2
#endif
move B, BO
.align 4
.L1:
andi J, N, 1
blez J, .L999
NOP
.L18:
dsra I, M, 3 # MR=8
move AO, A # Reset A
#if defined(TRMMKERNEL) && defined(LEFT)
move KK, OFFSET
#endif
blez I, .L14
NOP
.align 4
.L181:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
move BO, B # Reset B
#else
dsll L, KK, 3 + BASE_SHIFT
dsll TEMP, KK, BASE_SHIFT
daddu AO, AO, L
daddu BO, B, TEMP
#endif
MTC $0, C11 # CLEAR REAULTS REGISTERS
LD A1, 0 * SIZE(AO)
MOV C12, C11
LD A2, 1 * SIZE(AO)
MOV C21, C11
LD A3, 2 * SIZE(AO)
MOV C22, C11
LD A4, 3 * SIZE(AO)
MOV C31, C11
LD A5, 4 * SIZE(AO)
MOV C32, C11
LD A6, 5 * SIZE(AO)
MOV C41, C11
LD B1, 0 * SIZE(BO)
MOV C42, C11
LD A7, 6 * SIZE(AO)
MOV C13, C11
LD A8, 7 * SIZE(AO)
MOV C14, C11
MOV C23, C11
MOV C24, C11
MOV C33, C11
MOV C34, C11
MOV C43, C11
MOV C44, C11
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, K, KK
#elif defined(LEFT)
daddiu TEMP, KK, 8
#else
daddiu TEMP, KK, 1
#endif
dsra L, TEMP, 1
blez L, .L182
NOP
#else
move BO, B # Reset B
dsra L, K, 1 # UnRoll K=4
MTC $0, C11 # CLEAR REAULTS REGISTERS
LD A1, 0 * SIZE(AO)
MOV C12, C11
LD A2, 1 * SIZE(AO)
MOV C21, C11
LD A3, 2 * SIZE(AO)
MOV C22, C11
LD A4, 3 * SIZE(AO)
MOV C31, C11
LD A5, 4 * SIZE(AO)
MOV C32, C11
LD A6, 5 * SIZE(AO)
MOV C41, C11
LD B1, 0 * SIZE(BO)
MOV C42, C11
LD A7, 6 * SIZE(AO)
MOV C13, C11
LD A8, 7 * SIZE(AO)
MOV C14, C11
MOV C23, C11
MOV C24, C11
MOV C33, C11
MOV C34, C11
MOV C43, C11
blez L, .L182
MOV C44, C11
#endif
.align 4
.L1810:
daddiu L, L, -1
MADD C11, C11, A1, B1
LD B5, 8 * SIZE(AO)
MADD C21, C21, A2, B1
LD B6, 9 * SIZE(AO)
MADD C31, C31, A3, B1
LD B7, 10 * SIZE(AO)
MADD C41, C41, A4, B1
LD B8, 11 * SIZE(AO)
MADD C13, C13, A5, B1
LD B2, 1 * SIZE(BO)
daddiu BO, BO, 2 * SIZE
MADD C23, C23, A6, B1
LD A1, 12 * SIZE(AO)
MADD C33, C33, A7, B1
LD A2, 13 * SIZE(AO)
MADD C43, C43, A8, B1
LD A3, 14 * SIZE(AO)
LD A4, 15 * SIZE(AO)
daddiu AO, AO, 16 * SIZE
MADD C11, C11, B5, B2
LD A5, 4 * SIZE(AO)
MADD C21, C21, B6, B2
LD A6, 5 * SIZE(AO)
MADD C13, C13, A1, B2
LD A7, 6 * SIZE(AO)
MADD C23, C23, A2, B2
LD A8, 7 * SIZE(AO)
MADD C33, C33, A3, B2
LD B1, 0 * SIZE(BO)
MADD C43, C43, A4, B2
LD A1, 0 * SIZE(AO)
MADD C31, C31, B7, B2
LD A2, 1 * SIZE(AO)
MADD C41, C41, B8, B2
LD A3, 2 * SIZE(AO)
bgtz L, .L1810
LD A4, 3 * SIZE(AO)
.align 4
.L182:
#ifndef TRMMKERNEL
andi L, K, 1
#else
andi L, TEMP, 1
#endif
blez L, .L180
LD ALPHA, 152($sp)
MADD C13, C13, A5, B1
MADD C23, C23, A6, B1
MADD C33, C33, A7, B1
MADD C43, C43, A8, B1
daddiu AO, AO, 8 * SIZE
MADD C11, C11, A1, B1
MADD C21, C21, A2, B1
MADD C31, C31, A3, B1
MADD C41, C41, A4, B1
daddiu BO, BO, 1 * SIZE
.align 4
.L180: # Write Back
#ifndef TRMMKERNEL
daddiu I, I, -1
LD A1, 0 * SIZE(C)
LD A2, 1 * SIZE(C)
LD A3, 2 * SIZE(C)
LD A4, 3 * SIZE(C)
LD A5, 4 * SIZE(C)
LD A6, 5 * SIZE(C)
LD A7, 6 * SIZE(C)
LD A8, 7 * SIZE(C)
MADD A1, A1, C11, ALPHA
MADD A2, A2, C21, ALPHA
MADD A3, A3, C31, ALPHA
MADD A4, A4, C41, ALPHA
MADD A5, A5, C13, ALPHA
MADD A6, A6, C23, ALPHA
MADD A7, A7, C33, ALPHA
MADD A8, A8, C43, ALPHA
ST A1, 0 * SIZE(C)
ST A2, 1 * SIZE(C)
ST A3, 2 * SIZE(C)
ST A4, 3 * SIZE(C)
ST A5, 4 * SIZE(C)
ST A6, 5 * SIZE(C)
ST A7, 6 * SIZE(C)
ST A8, 7 * SIZE(C)
daddiu C, C, 8 * SIZE
bgtz I, .L181
NOP
#else
daddiu I, I, -1
MUL A1, C11, ALPHA
MUL A2, C21, ALPHA
MUL A3, C31, ALPHA
MUL A4, C41, ALPHA
MUL A5, C13, ALPHA
MUL A6, C23, ALPHA
MUL A7, C33, ALPHA
MUL A8, C43, ALPHA
ST A1, 0 * SIZE(C)
ST A2, 1 * SIZE(C)
ST A3, 2 * SIZE(C)
ST A4, 3 * SIZE(C)
ST A5, 4 * SIZE(C)
ST A6, 5 * SIZE(C)
ST A7, 6 * SIZE(C)
ST A8, 7 * SIZE(C)
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
dsubu TEMP, K, KK
#ifdef LEFT
daddiu TEMP, TEMP, -8
#else
daddiu TEMP, TEMP, -1
#endif
dsll L, TEMP, 3 + BASE_SHIFT
dsll TEMP, TEMP, BASE_SHIFT
daddu AO, AO, L
daddu BO, BO, TEMP
#endif
#ifdef LEFT
daddiu KK, KK, 8
#endif
daddiu C, C, 8 * SIZE
bgtz I, .L181
NOP
#endif
.align 4
.L14:
andi I, M, 4 # MR=4
blez I, .L12
NOP
.align 4
.L141:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
move BO, B
#else
dsll L, KK, 2 + BASE_SHIFT
dsll TEMP, KK, BASE_SHIFT
daddu AO, AO, L
daddu BO, B, TEMP
#endif
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
LD A1, 0 * SIZE(AO)
MOV C21, C11
MOV C22, C11
LD A2, 1 * SIZE(AO)
MOV C31, C11
MOV C32, C11
LD A3, 2 * SIZE(AO)
MOV C41, C11
MOV C42, C11
LD A4, 3 * SIZE(AO)
MOV C13, C11
MOV C14, C11
LD B1, 0 * SIZE(BO)
MOV C23, C11
MOV C24, C11
MOV C33, C11
MOV C34, C11
MOV C43, C11
MOV C44, C11
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
dsubu TEMP, K, KK
#elif defined(LEFT)
daddiu TEMP, KK, 4
#else
daddiu TEMP, KK, 1
#endif
dsra L, TEMP, 1
blez L, .L142
NOP
#else
move BO, B # Reset B
dsra L, K, 1 # UnRoll K=4
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
LD A1, 0 * SIZE(AO)
MOV C21, C11
MOV C22, C11
LD A2, 1 * SIZE(AO)
MOV C31, C11
MOV C32, C11
LD A3, 2 * SIZE(AO)
MOV C41, C11
MOV C42, C11
LD A4, 3 * SIZE(AO)
MOV C13, C11
MOV C14, C11
LD B1, 0 * SIZE(BO)
MOV C23, C11
MOV C24, C11
MOV C33, C11
MOV C34, C11
MOV C43, C11
blez L, .L142
MOV C44, C11
#endif
.align 4
.L1410:
daddiu L, L, -1
MADD C11, C11, A1, B1
LD A5, 4 * SIZE(AO)
MADD C21, C21, A2, B1
LD B3, 1 * SIZE(BO)
MADD C31, C31, A3, B1
LD A6, 5 * SIZE(AO)
daddiu BO, BO, 2 * SIZE
MADD C41, C41, A4, B1
LD A7, 6 * SIZE(AO)
LD A8, 7 * SIZE(AO)
daddiu AO, AO, 8 * SIZE
MADD C11, C11, A5, B3
LD A1, 0 * SIZE(AO)
MADD C21, C21, A6, B3
LD B1, 0 * SIZE(BO)
MADD C31, C31, A7, B3
LD A2, 1 * SIZE(AO)
MADD C41, C41, A8, B3
LD A3, 2 * SIZE(AO)
bgtz L, .L1410
LD A4, 3 * SIZE(AO)
.align 4
.L142:
#ifndef TRMMKERNEL
andi L, K, 1
#else
andi L, TEMP, 1
#endif
blez L, .L140
LD ALPHA, 152($sp)
MADD C11, C11, A1, B1
MADD C21, C21, A2, B1
MADD C31, C31, A3, B1
MADD C41, C41, A4, B1
daddiu AO, AO, 4 * SIZE
daddiu BO, BO, 1 * SIZE
.align 4
.L140: # Write Back
#ifndef TRMMKERNEL
LD A1, 0 * SIZE(C)
LD A2, 1 * SIZE(C)
LD A3, 2 * SIZE(C)
LD A4, 3 * SIZE(C)
MADD A1, A1, C11, ALPHA
MADD A2, A2, C21, ALPHA
MADD A3, A3, C31, ALPHA
MADD A4, A4, C41, ALPHA
ST A1, 0 * SIZE(C)
ST A2, 1 * SIZE(C)
ST A3, 2 * SIZE(C)
ST A4, 3 * SIZE(C)
daddiu C, C, 4 * SIZE
#else
MUL A1, C11, ALPHA
MUL A2, C21, ALPHA
MUL A3, C31, ALPHA
MUL A4, C41, ALPHA
ST A1, 0 * SIZE(C)
ST A2, 1 * SIZE(C)
ST A3, 2 * SIZE(C)
ST A4, 3 * SIZE(C)
daddiu C, C, 4 * SIZE
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
dsubu TEMP, K, KK
#ifdef LEFT
daddiu TEMP, TEMP, -4
#else
daddiu TEMP, TEMP, -1
#endif
dsll L, TEMP, 2 + BASE_SHIFT
dsll TEMP, TEMP, BASE_SHIFT
daddu AO, AO, L
daddu BO, BO, TEMP
#endif
#ifdef LEFT
daddiu KK, KK, 4
#endif
#endif
.align 4
.L12:
andi I, M, 2
blez I, .L11
NOP
.align 4
.L121:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) ||\
(!defined(LEFT) && !defined(TRANSA))
move BO, B # Reset B
#else
dsll L, KK, 1 + BASE_SHIFT
dsll TEMP, KK, BASE_SHIFT
daddu AO, AO, L
daddu BO, B, TEMP
#endif
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
LD A1, 0 * SIZE(AO)
MOV C21, C11
MOV C22, C11
LD A2, 1 * SIZE(AO)
MOV C31, C11
MOV C32, C11
LD B1, 0 * SIZE(BO)
MOV C41, C11
MOV C42, C11
MOV C43, C11
MOV C44, C11
#if (defined(LEFT) && !defined(TRANSA)) ||\
(!defined(LEFT) && defined(TRANSA))
dsubu TEMP, K, KK
#elif defined(LEFT)
daddiu TEMP, KK, 2
#else
daddiu TEMP, KK, 1
#endif
dsra L, TEMP, 1
blez L, .L122
NOP
#else
move BO, B # Reset B
dsra L, K, 1 # UnRoll K=4
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
LD A1, 0 * SIZE(AO)
MOV C21, C11
MOV C22, C11
LD A2, 1 * SIZE(AO)
MOV C31, C11
MOV C32, C11
LD B1, 0 * SIZE(BO)
MOV C41, C11
MOV C42, C11
MOV C43, C11
blez L, .L122
MOV C44, C11
#endif
.align 4
.L1210:
daddiu L, L, -1
MADD C11, C11, A1, B1
LD B3, 1 * SIZE(BO)
MADD C21, C21, A2, B1
daddiu BO, BO, 2 * SIZE
LD A3, 2 * SIZE(AO)
LD A4, 3 * SIZE(AO)
daddiu AO, AO, 4 * SIZE
MADD C11, C11, A3, B3
LD B1, 0 * SIZE(BO)
MADD C21, C21, A4, B3
LD A1, 0 * SIZE(AO)
bgtz L, .L1210
LD A2, 1 * SIZE(AO)
.align 4
.L122:
#ifndef TRMMKERNEL
andi L, K, 1
#else
andi L, TEMP, 1
#endif
blez L, .L120
LD ALPHA, 152($sp)
MADD C11, C11, A1, B1
MADD C21, C21, A2, B1
daddiu AO, AO, 2 * SIZE
daddiu BO, BO, 1 * SIZE
.align 4
.L120: # Write Back
#ifndef TRMMKERNEL
LD A1, 0 * SIZE(C)
LD A2, 1 * SIZE(C)
MADD A1, A1, C11, ALPHA
MADD A2, A2, C21, ALPHA
ST A1, 0 * SIZE(C)
ST A2, 1 * SIZE(C)
daddiu C, C, 2 * SIZE
#else
MUL A1, C11, ALPHA
MUL A2, C21, ALPHA
ST A1, 0 * SIZE(C)
ST A2, 1 * SIZE(C)
daddiu C, C, 2 * SIZE
#if ( defined(LEFT) && defined(TRANSA))||\
(!defined(LEFT) && !defined(TRANSA))
dsubu TEMP, K, KK
#ifdef LEFT
daddiu TEMP, TEMP, -2
#else
daddiu TEMP, TEMP, -1
#endif
dsll L, TEMP, 1 + BASE_SHIFT
dsll TEMP, TEMP, BASE_SHIFT
daddu AO, AO, L
daddu BO, BO, TEMP
#endif
#ifdef LEFT
daddiu KK, KK, 2
#endif
#endif
.align 4
.L11:
andi I, M, 1
blez I, .L10
NOP
.align 4
.L111:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA))||\
(!defined(LEFT) && !defined(TRANSA))
move BO, B
#else
dsll L, KK, BASE_SHIFT
daddu AO, AO, L
daddu BO, B, L
#endif
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
LD A1, 0 * SIZE(AO)
MOV C21, C11
MOV C22, C11
LD B1, 0 * SIZE(BO)
MOV C31, C11
MOV C32, C11
#if (defined(LEFT) && !defined(TRANSA))||\
(!defined(LEFT) && defined(TRANSA))
dsubu TEMP, K, KK
#elif defined(LEFT)
daddiu TEMP, KK, 1
#else
daddiu TEMP, KK, 1
#endif
dsra L, TEMP, 1
blez L, .L112
NOP
#else
move BO, B # Reset B
dsra L, K, 1 # UnRoll K=4
MTC $0, C11 # CLEAR REAULTS REGISTERS
MOV C12, C11
LD A1, 0 * SIZE(AO)
MOV C21, C11
MOV C22, C11
LD B1, 0 * SIZE(BO)
MOV C31, C11
blez L, .L112
MOV C32, C11
#endif
.align 4
.L1110:
daddiu L, L, -1
MADD C11, C11, A1, B1
LD A2, 1 * SIZE(AO)
LD B2, 1 * SIZE(BO)
daddiu AO, AO, 2 * SIZE
daddiu BO, BO, 2 * SIZE
MADD C11, C11, A2, B2
LD A1, 0 * SIZE(AO)
LD B1, 0 * SIZE(BO)
bgtz L, .L1110
NOP
.align 4
.L112:
#ifndef TRMMKERNEL
andi L, K, 1
#else
andi L, TEMP, 1
#endif
blez L, .L110
LD ALPHA, 152($sp)
MADD C11, C11, A1, B1
daddiu AO, AO, 1 * SIZE
daddiu BO, BO, 1 * SIZE
.align 4
.L110: # Write Back
#ifndef TRMMKERNEL
LD A1, 0 * SIZE(C)
MADD A1, A1, C11, ALPHA
ST A1, 0 * SIZE(C)
daddiu C, C, 1 * SIZE
#else
MUL A1, C11, ALPHA
ST A1, 0 * SIZE(C)
daddiu C, C, 1 * SIZE
#endif
.align 4
.L10:
move B, BO
NOP
.L999:
ld $16, 0($sp)
ld $17, 8($sp)
ld $18, 16($sp)
ld $19, 24($sp)
ld $20, 32($sp)
ld $21, 40($sp)
ld $22, 48($sp)
LD $f24, 56($sp)
LD $f25, 64($sp)
LD $f26, 72($sp)
LD $f27, 80($sp)
LD $f28, 88($sp)
#if defined(TRMMKERNEL)
ld $23, 96($sp)
ld $24, 104($sp)
ld $25, 112($sp)
#endif
#ifndef __64BIT__
LD $f20,120($sp)
LD $f21,128($sp)
LD $f22,136($sp)
LD $f23,144($sp)
#endif
daddiu $sp,$sp,STACKSIZE
j $31
nop
EPILOGUE
# .set macro
# .set reorder
# .end gemm
# .size gemm, .-gemm
# .ident "GCC: (Debian 4.4.6-6) 4.4.6"