pragma options "--bnd-unroll-amnt 6 --bnd-inbits 3 "; int[16] transpose(int[16] m){ int x, y; for(x = 0; x < 4; x++) for(y = 0; y ≤ x; y++) m[4*x+y] = m[4*y+x]; return m; } generator int[4] shufps(int[4] xmm1, int[4] xmm2, bit[8] imm8){ /* automatically rewritten */ int[4] ret; ret[0] = xmm1[(int)imm8[0::2]]; ret[1] = xmm1[(int)imm8[2::2]]; ret[2] = xmm2[(int)imm8[4::2]]; ret[3] = xmm2[(int)imm8[6::2]]; return ret; } int[16] sse_transpose(int[16] m) implements transpose { int[16] s = 0; int[16] t = 0; s[??::4] = shufps(m[??::4], m[??::4], ??); s[??::4] = shufps(m[??::4], m[??::4], ??); s[??::4] = shufps(m[??::4], m[??::4], ??); s[??::4] = shufps(m[??::4], m[??::4], ??); t[??::4] = shufps(s[??::4], s[??::4], ??); t[??::4] = shufps(s[??::4], s[??::4], ??); t[??::4] = shufps(s[??::4], s[??::4], ??); t[??::4] = shufps(s[??::4], s[??::4], ??); return t; }