2 synth_sse_accurate: SSE optimized synth (MPEG-compliant 16bit output version)
4 copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
6 initially written by Taihei Monma
18 #define MMREG_CLIP %mm7
21 int synth_1to1_sse_accurate_asm(real *window, real *b0, short *samples, int bo1);
22 return value: number of clipped samples
32 .long 1191181824 /* 32767.0 */
36 .long -956301312 /* -32768.0 */
42 .globl ASM_NAME(synth_1to1_sse_accurate_asm)
43 ASM_NAME(synth_1to1_sse_accurate_asm):
49 pxor MMREG_CLIP, MMREG_CLIP
53 movl 16(%ebp), SAMPLES
57 leal 64(WINDOW), WINDOW
64 movups (WINDOW), %xmm0
65 movups 16(WINDOW), %xmm1
66 movups 32(WINDOW), %xmm2
67 movups 48(WINDOW), %xmm3
68 movups 128(WINDOW), %xmm4
69 movups 144(WINDOW), %xmm5
70 movups 160(WINDOW), %xmm6
71 movups 176(WINDOW), %xmm7
89 leal 256(WINDOW), WINDOW
92 movups (WINDOW), %xmm0
93 movups 16(WINDOW), %xmm1
94 movups 32(WINDOW), %xmm2
95 movups 48(WINDOW), %xmm3
96 movups 128(WINDOW), %xmm6
97 movups 144(WINDOW), %xmm7
107 movups 160(WINDOW), %xmm1
108 movups 176(WINDOW), %xmm3
117 leal 256(WINDOW), WINDOW
122 unpcklps %xmm5, %xmm4
123 unpcklps %xmm7, %xmm6
124 unpckhps %xmm5, %xmm0
125 unpckhps %xmm7, %xmm1
138 pshufw $0xdd, (SAMPLES), %mm2
139 pshufw $0xdd, 8(SAMPLES), %mm3
140 cmpnleps ASM_NAME(maxmin_s16), %xmm1
141 cmpltps ASM_NAME(maxmin_s16)+16, %xmm2
150 movq %mm1, 8(SAMPLES)
163 paddw %mm0, MMREG_CLIP
165 leal 16(SAMPLES), SAMPLES
173 movups (WINDOW), %xmm0
174 movups 16(WINDOW), %xmm1
175 movups 32(WINDOW), %xmm2
176 movups 48(WINDOW), %xmm3
177 movups 128(WINDOW), %xmm4
178 movups 144(WINDOW), %xmm5
179 movups 160(WINDOW), %xmm6
180 movups 176(WINDOW), %xmm7
198 leal 256(WINDOW), WINDOW
201 movups (WINDOW), %xmm0
202 movups 16(WINDOW), %xmm1
203 movups 32(WINDOW), %xmm2
204 movups 48(WINDOW), %xmm3
205 movups 128(WINDOW), %xmm6
206 movups 144(WINDOW), %xmm7
216 movups 160(WINDOW), %xmm1
217 movups 176(WINDOW), %xmm3
226 leal 256(WINDOW), WINDOW
231 unpcklps %xmm5, %xmm4
232 unpcklps %xmm7, %xmm6
233 unpckhps %xmm5, %xmm0
234 unpckhps %xmm7, %xmm1
247 pshufw $0xdd, (SAMPLES), %mm2
248 pshufw $0xdd, 8(SAMPLES), %mm3
249 cmpnleps ASM_NAME(maxmin_s16), %xmm1
250 cmpltps ASM_NAME(maxmin_s16)+16, %xmm2
259 movq %mm1, 8(SAMPLES)
272 paddw %mm0, MMREG_CLIP
274 leal 16(SAMPLES), SAMPLES
278 pshufw $0xee, MMREG_CLIP, %mm0
279 paddw MMREG_CLIP, %mm0
280 pshufw $0x55, %mm0, %mm1