Sync with trunk (r48414)
[reactos.git] / lib / 3rdparty / libmpg123 / optimize.c
1 /*
2 optimize: get a grip on the different optimizations
3
4 copyright 2006-9 by the mpg123 project - free software under the terms of the LGPL 2.1
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
6 initially written by Thomas Orgis, inspired by 3DNow stuff in mpg123.[hc]
7
8 Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect.
9 */
10
11 #include "mpg123lib_intern.h" /* includes optimize.h */
12 #include "debug.h"
13
14 /* Must match the enum dectype! */
15
16 /*
17 It SUCKS having to define these names that way, but compile-time intialization of string arrays is a bitch.
18 GCC doesn't see constant stuff when it's wiggling in front of it!
19 Anyhow: Have a script for that:
20 names="generic generic_dither i386 i486 i586 i586_dither MMX 3DNow 3DNowExt AltiVec SSE x86-64"
21 for i in $names; do echo "##define dn_${i/-/_} \"$i\""; done
22 echo -n "static const char* decname[] =
23 {
24 \"auto\"
25 "
26 for i in $names; do echo -n ", dn_${i/-/_}"; done
27 echo "
28 , \"nodec\"
29 };"
30 */
31 #define dn_generic "generic"
32 #define dn_generic_dither "generic_dither"
33 #define dn_i386 "i386"
34 #define dn_i486 "i486"
35 #define dn_i586 "i586"
36 #define dn_i586_dither "i586_dither"
37 #define dn_MMX "MMX"
38 #define dn_3DNow "3DNow"
39 #define dn_3DNowExt "3DNowExt"
40 #define dn_AltiVec "AltiVec"
41 #define dn_SSE "SSE"
42 #define dn_x86_64 "x86-64"
43 #define dn_ARM "ARM"
44 static const char* decname[] =
45 {
46 "auto"
47 , dn_generic, dn_generic_dither, dn_i386, dn_i486, dn_i586, dn_i586_dither, dn_MMX, dn_3DNow, dn_3DNowExt, dn_AltiVec, dn_SSE, dn_x86_64, dn_ARM
48 , "nodec"
49 };
50
51 #if (defined OPT_X86) && (defined OPT_MULTI)
52 #include "getcpuflags.h"
53 struct cpuflags cpu_flags;
54 #else
55 /* Faking stuff for non-multi builds. The same code for synth function choice is used.
56 Just no runtime dependency of result... */
57 char cpu_flags;
58 #define cpu_i586(s) 1
59 #define cpu_fpu(s) 1
60 #define cpu_mmx(s) 1
61 #define cpu_3dnow(s) 1
62 #define cpu_3dnowext(s) 1
63 #define cpu_sse(s) 1
64 #define cpu_sse2(s) 1
65 #define cpu_sse3(s) 1
66 #endif
67
68 /* Ugly macros to build conditional synth function array values. */
69
70 #ifndef NO_8BIT
71 #define IF8(synth) synth,
72 #else
73 #define IF8(synth)
74 #endif
75
76 #ifndef NO_REAL
77 #define IFREAL(synth) synth,
78 #else
79 #define IFREAL(synth)
80 #endif
81
82 #ifndef NO_32BIT
83 #define IF32(synth) synth
84 #else
85 #define IF32(synth)
86 #endif
87
88 #ifndef NO_16BIT
89 # define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { synth_16, IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
90 #else
91 # define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
92 #endif
93
94 /* The call of left and right plain synth, wrapped.
95 This may be replaced by a direct stereo optimized synth. */
96 int synth_stereo_wrap(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
97 {
98 int clip;
99 clip = (fr->synth)(bandPtr_l, 0, fr, 0);
100 clip += (fr->synth)(bandPtr_r, 1, fr, 1);
101 return clip;
102 }
103
104 const struct synth_s synth_base =
105 {
106 { /* plain */
107 OUT_SYNTHS(synth_1to1, synth_1to1_8bit, synth_1to1_real, synth_1to1_s32)
108 # ifndef NO_DOWNSAMPLE
109 ,OUT_SYNTHS(synth_2to1, synth_2to1_8bit, synth_2to1_real, synth_2to1_s32)
110 ,OUT_SYNTHS(synth_4to1, synth_4to1_8bit, synth_4to1_real, synth_4to1_s32)
111 # endif
112 # ifndef NO_NTOM
113 ,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32)
114 # endif
115 },
116 { /* stereo, by default only wrappers over plain synth */
117 OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
118 # ifndef NO_DOWNSAMPLE
119 ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
120 ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
121 # endif
122 # ifndef NO_NTOM
123 ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
124 # endif
125 },
126 { /* mono2stereo */
127 OUT_SYNTHS(synth_1to1_mono2stereo, synth_1to1_8bit_mono2stereo, synth_1to1_real_mono2stereo, synth_1to1_s32_mono2stereo)
128 # ifndef NO_DOWNSAMPLE
129 ,OUT_SYNTHS(synth_2to1_mono2stereo, synth_2to1_8bit_mono2stereo, synth_2to1_real_mono2stereo, synth_2to1_s32_mono2stereo)
130 ,OUT_SYNTHS(synth_4to1_mono2stereo, synth_4to1_8bit_mono2stereo, synth_4to1_real_mono2stereo, synth_4to1_s32_mono2stereo)
131 # endif
132 # ifndef NO_NTOM
133 ,OUT_SYNTHS(synth_ntom_mono2stereo, synth_ntom_8bit_mono2stereo, synth_ntom_real_mono2stereo, synth_ntom_s32_mono2stereo)
134 # endif
135 },
136 { /* mono*/
137 OUT_SYNTHS(synth_1to1_mono, synth_1to1_8bit_mono, synth_1to1_real_mono, synth_1to1_s32_mono)
138 # ifndef NO_DOWNSAMPLE
139 ,OUT_SYNTHS(synth_2to1_mono, synth_2to1_8bit_mono, synth_2to1_real_mono, synth_2to1_s32_mono)
140 ,OUT_SYNTHS(synth_4to1_mono, synth_4to1_8bit_mono, synth_4to1_real_mono, synth_4to1_s32_mono)
141 # endif
142 # ifndef NO_NTOM
143 ,OUT_SYNTHS(synth_ntom_mono, synth_ntom_8bit_mono, synth_ntom_real_mono, synth_ntom_s32_mono)
144 #endif
145 }
146 };
147
148 #ifdef OPT_X86
149 /* More plain synths for i386 */
150 const func_synth plain_i386[r_limit][f_limit] =
151 { /* plain */
152 OUT_SYNTHS(synth_1to1_i386, synth_1to1_8bit_i386, synth_1to1_real_i386, synth_1to1_s32_i386)
153 # ifndef NO_DOWNSAMPLE
154 ,OUT_SYNTHS(synth_2to1_i386, synth_2to1_8bit_i386, synth_2to1_real_i386, synth_2to1_s32_i386)
155 ,OUT_SYNTHS(synth_4to1_i386, synth_4to1_8bit_i386, synth_4to1_real_i386, synth_4to1_s32_i386)
156 # endif
157 # ifndef NO_NTOM
158 ,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32)
159 # endif
160 };
161 #endif
162
163
164 enum optdec defdec(void){ return defopt; }
165
166 enum optcla decclass(const enum optdec type)
167 {
168 return (type == mmx || type == sse || type == dreidnowext || type == x86_64 ) ? mmxsse : normal;
169 }
170
171
172 static int find_synth(func_synth synth, const func_synth synths[r_limit][f_limit])
173 {
174 enum synth_resample ri;
175 enum synth_format fi;
176 for(ri=0; ri<r_limit; ++ri)
177 for(fi=0; fi<f_limit; ++fi)
178 if(synth == synths[ri][fi])
179 return TRUE;
180
181 return FALSE;
182 }
183
184 /* Determine what kind of decoder is actually active
185 This depends on runtime choices which may cause fallback to i386 or generic code. */
186 static int find_dectype(mpg123_handle *fr)
187 {
188 enum optdec type = nodec;
189 /* Direct and indirect usage, 1to1 stereo decoding.
190 Concentrating on the plain stereo synth should be fine, mono stuff is derived. */
191 func_synth basic_synth = fr->synth;
192 #ifndef NO_8BIT
193 #ifndef NO_16BIT
194 if(basic_synth == synth_1to1_8bit_wrap)
195 basic_synth = fr->synths.plain[r_1to1][f_16]; /* That is what's really below the surface. */
196 #endif
197 #endif
198
199 if(FALSE) ; /* Just to initialize the else if ladder. */
200 #ifndef NO_16BIT
201 #ifdef OPT_3DNOWEXT
202 else if(basic_synth == synth_1to1_3dnowext) type = dreidnowext;
203 #endif
204 #ifdef OPT_SSE
205 else if(basic_synth == synth_1to1_sse) type = sse;
206 #endif
207 #ifdef OPT_3DNOW
208 else if(basic_synth == synth_1to1_3dnow) type = dreidnow;
209 #endif
210 #ifdef OPT_MMX
211 else if(basic_synth == synth_1to1_mmx) type = mmx;
212 #endif
213 #ifdef OPT_I586_DITHER
214 else if(basic_synth == synth_1to1_i586_dither) type = ifuenf_dither;
215 #endif
216 #ifdef OPT_I586
217 else if(basic_synth == synth_1to1_i586) type = ifuenf;
218 #endif
219 #ifdef OPT_ALTIVEC
220 else if(basic_synth == synth_1to1_altivec) type = altivec;
221 #endif
222 #ifdef OPT_X86_64
223 else if(basic_synth == synth_1to1_x86_64) type = x86_64;
224 #endif
225 #ifdef OPT_ARM
226 else if(basic_synth == synth_1to1_arm) type = arm;
227 #endif
228 #ifdef OPT_GENERIC_DITHER
229 else if(basic_synth == synth_1to1_dither) type = generic_dither;
230 #endif
231 #ifdef OPT_DITHER /* either i586 or generic! */
232 #ifndef NO_DOWNSAMPLE
233 else if
234 (
235 basic_synth == synth_2to1_dither
236 || basic_synth == synth_4to1_dither
237 ) type = generic_dither;
238 #endif
239 #endif
240 #endif /* 16bit */
241
242 #ifndef NO_REAL
243 #ifdef OPT_SSE
244 else if(basic_synth == synth_1to1_real_sse) type = sse;
245 #endif
246 #ifdef OPT_X86_64
247 else if(basic_synth == synth_1to1_real_x86_64) type = x86_64;
248 #endif
249 #ifdef OPT_ALTIVEC
250 else if(basic_synth == synth_1to1_real_altivec) type = altivec;
251 #endif
252
253 #endif /* real */
254
255 #ifndef NO_32BIT
256 #ifdef OPT_SSE
257 else if(basic_synth == synth_1to1_s32_sse) type = sse;
258 #endif
259 #ifdef OPT_X86_64
260 else if(basic_synth == synth_1to1_s32_x86_64) type = x86_64;
261 #endif
262 #ifdef OPT_ALTIVEC
263 else if(basic_synth == synth_1to1_s32_altivec) type = altivec;
264 #endif
265 #endif /* 32bit */
266
267 #ifdef OPT_X86
268 else if(find_synth(basic_synth, plain_i386))
269 type = idrei;
270 #endif
271
272 else if(find_synth(basic_synth, synth_base.plain))
273 type = generic;
274
275
276
277 #ifdef OPT_I486
278 /* i486 is special ... the specific code is in use for 16bit 1to1 stereo
279 otherwise we have i386 active... but still, the distinction doesn't matter*/
280 type = ivier;
281 #endif
282
283 if(type != nodec)
284 {
285 fr->cpu_opts.type = type;
286 fr->cpu_opts.class = decclass(type);
287
288 debug3("determined active decoder type %i (%s) of class %i", type, decname[type], fr->cpu_opts.class);
289 return MPG123_OK;
290 }
291 else
292 {
293 if(NOQUIET) error("Unable to determine active decoder type -- this is SERIOUS b0rkage!");
294
295 fr->err = MPG123_BAD_DECODER_SETUP;
296 return MPG123_ERR;
297 }
298 }
299
300 /* set synth functions for current frame, optimizations handled by opt_* macros */
301 int set_synth_functions(mpg123_handle *fr)
302 {
303 enum synth_resample resample = r_none;
304 enum synth_format basic_format = f_none; /* Default is always 16bit, or whatever. */
305
306 /* Select the basic output format, different from 16bit: 8bit, real. */
307 if(FALSE){}
308 #ifndef NO_16BIT
309 else if(fr->af.encoding & MPG123_ENC_16)
310 basic_format = f_16;
311 #endif
312 #ifndef NO_8BIT
313 else if(fr->af.encoding & MPG123_ENC_8)
314 basic_format = f_8;
315 #endif
316 #ifndef NO_REAL
317 else if(fr->af.encoding & MPG123_ENC_FLOAT)
318 basic_format = f_real;
319 #endif
320 #ifndef NO_32BIT
321 else if(fr->af.encoding & MPG123_ENC_32)
322 basic_format = f_32;
323 #endif
324
325 /* Make sure the chosen format is compiled into this lib. */
326 if(basic_format == f_none)
327 {
328 if(NOQUIET) error("set_synth_functions: This output format is disabled in this build!");
329
330 return -1;
331 }
332
333 /* Be explicit about downsampling variant. */
334 switch(fr->down_sample)
335 {
336 case 0: resample = r_1to1; break;
337 #ifndef NO_DOWNSAMPLE
338 case 1: resample = r_2to1; break;
339 case 2: resample = r_4to1; break;
340 #endif
341 #ifndef NO_NTOM
342 case 3: resample = r_ntom; break;
343 #endif
344 }
345
346 if(resample == r_none)
347 {
348 if(NOQUIET) error("set_synth_functions: This resampling mode is not supported in this build!");
349
350 return -1;
351 }
352
353 debug2("selecting synth: resample=%i format=%i", resample, basic_format);
354 /* Finally selecting the synth functions for stereo / mono. */
355 fr->synth = fr->synths.plain[resample][basic_format];
356 fr->synth_stereo = fr->synths.stereo[resample][basic_format];
357 fr->synth_mono = fr->af.channels==2
358 ? fr->synths.mono2stereo[resample][basic_format] /* Mono MPEG file decoded to stereo. */
359 : fr->synths.mono[resample][basic_format]; /* Mono MPEG file decoded to mono. */
360
361 if(find_dectype(fr) != MPG123_OK) /* Actually determine the currently active decoder breed. */
362 {
363 fr->err = MPG123_BAD_DECODER_SETUP;
364 return MPG123_ERR;
365 }
366
367 if(frame_buffers(fr) != 0)
368 {
369 fr->err = MPG123_NO_BUFFERS;
370 if(NOQUIET) error("Failed to set up decoder buffers!");
371
372 return MPG123_ERR;
373 }
374
375 #ifndef NO_8BIT
376 if(basic_format == f_8)
377 {
378 if(make_conv16to8_table(fr) != 0)
379 {
380 if(NOQUIET) error("Failed to set up conv16to8 table!");
381 /* it's a bit more work to get proper error propagation up */
382 return -1;
383 }
384 }
385 #endif
386
387 #ifdef OPT_MMXORSSE
388 /* Special treatment for MMX, SSE and 3DNowExt stuff.
389 The real-decoding SSE for x86-64 uses normal tables! */
390 if(fr->cpu_opts.class == mmxsse
391 # ifndef NO_REAL
392 && basic_format != f_real
393 # endif
394 # ifndef NO_32BIT
395 && basic_format != f_32
396 # endif
397 # ifdef ACCURATE_ROUNDING
398 && fr->cpu_opts.type != sse
399 && fr->cpu_opts.type != x86_64
400 # endif
401 )
402 {
403 #ifndef NO_LAYER3
404 init_layer3_stuff(fr, init_layer3_gainpow2_mmx);
405 #endif
406 #ifndef NO_LAYER12
407 init_layer12_stuff(fr, init_layer12_table_mmx);
408 #endif
409 fr->make_decode_tables = make_decode_tables_mmx;
410 }
411 else
412 #endif
413 {
414 #ifndef NO_LAYER3
415 init_layer3_stuff(fr, init_layer3_gainpow2);
416 #endif
417 #ifndef NO_LAYER12
418 init_layer12_stuff(fr, init_layer12_table);
419 #endif
420 fr->make_decode_tables = make_decode_tables;
421 }
422
423 /* We allocated the table buffers just now, so (re)create the tables. */
424 fr->make_decode_tables(fr);
425
426 return 0;
427 }
428
429 int frame_cpu_opt(mpg123_handle *fr, const char* cpu)
430 {
431 const char* chosen = ""; /* the chosen decoder opt as string */
432 enum optdec want_dec = nodec;
433 int done = 0;
434 int auto_choose = 0;
435 #ifdef OPT_DITHER
436 int dithered = FALSE; /* If some dithered decoder is chosen. */
437 #endif
438
439 want_dec = dectype(cpu);
440 auto_choose = want_dec == autodec;
441 /* Fill whole array of synth functions with generic code first. */
442 fr->synths = synth_base;
443
444 #ifndef OPT_MULTI
445 {
446 if(!auto_choose && want_dec != defopt)
447 {
448 if(NOQUIET) error2("you wanted decoder type %i, I only have %i", want_dec, defopt);
449 }
450 auto_choose = TRUE; /* There will be only one choice anyway. */
451 }
452 #endif
453
454 fr->cpu_opts.type = nodec;
455 /* covers any i386+ cpu; they actually differ only in the synth_1to1 function, mostly... */
456 #ifdef OPT_X86
457
458 #ifdef OPT_MULTI
459 #ifndef NO_LAYER3
460 #if (defined OPT_3DNOW || defined OPT_3DNOWEXT)
461 fr->cpu_opts.the_dct36 = dct36;
462 #endif
463 #endif
464 #endif
465
466 if(cpu_i586(cpu_flags))
467 {
468 # ifdef OPT_MULTI
469 debug2("standard flags: 0x%08x\textended flags: 0x%08x", cpu_flags.std, cpu_flags.ext);
470 # endif
471 #ifdef OPT_SSE
472 if( !done && (auto_choose || want_dec == sse)
473 && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) )
474 {
475 chosen = "SSE";
476 fr->cpu_opts.type = sse;
477 # ifndef NO_16BIT
478 fr->synths.plain[r_1to1][f_16] = synth_1to1_sse;
479 # ifdef ACCURATE_ROUNDING
480 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_sse;
481 # endif
482 # endif
483 # ifndef NO_REAL
484 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_sse;
485 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_sse;
486 # endif
487 # ifndef NO_32BIT
488 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse;
489 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_sse;
490 # endif
491 done = 1;
492 }
493 #endif
494 # ifdef OPT_3DNOWEXT
495 if( !done && (auto_choose || want_dec == dreidnowext )
496 && cpu_3dnow(cpu_flags)
497 && cpu_3dnowext(cpu_flags)
498 && cpu_mmx(cpu_flags) )
499 {
500 chosen = "3DNowExt";
501 fr->cpu_opts.type = dreidnowext;
502 #ifdef OPT_MULTI
503 # ifndef NO_LAYER3
504 fr->cpu_opts.the_dct36 = dct36_3dnowext;
505 # endif
506 #endif
507 # ifndef NO_16BIT
508 fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnowext;
509 # endif
510 done = 1;
511 }
512 #endif
513 #ifdef OPT_3DNOW
514 if( !done && (auto_choose || want_dec == dreidnow)
515 && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) )
516 {
517 chosen = "3DNow";
518 fr->cpu_opts.type = dreidnow;
519 #ifdef OPT_MULTI
520 # ifndef NO_LAYER3
521 fr->cpu_opts.the_dct36 = dct36_3dnow;
522 # endif
523 #endif
524 # ifndef NO_16BIT
525 fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow;
526 # endif
527 done = 1;
528 }
529 #endif
530 #ifdef OPT_MMX
531 if( !done && (auto_choose || want_dec == mmx)
532 && cpu_mmx(cpu_flags) )
533 {
534 chosen = "MMX";
535 fr->cpu_opts.type = mmx;
536 # ifndef NO_16BIT
537 fr->synths.plain[r_1to1][f_16] = synth_1to1_mmx;
538 # endif
539 done = 1;
540 }
541 #endif
542 #ifdef OPT_I586
543 if(!done && (auto_choose || want_dec == ifuenf))
544 {
545 chosen = "i586/pentium";
546 fr->cpu_opts.type = ifuenf;
547 # ifndef NO_16BIT
548 fr->synths.plain[r_1to1][f_16] = synth_1to1_i586;
549 # endif
550 done = 1;
551 }
552 #endif
553 #ifdef OPT_I586_DITHER
554 if(!done && (auto_choose || want_dec == ifuenf_dither))
555 {
556 chosen = "dithered i586/pentium";
557 fr->cpu_opts.type = ifuenf_dither;
558 dithered = TRUE;
559 # ifndef NO_16BIT
560 fr->synths.plain[r_1to1][f_16] = synth_1to1_i586_dither;
561 # ifndef NO_DOWNSAMPLE
562 fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
563 fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
564 # endif
565 # endif
566 done = 1;
567 }
568 #endif
569 }
570 #ifdef OPT_I486
571 /* That won't cooperate in multi opt mode - forcing i486 in layer3.c
572 But still... here it is... maybe for real use in future. */
573 if(!done && (auto_choose || want_dec == ivier))
574 {
575 chosen = "i486";
576 fr->cpu_opts.type = ivier;
577 done = 1;
578 }
579 #endif
580 #ifdef OPT_I386
581 if(!done && (auto_choose || want_dec == idrei))
582 {
583 chosen = "i386";
584 fr->cpu_opts.type = idrei;
585 done = 1;
586 }
587 #endif
588
589 if(done)
590 {
591 /*
592 We have chosen some x86 decoder... fillup some i386 stuff.
593 There is an open question about using dithered synth_1to1 for 8bit wrappers.
594 For quality it won't make sense, but wrapped i586_dither wrapped may still be faster...
595 */
596 enum synth_resample ri;
597 enum synth_format fi;
598 # ifndef NO_8BIT
599 # ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
600 if(fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16])
601 {
602 fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap;
603 fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono;
604 fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_mono2stereo;
605 }
606 # endif
607 # endif
608 for(ri=0; ri<r_limit; ++ri)
609 for(fi=0; fi<f_limit; ++fi)
610 {
611 if(fr->synths.plain[ri][fi] == synth_base.plain[ri][fi])
612 fr->synths.plain[ri][fi] = plain_i386[ri][fi];
613 }
614 }
615
616 #endif /* OPT_X86 */
617
618 #ifdef OPT_X86_64
619 if(!done && (auto_choose || want_dec == x86_64))
620 {
621 chosen = "x86-64 (SSE)";
622 fr->cpu_opts.type = x86_64;
623 # ifndef NO_16BIT
624 fr->synths.plain[r_1to1][f_16] = synth_1to1_x86_64;
625 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_x86_64;
626 # endif
627 # ifndef NO_REAL
628 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_x86_64;
629 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_x86_64;
630 # endif
631 # ifndef NO_32BIT
632 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_x86_64;
633 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_x86_64;
634 # endif
635 done = 1;
636 }
637 #endif
638
639 #ifdef OPT_GENERIC_DITHER
640 if(!done && (auto_choose || want_dec == generic_dither))
641 {
642 chosen = "dithered generic";
643 fr->cpu_opts.type = generic_dither;
644 dithered = TRUE;
645 # ifndef NO_16BIT
646 fr->synths.plain[r_1to1][f_16] = synth_1to1_dither;
647 # ifndef NO_DOWNSAMPLE
648 fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
649 fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
650 # endif
651 # endif
652 done = 1;
653 }
654 #endif
655
656 # ifdef OPT_ALTIVEC
657 if(!done && (auto_choose || want_dec == altivec))
658 {
659 chosen = "AltiVec";
660 fr->cpu_opts.type = altivec;
661 # ifndef NO_16BIT
662 fr->synths.plain[r_1to1][f_16] = synth_1to1_altivec;
663 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_altivec;
664 # endif
665 # ifndef NO_REAL
666 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_altivec;
667 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_altivec;
668 # endif
669 # ifndef NO_32BIT
670 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_altivec;
671 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_altivec;
672 # endif
673 done = 1;
674 }
675 # endif
676
677 # ifdef OPT_ARM
678 if(!done && (auto_choose || want_dec == arm))
679 {
680 chosen = "ARM";
681 fr->cpu_opts.type = arm;
682 # ifndef NO_16BIT
683 fr->synths.plain[r_1to1][f_16] = synth_1to1_arm;
684 # endif
685 done = 1;
686 }
687 # endif
688
689 # ifdef OPT_GENERIC
690 if(!done && (auto_choose || want_dec == generic))
691 {
692 chosen = "generic";
693 fr->cpu_opts.type = generic;
694 done = 1;
695 }
696 # endif
697
698 fr->cpu_opts.class = decclass(fr->cpu_opts.type);
699
700 # ifndef NO_8BIT
701 # ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
702 /* Last chance to use some optimized routine via generic wrappers (for 8bit). */
703 if( fr->cpu_opts.type != ifuenf_dither
704 && fr->cpu_opts.type != generic_dither
705 && fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16] )
706 {
707 fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap;
708 fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono;
709 fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_mono2stereo;
710 }
711 # endif
712 # endif
713
714 #ifdef OPT_DITHER
715 if(done && dithered)
716 {
717 /* run-time dither noise table generation */
718 if(!frame_dither_init(fr))
719 {
720 if(NOQUIET) error("Dither noise setup failed!");
721 return 0;
722 }
723 }
724 #endif
725
726 if(done)
727 {
728 if(VERBOSE) fprintf(stderr, "Decoder: %s\n", chosen);
729 return 1;
730 }
731 else
732 {
733 if(NOQUIET) error("Could not set optimization!");
734 return 0;
735 }
736 }
737
738 enum optdec dectype(const char* decoder)
739 {
740 enum optdec dt;
741 if( (decoder == NULL)
742 || (decoder[0] == 0) )
743 return autodec;
744
745 for(dt=autodec; dt<nodec; ++dt)
746 if(!strcasecmp(decoder, decname[dt])) return dt;
747
748 return nodec; /* If we found nothing... */
749 }
750
751 #ifdef OPT_MULTI
752
753 /* same number of entries as full list, but empty at beginning */
754 static const char *mpg123_supported_decoder_list[] =
755 {
756 #ifdef OPT_SSE
757 NULL,
758 #endif
759 #ifdef OPT_3DNOWEXT
760 NULL,
761 #endif
762 #ifdef OPT_3DNOW
763 NULL,
764 #endif
765 #ifdef OPT_MMX
766 NULL,
767 #endif
768 #ifdef OPT_I586
769 NULL,
770 #endif
771 #ifdef OPT_I586_DITHER
772 NULL,
773 #endif
774 #ifdef OPT_I486
775 NULL,
776 #endif
777 #ifdef OPT_I386
778 NULL,
779 #endif
780 #ifdef OPT_ALTIVEC
781 NULL,
782 #endif
783 #ifdef OPT_X86_64
784 NULL,
785 #endif
786 #ifdef OPT_ARM
787 NULL,
788 #endif
789 #ifdef OPT_GENERIC_FLOAT
790 NULL,
791 #endif
792 # ifdef OPT_GENERIC
793 NULL,
794 # endif
795 # ifdef OPT_GENERIC_DITHER
796 NULL,
797 # endif
798 NULL
799 };
800 #endif
801
802 static const char *mpg123_decoder_list[] =
803 {
804 #ifdef OPT_SSE
805 dn_SSE,
806 #endif
807 #ifdef OPT_3DNOWEXT
808 dn_3DNowExt,
809 #endif
810 #ifdef OPT_3DNOW
811 dn_3DNow,
812 #endif
813 #ifdef OPT_MMX
814 dn_MMX,
815 #endif
816 #ifdef OPT_I586
817 dn_i586,
818 #endif
819 #ifdef OPT_I586_DITHER
820 dn_i586_dither,
821 #endif
822 #ifdef OPT_I486
823 dn_i486,
824 #endif
825 #ifdef OPT_I386
826 dn_i386,
827 #endif
828 #ifdef OPT_ALTIVEC
829 dn_AltiVec,
830 #endif
831 #ifdef OPT_X86_64
832 dn_x86_64,
833 #endif
834 #ifdef OPT_ARM
835 dn_ARM,
836 #endif
837 #ifdef OPT_GENERIC
838 dn_generic,
839 #endif
840 #ifdef OPT_GENERIC_DITHER
841 dn_generic_dither,
842 #endif
843 NULL
844 };
845
846 void check_decoders(void )
847 {
848 #ifndef OPT_MULTI
849 /* In non-multi mode, only the full list (one entry) is used. */
850 return;
851 #else
852 const char **d = mpg123_supported_decoder_list;
853 #ifdef OPT_X86
854 getcpuflags(&cpu_flags);
855 if(cpu_i586(cpu_flags))
856 {
857 /* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2");
858 if(cpu_sse3(cpu_flags)) printf(" SSE3"); */
859 #ifdef OPT_SSE
860 if(cpu_sse(cpu_flags)) *(d++) = decname[sse];
861 #endif
862 #ifdef OPT_3DNOWEXT
863 if(cpu_3dnowext(cpu_flags)) *(d++) = decname[dreidnowext];
864 #endif
865 #ifdef OPT_3DNOW
866 if(cpu_3dnow(cpu_flags)) *(d++) = decname[dreidnow];
867 #endif
868 #ifdef OPT_MMX
869 if(cpu_mmx(cpu_flags)) *(d++) = decname[mmx];
870 #endif
871 #ifdef OPT_I586
872 *(d++) = decname[ifuenf];
873 #endif
874 #ifdef OPT_I586_DITHER
875 *(d++) = decname[ifuenf_dither];
876 #endif
877 }
878 #endif
879 /* just assume that the i486 built is run on a i486 cpu... */
880 #ifdef OPT_I486
881 *(d++) = decname[ivier];
882 #endif
883 #ifdef OPT_ALTIVEC
884 *(d++) = decname[altivec];
885 #endif
886 /* every supported x86 can do i386, any cpu can do generic */
887 #ifdef OPT_I386
888 *(d++) = decname[idrei];
889 #endif
890 #ifdef OPT_X86_64
891 *(d++) = decname[x86_64];
892 #endif
893 #ifdef OPT_ARM
894 *(d++) = decname[arm];
895 #endif
896 #ifdef OPT_GENERIC
897 *(d++) = decname[generic];
898 #endif
899 #ifdef OPT_GENERIC_DITHER
900 *(d++) = decname[generic_dither];
901 #endif
902 #endif /* ndef OPT_MULTI */
903 }
904
905 const char* attribute_align_arg mpg123_current_decoder(mpg123_handle *mh)
906 {
907 if(mh == NULL) return NULL;
908
909 return decname[mh->cpu_opts.type];
910 }
911
912 const char attribute_align_arg **mpg123_decoders(void){ return mpg123_decoder_list; }
913 const char attribute_align_arg **mpg123_supported_decoders(void)
914 {
915 #ifdef OPT_MULTI
916 return mpg123_supported_decoder_list;
917 #else
918 return mpg123_decoder_list;
919 #endif
920 }