Merge r68232 to get Windows' rpcrt4.dll to work under ReactOS.
[reactos.git] / reactos / include / crt / mingw32 / intrin_x86.h
1 /*
2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
5 projects.
6
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
8
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
15
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
26 */
27
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
30
31 /*
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
36 */
37
38 /*
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
42
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
46
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
49
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
57
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
60
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
62 ...
63
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
66 */
67
68 #ifdef __cplusplus
69 extern "C" {
70 #endif
71
72 /*** memcopy must be memmove ***/
73 void* memmove(void* dest, const void* source, size_t num);
74 __INTRIN_INLINE void* memcpy(void* dest, const void* source, size_t num)
75 {
76 return memmove(dest, source, num);
77 }
78
79
80 /*** Stack frame juggling ***/
81 #define _ReturnAddress() (__builtin_return_address(0))
82 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
83 /* TODO: __getcallerseflags but how??? */
84
85 /* Maybe the same for x86? */
86 #ifdef __x86_64__
87 #define _alloca(s) __builtin_alloca(s)
88 #endif
89
90 /*** Memory barriers ***/
91
92 __INTRIN_INLINE void _ReadWriteBarrier(void)
93 {
94 __asm__ __volatile__("" : : : "memory");
95 }
96
97 /* GCC only supports full barriers */
98 #define _ReadBarrier _ReadWriteBarrier
99 #define _WriteBarrier _ReadWriteBarrier
100
101 __INTRIN_INLINE void _mm_mfence(void)
102 {
103 __asm__ __volatile__("mfence" : : : "memory");
104 }
105
106 __INTRIN_INLINE void _mm_lfence(void)
107 {
108 _ReadBarrier();
109 __asm__ __volatile__("lfence");
110 _ReadBarrier();
111 }
112
113 __INTRIN_INLINE void _mm_sfence(void)
114 {
115 _WriteBarrier();
116 __asm__ __volatile__("sfence");
117 _WriteBarrier();
118 }
119
120 #ifdef __x86_64__
121 __INTRIN_INLINE void __faststorefence(void)
122 {
123 long local;
124 __asm__ __volatile__("lock; orl $0, %0;" : : "m"(local));
125 }
126 #endif
127
128
129 /*** Atomic operations ***/
130
131 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
132
133 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * Destination, char Exchange, char Comperand)
134 {
135 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
136 }
137
138 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * Destination, short Exchange, short Comperand)
139 {
140 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
141 }
142
143 #ifndef __clang__
144
145 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * Destination, long Exchange, long Comperand)
146 {
147 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
148 }
149
150 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * Destination, void * Exchange, void * Comperand)
151 {
152 return (void *)__sync_val_compare_and_swap(Destination, Comperand, Exchange);
153 }
154
155 #endif
156
157 __INTRIN_INLINE char _InterlockedExchange8(volatile char * Target, char Value)
158 {
159 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
160 __sync_synchronize();
161 return __sync_lock_test_and_set(Target, Value);
162 }
163
164 __INTRIN_INLINE short _InterlockedExchange16(volatile short * Target, short Value)
165 {
166 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
167 __sync_synchronize();
168 return __sync_lock_test_and_set(Target, Value);
169 }
170
171 #ifndef __clang__
172
173 __INTRIN_INLINE long _InterlockedExchange(volatile long * Target, long Value)
174 {
175 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
176 __sync_synchronize();
177 return __sync_lock_test_and_set(Target, Value);
178 }
179
180 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * Target, void * Value)
181 {
182 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
183 __sync_synchronize();
184 return (void *)__sync_lock_test_and_set(Target, Value);
185 }
186
187 #endif
188
189 #if defined(__x86_64__)
190 __INTRIN_INLINE long long _InterlockedExchange64(volatile long long * Target, long long Value)
191 {
192 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
193 __sync_synchronize();
194 return __sync_lock_test_and_set(Target, Value);
195 }
196 #endif
197
198 __INTRIN_INLINE char _InterlockedExchangeAdd8(char volatile * Addend, char Value)
199 {
200 return __sync_fetch_and_add(Addend, Value);
201 }
202
203 __INTRIN_INLINE short _InterlockedExchangeAdd16(volatile short * Addend, short Value)
204 {
205 return __sync_fetch_and_add(Addend, Value);
206 }
207
208 #ifndef __clang__
209 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * Addend, long Value)
210 {
211 return __sync_fetch_and_add(Addend, Value);
212 }
213 #endif
214
215 #if defined(__x86_64__)
216 __INTRIN_INLINE long long _InterlockedExchangeAdd64(volatile long long * Addend, long long Value)
217 {
218 return __sync_fetch_and_add(Addend, Value);
219 }
220 #endif
221
222 __INTRIN_INLINE char _InterlockedAnd8(volatile char * value, char mask)
223 {
224 return __sync_fetch_and_and(value, mask);
225 }
226
227 __INTRIN_INLINE short _InterlockedAnd16(volatile short * value, short mask)
228 {
229 return __sync_fetch_and_and(value, mask);
230 }
231
232 __INTRIN_INLINE long _InterlockedAnd(volatile long * value, long mask)
233 {
234 return __sync_fetch_and_and(value, mask);
235 }
236
237 #if defined(__x86_64__)
238 __INTRIN_INLINE long long _InterlockedAnd64(volatile long long * value, long long mask)
239 {
240 return __sync_fetch_and_and(value, mask);
241 }
242 #endif
243
244 __INTRIN_INLINE char _InterlockedOr8(volatile char * value, char mask)
245 {
246 return __sync_fetch_and_or(value, mask);
247 }
248
249 __INTRIN_INLINE short _InterlockedOr16(volatile short * value, short mask)
250 {
251 return __sync_fetch_and_or(value, mask);
252 }
253
254 __INTRIN_INLINE long _InterlockedOr(volatile long * value, long mask)
255 {
256 return __sync_fetch_and_or(value, mask);
257 }
258
259 #if defined(__x86_64__)
260 __INTRIN_INLINE long long _InterlockedOr64(volatile long long * value, long long mask)
261 {
262 return __sync_fetch_and_or(value, mask);
263 }
264 #endif
265
266 __INTRIN_INLINE char _InterlockedXor8(volatile char * value, char mask)
267 {
268 return __sync_fetch_and_xor(value, mask);
269 }
270
271 __INTRIN_INLINE short _InterlockedXor16(volatile short * value, short mask)
272 {
273 return __sync_fetch_and_xor(value, mask);
274 }
275
276 __INTRIN_INLINE long _InterlockedXor(volatile long * value, long mask)
277 {
278 return __sync_fetch_and_xor(value, mask);
279 }
280
281 #if defined(__x86_64__)
282 __INTRIN_INLINE long long _InterlockedXor64(volatile long long * value, long long mask)
283 {
284 return __sync_fetch_and_xor(value, mask);
285 }
286 #endif
287
288 #ifndef __clang__
289 __INTRIN_INLINE long _InterlockedDecrement(volatile long * lpAddend)
290 {
291 return __sync_sub_and_fetch(lpAddend, 1);
292 }
293
294 __INTRIN_INLINE long _InterlockedIncrement(volatile long * lpAddend)
295 {
296 return __sync_add_and_fetch(lpAddend, 1);
297 }
298 #endif
299
300 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * lpAddend)
301 {
302 return __sync_sub_and_fetch(lpAddend, 1);
303 }
304
305 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * lpAddend)
306 {
307 return __sync_add_and_fetch(lpAddend, 1);
308 }
309
310 #if defined(__x86_64__)
311 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * lpAddend)
312 {
313 return __sync_sub_and_fetch(lpAddend, 1);
314 }
315
316 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * lpAddend)
317 {
318 return __sync_add_and_fetch(lpAddend, 1);
319 }
320 #endif
321
322 #else /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
323
324 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * Destination, char Exchange, char Comperand)
325 {
326 char retval = Comperand;
327 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
328 return retval;
329 }
330
331 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * Destination, short Exchange, short Comperand)
332 {
333 short retval = Comperand;
334 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
335 return retval;
336 }
337
338 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * Destination, long Exchange, long Comperand)
339 {
340 long retval = Comperand;
341 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
342 return retval;
343 }
344
345 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * Destination, void * Exchange, void * Comperand)
346 {
347 void * retval = (void *)Comperand;
348 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "=a" (retval) : "[retval]" (retval), [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
349 return retval;
350 }
351
352 __INTRIN_INLINE char _InterlockedExchange8(volatile char * Target, char Value)
353 {
354 char retval = Value;
355 __asm__("xchgb %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
356 return retval;
357 }
358
359 __INTRIN_INLINE short _InterlockedExchange16(volatile short * Target, short Value)
360 {
361 short retval = Value;
362 __asm__("xchgw %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
363 return retval;
364 }
365
366 __INTRIN_INLINE long _InterlockedExchange(volatile long * Target, long Value)
367 {
368 long retval = Value;
369 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
370 return retval;
371 }
372
373 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * Target, void * Value)
374 {
375 void * retval = Value;
376 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
377 return retval;
378 }
379
380 __INTRIN_INLINE char _InterlockedExchangeAdd8(char volatile * Addend, char Value)
381 {
382 char retval = Value;
383 __asm__("lock; xaddb %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
384 return retval;
385 }
386
387 __INTRIN_INLINE short _InterlockedExchangeAdd16(volatile short * Addend, short Value)
388 {
389 short retval = Value;
390 __asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
391 return retval;
392 }
393
394 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * Addend, long Value)
395 {
396 long retval = Value;
397 __asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
398 return retval;
399 }
400
401 __INTRIN_INLINE char _InterlockedAnd8(volatile char * value, char mask)
402 {
403 char x;
404 char y;
405
406 y = *value;
407
408 do
409 {
410 x = y;
411 y = _InterlockedCompareExchange8(value, x & mask, x);
412 }
413 while(y != x);
414
415 return y;
416 }
417
418 __INTRIN_INLINE short _InterlockedAnd16(volatile short * value, short mask)
419 {
420 short x;
421 short y;
422
423 y = *value;
424
425 do
426 {
427 x = y;
428 y = _InterlockedCompareExchange16(value, x & mask, x);
429 }
430 while(y != x);
431
432 return y;
433 }
434
435 __INTRIN_INLINE long _InterlockedAnd(volatile long * value, long mask)
436 {
437 long x;
438 long y;
439
440 y = *value;
441
442 do
443 {
444 x = y;
445 y = _InterlockedCompareExchange(value, x & mask, x);
446 }
447 while(y != x);
448
449 return y;
450 }
451
452 __INTRIN_INLINE char _InterlockedOr8(volatile char * value, char mask)
453 {
454 char x;
455 char y;
456
457 y = *value;
458
459 do
460 {
461 x = y;
462 y = _InterlockedCompareExchange8(value, x | mask, x);
463 }
464 while(y != x);
465
466 return y;
467 }
468
469 __INTRIN_INLINE short _InterlockedOr16(volatile short * value, short mask)
470 {
471 short x;
472 short y;
473
474 y = *value;
475
476 do
477 {
478 x = y;
479 y = _InterlockedCompareExchange16(value, x | mask, x);
480 }
481 while(y != x);
482
483 return y;
484 }
485
486 __INTRIN_INLINE long _InterlockedOr(volatile long * value, long mask)
487 {
488 long x;
489 long y;
490
491 y = *value;
492
493 do
494 {
495 x = y;
496 y = _InterlockedCompareExchange(value, x | mask, x);
497 }
498 while(y != x);
499
500 return y;
501 }
502
503 __INTRIN_INLINE char _InterlockedXor8(volatile char * value, char mask)
504 {
505 char x;
506 char y;
507
508 y = *value;
509
510 do
511 {
512 x = y;
513 y = _InterlockedCompareExchange8(value, x ^ mask, x);
514 }
515 while(y != x);
516
517 return y;
518 }
519
520 __INTRIN_INLINE short _InterlockedXor16(volatile short * value, short mask)
521 {
522 short x;
523 short y;
524
525 y = *value;
526
527 do
528 {
529 x = y;
530 y = _InterlockedCompareExchange16(value, x ^ mask, x);
531 }
532 while(y != x);
533
534 return y;
535 }
536
537 __INTRIN_INLINE long _InterlockedXor(volatile long * value, long mask)
538 {
539 long x;
540 long y;
541
542 y = *value;
543
544 do
545 {
546 x = y;
547 y = _InterlockedCompareExchange(value, x ^ mask, x);
548 }
549 while(y != x);
550
551 return y;
552 }
553
554 __INTRIN_INLINE long _InterlockedDecrement(volatile long * lpAddend)
555 {
556 return _InterlockedExchangeAdd(lpAddend, -1) - 1;
557 }
558
559 __INTRIN_INLINE long _InterlockedIncrement(volatile long * lpAddend)
560 {
561 return _InterlockedExchangeAdd(lpAddend, 1) + 1;
562 }
563
564 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * lpAddend)
565 {
566 return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
567 }
568
569 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * lpAddend)
570 {
571 return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
572 }
573
574 #if defined(__x86_64__)
575 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * lpAddend)
576 {
577 return _InterlockedExchangeAdd64(lpAddend, -1) - 1;
578 }
579
580 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * lpAddend)
581 {
582 return _InterlockedExchangeAdd64(lpAddend, 1) + 1;
583 }
584 #endif
585
586 #endif /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
587
588 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
589
590 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * Destination, long long Exchange, long long Comperand)
591 {
592 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
593 }
594
595 #else
596
597 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * Destination, long long Exchange, long long Comperand)
598 {
599 long long retval = Comperand;
600
601 __asm__
602 (
603 "lock; cmpxchg8b %[Destination]" :
604 [retval] "+A" (retval) :
605 [Destination] "m" (*Destination),
606 "b" ((unsigned long)((Exchange >> 0) & 0xFFFFFFFF)),
607 "c" ((unsigned long)((Exchange >> 32) & 0xFFFFFFFF)) :
608 "memory"
609 );
610
611 return retval;
612 }
613
614 #endif
615
616 #ifdef __i386__
617 __INTRIN_INLINE long _InterlockedAddLargeStatistic(volatile long long * Addend, long Value)
618 {
619 __asm__
620 (
621 "lock; addl %[Value], %[Lo32];"
622 "jae LABEL%=;"
623 "lock; adcl $0, %[Hi32];"
624 "LABEL%=:;" :
625 [Lo32] "+m" (*((volatile long *)(Addend) + 0)), [Hi32] "+m" (*((volatile long *)(Addend) + 1)) :
626 [Value] "ir" (Value) :
627 "memory"
628 );
629
630 return Value;
631 }
632 #endif /* __i386__ */
633
634 __INTRIN_INLINE unsigned char _interlockedbittestandreset(volatile long * a, long b)
635 {
636 unsigned char retval;
637 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
638 return retval;
639 }
640
641 #if defined(__x86_64__)
642 __INTRIN_INLINE unsigned char _interlockedbittestandreset64(volatile long long * a, long long b)
643 {
644 unsigned char retval;
645 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
646 return retval;
647 }
648 #endif
649
650 __INTRIN_INLINE unsigned char _interlockedbittestandset(volatile long * a, long b)
651 {
652 unsigned char retval;
653 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
654 return retval;
655 }
656
657 #if defined(__x86_64__)
658 __INTRIN_INLINE unsigned char _interlockedbittestandset64(volatile long long * a, long long b)
659 {
660 unsigned char retval;
661 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
662 return retval;
663 }
664 #endif
665
666 /*** String operations ***/
667
668 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
669 __INTRIN_INLINE void __stosb(unsigned char * Dest, unsigned char Data, size_t Count)
670 {
671 __asm__ __volatile__
672 (
673 "rep; stosb" :
674 [Dest] "=D" (Dest), [Count] "=c" (Count) :
675 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
676 );
677 }
678
679 __INTRIN_INLINE void __stosw(unsigned short * Dest, unsigned short Data, size_t Count)
680 {
681 __asm__ __volatile__
682 (
683 "rep; stosw" :
684 [Dest] "=D" (Dest), [Count] "=c" (Count) :
685 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
686 );
687 }
688
689 __INTRIN_INLINE void __stosd(unsigned long * Dest, unsigned long Data, size_t Count)
690 {
691 __asm__ __volatile__
692 (
693 "rep; stosl" :
694 [Dest] "=D" (Dest), [Count] "=c" (Count) :
695 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
696 );
697 }
698
699 #ifdef __x86_64__
700 __INTRIN_INLINE void __stosq(unsigned long long * Dest, unsigned long long Data, size_t Count)
701 {
702 __asm__ __volatile__
703 (
704 "rep; stosq" :
705 [Dest] "=D" (Dest), [Count] "=c" (Count) :
706 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
707 );
708 }
709 #endif
710
711 __INTRIN_INLINE void __movsb(unsigned char * Destination, const unsigned char * Source, size_t Count)
712 {
713 __asm__ __volatile__
714 (
715 "rep; movsb" :
716 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
717 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
718 );
719 }
720
721 __INTRIN_INLINE void __movsw(unsigned short * Destination, const unsigned short * Source, size_t Count)
722 {
723 __asm__ __volatile__
724 (
725 "rep; movsw" :
726 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
727 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
728 );
729 }
730
731 __INTRIN_INLINE void __movsd(unsigned long * Destination, const unsigned long * Source, size_t Count)
732 {
733 __asm__ __volatile__
734 (
735 "rep; movsd" :
736 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
737 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
738 );
739 }
740
741 #ifdef __x86_64__
742 __INTRIN_INLINE void __movsq(unsigned long * Destination, const unsigned long * Source, size_t Count)
743 {
744 __asm__ __volatile__
745 (
746 "rep; movsq" :
747 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
748 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
749 );
750 }
751 #endif
752
753 #if defined(__x86_64__)
754
755 /*** GS segment addressing ***/
756
757 __INTRIN_INLINE void __writegsbyte(unsigned long Offset, unsigned char Data)
758 {
759 __asm__ __volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
760 }
761
762 __INTRIN_INLINE void __writegsword(unsigned long Offset, unsigned short Data)
763 {
764 __asm__ __volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
765 }
766
767 __INTRIN_INLINE void __writegsdword(unsigned long Offset, unsigned long Data)
768 {
769 __asm__ __volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
770 }
771
772 __INTRIN_INLINE void __writegsqword(unsigned long Offset, unsigned long long Data)
773 {
774 __asm__ __volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
775 }
776
777 __INTRIN_INLINE unsigned char __readgsbyte(unsigned long Offset)
778 {
779 unsigned char value;
780 __asm__ __volatile__("movb %%gs:%a[Offset], %b[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
781 return value;
782 }
783
784 __INTRIN_INLINE unsigned short __readgsword(unsigned long Offset)
785 {
786 unsigned short value;
787 __asm__ __volatile__("movw %%gs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
788 return value;
789 }
790
791 __INTRIN_INLINE unsigned long __readgsdword(unsigned long Offset)
792 {
793 unsigned long value;
794 __asm__ __volatile__("movl %%gs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
795 return value;
796 }
797
798 __INTRIN_INLINE unsigned long long __readgsqword(unsigned long Offset)
799 {
800 unsigned long long value;
801 __asm__ __volatile__("movq %%gs:%a[Offset], %q[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
802 return value;
803 }
804
805 __INTRIN_INLINE void __incgsbyte(unsigned long Offset)
806 {
807 __asm__ __volatile__("incb %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
808 }
809
810 __INTRIN_INLINE void __incgsword(unsigned long Offset)
811 {
812 __asm__ __volatile__("incw %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
813 }
814
815 __INTRIN_INLINE void __incgsdword(unsigned long Offset)
816 {
817 __asm__ __volatile__("incl %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
818 }
819
820 __INTRIN_INLINE void __incgsqword(unsigned long Offset)
821 {
822 __asm__ __volatile__("incq %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
823 }
824
825 __INTRIN_INLINE void __addgsbyte(unsigned long Offset, unsigned char Data)
826 {
827 __asm__ __volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
828 }
829
830 __INTRIN_INLINE void __addgsword(unsigned long Offset, unsigned short Data)
831 {
832 __asm__ __volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
833 }
834
835 __INTRIN_INLINE void __addgsdword(unsigned long Offset, unsigned int Data)
836 {
837 __asm__ __volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
838 }
839
840 __INTRIN_INLINE void __addgsqword(unsigned long Offset, unsigned long long Data)
841 {
842 __asm__ __volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
843 }
844
845 #else /* defined(__x86_64__) */
846
847 /*** FS segment addressing ***/
848
849 __INTRIN_INLINE void __writefsbyte(unsigned long Offset, unsigned char Data)
850 {
851 __asm__ __volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
852 }
853
854 __INTRIN_INLINE void __writefsword(unsigned long Offset, unsigned short Data)
855 {
856 __asm__ __volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
857 }
858
859 __INTRIN_INLINE void __writefsdword(unsigned long Offset, unsigned long Data)
860 {
861 __asm__ __volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
862 }
863
864 __INTRIN_INLINE unsigned char __readfsbyte(unsigned long Offset)
865 {
866 unsigned char value;
867 __asm__ __volatile__("movb %%fs:%a[Offset], %b[value]" : [value] "=q" (value) : [Offset] "ir" (Offset));
868 return value;
869 }
870
871 __INTRIN_INLINE unsigned short __readfsword(unsigned long Offset)
872 {
873 unsigned short value;
874 __asm__ __volatile__("movw %%fs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
875 return value;
876 }
877
878 __INTRIN_INLINE unsigned long __readfsdword(unsigned long Offset)
879 {
880 unsigned long value;
881 __asm__ __volatile__("movl %%fs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
882 return value;
883 }
884
885 __INTRIN_INLINE void __incfsbyte(unsigned long Offset)
886 {
887 __asm__ __volatile__("incb %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
888 }
889
890 __INTRIN_INLINE void __incfsword(unsigned long Offset)
891 {
892 __asm__ __volatile__("incw %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
893 }
894
895 __INTRIN_INLINE void __incfsdword(unsigned long Offset)
896 {
897 __asm__ __volatile__("incl %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
898 }
899
900 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
901 __INTRIN_INLINE void __addfsbyte(unsigned long Offset, unsigned char Data)
902 {
903 if(!__builtin_constant_p(Offset))
904 __asm__ __volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
905 else
906 __asm__ __volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
907 }
908
909 __INTRIN_INLINE void __addfsword(unsigned long Offset, unsigned short Data)
910 {
911 if(!__builtin_constant_p(Offset))
912 __asm__ __volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
913 else
914 __asm__ __volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
915 }
916
917 __INTRIN_INLINE void __addfsdword(unsigned long Offset, unsigned long Data)
918 {
919 if(!__builtin_constant_p(Offset))
920 __asm__ __volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
921 else
922 __asm__ __volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
923 }
924
925 #endif /* defined(__x86_64__) */
926
927
928 /*** Bit manipulation ***/
929
930 __INTRIN_INLINE unsigned char _BitScanForward(unsigned long * Index, unsigned long Mask)
931 {
932 __asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
933 return Mask ? 1 : 0;
934 }
935
936 __INTRIN_INLINE unsigned char _BitScanReverse(unsigned long * Index, unsigned long Mask)
937 {
938 __asm__("bsrl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
939 return Mask ? 1 : 0;
940 }
941
942 /* NOTE: again, the bizarre implementation follows Visual C++ */
943 __INTRIN_INLINE unsigned char _bittest(const long * a, long b)
944 {
945 unsigned char retval;
946
947 if(__builtin_constant_p(b))
948 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 32))), [b] "Ir" (b % 32));
949 else
950 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
951
952 return retval;
953 }
954
955 #ifdef __x86_64__
956 __INTRIN_INLINE unsigned char _BitScanForward64(unsigned long * Index, unsigned long long Mask);
957 {
958 __asm__("bsfq %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
959 return Mask ? 1 : 0;
960 }
961
962 __INTRIN_INLINE unsigned char _BitScanReverse64(unsigned long * Index, unsigned long long Mask);
963 {
964 __asm__("bsrq %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
965 return Mask ? 1 : 0;
966 }
967
968 __INTRIN_INLINE unsigned char _bittest64(const long long * a, long long b)
969 {
970 unsigned char retval;
971
972 if(__builtin_constant_p(b))
973 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 64))), [b] "Ir" (b % 64));
974 else
975 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
976
977 return retval;
978 }
979 #endif
980
981 __INTRIN_INLINE unsigned char _bittestandcomplement(long * a, long b)
982 {
983 unsigned char retval;
984
985 if(__builtin_constant_p(b))
986 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
987 else
988 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
989
990 return retval;
991 }
992
993 __INTRIN_INLINE unsigned char _bittestandreset(long * a, long b)
994 {
995 unsigned char retval;
996
997 if(__builtin_constant_p(b))
998 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
999 else
1000 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1001
1002 return retval;
1003 }
1004
1005 __INTRIN_INLINE unsigned char _bittestandset(long * a, long b)
1006 {
1007 unsigned char retval;
1008
1009 if(__builtin_constant_p(b))
1010 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1011 else
1012 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1013
1014 return retval;
1015 }
1016
1017 #ifdef __x86_64__
1018
1019 __INTRIN_INLINE unsigned char _bittestandset64(long long * a, long long b);
1020 {
1021 unsigned char retval;
1022
1023 if(__builtin_constant_p(b))
1024 __asm__("btsq %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 64))), [retval] "=q" (retval) : [b] "Ir" (b % 64));
1025 else
1026 __asm__("btsq %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1027
1028 return retval;
1029 }
1030
1031 __INTRIN_INLINE unsigned char _bittestandreset64(long long * a, long long b);
1032 {
1033 unsigned char retval;
1034
1035 if(__builtin_constant_p(b))
1036 __asm__("btrq %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 64))), [retval] "=q" (retval) : [b] "Ir" (b % 64));
1037 else
1038 __asm__("btrq %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1039
1040 return retval;
1041 }
1042
1043 __INTRIN_INLINE unsigned char _bittestandcomplement64(long long * a, long long b);
1044 {
1045 unsigned char retval;
1046
1047 if(__builtin_constant_p(b))
1048 __asm__("btcq %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 64))), [retval] "=q" (retval) : [b] "Ir" (b % 64));
1049 else
1050 __asm__("btcq %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1051
1052 return retval;
1053 }
1054
1055 #endif
1056
1057 __INTRIN_INLINE unsigned char _rotl8(unsigned char value, unsigned char shift)
1058 {
1059 unsigned char retval;
1060 __asm__("rolb %b[shift], %b[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1061 return retval;
1062 }
1063
1064 __INTRIN_INLINE unsigned short _rotl16(unsigned short value, unsigned char shift)
1065 {
1066 unsigned short retval;
1067 __asm__("rolw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1068 return retval;
1069 }
1070
1071 __INTRIN_INLINE unsigned int _rotl(unsigned int value, int shift)
1072 {
1073 unsigned int retval;
1074 __asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1075 return retval;
1076 }
1077
1078 #ifdef __x86_64__
1079 __INTRIN_INLINE unsigned long long _rotl64(unsigned long long value, int shift)
1080 {
1081 unsigned long long retval;
1082 __asm__("rolq %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1083 return retval;
1084 }
1085 #else
1086 __INTRIN_INLINE unsigned long long _rotl64(unsigned long long value, int shift)
1087 {
1088 /* FIXME: this is probably not optimal */
1089 return (value << shift) | (value >> (64 - shift));
1090 }
1091 #endif
1092
1093 __INTRIN_INLINE unsigned int _rotr(unsigned int value, int shift)
1094 {
1095 unsigned int retval;
1096 __asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1097 return retval;
1098 }
1099
1100 __INTRIN_INLINE unsigned char _rotr8(unsigned char value, unsigned char shift)
1101 {
1102 unsigned char retval;
1103 __asm__("rorb %b[shift], %b[retval]" : [retval] "=qm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1104 return retval;
1105 }
1106
1107 __INTRIN_INLINE unsigned short _rotr16(unsigned short value, unsigned char shift)
1108 {
1109 unsigned short retval;
1110 __asm__("rorw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1111 return retval;
1112 }
1113
1114 #ifdef __x86_64__
1115 __INTRIN_INLINE unsigned long long _rotr64(unsigned long long value, int shift)
1116 {
1117 unsigned long long retval;
1118 __asm__("rorq %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1119 return retval;
1120 }
1121 #else
1122 __INTRIN_INLINE unsigned long long _rotr64(unsigned long long value, int shift)
1123 {
1124 /* FIXME: this is probably not optimal */
1125 return (value >> shift) | (value << (64 - shift));
1126 }
1127 #endif
1128
1129 __INTRIN_INLINE unsigned long __cdecl _lrotl(unsigned long value, int shift)
1130 {
1131 unsigned long retval;
1132 __asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1133 return retval;
1134 }
1135
1136 __INTRIN_INLINE unsigned long __cdecl _lrotr(unsigned long value, int shift)
1137 {
1138 unsigned long retval;
1139 __asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1140 return retval;
1141 }
1142
1143 /*
1144 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
1145 constraint (edx:eax) for the Mask argument, because it's the only way GCC
1146 can pass 64-bit operands around - passing the two 32 bit parts separately
1147 just confuses it. Also we declare Bit as an int and then truncate it to
1148 match Visual C++ behavior
1149 */
1150 __INTRIN_INLINE unsigned long long __ll_lshift(unsigned long long Mask, int Bit)
1151 {
1152 unsigned long long retval = Mask;
1153
1154 __asm__
1155 (
1156 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
1157 "+A" (retval) :
1158 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1159 );
1160
1161 return retval;
1162 }
1163
1164 __INTRIN_INLINE long long __ll_rshift(long long Mask, int Bit)
1165 {
1166 long long retval = Mask;
1167
1168 __asm__
1169 (
1170 "shrdl %b[Bit], %%edx, %%eax; sarl %b[Bit], %%edx" :
1171 "+A" (retval) :
1172 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1173 );
1174
1175 return retval;
1176 }
1177
1178 __INTRIN_INLINE unsigned long long __ull_rshift(unsigned long long Mask, int Bit)
1179 {
1180 unsigned long long retval = Mask;
1181
1182 __asm__
1183 (
1184 "shrdl %b[Bit], %%edx, %%eax; shrl %b[Bit], %%edx" :
1185 "+A" (retval) :
1186 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1187 );
1188
1189 return retval;
1190 }
1191
1192 __INTRIN_INLINE unsigned short _byteswap_ushort(unsigned short value)
1193 {
1194 unsigned short retval;
1195 __asm__("rorw $8, %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value));
1196 return retval;
1197 }
1198
1199 __INTRIN_INLINE unsigned long _byteswap_ulong(unsigned long value)
1200 {
1201 unsigned long retval;
1202 __asm__("bswapl %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1203 return retval;
1204 }
1205
1206 #ifdef __x86_64__
1207 __INTRIN_INLINE unsigned long long _byteswap_uint64(unsigned long long value)
1208 {
1209 unsigned long long retval;
1210 __asm__("bswapq %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1211 return retval;
1212 }
1213 #else
1214 __INTRIN_INLINE unsigned long long _byteswap_uint64(unsigned long long value)
1215 {
1216 union {
1217 unsigned long long int64part;
1218 struct {
1219 unsigned long lowpart;
1220 unsigned long hipart;
1221 };
1222 } retval;
1223 retval.int64part = value;
1224 __asm__("bswapl %[lowpart]\n"
1225 "bswapl %[hipart]\n"
1226 : [lowpart] "=r" (retval.hipart), [hipart] "=r" (retval.lowpart) : "[lowpart]" (retval.lowpart), "[hipart]" (retval.hipart) );
1227 return retval.int64part;
1228 }
1229 #endif
1230
1231 __INTRIN_INLINE unsigned int __lzcnt(unsigned int value)
1232 {
1233 return __builtin_clz(value);
1234 }
1235
1236 __INTRIN_INLINE unsigned short __lzcnt16(unsigned short value)
1237 {
1238 return __builtin_clz(value);
1239 }
1240
1241 __INTRIN_INLINE unsigned int __popcnt(unsigned int value)
1242 {
1243 return __builtin_popcount(value);
1244 }
1245
1246 __INTRIN_INLINE unsigned short __popcnt16(unsigned short value)
1247 {
1248 return __builtin_popcount(value);
1249 }
1250
1251 #ifdef __x86_64__
1252 unsigned long long __lzcnt64(unsigned long long value)
1253 {
1254 return __builtin_clzll(value);
1255 }
1256
1257 unsigned long long __popcnt64(unsigned long long value)
1258 {
1259 return __builtin_popcountll(value);
1260 }
1261 #endif
1262
1263 /*** 64-bit math ***/
1264
1265 __INTRIN_INLINE long long __emul(int a, int b)
1266 {
1267 long long retval;
1268 __asm__("imull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1269 return retval;
1270 }
1271
1272 __INTRIN_INLINE unsigned long long __emulu(unsigned int a, unsigned int b)
1273 {
1274 unsigned long long retval;
1275 __asm__("mull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1276 return retval;
1277 }
1278
1279 __INTRIN_INLINE long long __cdecl _abs64(long long value)
1280 {
1281 return (value >= 0) ? value : -value;
1282 }
1283
1284 #ifdef __x86_64__
1285
1286 __INTRIN_INLINE long long __mulh(long long a, long long b)
1287 {
1288 long long retval;
1289 __asm__("imulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1290 return retval;
1291 }
1292
1293 __INTRIN_INLINE unsigned long long __umulh(unsigned long long a, unsigned long long b)
1294 {
1295 unsigned long long retval;
1296 __asm__("mulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1297 return retval;
1298 }
1299
1300 #endif
1301
1302 /*** Port I/O ***/
1303
1304 __INTRIN_INLINE unsigned char __inbyte(unsigned short Port)
1305 {
1306 unsigned char byte;
1307 __asm__ __volatile__("inb %w[Port], %b[byte]" : [byte] "=a" (byte) : [Port] "Nd" (Port));
1308 return byte;
1309 }
1310
1311 __INTRIN_INLINE unsigned short __inword(unsigned short Port)
1312 {
1313 unsigned short word;
1314 __asm__ __volatile__("inw %w[Port], %w[word]" : [word] "=a" (word) : [Port] "Nd" (Port));
1315 return word;
1316 }
1317
1318 __INTRIN_INLINE unsigned long __indword(unsigned short Port)
1319 {
1320 unsigned long dword;
1321 __asm__ __volatile__("inl %w[Port], %k[dword]" : [dword] "=a" (dword) : [Port] "Nd" (Port));
1322 return dword;
1323 }
1324
1325 __INTRIN_INLINE void __inbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1326 {
1327 __asm__ __volatile__
1328 (
1329 "rep; insb" :
1330 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1331 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1332 "memory"
1333 );
1334 }
1335
1336 __INTRIN_INLINE void __inwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1337 {
1338 __asm__ __volatile__
1339 (
1340 "rep; insw" :
1341 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1342 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1343 "memory"
1344 );
1345 }
1346
1347 __INTRIN_INLINE void __indwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1348 {
1349 __asm__ __volatile__
1350 (
1351 "rep; insl" :
1352 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1353 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1354 "memory"
1355 );
1356 }
1357
1358 __INTRIN_INLINE void __outbyte(unsigned short Port, unsigned char Data)
1359 {
1360 __asm__ __volatile__("outb %b[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1361 }
1362
1363 __INTRIN_INLINE void __outword(unsigned short Port, unsigned short Data)
1364 {
1365 __asm__ __volatile__("outw %w[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1366 }
1367
1368 __INTRIN_INLINE void __outdword(unsigned short Port, unsigned long Data)
1369 {
1370 __asm__ __volatile__("outl %k[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1371 }
1372
1373 __INTRIN_INLINE void __outbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1374 {
1375 __asm__ __volatile__("rep; outsb" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1376 }
1377
1378 __INTRIN_INLINE void __outwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1379 {
1380 __asm__ __volatile__("rep; outsw" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1381 }
1382
1383 __INTRIN_INLINE void __outdwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1384 {
1385 __asm__ __volatile__("rep; outsl" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1386 }
1387
1388 __INTRIN_INLINE int _inp(unsigned short Port)
1389 {
1390 return __inbyte(Port);
1391 }
1392
1393 __INTRIN_INLINE unsigned short _inpw(unsigned short Port)
1394 {
1395 return __inword(Port);
1396 }
1397
1398 __INTRIN_INLINE unsigned long _inpd(unsigned short Port)
1399 {
1400 return __indword(Port);
1401 }
1402
1403 __INTRIN_INLINE int _outp(unsigned short Port, int databyte)
1404 {
1405 __outbyte(Port, (unsigned char)databyte);
1406 return databyte;
1407 }
1408
1409 __INTRIN_INLINE unsigned short _outpw(unsigned short Port, unsigned short dataword)
1410 {
1411 __outword(Port, dataword);
1412 return dataword;
1413 }
1414
1415 __INTRIN_INLINE unsigned long _outpd(unsigned short Port, unsigned long dataword)
1416 {
1417 __outdword(Port, dataword);
1418 return dataword;
1419 }
1420
1421
1422 /*** System information ***/
1423
1424 __INTRIN_INLINE void __cpuid(int CPUInfo[4], int InfoType)
1425 {
1426 __asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType));
1427 }
1428
1429 __INTRIN_INLINE void __cpuidex(int CPUInfo[4], int InfoType, int ECXValue)
1430 {
1431 __asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType), "c" (ECXValue));
1432 }
1433
1434 __INTRIN_INLINE unsigned long long __rdtsc(void)
1435 {
1436 #ifdef __x86_64__
1437 unsigned long long low, high;
1438 __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
1439 return low | (high << 32);
1440 #else
1441 unsigned long long retval;
1442 __asm__ __volatile__("rdtsc" : "=A"(retval));
1443 return retval;
1444 #endif
1445 }
1446
1447 __INTRIN_INLINE void __writeeflags(uintptr_t Value)
1448 {
1449 __asm__ __volatile__("push %0\n popf" : : "rim"(Value));
1450 }
1451
1452 __INTRIN_INLINE uintptr_t __readeflags(void)
1453 {
1454 uintptr_t retval;
1455 __asm__ __volatile__("pushf\n pop %0" : "=rm"(retval));
1456 return retval;
1457 }
1458
1459 /*** Interrupts ***/
1460
1461 #ifdef __clang__
1462 #define __debugbreak() __asm__("int $3")
1463 #else
1464 __INTRIN_INLINE void __debugbreak(void)
1465 {
1466 __asm__("int $3");
1467 }
1468 #endif
1469
1470 __INTRIN_INLINE void __ud2(void)
1471 {
1472 __asm__("ud2");
1473 }
1474
1475 __INTRIN_INLINE void __int2c(void)
1476 {
1477 __asm__("int $0x2c");
1478 }
1479
1480 __INTRIN_INLINE void _disable(void)
1481 {
1482 __asm__("cli" : : : "memory");
1483 }
1484
1485 __INTRIN_INLINE void _enable(void)
1486 {
1487 __asm__("sti" : : : "memory");
1488 }
1489
1490 __INTRIN_INLINE void __halt(void)
1491 {
1492 __asm__("hlt" : : : "memory");
1493 }
1494
1495 __declspec(noreturn)
1496 __INTRIN_INLINE void __fastfail(unsigned int Code)
1497 {
1498 __asm__("int $0x29" : : "c"(Code) : "memory");
1499 __builtin_unreachable();
1500 }
1501
1502 /*** Protected memory management ***/
1503
1504 #ifdef __x86_64__
1505
1506 __INTRIN_INLINE void __writecr0(unsigned long long Data)
1507 {
1508 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1509 }
1510
1511 __INTRIN_INLINE void __writecr3(unsigned long long Data)
1512 {
1513 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1514 }
1515
1516 __INTRIN_INLINE void __writecr4(unsigned long long Data)
1517 {
1518 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1519 }
1520
1521 __INTRIN_INLINE void __writecr8(unsigned long long Data)
1522 {
1523 __asm__("mov %[Data], %%cr8" : : [Data] "r" (Data) : "memory");
1524 }
1525
1526 __INTRIN_INLINE unsigned long long __readcr0(void)
1527 {
1528 unsigned long long value;
1529 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1530 return value;
1531 }
1532
1533 __INTRIN_INLINE unsigned long long __readcr2(void)
1534 {
1535 unsigned long long value;
1536 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1537 return value;
1538 }
1539
1540 __INTRIN_INLINE unsigned long long __readcr3(void)
1541 {
1542 unsigned long long value;
1543 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1544 return value;
1545 }
1546
1547 __INTRIN_INLINE unsigned long long __readcr4(void)
1548 {
1549 unsigned long long value;
1550 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1551 return value;
1552 }
1553
1554 __INTRIN_INLINE unsigned long long __readcr8(void)
1555 {
1556 unsigned long long value;
1557 __asm__ __volatile__("movq %%cr8, %q[value]" : [value] "=r" (value));
1558 return value;
1559 }
1560
1561 #else /* __x86_64__ */
1562
1563 __INTRIN_INLINE void __writecr0(unsigned int Data)
1564 {
1565 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1566 }
1567
1568 __INTRIN_INLINE void __writecr3(unsigned int Data)
1569 {
1570 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1571 }
1572
1573 __INTRIN_INLINE void __writecr4(unsigned int Data)
1574 {
1575 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1576 }
1577
1578 __INTRIN_INLINE void __writecr8(unsigned int Data)
1579 {
1580 __asm__("mov %[Data], %%cr8" : : [Data] "r" (Data) : "memory");
1581 }
1582
1583 __INTRIN_INLINE unsigned long __readcr0(void)
1584 {
1585 unsigned long value;
1586 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1587 return value;
1588 }
1589
1590 __INTRIN_INLINE unsigned long __readcr2(void)
1591 {
1592 unsigned long value;
1593 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1594 return value;
1595 }
1596
1597 __INTRIN_INLINE unsigned long __readcr3(void)
1598 {
1599 unsigned long value;
1600 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1601 return value;
1602 }
1603
1604 __INTRIN_INLINE unsigned long __readcr4(void)
1605 {
1606 unsigned long value;
1607 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1608 return value;
1609 }
1610
1611 __INTRIN_INLINE unsigned long __readcr8(void)
1612 {
1613 unsigned long value;
1614 __asm__ __volatile__("mov %%cr8, %[value]" : [value] "=r" (value));
1615 return value;
1616 }
1617
1618 #endif /* __x86_64__ */
1619
1620 #ifdef __x86_64__
1621
1622 __INTRIN_INLINE unsigned long long __readdr(unsigned int reg)
1623 {
1624 unsigned long long value;
1625 switch (reg)
1626 {
1627 case 0:
1628 __asm__ __volatile__("movq %%dr0, %q[value]" : [value] "=r" (value));
1629 break;
1630 case 1:
1631 __asm__ __volatile__("movq %%dr1, %q[value]" : [value] "=r" (value));
1632 break;
1633 case 2:
1634 __asm__ __volatile__("movq %%dr2, %q[value]" : [value] "=r" (value));
1635 break;
1636 case 3:
1637 __asm__ __volatile__("movq %%dr3, %q[value]" : [value] "=r" (value));
1638 break;
1639 case 4:
1640 __asm__ __volatile__("movq %%dr4, %q[value]" : [value] "=r" (value));
1641 break;
1642 case 5:
1643 __asm__ __volatile__("movq %%dr5, %q[value]" : [value] "=r" (value));
1644 break;
1645 case 6:
1646 __asm__ __volatile__("movq %%dr6, %q[value]" : [value] "=r" (value));
1647 break;
1648 case 7:
1649 __asm__ __volatile__("movq %%dr7, %q[value]" : [value] "=r" (value));
1650 break;
1651 }
1652 return value;
1653 }
1654
1655 __INTRIN_INLINE void __writedr(unsigned reg, unsigned long long value)
1656 {
1657 switch (reg)
1658 {
1659 case 0:
1660 __asm__("movq %q[value], %%dr0" : : [value] "r" (value) : "memory");
1661 break;
1662 case 1:
1663 __asm__("movq %q[value], %%dr1" : : [value] "r" (value) : "memory");
1664 break;
1665 case 2:
1666 __asm__("movq %q[value], %%dr2" : : [value] "r" (value) : "memory");
1667 break;
1668 case 3:
1669 __asm__("movq %q[value], %%dr3" : : [value] "r" (value) : "memory");
1670 break;
1671 case 4:
1672 __asm__("movq %q[value], %%dr4" : : [value] "r" (value) : "memory");
1673 break;
1674 case 5:
1675 __asm__("movq %q[value], %%dr5" : : [value] "r" (value) : "memory");
1676 break;
1677 case 6:
1678 __asm__("movq %q[value], %%dr6" : : [value] "r" (value) : "memory");
1679 break;
1680 case 7:
1681 __asm__("movq %q[value], %%dr7" : : [value] "r" (value) : "memory");
1682 break;
1683 }
1684 }
1685
1686 #else /* __x86_64__ */
1687
1688 __INTRIN_INLINE unsigned int __readdr(unsigned int reg)
1689 {
1690 unsigned int value;
1691 switch (reg)
1692 {
1693 case 0:
1694 __asm__ __volatile__("mov %%dr0, %[value]" : [value] "=r" (value));
1695 break;
1696 case 1:
1697 __asm__ __volatile__("mov %%dr1, %[value]" : [value] "=r" (value));
1698 break;
1699 case 2:
1700 __asm__ __volatile__("mov %%dr2, %[value]" : [value] "=r" (value));
1701 break;
1702 case 3:
1703 __asm__ __volatile__("mov %%dr3, %[value]" : [value] "=r" (value));
1704 break;
1705 case 4:
1706 __asm__ __volatile__("mov %%dr4, %[value]" : [value] "=r" (value));
1707 break;
1708 case 5:
1709 __asm__ __volatile__("mov %%dr5, %[value]" : [value] "=r" (value));
1710 break;
1711 case 6:
1712 __asm__ __volatile__("mov %%dr6, %[value]" : [value] "=r" (value));
1713 break;
1714 case 7:
1715 __asm__ __volatile__("mov %%dr7, %[value]" : [value] "=r" (value));
1716 break;
1717 }
1718 return value;
1719 }
1720
1721 __INTRIN_INLINE void __writedr(unsigned reg, unsigned int value)
1722 {
1723 switch (reg)
1724 {
1725 case 0:
1726 __asm__("mov %[value], %%dr0" : : [value] "r" (value) : "memory");
1727 break;
1728 case 1:
1729 __asm__("mov %[value], %%dr1" : : [value] "r" (value) : "memory");
1730 break;
1731 case 2:
1732 __asm__("mov %[value], %%dr2" : : [value] "r" (value) : "memory");
1733 break;
1734 case 3:
1735 __asm__("mov %[value], %%dr3" : : [value] "r" (value) : "memory");
1736 break;
1737 case 4:
1738 __asm__("mov %[value], %%dr4" : : [value] "r" (value) : "memory");
1739 break;
1740 case 5:
1741 __asm__("mov %[value], %%dr5" : : [value] "r" (value) : "memory");
1742 break;
1743 case 6:
1744 __asm__("mov %[value], %%dr6" : : [value] "r" (value) : "memory");
1745 break;
1746 case 7:
1747 __asm__("mov %[value], %%dr7" : : [value] "r" (value) : "memory");
1748 break;
1749 }
1750 }
1751
1752 #endif /* __x86_64__ */
1753
1754 __INTRIN_INLINE void __invlpg(void *Address)
1755 {
1756 __asm__("invlpg %[Address]" : : [Address] "m" (*((unsigned char *)(Address))) : "memory");
1757 }
1758
1759
1760 /*** System operations ***/
1761
1762 __INTRIN_INLINE unsigned long long __readmsr(unsigned long reg)
1763 {
1764 #ifdef __x86_64__
1765 unsigned long low, high;
1766 __asm__ __volatile__("rdmsr" : "=a" (low), "=d" (high) : "c" (reg));
1767 return ((unsigned long long)high << 32) | low;
1768 #else
1769 unsigned long long retval;
1770 __asm__ __volatile__("rdmsr" : "=A" (retval) : "c" (reg));
1771 return retval;
1772 #endif
1773 }
1774
1775 __INTRIN_INLINE void __writemsr(unsigned long Register, unsigned long long Value)
1776 {
1777 #ifdef __x86_64__
1778 __asm__ __volatile__("wrmsr" : : "a" (Value), "d" (Value >> 32), "c" (Register));
1779 #else
1780 __asm__ __volatile__("wrmsr" : : "A" (Value), "c" (Register));
1781 #endif
1782 }
1783
1784 __INTRIN_INLINE unsigned long long __readpmc(unsigned long counter)
1785 {
1786 unsigned long long retval;
1787 __asm__ __volatile__("rdpmc" : "=A" (retval) : "c" (counter));
1788 return retval;
1789 }
1790
1791 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1792 __INTRIN_INLINE unsigned long __segmentlimit(unsigned long a)
1793 {
1794 unsigned long retval;
1795 __asm__ __volatile__("lsl %[a], %[retval]" : [retval] "=r" (retval) : [a] "rm" (a));
1796 return retval;
1797 }
1798
1799 __INTRIN_INLINE void __wbinvd(void)
1800 {
1801 __asm__ __volatile__("wbinvd" : : : "memory");
1802 }
1803
1804 __INTRIN_INLINE void __lidt(void *Source)
1805 {
1806 __asm__ __volatile__("lidt %0" : : "m"(*(short*)Source));
1807 }
1808
1809 __INTRIN_INLINE void __sidt(void *Destination)
1810 {
1811 __asm__ __volatile__("sidt %0" : : "m"(*(short*)Destination) : "memory");
1812 }
1813
1814 /*** Misc operations ***/
1815
1816 __INTRIN_INLINE void _mm_pause(void)
1817 {
1818 __asm__ __volatile__("pause" : : : "memory");
1819 }
1820
1821 __INTRIN_INLINE void __nop(void)
1822 {
1823 __asm__ __volatile__("nop");
1824 }
1825
1826 #ifdef __cplusplus
1827 }
1828 #endif
1829
1830 #endif /* KJK_INTRIN_X86_H_ */
1831
1832 /* EOF */