2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
72 /*** Stack frame juggling ***/
73 #define _ReturnAddress() (__builtin_return_address(0))
74 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
75 /* TODO: __getcallerseflags but how??? */
77 /* Maybe the same for x86? */
79 #define _alloca(s) __builtin_alloca(s)
82 /*** Atomic operations ***/
84 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
85 #define _ReadWriteBarrier() __sync_synchronize()
87 __INTRIN_INLINE
void _MemoryBarrier(void)
89 __asm__
__volatile__("" : : : "memory");
91 #define _ReadWriteBarrier() _MemoryBarrier()
94 /* BUGBUG: GCC only supports full barriers */
95 #define _ReadBarrier _ReadWriteBarrier
96 #define _WriteBarrier _ReadWriteBarrier
98 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
100 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * Destination
, const char Exchange
, const char Comperand
)
102 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
105 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * Destination
, const short Exchange
, const short Comperand
)
107 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
110 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * Destination
, const long Exchange
, const long Comperand
)
112 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
115 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * Destination
, void * const Exchange
, void * const Comperand
)
117 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
120 __INTRIN_INLINE
long _InterlockedExchange(volatile long * Target
, const long Value
)
122 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
123 __sync_synchronize();
124 return __sync_lock_test_and_set(Target
, Value
);
127 #if defined(_M_AMD64)
128 __INTRIN_INLINE
long long _InterlockedExchange64(volatile long long * Target
, const long long Value
)
130 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
131 __sync_synchronize();
132 return __sync_lock_test_and_set(Target
, Value
);
136 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * Target
, void * const Value
)
139 __sync_synchronize();
140 return __sync_lock_test_and_set(Target
, Value
);
143 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * Addend
, const short Value
)
145 return __sync_fetch_and_add(Addend
, Value
);
148 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * Addend
, const long Value
)
150 return __sync_fetch_and_add(Addend
, Value
);
153 #if defined(_M_AMD64)
154 __INTRIN_INLINE
long long _InterlockedExchangeAdd64(volatile long long * Addend
, const long long Value
)
156 return __sync_fetch_and_add(Addend
, Value
);
160 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * value
, const char mask
)
162 return __sync_fetch_and_and(value
, mask
);
165 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * value
, const short mask
)
167 return __sync_fetch_and_and(value
, mask
);
170 __INTRIN_INLINE
long _InterlockedAnd(volatile long * value
, const long mask
)
172 return __sync_fetch_and_and(value
, mask
);
175 #if defined(_M_AMD64)
176 __INTRIN_INLINE
long _InterlockedAnd64(volatile long long * value
, const long long mask
)
178 return __sync_fetch_and_and(value
, mask
);
182 __INTRIN_INLINE
char _InterlockedOr8(volatile char * value
, const char mask
)
184 return __sync_fetch_and_or(value
, mask
);
187 __INTRIN_INLINE
short _InterlockedOr16(volatile short * value
, const short mask
)
189 return __sync_fetch_and_or(value
, mask
);
192 __INTRIN_INLINE
long _InterlockedOr(volatile long * value
, const long mask
)
194 return __sync_fetch_and_or(value
, mask
);
197 #if defined(_M_AMD64)
198 __INTRIN_INLINE
long _InterlockedOr64(volatile long long * value
, const long long mask
)
200 return __sync_fetch_and_or(value
, mask
);
204 __INTRIN_INLINE
char _InterlockedXor8(volatile char * value
, const char mask
)
206 return __sync_fetch_and_xor(value
, mask
);
209 __INTRIN_INLINE
short _InterlockedXor16(volatile short * value
, const short mask
)
211 return __sync_fetch_and_xor(value
, mask
);
214 __INTRIN_INLINE
long _InterlockedXor(volatile long * value
, const long mask
)
216 return __sync_fetch_and_xor(value
, mask
);
221 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * Destination
, const char Exchange
, const char Comperand
)
223 char retval
= Comperand
;
224 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
) : "memory");
228 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * Destination
, const short Exchange
, const short Comperand
)
230 short retval
= Comperand
;
231 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
): "memory");
235 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * Destination
, const long Exchange
, const long Comperand
)
237 long retval
= Comperand
;
238 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
): "memory");
242 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * Destination
, void * const Exchange
, void * const Comperand
)
244 void * retval
= (void *)Comperand
;
245 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval
] "=a" (retval
) : "[retval]" (retval
), [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
) : "memory");
249 __INTRIN_INLINE
long _InterlockedExchange(volatile long * Target
, const long Value
)
252 __asm__("xchgl %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
256 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * Target
, void * const Value
)
258 void * retval
= Value
;
259 __asm__("xchgl %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
263 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * Addend
, const short Value
)
266 __asm__("lock; xaddw %[retval], %[Addend]" : [retval
] "+r" (retval
) : [Addend
] "m" (*Addend
) : "memory");
270 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * Addend
, const long Value
)
273 __asm__("lock; xaddl %[retval], %[Addend]" : [retval
] "+r" (retval
) : [Addend
] "m" (*Addend
) : "memory");
277 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * value
, const char mask
)
287 y
= _InterlockedCompareExchange8(value
, x
& mask
, x
);
294 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * value
, const short mask
)
304 y
= _InterlockedCompareExchange16(value
, x
& mask
, x
);
311 __INTRIN_INLINE
long _InterlockedAnd(volatile long * value
, const long mask
)
321 y
= _InterlockedCompareExchange(value
, x
& mask
, x
);
328 __INTRIN_INLINE
char _InterlockedOr8(volatile char * value
, const char mask
)
338 y
= _InterlockedCompareExchange8(value
, x
| mask
, x
);
345 __INTRIN_INLINE
short _InterlockedOr16(volatile short * value
, const short mask
)
355 y
= _InterlockedCompareExchange16(value
, x
| mask
, x
);
362 __INTRIN_INLINE
long _InterlockedOr(volatile long * value
, const long mask
)
372 y
= _InterlockedCompareExchange(value
, x
| mask
, x
);
379 __INTRIN_INLINE
char _InterlockedXor8(volatile char * value
, const char mask
)
389 y
= _InterlockedCompareExchange8(value
, x
^ mask
, x
);
396 __INTRIN_INLINE
short _InterlockedXor16(volatile short * value
, const short mask
)
406 y
= _InterlockedCompareExchange16(value
, x
^ mask
, x
);
413 __INTRIN_INLINE
long _InterlockedXor(volatile long * value
, const long mask
)
423 y
= _InterlockedCompareExchange(value
, x
^ mask
, x
);
432 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
434 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * Destination
, const long long Exchange
, const long long Comperand
)
436 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
441 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * Destination
, const long long Exchange
, const long long Comperand
)
443 long long retval
= Comperand
;
447 "lock; cmpxchg8b %[Destination]" :
448 [retval
] "+A" (retval
) :
449 [Destination
] "m" (*Destination
),
450 "b" ((unsigned long)((Exchange
>> 0) & 0xFFFFFFFF)),
451 "c" ((unsigned long)((Exchange
>> 32) & 0xFFFFFFFF)) :
460 __INTRIN_INLINE
long _InterlockedAddLargeStatistic(volatile long long * Addend
, const long Value
)
464 "lock; add %[Value], %[Lo32];"
466 "lock; adc $0, %[Hi32];"
468 [Lo32
] "+m" (*((volatile long *)(Addend
) + 0)), [Hi32
] "+m" (*((volatile long *)(Addend
) + 1)) :
469 [Value
] "ir" (Value
) :
476 __INTRIN_INLINE
long _InterlockedDecrement(volatile long * lpAddend
)
478 return _InterlockedExchangeAdd(lpAddend
, -1) - 1;
481 __INTRIN_INLINE
long _InterlockedIncrement(volatile long * lpAddend
)
483 return _InterlockedExchangeAdd(lpAddend
, 1) + 1;
486 __INTRIN_INLINE
short _InterlockedDecrement16(volatile short * lpAddend
)
488 return _InterlockedExchangeAdd16(lpAddend
, -1) - 1;
491 __INTRIN_INLINE
short _InterlockedIncrement16(volatile short * lpAddend
)
493 return _InterlockedExchangeAdd16(lpAddend
, 1) + 1;
496 #if defined(_M_AMD64)
497 __INTRIN_INLINE
long long _InterlockedDecrement64(volatile long long * lpAddend
)
499 return _InterlockedExchangeAdd64(lpAddend
, -1) - 1;
502 __INTRIN_INLINE
long long _InterlockedIncrement64(volatile long long * lpAddend
)
504 return _InterlockedExchangeAdd64(lpAddend
, 1) + 1;
508 __INTRIN_INLINE
unsigned char _interlockedbittestandreset(volatile long * a
, const long b
)
510 unsigned char retval
;
511 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
515 #if defined(_M_AMD64)
516 __INTRIN_INLINE
unsigned char _interlockedbittestandreset64(volatile long long * a
, const long long b
)
518 unsigned char retval
;
519 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval
] "=r" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
524 __INTRIN_INLINE
unsigned char _interlockedbittestandset(volatile long * a
, const long b
)
526 unsigned char retval
;
527 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval
] "=q" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
531 #if defined(_M_AMD64)
532 __INTRIN_INLINE
unsigned char _interlockedbittestandset64(volatile long long * a
, const long long b
)
534 unsigned char retval
;
535 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval
] "=r" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
540 /*** String operations ***/
541 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
542 __INTRIN_INLINE
void __stosb(unsigned char * Dest
, const unsigned char Data
, size_t Count
)
547 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
548 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
552 __INTRIN_INLINE
void __stosw(unsigned short * Dest
, const unsigned short Data
, size_t Count
)
557 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
558 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
562 __INTRIN_INLINE
void __stosd(unsigned long * Dest
, const unsigned long Data
, size_t Count
)
567 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
568 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
572 __INTRIN_INLINE
void __movsb(unsigned char * Destination
, const unsigned char * Source
, size_t Count
)
577 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
578 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
582 __INTRIN_INLINE
void __movsw(unsigned short * Destination
, const unsigned short * Source
, size_t Count
)
587 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
588 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
592 __INTRIN_INLINE
void __movsd(unsigned long * Destination
, const unsigned long * Source
, size_t Count
)
597 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
598 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
602 #if defined(_M_AMD64)
603 /*** GS segment addressing ***/
605 __INTRIN_INLINE
void __writegsbyte(const unsigned long Offset
, const unsigned char Data
)
607 __asm__
__volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
610 __INTRIN_INLINE
void __writegsword(const unsigned long Offset
, const unsigned short Data
)
612 __asm__
__volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
615 __INTRIN_INLINE
void __writegsdword(const unsigned long Offset
, const unsigned long Data
)
617 __asm__
__volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
620 __INTRIN_INLINE
void __writegsqword(const unsigned long Offset
, const unsigned __int64 Data
)
622 __asm__
__volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
625 __INTRIN_INLINE
unsigned char __readgsbyte(const unsigned long Offset
)
628 __asm__
__volatile__("movb %%gs:%a[Offset], %b[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
632 __INTRIN_INLINE
unsigned short __readgsword(const unsigned long Offset
)
634 unsigned short value
;
635 __asm__
__volatile__("movw %%gs:%a[Offset], %w[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
639 __INTRIN_INLINE
unsigned long __readgsdword(const unsigned long Offset
)
642 __asm__
__volatile__("movl %%gs:%a[Offset], %k[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
646 __INTRIN_INLINE
unsigned __int64
__readgsqword(const unsigned long Offset
)
648 unsigned __int64 value
;
649 __asm__
__volatile__("movq %%gs:%a[Offset], %q[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
653 __INTRIN_INLINE
void __incgsbyte(const unsigned long Offset
)
655 __asm__
__volatile__("incb %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
658 __INTRIN_INLINE
void __incgsword(const unsigned long Offset
)
660 __asm__
__volatile__("incw %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
663 __INTRIN_INLINE
void __incgsdword(const unsigned long Offset
)
665 __asm__
__volatile__("incl %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
668 __INTRIN_INLINE
void __addgsbyte(const unsigned long Offset
, const unsigned char Data
)
670 __asm__
__volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
673 __INTRIN_INLINE
void __addgsword(const unsigned long Offset
, const unsigned short Data
)
675 __asm__
__volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
678 __INTRIN_INLINE
void __addgsdword(const unsigned long Offset
, const unsigned int Data
)
680 __asm__
__volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
683 __INTRIN_INLINE
void __addgsqword(const unsigned long Offset
, const unsigned __int64 Data
)
685 __asm__
__volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
689 /*** FS segment addressing ***/
690 __INTRIN_INLINE
void __writefsbyte(const unsigned long Offset
, const unsigned char Data
)
692 __asm__
__volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
695 __INTRIN_INLINE
void __writefsword(const unsigned long Offset
, const unsigned short Data
)
697 __asm__
__volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
700 __INTRIN_INLINE
void __writefsdword(const unsigned long Offset
, const unsigned long Data
)
702 __asm__
__volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
705 __INTRIN_INLINE
unsigned char __readfsbyte(const unsigned long Offset
)
708 __asm__
__volatile__("movb %%fs:%a[Offset], %b[value]" : [value
] "=q" (value
) : [Offset
] "ir" (Offset
));
712 __INTRIN_INLINE
unsigned short __readfsword(const unsigned long Offset
)
714 unsigned short value
;
715 __asm__
__volatile__("movw %%fs:%a[Offset], %w[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
719 __INTRIN_INLINE
unsigned long __readfsdword(const unsigned long Offset
)
722 __asm__
__volatile__("movl %%fs:%a[Offset], %k[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
726 __INTRIN_INLINE
void __incfsbyte(const unsigned long Offset
)
728 __asm__
__volatile__("incb %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
731 __INTRIN_INLINE
void __incfsword(const unsigned long Offset
)
733 __asm__
__volatile__("incw %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
736 __INTRIN_INLINE
void __incfsdword(const unsigned long Offset
)
738 __asm__
__volatile__("incl %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
741 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
742 __INTRIN_INLINE
void __addfsbyte(const unsigned long Offset
, const unsigned char Data
)
744 if(!__builtin_constant_p(Offset
))
745 __asm__
__volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
747 __asm__
__volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
750 __INTRIN_INLINE
void __addfsword(const unsigned long Offset
, const unsigned short Data
)
752 if(!__builtin_constant_p(Offset
))
753 __asm__
__volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
755 __asm__
__volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
758 __INTRIN_INLINE
void __addfsdword(const unsigned long Offset
, const unsigned int Data
)
760 if(!__builtin_constant_p(Offset
))
761 __asm__
__volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
763 __asm__
__volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
768 /*** Bit manipulation ***/
769 __INTRIN_INLINE
unsigned char _BitScanForward(unsigned long * const Index
, const unsigned long Mask
)
771 __asm__("bsfl %[Mask], %[Index]" : [Index
] "=r" (*Index
) : [Mask
] "mr" (Mask
));
775 __INTRIN_INLINE
unsigned char _BitScanReverse(unsigned long * const Index
, const unsigned long Mask
)
777 __asm__("bsrl %[Mask], %[Index]" : [Index
] "=r" (*Index
) : [Mask
] "mr" (Mask
));
781 /* NOTE: again, the bizarre implementation follows Visual C++ */
782 __INTRIN_INLINE
unsigned char _bittest(const long * const a
, const long b
)
784 unsigned char retval
;
786 if(__builtin_constant_p(b
))
787 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*(a
+ (b
/ 32))), [b
] "Ir" (b
% 32));
789 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*a
), [b
] "r" (b
));
794 __INTRIN_INLINE
unsigned char _bittestandcomplement(long * const a
, const long b
)
796 unsigned char retval
;
798 if(__builtin_constant_p(b
))
799 __asm__("btc %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
801 __asm__("btc %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
806 __INTRIN_INLINE
unsigned char _bittestandreset(long * const a
, const long b
)
808 unsigned char retval
;
810 if(__builtin_constant_p(b
))
811 __asm__("btr %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
813 __asm__("btr %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
818 __INTRIN_INLINE
unsigned char _bittestandset(long * const a
, const long b
)
820 unsigned char retval
;
822 if(__builtin_constant_p(b
))
823 __asm__("bts %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
825 __asm__("bts %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
830 __INTRIN_INLINE
unsigned char _rotl8(unsigned char value
, unsigned char shift
)
832 unsigned char retval
;
833 __asm__("rolb %b[shift], %b[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
837 __INTRIN_INLINE
unsigned short _rotl16(unsigned short value
, unsigned char shift
)
839 unsigned short retval
;
840 __asm__("rolw %b[shift], %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
844 __INTRIN_INLINE
unsigned int _rotl(unsigned int value
, int shift
)
846 unsigned long retval
;
847 __asm__("roll %b[shift], %k[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
851 __INTRIN_INLINE
unsigned int _rotr(unsigned int value
, int shift
)
853 unsigned long retval
;
854 __asm__("rorl %b[shift], %k[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
858 __INTRIN_INLINE
unsigned char _rotr8(unsigned char value
, unsigned char shift
)
860 unsigned char retval
;
861 __asm__("rorb %b[shift], %b[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
865 __INTRIN_INLINE
unsigned short _rotr16(unsigned short value
, unsigned char shift
)
867 unsigned short retval
;
868 __asm__("rorw %b[shift], %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
873 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
874 constraint (edx:eax) for the Mask argument, because it's the only way GCC
875 can pass 64-bit operands around - passing the two 32 bit parts separately
876 just confuses it. Also we declare Bit as an int and then truncate it to
877 match Visual C++ behavior
879 __INTRIN_INLINE
unsigned long long __ll_lshift(const unsigned long long Mask
, const int Bit
)
881 unsigned long long retval
= Mask
;
885 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
887 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
893 __INTRIN_INLINE
long long __ll_rshift(const long long Mask
, const int Bit
)
895 unsigned long long retval
= Mask
;
899 "shldl %b[Bit], %%eax, %%edx; sarl %b[Bit], %%eax" :
901 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
907 __INTRIN_INLINE
unsigned long long __ull_rshift(const unsigned long long Mask
, int Bit
)
909 unsigned long long retval
= Mask
;
913 "shrdl %b[Bit], %%eax, %%edx; shrl %b[Bit], %%eax" :
915 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
921 __INTRIN_INLINE
unsigned short _byteswap_ushort(unsigned short value
)
923 unsigned short retval
;
924 __asm__("rorw $8, %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
));
928 __INTRIN_INLINE
unsigned long _byteswap_ulong(unsigned long value
)
930 unsigned long retval
;
931 __asm__("bswapl %[retval]" : [retval
] "=r" (retval
) : "[retval]" (value
));
936 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
)
938 unsigned __int64 retval
;
939 __asm__("bswapq %[retval]" : [retval
] "=r" (retval
) : "[retval]" (value
));
943 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
)
948 unsigned long lowpart
;
949 unsigned long hipart
;
952 retval
.int64part
= value
;
953 __asm__("bswapl %[lowpart]\n"
955 : [lowpart
] "=r" (retval
.hipart
), [hipart
] "=r" (retval
.lowpart
) : "[lowpart]" (retval
.lowpart
), "[hipart]" (retval
.hipart
) );
956 return retval
.int64part
;
960 /*** 64-bit math ***/
961 __INTRIN_INLINE
long long __emul(const int a
, const int b
)
964 __asm__("imull %[b]" : "=A" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
968 __INTRIN_INLINE
unsigned long long __emulu(const unsigned int a
, const unsigned int b
)
970 unsigned long long retval
;
971 __asm__("mull %[b]" : "=A" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
977 static __inline__
__attribute__((always_inline
)) __int64
__mulh(__int64 a
, __int64 b
)
980 __asm__("imulq %[b]" : "=d" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
984 static __inline__
__attribute__((always_inline
)) unsigned __int64
__umulh(unsigned __int64 a
, unsigned __int64 b
)
986 unsigned __int64 retval
;
987 __asm__("mulq %[b]" : "=d" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
994 __INTRIN_INLINE
unsigned char __inbyte(const unsigned short Port
)
997 __asm__
__volatile__("inb %w[Port], %b[byte]" : [byte
] "=a" (byte
) : [Port
] "Nd" (Port
));
1001 __INTRIN_INLINE
unsigned short __inword(const unsigned short Port
)
1003 unsigned short word
;
1004 __asm__
__volatile__("inw %w[Port], %w[word]" : [word
] "=a" (word
) : [Port
] "Nd" (Port
));
1008 __INTRIN_INLINE
unsigned long __indword(const unsigned short Port
)
1010 unsigned long dword
;
1011 __asm__
__volatile__("inl %w[Port], %k[dword]" : [dword
] "=a" (dword
) : [Port
] "Nd" (Port
));
1015 __INTRIN_INLINE
void __inbytestring(unsigned short Port
, unsigned char * Buffer
, unsigned long Count
)
1017 __asm__ __volatile__
1020 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1021 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1026 __INTRIN_INLINE
void __inwordstring(unsigned short Port
, unsigned short * Buffer
, unsigned long Count
)
1028 __asm__ __volatile__
1031 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1032 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1037 __INTRIN_INLINE
void __indwordstring(unsigned short Port
, unsigned long * Buffer
, unsigned long Count
)
1039 __asm__ __volatile__
1042 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1043 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1048 __INTRIN_INLINE
void __outbyte(unsigned short const Port
, const unsigned char Data
)
1050 __asm__
__volatile__("outb %b[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1053 __INTRIN_INLINE
void __outword(unsigned short const Port
, const unsigned short Data
)
1055 __asm__
__volatile__("outw %w[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1058 __INTRIN_INLINE
void __outdword(unsigned short const Port
, const unsigned long Data
)
1060 __asm__
__volatile__("outl %k[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1063 __INTRIN_INLINE
void __outbytestring(unsigned short const Port
, const unsigned char * const Buffer
, const unsigned long Count
)
1065 __asm__
__volatile__("rep; outsb" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1068 __INTRIN_INLINE
void __outwordstring(unsigned short const Port
, const unsigned short * const Buffer
, const unsigned long Count
)
1070 __asm__
__volatile__("rep; outsw" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1073 __INTRIN_INLINE
void __outdwordstring(unsigned short const Port
, const unsigned long * const Buffer
, const unsigned long Count
)
1075 __asm__
__volatile__("rep; outsl" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1079 /*** System information ***/
1080 __INTRIN_INLINE
void __cpuid(int CPUInfo
[], const int InfoType
)
1082 __asm__
__volatile__("cpuid" : "=a" (CPUInfo
[0]), "=b" (CPUInfo
[1]), "=c" (CPUInfo
[2]), "=d" (CPUInfo
[3]) : "a" (InfoType
));
1085 __INTRIN_INLINE
unsigned long long __rdtsc(void)
1088 unsigned long long low
, high
;
1089 __asm__
__volatile__("rdtsc" : "=a"(low
), "=d"(high
));
1090 return low
| (high
<< 32);
1092 unsigned long long retval
;
1093 __asm__
__volatile__("rdtsc" : "=A"(retval
));
1098 __INTRIN_INLINE
void __writeeflags(uintptr_t Value
)
1100 __asm__
__volatile__("push %0\n popf" : : "rim"(Value
));
1103 __INTRIN_INLINE
uintptr_t __readeflags(void)
1106 __asm__
__volatile__("pushf\n pop %0" : "=rm"(retval
));
1110 /*** Interrupts ***/
1111 __INTRIN_INLINE
void __debugbreak(void)
1116 __INTRIN_INLINE
void __int2c(void)
1118 __asm__("int $0x2c");
1121 __INTRIN_INLINE
void _disable(void)
1126 __INTRIN_INLINE
void _enable(void)
1132 /*** Protected memory management ***/
1134 __INTRIN_INLINE
void __writecr0(const unsigned __int64 Data
)
1136 __asm__("mov %[Data], %%cr0" : : [Data
] "r" (Data
) : "memory");
1139 __INTRIN_INLINE
void __writecr3(const unsigned __int64 Data
)
1141 __asm__("mov %[Data], %%cr3" : : [Data
] "r" (Data
) : "memory");
1144 __INTRIN_INLINE
void __writecr4(const unsigned __int64 Data
)
1146 __asm__("mov %[Data], %%cr4" : : [Data
] "r" (Data
) : "memory");
1150 __INTRIN_INLINE
void __writecr8(const unsigned __int64 Data
)
1152 __asm__("mov %[Data], %%cr8" : : [Data
] "r" (Data
) : "memory");
1155 __INTRIN_INLINE
unsigned __int64
__readcr0(void)
1157 unsigned __int64 value
;
1158 __asm__
__volatile__("mov %%cr0, %[value]" : [value
] "=r" (value
));
1162 __INTRIN_INLINE
unsigned __int64
__readcr2(void)
1164 unsigned __int64 value
;
1165 __asm__
__volatile__("mov %%cr2, %[value]" : [value
] "=r" (value
));
1169 __INTRIN_INLINE
unsigned __int64
__readcr3(void)
1171 unsigned __int64 value
;
1172 __asm__
__volatile__("mov %%cr3, %[value]" : [value
] "=r" (value
));
1176 __INTRIN_INLINE
unsigned __int64
__readcr4(void)
1178 unsigned __int64 value
;
1179 __asm__
__volatile__("mov %%cr4, %[value]" : [value
] "=r" (value
));
1183 __INTRIN_INLINE
unsigned __int64
__readcr8(void)
1185 unsigned __int64 value
;
1186 __asm__
__volatile__("movq %%cr8, %q[value]" : [value
] "=r" (value
));
1190 __INTRIN_INLINE
unsigned long __readcr0(void)
1192 unsigned long value
;
1193 __asm__
__volatile__("mov %%cr0, %[value]" : [value
] "=r" (value
));
1197 __INTRIN_INLINE
unsigned long __readcr2(void)
1199 unsigned long value
;
1200 __asm__
__volatile__("mov %%cr2, %[value]" : [value
] "=r" (value
));
1204 __INTRIN_INLINE
unsigned long __readcr3(void)
1206 unsigned long value
;
1207 __asm__
__volatile__("mov %%cr3, %[value]" : [value
] "=r" (value
));
1211 __INTRIN_INLINE
unsigned long __readcr4(void)
1213 unsigned long value
;
1214 __asm__
__volatile__("mov %%cr4, %[value]" : [value
] "=r" (value
));
1220 __INTRIN_INLINE
unsigned __int64
__readdr(unsigned int reg
)
1222 unsigned __int64 value
;
1226 __asm__
__volatile__("movq %%dr0, %q[value]" : [value
] "=r" (value
));
1229 __asm__
__volatile__("movq %%dr1, %q[value]" : [value
] "=r" (value
));
1232 __asm__
__volatile__("movq %%dr2, %q[value]" : [value
] "=r" (value
));
1235 __asm__
__volatile__("movq %%dr3, %q[value]" : [value
] "=r" (value
));
1238 __asm__
__volatile__("movq %%dr4, %q[value]" : [value
] "=r" (value
));
1241 __asm__
__volatile__("movq %%dr5, %q[value]" : [value
] "=r" (value
));
1244 __asm__
__volatile__("movq %%dr6, %q[value]" : [value
] "=r" (value
));
1247 __asm__
__volatile__("movq %%dr7, %q[value]" : [value
] "=r" (value
));
1253 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned __int64 value
)
1258 __asm__("movq %q[value], %%dr0" : : [value
] "r" (value
) : "memory");
1261 __asm__("movq %q[value], %%dr1" : : [value
] "r" (value
) : "memory");
1264 __asm__("movq %q[value], %%dr2" : : [value
] "r" (value
) : "memory");
1267 __asm__("movq %q[value], %%dr3" : : [value
] "r" (value
) : "memory");
1270 __asm__("movq %q[value], %%dr4" : : [value
] "r" (value
) : "memory");
1273 __asm__("movq %q[value], %%dr5" : : [value
] "r" (value
) : "memory");
1276 __asm__("movq %q[value], %%dr6" : : [value
] "r" (value
) : "memory");
1279 __asm__("movq %q[value], %%dr7" : : [value
] "r" (value
) : "memory");
1284 __INTRIN_INLINE
unsigned int __readdr(unsigned int reg
)
1290 __asm__
__volatile__("mov %%dr0, %[value]" : [value
] "=r" (value
));
1293 __asm__
__volatile__("mov %%dr1, %[value]" : [value
] "=r" (value
));
1296 __asm__
__volatile__("mov %%dr2, %[value]" : [value
] "=r" (value
));
1299 __asm__
__volatile__("mov %%dr3, %[value]" : [value
] "=r" (value
));
1302 __asm__
__volatile__("mov %%dr4, %[value]" : [value
] "=r" (value
));
1305 __asm__
__volatile__("mov %%dr5, %[value]" : [value
] "=r" (value
));
1308 __asm__
__volatile__("mov %%dr6, %[value]" : [value
] "=r" (value
));
1311 __asm__
__volatile__("mov %%dr7, %[value]" : [value
] "=r" (value
));
1317 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned int value
)
1322 __asm__("mov %[value], %%dr0" : : [value
] "r" (value
) : "memory");
1325 __asm__("mov %[value], %%dr1" : : [value
] "r" (value
) : "memory");
1328 __asm__("mov %[value], %%dr2" : : [value
] "r" (value
) : "memory");
1331 __asm__("mov %[value], %%dr3" : : [value
] "r" (value
) : "memory");
1334 __asm__("mov %[value], %%dr4" : : [value
] "r" (value
) : "memory");
1337 __asm__("mov %[value], %%dr5" : : [value
] "r" (value
) : "memory");
1340 __asm__("mov %[value], %%dr6" : : [value
] "r" (value
) : "memory");
1343 __asm__("mov %[value], %%dr7" : : [value
] "r" (value
) : "memory");
1349 __INTRIN_INLINE
void __invlpg(void * const Address
)
1351 __asm__("invlpg %[Address]" : : [Address
] "m" (*((unsigned char *)(Address
))));
1355 /*** System operations ***/
1356 __INTRIN_INLINE
unsigned long long __readmsr(const int reg
)
1359 unsigned long low
, high
;
1360 __asm__
__volatile__("rdmsr" : "=a" (low
), "=d" (high
) : "c" (reg
));
1361 return ((unsigned long long)high
<< 32) | low
;
1363 unsigned long long retval
;
1364 __asm__
__volatile__("rdmsr" : "=A" (retval
) : "c" (reg
));
1369 __INTRIN_INLINE
void __writemsr(const unsigned long Register
, const unsigned long long Value
)
1372 __asm__
__volatile__("wrmsr" : : "a" (Value
), "d" (Value
>> 32), "c" (Register
));
1374 __asm__
__volatile__("wrmsr" : : "A" (Value
), "c" (Register
));
1378 __INTRIN_INLINE
unsigned long long __readpmc(const int counter
)
1380 unsigned long long retval
;
1381 __asm__
__volatile__("rdpmc" : "=A" (retval
) : "c" (counter
));
1385 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1386 __INTRIN_INLINE
unsigned long __segmentlimit(const unsigned long a
)
1388 unsigned long retval
;
1389 __asm__
__volatile__("lsl %[a], %[retval]" : [retval
] "=r" (retval
) : [a
] "rm" (a
));
1393 __INTRIN_INLINE
void __wbinvd(void)
1395 __asm__
__volatile__("wbinvd");
1398 __INTRIN_INLINE
void __lidt(void *Source
)
1400 __asm__
__volatile__("lidt %0" : : "m"(*(short*)Source
));
1403 __INTRIN_INLINE
void __sidt(void *Destination
)
1405 __asm__
__volatile__("sidt %0" : : "m"(*(short*)Destination
) : "memory");
1408 __INTRIN_INLINE
void _mm_pause(void)
1410 __asm__
__volatile__("pause");
1417 #endif /* KJK_INTRIN_X86_H_ */