5b6f32d18a3b192518d039d356b692b91630ebed
2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
72 /*** Stack frame juggling ***/
73 #define _ReturnAddress() (__builtin_return_address(0))
74 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
75 /* TODO: __getcallerseflags but how??? */
77 /* Maybe the same for x86? */
79 #define _alloca(s) __builtin_alloca(s)
82 /*** Atomic operations ***/
84 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
85 #define _ReadWriteBarrier() __sync_synchronize()
87 __INTRIN_INLINE
void _MemoryBarrier(void)
89 __asm__
__volatile__("" : : : "memory");
91 #define _ReadWriteBarrier() _MemoryBarrier()
94 /* BUGBUG: GCC only supports full barriers */
95 #define _ReadBarrier _ReadWriteBarrier
96 #define _WriteBarrier _ReadWriteBarrier
98 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
100 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
)
102 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
105 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
)
107 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
110 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
)
112 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
115 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
)
117 return (void *)__sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
120 __INTRIN_INLINE
long _InterlockedExchange(volatile long * const Target
, const long Value
)
122 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
123 __sync_synchronize();
124 return __sync_lock_test_and_set(Target
, Value
);
127 #if defined(_M_AMD64)
128 __INTRIN_INLINE
long long _InterlockedExchange64(volatile long long * const Target
, const long long Value
)
130 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
131 __sync_synchronize();
132 return __sync_lock_test_and_set(Target
, Value
);
136 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
)
139 __sync_synchronize();
140 return (void *)__sync_lock_test_and_set(Target
, Value
);
143 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
)
145 return __sync_fetch_and_add(Addend
, Value
);
148 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
)
150 return __sync_fetch_and_add(Addend
, Value
);
153 #if defined(_M_AMD64)
154 __INTRIN_INLINE
long long _InterlockedExchangeAdd64(volatile long long * const Addend
, const long long Value
)
156 return __sync_fetch_and_add(Addend
, Value
);
160 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * const value
, const char mask
)
162 return __sync_fetch_and_and(value
, mask
);
165 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * const value
, const short mask
)
167 return __sync_fetch_and_and(value
, mask
);
170 __INTRIN_INLINE
long _InterlockedAnd(volatile long * const value
, const long mask
)
172 return __sync_fetch_and_and(value
, mask
);
175 #if defined(_M_AMD64)
176 __INTRIN_INLINE
long _InterlockedAnd64(volatile long long * const value
, const long long mask
)
178 return __sync_fetch_and_and(value
, mask
);
182 __INTRIN_INLINE
char _InterlockedOr8(volatile char * const value
, const char mask
)
184 return __sync_fetch_and_or(value
, mask
);
187 __INTRIN_INLINE
short _InterlockedOr16(volatile short * const value
, const short mask
)
189 return __sync_fetch_and_or(value
, mask
);
192 __INTRIN_INLINE
long _InterlockedOr(volatile long * const value
, const long mask
)
194 return __sync_fetch_and_or(value
, mask
);
197 #if defined(_M_AMD64)
198 __INTRIN_INLINE
long _InterlockedOr64(volatile long long * const value
, const long long mask
)
200 return __sync_fetch_and_or(value
, mask
);
204 __INTRIN_INLINE
char _InterlockedXor8(volatile char * const value
, const char mask
)
206 return __sync_fetch_and_xor(value
, mask
);
209 __INTRIN_INLINE
short _InterlockedXor16(volatile short * const value
, const short mask
)
211 return __sync_fetch_and_xor(value
, mask
);
214 __INTRIN_INLINE
long _InterlockedXor(volatile long * const value
, const long mask
)
216 return __sync_fetch_and_xor(value
, mask
);
221 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
)
223 char retval
= Comperand
;
224 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
) : "memory");
228 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
)
230 short retval
= Comperand
;
231 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
): "memory");
235 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
)
237 long retval
= Comperand
;
238 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
): "memory");
242 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
)
244 void * retval
= (void *)Comperand
;
245 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval
] "=a" (retval
) : "[retval]" (retval
), [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
) : "memory");
249 __INTRIN_INLINE
long _InterlockedExchange(volatile long * const Target
, const long Value
)
252 __asm__("xchgl %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
256 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
)
258 void * retval
= Value
;
259 __asm__("xchgl %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
263 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
)
266 __asm__("lock; xaddw %[retval], %[Addend]" : [retval
] "+r" (retval
) : [Addend
] "m" (*Addend
) : "memory");
270 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
)
273 __asm__("lock; xaddl %[retval], %[Addend]" : [retval
] "+r" (retval
) : [Addend
] "m" (*Addend
) : "memory");
277 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * const value
, const char mask
)
287 y
= _InterlockedCompareExchange8(value
, x
& mask
, x
);
294 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * const value
, const short mask
)
304 y
= _InterlockedCompareExchange16(value
, x
& mask
, x
);
311 __INTRIN_INLINE
long _InterlockedAnd(volatile long * const value
, const long mask
)
321 y
= _InterlockedCompareExchange(value
, x
& mask
, x
);
328 __INTRIN_INLINE
char _InterlockedOr8(volatile char * const value
, const char mask
)
338 y
= _InterlockedCompareExchange8(value
, x
| mask
, x
);
345 __INTRIN_INLINE
short _InterlockedOr16(volatile short * const value
, const short mask
)
355 y
= _InterlockedCompareExchange16(value
, x
| mask
, x
);
362 __INTRIN_INLINE
long _InterlockedOr(volatile long * const value
, const long mask
)
372 y
= _InterlockedCompareExchange(value
, x
| mask
, x
);
379 __INTRIN_INLINE
char _InterlockedXor8(volatile char * const value
, const char mask
)
389 y
= _InterlockedCompareExchange8(value
, x
^ mask
, x
);
396 __INTRIN_INLINE
short _InterlockedXor16(volatile short * const value
, const short mask
)
406 y
= _InterlockedCompareExchange16(value
, x
^ mask
, x
);
413 __INTRIN_INLINE
long _InterlockedXor(volatile long * const value
, const long mask
)
423 y
= _InterlockedCompareExchange(value
, x
^ mask
, x
);
432 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
434 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
)
436 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
441 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
)
443 long long retval
= Comperand
;
447 "lock; cmpxchg8b %[Destination]" :
448 [retval
] "+A" (retval
) :
449 [Destination
] "m" (*Destination
),
450 "b" ((unsigned long)((Exchange
>> 0) & 0xFFFFFFFF)),
451 "c" ((unsigned long)((Exchange
>> 32) & 0xFFFFFFFF)) :
460 __INTRIN_INLINE
long _InterlockedAddLargeStatistic(volatile long long * const Addend
, const long Value
)
464 "lock; add %[Value], %[Lo32];"
466 "lock; adc $0, %[Hi32];"
468 [Lo32
] "+m" (*((volatile long *)(Addend
) + 0)), [Hi32
] "+m" (*((volatile long *)(Addend
) + 1)) :
469 [Value
] "ir" (Value
) :
476 __INTRIN_INLINE
long _InterlockedDecrement(volatile long * const lpAddend
)
478 return _InterlockedExchangeAdd(lpAddend
, -1) - 1;
481 __INTRIN_INLINE
long _InterlockedIncrement(volatile long * const lpAddend
)
483 return _InterlockedExchangeAdd(lpAddend
, 1) + 1;
486 __INTRIN_INLINE
short _InterlockedDecrement16(volatile short * const lpAddend
)
488 return _InterlockedExchangeAdd16(lpAddend
, -1) - 1;
491 __INTRIN_INLINE
short _InterlockedIncrement16(volatile short * const lpAddend
)
493 return _InterlockedExchangeAdd16(lpAddend
, 1) + 1;
496 #if defined(_M_AMD64)
497 __INTRIN_INLINE
long long _InterlockedDecrement64(volatile long long * const lpAddend
)
499 return _InterlockedExchangeAdd64(lpAddend
, -1) - 1;
502 __INTRIN_INLINE
long long _InterlockedIncrement64(volatile long long * const lpAddend
)
504 return _InterlockedExchangeAdd64(lpAddend
, 1) + 1;
508 __INTRIN_INLINE
unsigned char _interlockedbittestandreset(volatile long * a
, const long b
)
510 unsigned char retval
;
511 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
515 #if defined(_M_AMD64)
516 __INTRIN_INLINE
unsigned char _interlockedbittestandreset64(volatile long long * a
, const long long b
)
518 unsigned char retval
;
519 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval
] "=r" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
524 __INTRIN_INLINE
unsigned char _interlockedbittestandset(volatile long * a
, const long b
)
526 unsigned char retval
;
527 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval
] "=q" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
531 #if defined(_M_AMD64)
532 __INTRIN_INLINE
unsigned char _interlockedbittestandset64(volatile long long * a
, const long long b
)
534 unsigned char retval
;
535 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval
] "=r" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
540 /*** String operations ***/
541 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
542 __INTRIN_INLINE
void __stosb(unsigned char * Dest
, const unsigned char Data
, size_t Count
)
547 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
548 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
552 __INTRIN_INLINE
void __stosw(unsigned short * Dest
, const unsigned short Data
, size_t Count
)
557 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
558 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
562 __INTRIN_INLINE
void __stosd(unsigned long * Dest
, const unsigned long Data
, size_t Count
)
567 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
568 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
573 __INTRIN_INLINE
void __stosq(unsigned __int64
* Dest
, const unsigned __int64 Data
, size_t Count
)
578 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
579 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
584 __INTRIN_INLINE
void __movsb(unsigned char * Destination
, const unsigned char * Source
, size_t Count
)
589 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
590 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
594 __INTRIN_INLINE
void __movsw(unsigned short * Destination
, const unsigned short * Source
, size_t Count
)
599 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
600 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
604 __INTRIN_INLINE
void __movsd(unsigned long * Destination
, const unsigned long * Source
, size_t Count
)
609 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
610 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
615 __INTRIN_INLINE
void __movsq(unsigned long * Destination
, const unsigned long * Source
, size_t Count
)
620 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
621 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
626 #if defined(_M_AMD64)
627 /*** GS segment addressing ***/
629 __INTRIN_INLINE
void __writegsbyte(const unsigned long Offset
, const unsigned char Data
)
631 __asm__
__volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
634 __INTRIN_INLINE
void __writegsword(const unsigned long Offset
, const unsigned short Data
)
636 __asm__
__volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
639 __INTRIN_INLINE
void __writegsdword(const unsigned long Offset
, const unsigned long Data
)
641 __asm__
__volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
644 __INTRIN_INLINE
void __writegsqword(const unsigned long Offset
, const unsigned __int64 Data
)
646 __asm__
__volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
649 __INTRIN_INLINE
unsigned char __readgsbyte(const unsigned long Offset
)
652 __asm__
__volatile__("movb %%gs:%a[Offset], %b[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
656 __INTRIN_INLINE
unsigned short __readgsword(const unsigned long Offset
)
658 unsigned short value
;
659 __asm__
__volatile__("movw %%gs:%a[Offset], %w[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
663 __INTRIN_INLINE
unsigned long __readgsdword(const unsigned long Offset
)
666 __asm__
__volatile__("movl %%gs:%a[Offset], %k[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
670 __INTRIN_INLINE
unsigned __int64
__readgsqword(const unsigned long Offset
)
672 unsigned __int64 value
;
673 __asm__
__volatile__("movq %%gs:%a[Offset], %q[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
677 __INTRIN_INLINE
void __incgsbyte(const unsigned long Offset
)
679 __asm__
__volatile__("incb %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
682 __INTRIN_INLINE
void __incgsword(const unsigned long Offset
)
684 __asm__
__volatile__("incw %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
687 __INTRIN_INLINE
void __incgsdword(const unsigned long Offset
)
689 __asm__
__volatile__("incl %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
692 __INTRIN_INLINE
void __addgsbyte(const unsigned long Offset
, const unsigned char Data
)
694 __asm__
__volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
697 __INTRIN_INLINE
void __addgsword(const unsigned long Offset
, const unsigned short Data
)
699 __asm__
__volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
702 __INTRIN_INLINE
void __addgsdword(const unsigned long Offset
, const unsigned int Data
)
704 __asm__
__volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
707 __INTRIN_INLINE
void __addgsqword(const unsigned long Offset
, const unsigned __int64 Data
)
709 __asm__
__volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
713 /*** FS segment addressing ***/
714 __INTRIN_INLINE
void __writefsbyte(const unsigned long Offset
, const unsigned char Data
)
716 __asm__
__volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
719 __INTRIN_INLINE
void __writefsword(const unsigned long Offset
, const unsigned short Data
)
721 __asm__
__volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
724 __INTRIN_INLINE
void __writefsdword(const unsigned long Offset
, const unsigned long Data
)
726 __asm__
__volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
729 __INTRIN_INLINE
unsigned char __readfsbyte(const unsigned long Offset
)
732 __asm__
__volatile__("movb %%fs:%a[Offset], %b[value]" : [value
] "=q" (value
) : [Offset
] "ir" (Offset
));
736 __INTRIN_INLINE
unsigned short __readfsword(const unsigned long Offset
)
738 unsigned short value
;
739 __asm__
__volatile__("movw %%fs:%a[Offset], %w[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
743 __INTRIN_INLINE
unsigned long __readfsdword(const unsigned long Offset
)
746 __asm__
__volatile__("movl %%fs:%a[Offset], %k[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
750 __INTRIN_INLINE
void __incfsbyte(const unsigned long Offset
)
752 __asm__
__volatile__("incb %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
755 __INTRIN_INLINE
void __incfsword(const unsigned long Offset
)
757 __asm__
__volatile__("incw %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
760 __INTRIN_INLINE
void __incfsdword(const unsigned long Offset
)
762 __asm__
__volatile__("incl %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
765 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
766 __INTRIN_INLINE
void __addfsbyte(const unsigned long Offset
, const unsigned char Data
)
768 if(!__builtin_constant_p(Offset
))
769 __asm__
__volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
771 __asm__
__volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
774 __INTRIN_INLINE
void __addfsword(const unsigned long Offset
, const unsigned short Data
)
776 if(!__builtin_constant_p(Offset
))
777 __asm__
__volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
779 __asm__
__volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
782 __INTRIN_INLINE
void __addfsdword(const unsigned long Offset
, const unsigned int Data
)
784 if(!__builtin_constant_p(Offset
))
785 __asm__
__volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
787 __asm__
__volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
792 /*** Bit manipulation ***/
793 __INTRIN_INLINE
unsigned char _BitScanForward(unsigned long * const Index
, const unsigned long Mask
)
795 __asm__("bsfl %[Mask], %[Index]" : [Index
] "=r" (*Index
) : [Mask
] "mr" (Mask
));
799 __INTRIN_INLINE
unsigned char _BitScanReverse(unsigned long * const Index
, const unsigned long Mask
)
801 __asm__("bsrl %[Mask], %[Index]" : [Index
] "=r" (*Index
) : [Mask
] "mr" (Mask
));
805 /* NOTE: again, the bizarre implementation follows Visual C++ */
806 __INTRIN_INLINE
unsigned char _bittest(const long * const a
, const long b
)
808 unsigned char retval
;
810 if(__builtin_constant_p(b
))
811 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*(a
+ (b
/ 32))), [b
] "Ir" (b
% 32));
813 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*a
), [b
] "r" (b
));
819 __INTRIN_INLINE
unsigned char _bittest64(const __int64
* const a
, const __int64 b
)
821 unsigned char retval
;
823 if(__builtin_constant_p(b
))
824 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*(a
+ (b
/ 64))), [b
] "Ir" (b
% 64));
826 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*a
), [b
] "r" (b
));
832 __INTRIN_INLINE
unsigned char _bittestandcomplement(long * const a
, const long b
)
834 unsigned char retval
;
836 if(__builtin_constant_p(b
))
837 __asm__("btc %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
839 __asm__("btc %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
844 __INTRIN_INLINE
unsigned char _bittestandreset(long * const a
, const long b
)
846 unsigned char retval
;
848 if(__builtin_constant_p(b
))
849 __asm__("btr %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
851 __asm__("btr %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
856 __INTRIN_INLINE
unsigned char _bittestandset(long * const a
, const long b
)
858 unsigned char retval
;
860 if(__builtin_constant_p(b
))
861 __asm__("bts %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
863 __asm__("bts %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
868 __INTRIN_INLINE
unsigned char _rotl8(unsigned char value
, unsigned char shift
)
870 unsigned char retval
;
871 __asm__("rolb %b[shift], %b[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
875 __INTRIN_INLINE
unsigned short _rotl16(unsigned short value
, unsigned char shift
)
877 unsigned short retval
;
878 __asm__("rolw %b[shift], %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
882 __INTRIN_INLINE
unsigned int _rotl(unsigned int value
, int shift
)
884 unsigned long retval
;
885 __asm__("roll %b[shift], %k[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
889 __INTRIN_INLINE
unsigned int _rotr(unsigned int value
, int shift
)
891 unsigned long retval
;
892 __asm__("rorl %b[shift], %k[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
896 __INTRIN_INLINE
unsigned char _rotr8(unsigned char value
, unsigned char shift
)
898 unsigned char retval
;
899 __asm__("rorb %b[shift], %b[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
903 __INTRIN_INLINE
unsigned short _rotr16(unsigned short value
, unsigned char shift
)
905 unsigned short retval
;
906 __asm__("rorw %b[shift], %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
911 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
912 constraint (edx:eax) for the Mask argument, because it's the only way GCC
913 can pass 64-bit operands around - passing the two 32 bit parts separately
914 just confuses it. Also we declare Bit as an int and then truncate it to
915 match Visual C++ behavior
917 __INTRIN_INLINE
unsigned long long __ll_lshift(const unsigned long long Mask
, const int Bit
)
919 unsigned long long retval
= Mask
;
923 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
925 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
931 __INTRIN_INLINE
long long __ll_rshift(const long long Mask
, const int Bit
)
933 unsigned long long retval
= Mask
;
937 "shldl %b[Bit], %%eax, %%edx; sarl %b[Bit], %%eax" :
939 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
945 __INTRIN_INLINE
unsigned long long __ull_rshift(const unsigned long long Mask
, int Bit
)
947 unsigned long long retval
= Mask
;
951 "shrdl %b[Bit], %%eax, %%edx; shrl %b[Bit], %%eax" :
953 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
959 __INTRIN_INLINE
unsigned short _byteswap_ushort(unsigned short value
)
961 unsigned short retval
;
962 __asm__("rorw $8, %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
));
966 __INTRIN_INLINE
unsigned long _byteswap_ulong(unsigned long value
)
968 unsigned long retval
;
969 __asm__("bswapl %[retval]" : [retval
] "=r" (retval
) : "[retval]" (value
));
974 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
)
976 unsigned __int64 retval
;
977 __asm__("bswapq %[retval]" : [retval
] "=r" (retval
) : "[retval]" (value
));
981 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
)
986 unsigned long lowpart
;
987 unsigned long hipart
;
990 retval
.int64part
= value
;
991 __asm__("bswapl %[lowpart]\n"
993 : [lowpart
] "=r" (retval
.hipart
), [hipart
] "=r" (retval
.lowpart
) : "[lowpart]" (retval
.lowpart
), "[hipart]" (retval
.hipart
) );
994 return retval
.int64part
;
998 /*** 64-bit math ***/
999 __INTRIN_INLINE
long long __emul(const int a
, const int b
)
1002 __asm__("imull %[b]" : "=A" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1006 __INTRIN_INLINE
unsigned long long __emulu(const unsigned int a
, const unsigned int b
)
1008 unsigned long long retval
;
1009 __asm__("mull %[b]" : "=A" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1015 __INTRIN_INLINE __int64
__mulh(__int64 a
, __int64 b
)
1018 __asm__("imulq %[b]" : "=d" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1022 __INTRIN_INLINE
unsigned __int64
__umulh(unsigned __int64 a
, unsigned __int64 b
)
1024 unsigned __int64 retval
;
1025 __asm__("mulq %[b]" : "=d" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1032 __INTRIN_INLINE
unsigned char __inbyte(const unsigned short Port
)
1035 __asm__
__volatile__("inb %w[Port], %b[byte]" : [byte
] "=a" (byte
) : [Port
] "Nd" (Port
));
1039 __INTRIN_INLINE
unsigned short __inword(const unsigned short Port
)
1041 unsigned short word
;
1042 __asm__
__volatile__("inw %w[Port], %w[word]" : [word
] "=a" (word
) : [Port
] "Nd" (Port
));
1046 __INTRIN_INLINE
unsigned long __indword(const unsigned short Port
)
1048 unsigned long dword
;
1049 __asm__
__volatile__("inl %w[Port], %k[dword]" : [dword
] "=a" (dword
) : [Port
] "Nd" (Port
));
1053 __INTRIN_INLINE
void __inbytestring(unsigned short Port
, unsigned char * Buffer
, unsigned long Count
)
1055 __asm__ __volatile__
1058 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1059 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1064 __INTRIN_INLINE
void __inwordstring(unsigned short Port
, unsigned short * Buffer
, unsigned long Count
)
1066 __asm__ __volatile__
1069 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1070 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1075 __INTRIN_INLINE
void __indwordstring(unsigned short Port
, unsigned long * Buffer
, unsigned long Count
)
1077 __asm__ __volatile__
1080 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1081 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1086 __INTRIN_INLINE
void __outbyte(unsigned short const Port
, const unsigned char Data
)
1088 __asm__
__volatile__("outb %b[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1091 __INTRIN_INLINE
void __outword(unsigned short const Port
, const unsigned short Data
)
1093 __asm__
__volatile__("outw %w[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1096 __INTRIN_INLINE
void __outdword(unsigned short const Port
, const unsigned long Data
)
1098 __asm__
__volatile__("outl %k[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1101 __INTRIN_INLINE
void __outbytestring(unsigned short const Port
, const unsigned char * const Buffer
, const unsigned long Count
)
1103 __asm__
__volatile__("rep; outsb" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1106 __INTRIN_INLINE
void __outwordstring(unsigned short const Port
, const unsigned short * const Buffer
, const unsigned long Count
)
1108 __asm__
__volatile__("rep; outsw" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1111 __INTRIN_INLINE
void __outdwordstring(unsigned short const Port
, const unsigned long * const Buffer
, const unsigned long Count
)
1113 __asm__
__volatile__("rep; outsl" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1117 /*** System information ***/
1118 __INTRIN_INLINE
void __cpuid(int CPUInfo
[], const int InfoType
)
1120 __asm__
__volatile__("cpuid" : "=a" (CPUInfo
[0]), "=b" (CPUInfo
[1]), "=c" (CPUInfo
[2]), "=d" (CPUInfo
[3]) : "a" (InfoType
));
1123 __INTRIN_INLINE
unsigned long long __rdtsc(void)
1126 unsigned long long low
, high
;
1127 __asm__
__volatile__("rdtsc" : "=a"(low
), "=d"(high
));
1128 return low
| (high
<< 32);
1130 unsigned long long retval
;
1131 __asm__
__volatile__("rdtsc" : "=A"(retval
));
1136 __INTRIN_INLINE
void __writeeflags(uintptr_t Value
)
1138 __asm__
__volatile__("push %0\n popf" : : "rim"(Value
));
1141 __INTRIN_INLINE
uintptr_t __readeflags(void)
1144 __asm__
__volatile__("pushf\n pop %0" : "=rm"(retval
));
1148 /*** Interrupts ***/
1149 __INTRIN_INLINE
void __debugbreak(void)
1154 __INTRIN_INLINE
void __int2c(void)
1156 __asm__("int $0x2c");
1159 __INTRIN_INLINE
void _disable(void)
1164 __INTRIN_INLINE
void _enable(void)
1169 __INTRIN_INLINE
void __halt(void)
1174 /*** Protected memory management ***/
1176 __INTRIN_INLINE
void __writecr0(const unsigned __int64 Data
)
1178 __asm__("mov %[Data], %%cr0" : : [Data
] "r" (Data
) : "memory");
1181 __INTRIN_INLINE
void __writecr3(const unsigned __int64 Data
)
1183 __asm__("mov %[Data], %%cr3" : : [Data
] "r" (Data
) : "memory");
1186 __INTRIN_INLINE
void __writecr4(const unsigned __int64 Data
)
1188 __asm__("mov %[Data], %%cr4" : : [Data
] "r" (Data
) : "memory");
1192 __INTRIN_INLINE
void __writecr8(const unsigned __int64 Data
)
1194 __asm__("mov %[Data], %%cr8" : : [Data
] "r" (Data
) : "memory");
1197 __INTRIN_INLINE
unsigned __int64
__readcr0(void)
1199 unsigned __int64 value
;
1200 __asm__
__volatile__("mov %%cr0, %[value]" : [value
] "=r" (value
));
1204 __INTRIN_INLINE
unsigned __int64
__readcr2(void)
1206 unsigned __int64 value
;
1207 __asm__
__volatile__("mov %%cr2, %[value]" : [value
] "=r" (value
));
1211 __INTRIN_INLINE
unsigned __int64
__readcr3(void)
1213 unsigned __int64 value
;
1214 __asm__
__volatile__("mov %%cr3, %[value]" : [value
] "=r" (value
));
1218 __INTRIN_INLINE
unsigned __int64
__readcr4(void)
1220 unsigned __int64 value
;
1221 __asm__
__volatile__("mov %%cr4, %[value]" : [value
] "=r" (value
));
1225 __INTRIN_INLINE
unsigned __int64
__readcr8(void)
1227 unsigned __int64 value
;
1228 __asm__
__volatile__("movq %%cr8, %q[value]" : [value
] "=r" (value
));
1232 __INTRIN_INLINE
unsigned long __readcr0(void)
1234 unsigned long value
;
1235 __asm__
__volatile__("mov %%cr0, %[value]" : [value
] "=r" (value
));
1239 __INTRIN_INLINE
unsigned long __readcr2(void)
1241 unsigned long value
;
1242 __asm__
__volatile__("mov %%cr2, %[value]" : [value
] "=r" (value
));
1246 __INTRIN_INLINE
unsigned long __readcr3(void)
1248 unsigned long value
;
1249 __asm__
__volatile__("mov %%cr3, %[value]" : [value
] "=r" (value
));
1253 __INTRIN_INLINE
unsigned long __readcr4(void)
1255 unsigned long value
;
1256 __asm__
__volatile__("mov %%cr4, %[value]" : [value
] "=r" (value
));
1262 __INTRIN_INLINE
unsigned __int64
__readdr(unsigned int reg
)
1264 unsigned __int64 value
;
1268 __asm__
__volatile__("movq %%dr0, %q[value]" : [value
] "=r" (value
));
1271 __asm__
__volatile__("movq %%dr1, %q[value]" : [value
] "=r" (value
));
1274 __asm__
__volatile__("movq %%dr2, %q[value]" : [value
] "=r" (value
));
1277 __asm__
__volatile__("movq %%dr3, %q[value]" : [value
] "=r" (value
));
1280 __asm__
__volatile__("movq %%dr4, %q[value]" : [value
] "=r" (value
));
1283 __asm__
__volatile__("movq %%dr5, %q[value]" : [value
] "=r" (value
));
1286 __asm__
__volatile__("movq %%dr6, %q[value]" : [value
] "=r" (value
));
1289 __asm__
__volatile__("movq %%dr7, %q[value]" : [value
] "=r" (value
));
1295 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned __int64 value
)
1300 __asm__("movq %q[value], %%dr0" : : [value
] "r" (value
) : "memory");
1303 __asm__("movq %q[value], %%dr1" : : [value
] "r" (value
) : "memory");
1306 __asm__("movq %q[value], %%dr2" : : [value
] "r" (value
) : "memory");
1309 __asm__("movq %q[value], %%dr3" : : [value
] "r" (value
) : "memory");
1312 __asm__("movq %q[value], %%dr4" : : [value
] "r" (value
) : "memory");
1315 __asm__("movq %q[value], %%dr5" : : [value
] "r" (value
) : "memory");
1318 __asm__("movq %q[value], %%dr6" : : [value
] "r" (value
) : "memory");
1321 __asm__("movq %q[value], %%dr7" : : [value
] "r" (value
) : "memory");
1326 __INTRIN_INLINE
unsigned int __readdr(unsigned int reg
)
1332 __asm__
__volatile__("mov %%dr0, %[value]" : [value
] "=r" (value
));
1335 __asm__
__volatile__("mov %%dr1, %[value]" : [value
] "=r" (value
));
1338 __asm__
__volatile__("mov %%dr2, %[value]" : [value
] "=r" (value
));
1341 __asm__
__volatile__("mov %%dr3, %[value]" : [value
] "=r" (value
));
1344 __asm__
__volatile__("mov %%dr4, %[value]" : [value
] "=r" (value
));
1347 __asm__
__volatile__("mov %%dr5, %[value]" : [value
] "=r" (value
));
1350 __asm__
__volatile__("mov %%dr6, %[value]" : [value
] "=r" (value
));
1353 __asm__
__volatile__("mov %%dr7, %[value]" : [value
] "=r" (value
));
1359 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned int value
)
1364 __asm__("mov %[value], %%dr0" : : [value
] "r" (value
) : "memory");
1367 __asm__("mov %[value], %%dr1" : : [value
] "r" (value
) : "memory");
1370 __asm__("mov %[value], %%dr2" : : [value
] "r" (value
) : "memory");
1373 __asm__("mov %[value], %%dr3" : : [value
] "r" (value
) : "memory");
1376 __asm__("mov %[value], %%dr4" : : [value
] "r" (value
) : "memory");
1379 __asm__("mov %[value], %%dr5" : : [value
] "r" (value
) : "memory");
1382 __asm__("mov %[value], %%dr6" : : [value
] "r" (value
) : "memory");
1385 __asm__("mov %[value], %%dr7" : : [value
] "r" (value
) : "memory");
1391 __INTRIN_INLINE
void __invlpg(void * const Address
)
1393 __asm__("invlpg %[Address]" : : [Address
] "m" (*((unsigned char *)(Address
))));
1397 /*** System operations ***/
1398 __INTRIN_INLINE
unsigned long long __readmsr(const int reg
)
1401 unsigned long low
, high
;
1402 __asm__
__volatile__("rdmsr" : "=a" (low
), "=d" (high
) : "c" (reg
));
1403 return ((unsigned long long)high
<< 32) | low
;
1405 unsigned long long retval
;
1406 __asm__
__volatile__("rdmsr" : "=A" (retval
) : "c" (reg
));
1411 __INTRIN_INLINE
void __writemsr(const unsigned long Register
, const unsigned long long Value
)
1414 __asm__
__volatile__("wrmsr" : : "a" (Value
), "d" (Value
>> 32), "c" (Register
));
1416 __asm__
__volatile__("wrmsr" : : "A" (Value
), "c" (Register
));
1420 __INTRIN_INLINE
unsigned long long __readpmc(const int counter
)
1422 unsigned long long retval
;
1423 __asm__
__volatile__("rdpmc" : "=A" (retval
) : "c" (counter
));
1427 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1428 __INTRIN_INLINE
unsigned long __segmentlimit(const unsigned long a
)
1430 unsigned long retval
;
1431 __asm__
__volatile__("lsl %[a], %[retval]" : [retval
] "=r" (retval
) : [a
] "rm" (a
));
1435 __INTRIN_INLINE
void __wbinvd(void)
1437 __asm__
__volatile__("wbinvd");
1440 __INTRIN_INLINE
void __lidt(void *Source
)
1442 __asm__
__volatile__("lidt %0" : : "m"(*(short*)Source
));
1445 __INTRIN_INLINE
void __sidt(void *Destination
)
1447 __asm__
__volatile__("sidt %0" : : "m"(*(short*)Destination
) : "memory");
1450 __INTRIN_INLINE
void _mm_pause(void)
1452 __asm__
__volatile__("pause");
1459 #endif /* KJK_INTRIN_X86_H_ */