2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
72 /*** Stack frame juggling ***/
73 #define _ReturnAddress() (__builtin_return_address(0))
74 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
75 /* TODO: __getcallerseflags but how??? */
77 /* Maybe the same for x86? */
79 #define _alloca(s) __builtin_alloca(s)
82 /*** Memory barriers ***/
85 __INTRIN_INLINE
void __faststorefence(void)
88 __asm__
__volatile__("lock; orl $0, %0;" : : "m"(local
));
92 __INTRIN_INLINE
void _mm_lfence(void)
94 __asm__
__volatile__("lfence");
97 __INTRIN_INLINE
void _mm_sfence(void)
99 __asm__
__volatile__("sfence");
102 __INTRIN_INLINE
void _ReadWriteBarrier(void)
104 __asm__
__volatile__("" : : : "memory");
107 /* GCC only supports full barriers */
108 #define _ReadBarrier _ReadWriteBarrier
109 #define _WriteBarrier _ReadWriteBarrier
111 /*** Atomic operations ***/
113 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
115 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
)
117 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
120 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
)
122 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
125 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
)
127 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
130 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
)
132 return (void *)__sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
135 __INTRIN_INLINE
long _InterlockedExchange(volatile long * const Target
, const long Value
)
137 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
138 __sync_synchronize();
139 return __sync_lock_test_and_set(Target
, Value
);
142 #if defined(_M_AMD64)
143 __INTRIN_INLINE
long long _InterlockedExchange64(volatile long long * const Target
, const long long Value
)
145 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
146 __sync_synchronize();
147 return __sync_lock_test_and_set(Target
, Value
);
151 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
)
154 __sync_synchronize();
155 return (void *)__sync_lock_test_and_set(Target
, Value
);
158 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
)
160 return __sync_fetch_and_add(Addend
, Value
);
163 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
)
165 return __sync_fetch_and_add(Addend
, Value
);
168 #if defined(_M_AMD64)
169 __INTRIN_INLINE
long long _InterlockedExchangeAdd64(volatile long long * const Addend
, const long long Value
)
171 return __sync_fetch_and_add(Addend
, Value
);
175 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * const value
, const char mask
)
177 return __sync_fetch_and_and(value
, mask
);
180 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * const value
, const short mask
)
182 return __sync_fetch_and_and(value
, mask
);
185 __INTRIN_INLINE
long _InterlockedAnd(volatile long * const value
, const long mask
)
187 return __sync_fetch_and_and(value
, mask
);
190 #if defined(_M_AMD64)
191 __INTRIN_INLINE
long _InterlockedAnd64(volatile long long * const value
, const long long mask
)
193 return __sync_fetch_and_and(value
, mask
);
197 __INTRIN_INLINE
char _InterlockedOr8(volatile char * const value
, const char mask
)
199 return __sync_fetch_and_or(value
, mask
);
202 __INTRIN_INLINE
short _InterlockedOr16(volatile short * const value
, const short mask
)
204 return __sync_fetch_and_or(value
, mask
);
207 __INTRIN_INLINE
long _InterlockedOr(volatile long * const value
, const long mask
)
209 return __sync_fetch_and_or(value
, mask
);
212 #if defined(_M_AMD64)
213 __INTRIN_INLINE
long _InterlockedOr64(volatile long long * const value
, const long long mask
)
215 return __sync_fetch_and_or(value
, mask
);
219 __INTRIN_INLINE
char _InterlockedXor8(volatile char * const value
, const char mask
)
221 return __sync_fetch_and_xor(value
, mask
);
224 __INTRIN_INLINE
short _InterlockedXor16(volatile short * const value
, const short mask
)
226 return __sync_fetch_and_xor(value
, mask
);
229 __INTRIN_INLINE
long _InterlockedXor(volatile long * const value
, const long mask
)
231 return __sync_fetch_and_xor(value
, mask
);
236 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
)
238 char retval
= Comperand
;
239 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
) : "memory");
243 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
)
245 short retval
= Comperand
;
246 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
): "memory");
250 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
)
252 long retval
= Comperand
;
253 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
): "memory");
257 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
)
259 void * retval
= (void *)Comperand
;
260 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval
] "=a" (retval
) : "[retval]" (retval
), [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
) : "memory");
264 __INTRIN_INLINE
long _InterlockedExchange(volatile long * const Target
, const long Value
)
267 __asm__("xchgl %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
271 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
)
273 void * retval
= Value
;
274 __asm__("xchgl %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
278 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
)
281 __asm__("lock; xaddw %[retval], %[Addend]" : [retval
] "+r" (retval
) : [Addend
] "m" (*Addend
) : "memory");
285 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
)
288 __asm__("lock; xaddl %[retval], %[Addend]" : [retval
] "+r" (retval
) : [Addend
] "m" (*Addend
) : "memory");
292 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * const value
, const char mask
)
302 y
= _InterlockedCompareExchange8(value
, x
& mask
, x
);
309 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * const value
, const short mask
)
319 y
= _InterlockedCompareExchange16(value
, x
& mask
, x
);
326 __INTRIN_INLINE
long _InterlockedAnd(volatile long * const value
, const long mask
)
336 y
= _InterlockedCompareExchange(value
, x
& mask
, x
);
343 __INTRIN_INLINE
char _InterlockedOr8(volatile char * const value
, const char mask
)
353 y
= _InterlockedCompareExchange8(value
, x
| mask
, x
);
360 __INTRIN_INLINE
short _InterlockedOr16(volatile short * const value
, const short mask
)
370 y
= _InterlockedCompareExchange16(value
, x
| mask
, x
);
377 __INTRIN_INLINE
long _InterlockedOr(volatile long * const value
, const long mask
)
387 y
= _InterlockedCompareExchange(value
, x
| mask
, x
);
394 __INTRIN_INLINE
char _InterlockedXor8(volatile char * const value
, const char mask
)
404 y
= _InterlockedCompareExchange8(value
, x
^ mask
, x
);
411 __INTRIN_INLINE
short _InterlockedXor16(volatile short * const value
, const short mask
)
421 y
= _InterlockedCompareExchange16(value
, x
^ mask
, x
);
428 __INTRIN_INLINE
long _InterlockedXor(volatile long * const value
, const long mask
)
438 y
= _InterlockedCompareExchange(value
, x
^ mask
, x
);
447 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
449 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
)
451 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
456 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
)
458 long long retval
= Comperand
;
462 "lock; cmpxchg8b %[Destination]" :
463 [retval
] "+A" (retval
) :
464 [Destination
] "m" (*Destination
),
465 "b" ((unsigned long)((Exchange
>> 0) & 0xFFFFFFFF)),
466 "c" ((unsigned long)((Exchange
>> 32) & 0xFFFFFFFF)) :
475 __INTRIN_INLINE
long _InterlockedAddLargeStatistic(volatile long long * const Addend
, const long Value
)
479 "lock; add %[Value], %[Lo32];"
481 "lock; adc $0, %[Hi32];"
483 [Lo32
] "+m" (*((volatile long *)(Addend
) + 0)), [Hi32
] "+m" (*((volatile long *)(Addend
) + 1)) :
484 [Value
] "ir" (Value
) :
491 __INTRIN_INLINE
long _InterlockedDecrement(volatile long * const lpAddend
)
493 return _InterlockedExchangeAdd(lpAddend
, -1) - 1;
496 __INTRIN_INLINE
long _InterlockedIncrement(volatile long * const lpAddend
)
498 return _InterlockedExchangeAdd(lpAddend
, 1) + 1;
501 __INTRIN_INLINE
short _InterlockedDecrement16(volatile short * const lpAddend
)
503 return _InterlockedExchangeAdd16(lpAddend
, -1) - 1;
506 __INTRIN_INLINE
short _InterlockedIncrement16(volatile short * const lpAddend
)
508 return _InterlockedExchangeAdd16(lpAddend
, 1) + 1;
511 #if defined(_M_AMD64)
512 __INTRIN_INLINE
long long _InterlockedDecrement64(volatile long long * const lpAddend
)
514 return _InterlockedExchangeAdd64(lpAddend
, -1) - 1;
517 __INTRIN_INLINE
long long _InterlockedIncrement64(volatile long long * const lpAddend
)
519 return _InterlockedExchangeAdd64(lpAddend
, 1) + 1;
523 __INTRIN_INLINE
unsigned char _interlockedbittestandreset(volatile long * a
, const long b
)
525 unsigned char retval
;
526 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
530 #if defined(_M_AMD64)
531 __INTRIN_INLINE
unsigned char _interlockedbittestandreset64(volatile long long * a
, const long long b
)
533 unsigned char retval
;
534 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval
] "=r" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
539 __INTRIN_INLINE
unsigned char _interlockedbittestandset(volatile long * a
, const long b
)
541 unsigned char retval
;
542 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval
] "=q" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
546 #if defined(_M_AMD64)
547 __INTRIN_INLINE
unsigned char _interlockedbittestandset64(volatile long long * a
, const long long b
)
549 unsigned char retval
;
550 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval
] "=r" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
555 /*** String operations ***/
556 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
557 __INTRIN_INLINE
void __stosb(unsigned char * Dest
, const unsigned char Data
, size_t Count
)
562 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
563 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
567 __INTRIN_INLINE
void __stosw(unsigned short * Dest
, const unsigned short Data
, size_t Count
)
572 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
573 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
577 __INTRIN_INLINE
void __stosd(unsigned long * Dest
, const unsigned long Data
, size_t Count
)
582 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
583 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
588 __INTRIN_INLINE
void __stosq(unsigned __int64
* Dest
, const unsigned __int64 Data
, size_t Count
)
593 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
594 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
599 __INTRIN_INLINE
void __movsb(unsigned char * Destination
, const unsigned char * Source
, size_t Count
)
604 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
605 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
609 __INTRIN_INLINE
void __movsw(unsigned short * Destination
, const unsigned short * Source
, size_t Count
)
614 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
615 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
619 __INTRIN_INLINE
void __movsd(unsigned long * Destination
, const unsigned long * Source
, size_t Count
)
624 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
625 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
630 __INTRIN_INLINE
void __movsq(unsigned long * Destination
, const unsigned long * Source
, size_t Count
)
635 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
636 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
641 #if defined(_M_AMD64)
642 /*** GS segment addressing ***/
644 __INTRIN_INLINE
void __writegsbyte(const unsigned long Offset
, const unsigned char Data
)
646 __asm__
__volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
649 __INTRIN_INLINE
void __writegsword(const unsigned long Offset
, const unsigned short Data
)
651 __asm__
__volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
654 __INTRIN_INLINE
void __writegsdword(const unsigned long Offset
, const unsigned long Data
)
656 __asm__
__volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
659 __INTRIN_INLINE
void __writegsqword(const unsigned long Offset
, const unsigned __int64 Data
)
661 __asm__
__volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
664 __INTRIN_INLINE
unsigned char __readgsbyte(const unsigned long Offset
)
667 __asm__
__volatile__("movb %%gs:%a[Offset], %b[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
671 __INTRIN_INLINE
unsigned short __readgsword(const unsigned long Offset
)
673 unsigned short value
;
674 __asm__
__volatile__("movw %%gs:%a[Offset], %w[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
678 __INTRIN_INLINE
unsigned long __readgsdword(const unsigned long Offset
)
681 __asm__
__volatile__("movl %%gs:%a[Offset], %k[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
685 __INTRIN_INLINE
unsigned __int64
__readgsqword(const unsigned long Offset
)
687 unsigned __int64 value
;
688 __asm__
__volatile__("movq %%gs:%a[Offset], %q[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
692 __INTRIN_INLINE
void __incgsbyte(const unsigned long Offset
)
694 __asm__
__volatile__("incb %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
697 __INTRIN_INLINE
void __incgsword(const unsigned long Offset
)
699 __asm__
__volatile__("incw %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
702 __INTRIN_INLINE
void __incgsdword(const unsigned long Offset
)
704 __asm__
__volatile__("incl %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
707 __INTRIN_INLINE
void __addgsbyte(const unsigned long Offset
, const unsigned char Data
)
709 __asm__
__volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
712 __INTRIN_INLINE
void __addgsword(const unsigned long Offset
, const unsigned short Data
)
714 __asm__
__volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
717 __INTRIN_INLINE
void __addgsdword(const unsigned long Offset
, const unsigned int Data
)
719 __asm__
__volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
722 __INTRIN_INLINE
void __addgsqword(const unsigned long Offset
, const unsigned __int64 Data
)
724 __asm__
__volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
728 /*** FS segment addressing ***/
729 __INTRIN_INLINE
void __writefsbyte(const unsigned long Offset
, const unsigned char Data
)
731 __asm__
__volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
734 __INTRIN_INLINE
void __writefsword(const unsigned long Offset
, const unsigned short Data
)
736 __asm__
__volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
739 __INTRIN_INLINE
void __writefsdword(const unsigned long Offset
, const unsigned long Data
)
741 __asm__
__volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
744 __INTRIN_INLINE
unsigned char __readfsbyte(const unsigned long Offset
)
747 __asm__
__volatile__("movb %%fs:%a[Offset], %b[value]" : [value
] "=q" (value
) : [Offset
] "ir" (Offset
));
751 __INTRIN_INLINE
unsigned short __readfsword(const unsigned long Offset
)
753 unsigned short value
;
754 __asm__
__volatile__("movw %%fs:%a[Offset], %w[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
758 __INTRIN_INLINE
unsigned long __readfsdword(const unsigned long Offset
)
761 __asm__
__volatile__("movl %%fs:%a[Offset], %k[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
765 __INTRIN_INLINE
void __incfsbyte(const unsigned long Offset
)
767 __asm__
__volatile__("incb %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
770 __INTRIN_INLINE
void __incfsword(const unsigned long Offset
)
772 __asm__
__volatile__("incw %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
775 __INTRIN_INLINE
void __incfsdword(const unsigned long Offset
)
777 __asm__
__volatile__("incl %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
780 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
781 __INTRIN_INLINE
void __addfsbyte(const unsigned long Offset
, const unsigned char Data
)
783 if(!__builtin_constant_p(Offset
))
784 __asm__
__volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
786 __asm__
__volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
789 __INTRIN_INLINE
void __addfsword(const unsigned long Offset
, const unsigned short Data
)
791 if(!__builtin_constant_p(Offset
))
792 __asm__
__volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
794 __asm__
__volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
797 __INTRIN_INLINE
void __addfsdword(const unsigned long Offset
, const unsigned int Data
)
799 if(!__builtin_constant_p(Offset
))
800 __asm__
__volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
802 __asm__
__volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
807 /*** Bit manipulation ***/
808 __INTRIN_INLINE
unsigned char _BitScanForward(unsigned long * const Index
, const unsigned long Mask
)
810 __asm__("bsfl %[Mask], %[Index]" : [Index
] "=r" (*Index
) : [Mask
] "mr" (Mask
));
814 __INTRIN_INLINE
unsigned char _BitScanReverse(unsigned long * const Index
, const unsigned long Mask
)
816 __asm__("bsrl %[Mask], %[Index]" : [Index
] "=r" (*Index
) : [Mask
] "mr" (Mask
));
820 /* NOTE: again, the bizarre implementation follows Visual C++ */
821 __INTRIN_INLINE
unsigned char _bittest(const long * const a
, const long b
)
823 unsigned char retval
;
825 if(__builtin_constant_p(b
))
826 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*(a
+ (b
/ 32))), [b
] "Ir" (b
% 32));
828 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*a
), [b
] "r" (b
));
834 __INTRIN_INLINE
unsigned char _bittest64(const __int64
* const a
, const __int64 b
)
836 unsigned char retval
;
838 if(__builtin_constant_p(b
))
839 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*(a
+ (b
/ 64))), [b
] "Ir" (b
% 64));
841 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*a
), [b
] "r" (b
));
847 __INTRIN_INLINE
unsigned char _bittestandcomplement(long * const a
, const long b
)
849 unsigned char retval
;
851 if(__builtin_constant_p(b
))
852 __asm__("btc %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
854 __asm__("btc %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
859 __INTRIN_INLINE
unsigned char _bittestandreset(long * const a
, const long b
)
861 unsigned char retval
;
863 if(__builtin_constant_p(b
))
864 __asm__("btr %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
866 __asm__("btr %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
871 __INTRIN_INLINE
unsigned char _bittestandset(long * const a
, const long b
)
873 unsigned char retval
;
875 if(__builtin_constant_p(b
))
876 __asm__("bts %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
878 __asm__("bts %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
883 __INTRIN_INLINE
unsigned char _rotl8(unsigned char value
, unsigned char shift
)
885 unsigned char retval
;
886 __asm__("rolb %b[shift], %b[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
890 __INTRIN_INLINE
unsigned short _rotl16(unsigned short value
, unsigned char shift
)
892 unsigned short retval
;
893 __asm__("rolw %b[shift], %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
897 __INTRIN_INLINE
unsigned int _rotl(unsigned int value
, int shift
)
899 unsigned long retval
;
900 __asm__("roll %b[shift], %k[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
904 __INTRIN_INLINE
unsigned int _rotr(unsigned int value
, int shift
)
906 unsigned long retval
;
907 __asm__("rorl %b[shift], %k[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
911 __INTRIN_INLINE
unsigned char _rotr8(unsigned char value
, unsigned char shift
)
913 unsigned char retval
;
914 __asm__("rorb %b[shift], %b[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
918 __INTRIN_INLINE
unsigned short _rotr16(unsigned short value
, unsigned char shift
)
920 unsigned short retval
;
921 __asm__("rorw %b[shift], %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
926 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
927 constraint (edx:eax) for the Mask argument, because it's the only way GCC
928 can pass 64-bit operands around - passing the two 32 bit parts separately
929 just confuses it. Also we declare Bit as an int and then truncate it to
930 match Visual C++ behavior
932 __INTRIN_INLINE
unsigned long long __ll_lshift(const unsigned long long Mask
, const int Bit
)
934 unsigned long long retval
= Mask
;
938 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
940 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
946 __INTRIN_INLINE
long long __ll_rshift(const long long Mask
, const int Bit
)
948 unsigned long long retval
= Mask
;
952 "shldl %b[Bit], %%eax, %%edx; sarl %b[Bit], %%eax" :
954 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
960 __INTRIN_INLINE
unsigned long long __ull_rshift(const unsigned long long Mask
, int Bit
)
962 unsigned long long retval
= Mask
;
966 "shrdl %b[Bit], %%eax, %%edx; shrl %b[Bit], %%eax" :
968 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
974 __INTRIN_INLINE
unsigned short _byteswap_ushort(unsigned short value
)
976 unsigned short retval
;
977 __asm__("rorw $8, %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
));
981 __INTRIN_INLINE
unsigned long _byteswap_ulong(unsigned long value
)
983 unsigned long retval
;
984 __asm__("bswapl %[retval]" : [retval
] "=r" (retval
) : "[retval]" (value
));
989 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
)
991 unsigned __int64 retval
;
992 __asm__("bswapq %[retval]" : [retval
] "=r" (retval
) : "[retval]" (value
));
996 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
)
1001 unsigned long lowpart
;
1002 unsigned long hipart
;
1005 retval
.int64part
= value
;
1006 __asm__("bswapl %[lowpart]\n"
1007 "bswapl %[hipart]\n"
1008 : [lowpart
] "=r" (retval
.hipart
), [hipart
] "=r" (retval
.lowpart
) : "[lowpart]" (retval
.lowpart
), "[hipart]" (retval
.hipart
) );
1009 return retval
.int64part
;
1013 /*** 64-bit math ***/
1014 __INTRIN_INLINE
long long __emul(const int a
, const int b
)
1017 __asm__("imull %[b]" : "=A" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1021 __INTRIN_INLINE
unsigned long long __emulu(const unsigned int a
, const unsigned int b
)
1023 unsigned long long retval
;
1024 __asm__("mull %[b]" : "=A" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1030 __INTRIN_INLINE __int64
__mulh(__int64 a
, __int64 b
)
1033 __asm__("imulq %[b]" : "=d" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1037 __INTRIN_INLINE
unsigned __int64
__umulh(unsigned __int64 a
, unsigned __int64 b
)
1039 unsigned __int64 retval
;
1040 __asm__("mulq %[b]" : "=d" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1047 __INTRIN_INLINE
unsigned char __inbyte(const unsigned short Port
)
1050 __asm__
__volatile__("inb %w[Port], %b[byte]" : [byte
] "=a" (byte
) : [Port
] "Nd" (Port
));
1054 __INTRIN_INLINE
unsigned short __inword(const unsigned short Port
)
1056 unsigned short word
;
1057 __asm__
__volatile__("inw %w[Port], %w[word]" : [word
] "=a" (word
) : [Port
] "Nd" (Port
));
1061 __INTRIN_INLINE
unsigned long __indword(const unsigned short Port
)
1063 unsigned long dword
;
1064 __asm__
__volatile__("inl %w[Port], %k[dword]" : [dword
] "=a" (dword
) : [Port
] "Nd" (Port
));
1068 __INTRIN_INLINE
void __inbytestring(unsigned short Port
, unsigned char * Buffer
, unsigned long Count
)
1070 __asm__ __volatile__
1073 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1074 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1079 __INTRIN_INLINE
void __inwordstring(unsigned short Port
, unsigned short * Buffer
, unsigned long Count
)
1081 __asm__ __volatile__
1084 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1085 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1090 __INTRIN_INLINE
void __indwordstring(unsigned short Port
, unsigned long * Buffer
, unsigned long Count
)
1092 __asm__ __volatile__
1095 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1096 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1101 __INTRIN_INLINE
void __outbyte(unsigned short const Port
, const unsigned char Data
)
1103 __asm__
__volatile__("outb %b[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1106 __INTRIN_INLINE
void __outword(unsigned short const Port
, const unsigned short Data
)
1108 __asm__
__volatile__("outw %w[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1111 __INTRIN_INLINE
void __outdword(unsigned short const Port
, const unsigned long Data
)
1113 __asm__
__volatile__("outl %k[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1116 __INTRIN_INLINE
void __outbytestring(unsigned short const Port
, const unsigned char * const Buffer
, const unsigned long Count
)
1118 __asm__
__volatile__("rep; outsb" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1121 __INTRIN_INLINE
void __outwordstring(unsigned short const Port
, const unsigned short * const Buffer
, const unsigned long Count
)
1123 __asm__
__volatile__("rep; outsw" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1126 __INTRIN_INLINE
void __outdwordstring(unsigned short const Port
, const unsigned long * const Buffer
, const unsigned long Count
)
1128 __asm__
__volatile__("rep; outsl" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1132 /*** System information ***/
1133 __INTRIN_INLINE
void __cpuid(int CPUInfo
[], const int InfoType
)
1135 __asm__
__volatile__("cpuid" : "=a" (CPUInfo
[0]), "=b" (CPUInfo
[1]), "=c" (CPUInfo
[2]), "=d" (CPUInfo
[3]) : "a" (InfoType
));
1138 __INTRIN_INLINE
unsigned long long __rdtsc(void)
1141 unsigned long long low
, high
;
1142 __asm__
__volatile__("rdtsc" : "=a"(low
), "=d"(high
));
1143 return low
| (high
<< 32);
1145 unsigned long long retval
;
1146 __asm__
__volatile__("rdtsc" : "=A"(retval
));
1151 __INTRIN_INLINE
void __writeeflags(uintptr_t Value
)
1153 __asm__
__volatile__("push %0\n popf" : : "rim"(Value
));
1156 __INTRIN_INLINE
uintptr_t __readeflags(void)
1159 __asm__
__volatile__("pushf\n pop %0" : "=rm"(retval
));
1163 /*** Interrupts ***/
1164 __INTRIN_INLINE
void __debugbreak(void)
1169 __INTRIN_INLINE
void __int2c(void)
1171 __asm__("int $0x2c");
1174 __INTRIN_INLINE
void _disable(void)
1179 __INTRIN_INLINE
void _enable(void)
1184 __INTRIN_INLINE
void __halt(void)
1189 /*** Protected memory management ***/
1191 __INTRIN_INLINE
void __writecr0(const unsigned __int64 Data
)
1193 __asm__("mov %[Data], %%cr0" : : [Data
] "r" (Data
) : "memory");
1196 __INTRIN_INLINE
void __writecr3(const unsigned __int64 Data
)
1198 __asm__("mov %[Data], %%cr3" : : [Data
] "r" (Data
) : "memory");
1201 __INTRIN_INLINE
void __writecr4(const unsigned __int64 Data
)
1203 __asm__("mov %[Data], %%cr4" : : [Data
] "r" (Data
) : "memory");
1207 __INTRIN_INLINE
void __writecr8(const unsigned __int64 Data
)
1209 __asm__("mov %[Data], %%cr8" : : [Data
] "r" (Data
) : "memory");
1212 __INTRIN_INLINE
unsigned __int64
__readcr0(void)
1214 unsigned __int64 value
;
1215 __asm__
__volatile__("mov %%cr0, %[value]" : [value
] "=r" (value
));
1219 __INTRIN_INLINE
unsigned __int64
__readcr2(void)
1221 unsigned __int64 value
;
1222 __asm__
__volatile__("mov %%cr2, %[value]" : [value
] "=r" (value
));
1226 __INTRIN_INLINE
unsigned __int64
__readcr3(void)
1228 unsigned __int64 value
;
1229 __asm__
__volatile__("mov %%cr3, %[value]" : [value
] "=r" (value
));
1233 __INTRIN_INLINE
unsigned __int64
__readcr4(void)
1235 unsigned __int64 value
;
1236 __asm__
__volatile__("mov %%cr4, %[value]" : [value
] "=r" (value
));
1240 __INTRIN_INLINE
unsigned __int64
__readcr8(void)
1242 unsigned __int64 value
;
1243 __asm__
__volatile__("movq %%cr8, %q[value]" : [value
] "=r" (value
));
1247 __INTRIN_INLINE
unsigned long __readcr0(void)
1249 unsigned long value
;
1250 __asm__
__volatile__("mov %%cr0, %[value]" : [value
] "=r" (value
));
1254 __INTRIN_INLINE
unsigned long __readcr2(void)
1256 unsigned long value
;
1257 __asm__
__volatile__("mov %%cr2, %[value]" : [value
] "=r" (value
));
1261 __INTRIN_INLINE
unsigned long __readcr3(void)
1263 unsigned long value
;
1264 __asm__
__volatile__("mov %%cr3, %[value]" : [value
] "=r" (value
));
1268 __INTRIN_INLINE
unsigned long __readcr4(void)
1270 unsigned long value
;
1271 __asm__
__volatile__("mov %%cr4, %[value]" : [value
] "=r" (value
));
1277 __INTRIN_INLINE
unsigned __int64
__readdr(unsigned int reg
)
1279 unsigned __int64 value
;
1283 __asm__
__volatile__("movq %%dr0, %q[value]" : [value
] "=r" (value
));
1286 __asm__
__volatile__("movq %%dr1, %q[value]" : [value
] "=r" (value
));
1289 __asm__
__volatile__("movq %%dr2, %q[value]" : [value
] "=r" (value
));
1292 __asm__
__volatile__("movq %%dr3, %q[value]" : [value
] "=r" (value
));
1295 __asm__
__volatile__("movq %%dr4, %q[value]" : [value
] "=r" (value
));
1298 __asm__
__volatile__("movq %%dr5, %q[value]" : [value
] "=r" (value
));
1301 __asm__
__volatile__("movq %%dr6, %q[value]" : [value
] "=r" (value
));
1304 __asm__
__volatile__("movq %%dr7, %q[value]" : [value
] "=r" (value
));
1310 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned __int64 value
)
1315 __asm__("movq %q[value], %%dr0" : : [value
] "r" (value
) : "memory");
1318 __asm__("movq %q[value], %%dr1" : : [value
] "r" (value
) : "memory");
1321 __asm__("movq %q[value], %%dr2" : : [value
] "r" (value
) : "memory");
1324 __asm__("movq %q[value], %%dr3" : : [value
] "r" (value
) : "memory");
1327 __asm__("movq %q[value], %%dr4" : : [value
] "r" (value
) : "memory");
1330 __asm__("movq %q[value], %%dr5" : : [value
] "r" (value
) : "memory");
1333 __asm__("movq %q[value], %%dr6" : : [value
] "r" (value
) : "memory");
1336 __asm__("movq %q[value], %%dr7" : : [value
] "r" (value
) : "memory");
1341 __INTRIN_INLINE
unsigned int __readdr(unsigned int reg
)
1347 __asm__
__volatile__("mov %%dr0, %[value]" : [value
] "=r" (value
));
1350 __asm__
__volatile__("mov %%dr1, %[value]" : [value
] "=r" (value
));
1353 __asm__
__volatile__("mov %%dr2, %[value]" : [value
] "=r" (value
));
1356 __asm__
__volatile__("mov %%dr3, %[value]" : [value
] "=r" (value
));
1359 __asm__
__volatile__("mov %%dr4, %[value]" : [value
] "=r" (value
));
1362 __asm__
__volatile__("mov %%dr5, %[value]" : [value
] "=r" (value
));
1365 __asm__
__volatile__("mov %%dr6, %[value]" : [value
] "=r" (value
));
1368 __asm__
__volatile__("mov %%dr7, %[value]" : [value
] "=r" (value
));
1374 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned int value
)
1379 __asm__("mov %[value], %%dr0" : : [value
] "r" (value
) : "memory");
1382 __asm__("mov %[value], %%dr1" : : [value
] "r" (value
) : "memory");
1385 __asm__("mov %[value], %%dr2" : : [value
] "r" (value
) : "memory");
1388 __asm__("mov %[value], %%dr3" : : [value
] "r" (value
) : "memory");
1391 __asm__("mov %[value], %%dr4" : : [value
] "r" (value
) : "memory");
1394 __asm__("mov %[value], %%dr5" : : [value
] "r" (value
) : "memory");
1397 __asm__("mov %[value], %%dr6" : : [value
] "r" (value
) : "memory");
1400 __asm__("mov %[value], %%dr7" : : [value
] "r" (value
) : "memory");
1406 __INTRIN_INLINE
void __invlpg(void * const Address
)
1408 __asm__("invlpg %[Address]" : : [Address
] "m" (*((unsigned char *)(Address
))));
1412 /*** System operations ***/
1413 __INTRIN_INLINE
unsigned long long __readmsr(const int reg
)
1416 unsigned long low
, high
;
1417 __asm__
__volatile__("rdmsr" : "=a" (low
), "=d" (high
) : "c" (reg
));
1418 return ((unsigned long long)high
<< 32) | low
;
1420 unsigned long long retval
;
1421 __asm__
__volatile__("rdmsr" : "=A" (retval
) : "c" (reg
));
1426 __INTRIN_INLINE
void __writemsr(const unsigned long Register
, const unsigned long long Value
)
1429 __asm__
__volatile__("wrmsr" : : "a" (Value
), "d" (Value
>> 32), "c" (Register
));
1431 __asm__
__volatile__("wrmsr" : : "A" (Value
), "c" (Register
));
1435 __INTRIN_INLINE
unsigned long long __readpmc(const int counter
)
1437 unsigned long long retval
;
1438 __asm__
__volatile__("rdpmc" : "=A" (retval
) : "c" (counter
));
1442 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1443 __INTRIN_INLINE
unsigned long __segmentlimit(const unsigned long a
)
1445 unsigned long retval
;
1446 __asm__
__volatile__("lsl %[a], %[retval]" : [retval
] "=r" (retval
) : [a
] "rm" (a
));
1450 __INTRIN_INLINE
void __wbinvd(void)
1452 __asm__
__volatile__("wbinvd");
1455 __INTRIN_INLINE
void __lidt(void *Source
)
1457 __asm__
__volatile__("lidt %0" : : "m"(*(short*)Source
));
1460 __INTRIN_INLINE
void __sidt(void *Destination
)
1462 __asm__
__volatile__("sidt %0" : : "m"(*(short*)Destination
) : "memory");
1465 __INTRIN_INLINE
void _mm_pause(void)
1467 __asm__
__volatile__("pause");
1474 #endif /* KJK_INTRIN_X86_H_ */