2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
72 /*** Stack frame juggling ***/
73 #define _ReturnAddress() (__builtin_return_address(0))
74 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
75 /* TODO: __getcallerseflags but how??? */
78 /*** Atomic operations ***/
80 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
81 #define _ReadWriteBarrier() __sync_synchronize()
83 __INTRIN_INLINE
void _MemoryBarrier(void)
85 __asm__
__volatile__("" : : : "memory");
87 #define _ReadWriteBarrier() _MemoryBarrier()
90 /* BUGBUG: GCC only supports full barriers */
91 #define _ReadBarrier _ReadWriteBarrier
92 #define _WriteBarrier _ReadWriteBarrier
94 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
96 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
)
98 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
101 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
)
103 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
106 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
)
108 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
111 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
)
113 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
116 __INTRIN_INLINE
long _InterlockedExchange(volatile long * const Target
, const long Value
)
118 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
119 __sync_synchronize();
120 return __sync_lock_test_and_set(Target
, Value
);
123 #if defined(_M_AMD64)
124 __INTRIN_INLINE
long long _InterlockedExchange64(volatile long long * const Target
, const long long Value
)
126 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
127 __sync_synchronize();
128 return __sync_lock_test_and_set(Target
, Value
);
132 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
)
135 __sync_synchronize();
136 return __sync_lock_test_and_set(Target
, Value
);
139 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
)
141 return __sync_fetch_and_add(Addend
, Value
);
144 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
)
146 return __sync_fetch_and_add(Addend
, Value
);
149 #if defined(_M_AMD64)
150 __INTRIN_INLINE
long long _InterlockedExchangeAdd64(volatile long long * const Addend
, const long long Value
)
152 return __sync_fetch_and_add(Addend
, Value
);
156 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * const value
, const char mask
)
158 return __sync_fetch_and_and(value
, mask
);
161 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * const value
, const short mask
)
163 return __sync_fetch_and_and(value
, mask
);
166 __INTRIN_INLINE
long _InterlockedAnd(volatile long * const value
, const long mask
)
168 return __sync_fetch_and_and(value
, mask
);
171 #if defined(_M_AMD64)
172 __INTRIN_INLINE
long _InterlockedAnd64(volatile long long * const value
, const long long mask
)
174 return __sync_fetch_and_and(value
, mask
);
178 __INTRIN_INLINE
char _InterlockedOr8(volatile char * const value
, const char mask
)
180 return __sync_fetch_and_or(value
, mask
);
183 __INTRIN_INLINE
short _InterlockedOr16(volatile short * const value
, const short mask
)
185 return __sync_fetch_and_or(value
, mask
);
188 __INTRIN_INLINE
long _InterlockedOr(volatile long * const value
, const long mask
)
190 return __sync_fetch_and_or(value
, mask
);
193 #if defined(_M_AMD64)
194 __INTRIN_INLINE
long _InterlockedOr64(volatile long long * const value
, const long long mask
)
196 return __sync_fetch_and_or(value
, mask
);
200 __INTRIN_INLINE
char _InterlockedXor8(volatile char * const value
, const char mask
)
202 return __sync_fetch_and_xor(value
, mask
);
205 __INTRIN_INLINE
short _InterlockedXor16(volatile short * const value
, const short mask
)
207 return __sync_fetch_and_xor(value
, mask
);
210 __INTRIN_INLINE
long _InterlockedXor(volatile long * const value
, const long mask
)
212 return __sync_fetch_and_xor(value
, mask
);
217 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
)
219 char retval
= Comperand
;
220 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
) : "memory");
224 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
)
226 short retval
= Comperand
;
227 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
): "memory");
231 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
)
233 long retval
= Comperand
;
234 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
): "memory");
238 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
)
240 void * retval
= (void *)Comperand
;
241 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval
] "=a" (retval
) : "[retval]" (retval
), [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
) : "memory");
245 __INTRIN_INLINE
long _InterlockedExchange(volatile long * const Target
, const long Value
)
248 __asm__("xchgl %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
252 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
)
254 void * retval
= Value
;
255 __asm__("xchgl %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
259 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
)
262 __asm__("lock; xaddw %[retval], %[Addend]" : [retval
] "+r" (retval
) : [Addend
] "m" (*Addend
) : "memory");
266 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
)
269 __asm__("lock; xaddl %[retval], %[Addend]" : [retval
] "+r" (retval
) : [Addend
] "m" (*Addend
) : "memory");
273 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * const value
, const char mask
)
283 y
= _InterlockedCompareExchange8(value
, x
& mask
, x
);
290 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * const value
, const short mask
)
300 y
= _InterlockedCompareExchange16(value
, x
& mask
, x
);
307 __INTRIN_INLINE
long _InterlockedAnd(volatile long * const value
, const long mask
)
317 y
= _InterlockedCompareExchange(value
, x
& mask
, x
);
324 __INTRIN_INLINE
char _InterlockedOr8(volatile char * const value
, const char mask
)
334 y
= _InterlockedCompareExchange8(value
, x
| mask
, x
);
341 __INTRIN_INLINE
short _InterlockedOr16(volatile short * const value
, const short mask
)
351 y
= _InterlockedCompareExchange16(value
, x
| mask
, x
);
358 __INTRIN_INLINE
long _InterlockedOr(volatile long * const value
, const long mask
)
368 y
= _InterlockedCompareExchange(value
, x
| mask
, x
);
375 __INTRIN_INLINE
char _InterlockedXor8(volatile char * const value
, const char mask
)
385 y
= _InterlockedCompareExchange8(value
, x
^ mask
, x
);
392 __INTRIN_INLINE
short _InterlockedXor16(volatile short * const value
, const short mask
)
402 y
= _InterlockedCompareExchange16(value
, x
^ mask
, x
);
409 __INTRIN_INLINE
long _InterlockedXor(volatile long * const value
, const long mask
)
419 y
= _InterlockedCompareExchange(value
, x
^ mask
, x
);
428 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
430 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
)
432 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
437 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
)
439 long long retval
= Comperand
;
443 "lock; cmpxchg8b %[Destination]" :
444 [retval
] "+A" (retval
) :
445 [Destination
] "m" (*Destination
),
446 "b" ((unsigned long)((Exchange
>> 0) & 0xFFFFFFFF)),
447 "c" ((unsigned long)((Exchange
>> 32) & 0xFFFFFFFF)) :
456 __INTRIN_INLINE
long _InterlockedAddLargeStatistic(volatile long long * const Addend
, const long Value
)
460 "lock; add %[Value], %[Lo32];"
462 "lock; adc $0, %[Hi32];"
464 [Lo32
] "+m" (*((volatile long *)(Addend
) + 0)), [Hi32
] "+m" (*((volatile long *)(Addend
) + 1)) :
465 [Value
] "ir" (Value
) :
472 __INTRIN_INLINE
long _InterlockedDecrement(volatile long * const lpAddend
)
474 return _InterlockedExchangeAdd(lpAddend
, -1) - 1;
477 __INTRIN_INLINE
long _InterlockedIncrement(volatile long * const lpAddend
)
479 return _InterlockedExchangeAdd(lpAddend
, 1) + 1;
482 __INTRIN_INLINE
short _InterlockedDecrement16(volatile short * const lpAddend
)
484 return _InterlockedExchangeAdd16(lpAddend
, -1) - 1;
487 __INTRIN_INLINE
short _InterlockedIncrement16(volatile short * const lpAddend
)
489 return _InterlockedExchangeAdd16(lpAddend
, 1) + 1;
492 #if defined(_M_AMD64)
493 __INTRIN_INLINE
long long _InterlockedDecrement64(volatile long long * const lpAddend
)
495 return _InterlockedExchangeAdd64(lpAddend
, -1) - 1;
498 __INTRIN_INLINE
long long _InterlockedIncrement64(volatile long long * const lpAddend
)
500 return _InterlockedExchangeAdd64(lpAddend
, 1) + 1;
504 __INTRIN_INLINE
unsigned char _interlockedbittestandreset(volatile long * a
, const long b
)
506 unsigned char retval
;
507 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
511 #if defined(_M_AMD64)
512 __INTRIN_INLINE
unsigned char _interlockedbittestandreset64(volatile long long * a
, const long long b
)
514 unsigned char retval
;
515 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval
] "=r" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
520 __INTRIN_INLINE
unsigned char _interlockedbittestandset(volatile long * a
, const long b
)
522 unsigned char retval
;
523 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval
] "=q" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
527 #if defined(_M_AMD64)
528 __INTRIN_INLINE
unsigned char _interlockedbittestandset64(volatile long long * a
, const long long b
)
530 unsigned char retval
;
531 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval
] "=r" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
536 /*** String operations ***/
537 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
538 __INTRIN_INLINE
void __stosb(unsigned char * Dest
, const unsigned char Data
, size_t Count
)
543 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
544 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
548 __INTRIN_INLINE
void __stosw(unsigned short * Dest
, const unsigned short Data
, size_t Count
)
553 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
554 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
558 __INTRIN_INLINE
void __stosd(unsigned long * Dest
, const unsigned long Data
, size_t Count
)
563 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
564 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
568 __INTRIN_INLINE
void __movsb(unsigned char * Destination
, const unsigned char * Source
, size_t Count
)
573 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
574 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
578 __INTRIN_INLINE
void __movsw(unsigned short * Destination
, const unsigned short * Source
, size_t Count
)
583 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
584 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
588 __INTRIN_INLINE
void __movsd(unsigned long * Destination
, const unsigned long * Source
, size_t Count
)
593 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
594 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
598 #if defined(_M_AMD64)
599 /*** GS segment addressing ***/
601 __INTRIN_INLINE
void __writegsbyte(const unsigned long Offset
, const unsigned char Data
)
603 __asm__
__volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
606 __INTRIN_INLINE
void __writegsword(const unsigned long Offset
, const unsigned short Data
)
608 __asm__
__volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
611 __INTRIN_INLINE
void __writegsdword(const unsigned long Offset
, const unsigned long Data
)
613 __asm__
__volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
616 __INTRIN_INLINE
void __writegsqword(const unsigned long Offset
, const unsigned __int64 Data
)
618 __asm__
__volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
621 __INTRIN_INLINE
unsigned char __readgsbyte(const unsigned long Offset
)
624 __asm__
__volatile__("movb %%gs:%a[Offset], %b[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
628 __INTRIN_INLINE
unsigned short __readgsword(const unsigned long Offset
)
630 unsigned short value
;
631 __asm__
__volatile__("movw %%gs:%a[Offset], %w[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
635 __INTRIN_INLINE
unsigned long __readgsdword(const unsigned long Offset
)
638 __asm__
__volatile__("movl %%gs:%a[Offset], %k[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
642 __INTRIN_INLINE
unsigned __int64
__readgsqword(const unsigned long Offset
)
644 unsigned __int64 value
;
645 __asm__
__volatile__("movq %%gs:%a[Offset], %q[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
649 __INTRIN_INLINE
void __incgsbyte(const unsigned long Offset
)
651 __asm__
__volatile__("incb %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
654 __INTRIN_INLINE
void __incgsword(const unsigned long Offset
)
656 __asm__
__volatile__("incw %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
659 __INTRIN_INLINE
void __incgsdword(const unsigned long Offset
)
661 __asm__
__volatile__("incl %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
664 __INTRIN_INLINE
void __addgsbyte(const unsigned long Offset
, const unsigned char Data
)
666 __asm__
__volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
669 __INTRIN_INLINE
void __addgsword(const unsigned long Offset
, const unsigned short Data
)
671 __asm__
__volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
674 __INTRIN_INLINE
void __addgsdword(const unsigned long Offset
, const unsigned int Data
)
676 __asm__
__volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
679 __INTRIN_INLINE
void __addgsqword(const unsigned long Offset
, const unsigned __int64 Data
)
681 __asm__
__volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
685 /*** FS segment addressing ***/
686 __INTRIN_INLINE
void __writefsbyte(const unsigned long Offset
, const unsigned char Data
)
688 __asm__
__volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
691 __INTRIN_INLINE
void __writefsword(const unsigned long Offset
, const unsigned short Data
)
693 __asm__
__volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
696 __INTRIN_INLINE
void __writefsdword(const unsigned long Offset
, const unsigned long Data
)
698 __asm__
__volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
701 __INTRIN_INLINE
unsigned char __readfsbyte(const unsigned long Offset
)
704 __asm__
__volatile__("movb %%fs:%a[Offset], %b[value]" : [value
] "=q" (value
) : [Offset
] "ir" (Offset
));
708 __INTRIN_INLINE
unsigned short __readfsword(const unsigned long Offset
)
710 unsigned short value
;
711 __asm__
__volatile__("movw %%fs:%a[Offset], %w[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
715 __INTRIN_INLINE
unsigned long __readfsdword(const unsigned long Offset
)
718 __asm__
__volatile__("movl %%fs:%a[Offset], %k[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
722 __INTRIN_INLINE
void __incfsbyte(const unsigned long Offset
)
724 __asm__
__volatile__("incb %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
727 __INTRIN_INLINE
void __incfsword(const unsigned long Offset
)
729 __asm__
__volatile__("incw %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
732 __INTRIN_INLINE
void __incfsdword(const unsigned long Offset
)
734 __asm__
__volatile__("incl %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
737 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
738 __INTRIN_INLINE
void __addfsbyte(const unsigned long Offset
, const unsigned char Data
)
740 if(!__builtin_constant_p(Offset
))
741 __asm__
__volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
743 __asm__
__volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
746 __INTRIN_INLINE
void __addfsword(const unsigned long Offset
, const unsigned short Data
)
748 if(!__builtin_constant_p(Offset
))
749 __asm__
__volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
751 __asm__
__volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
754 __INTRIN_INLINE
void __addfsdword(const unsigned long Offset
, const unsigned int Data
)
756 if(!__builtin_constant_p(Offset
))
757 __asm__
__volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
759 __asm__
__volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
764 /*** Bit manipulation ***/
765 __INTRIN_INLINE
unsigned char _BitScanForward(unsigned long * const Index
, const unsigned long Mask
)
767 __asm__("bsfl %[Mask], %[Index]" : [Index
] "=r" (*Index
) : [Mask
] "mr" (Mask
));
771 __INTRIN_INLINE
unsigned char _BitScanReverse(unsigned long * const Index
, const unsigned long Mask
)
773 __asm__("bsrl %[Mask], %[Index]" : [Index
] "=r" (*Index
) : [Mask
] "mr" (Mask
));
777 /* NOTE: again, the bizarre implementation follows Visual C++ */
778 __INTRIN_INLINE
unsigned char _bittest(const long * const a
, const long b
)
780 unsigned char retval
;
782 if(__builtin_constant_p(b
))
783 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*(a
+ (b
/ 32))), [b
] "Ir" (b
% 32));
785 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*a
), [b
] "r" (b
));
790 __INTRIN_INLINE
unsigned char _bittestandcomplement(long * const a
, const long b
)
792 unsigned char retval
;
794 if(__builtin_constant_p(b
))
795 __asm__("btc %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
797 __asm__("btc %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
802 __INTRIN_INLINE
unsigned char _bittestandreset(long * const a
, const long b
)
804 unsigned char retval
;
806 if(__builtin_constant_p(b
))
807 __asm__("btr %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
809 __asm__("btr %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
814 __INTRIN_INLINE
unsigned char _bittestandset(long * const a
, const long b
)
816 unsigned char retval
;
818 if(__builtin_constant_p(b
))
819 __asm__("bts %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
821 __asm__("bts %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
826 __INTRIN_INLINE
unsigned char _rotl8(unsigned char value
, unsigned char shift
)
828 unsigned char retval
;
829 __asm__("rolb %b[shift], %b[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
833 __INTRIN_INLINE
unsigned short _rotl16(unsigned short value
, unsigned char shift
)
835 unsigned short retval
;
836 __asm__("rolw %b[shift], %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
840 __INTRIN_INLINE
unsigned int _rotl(unsigned int value
, int shift
)
842 unsigned long retval
;
843 __asm__("roll %b[shift], %k[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
847 __INTRIN_INLINE
unsigned int _rotr(unsigned int value
, int shift
)
849 unsigned long retval
;
850 __asm__("rorl %b[shift], %k[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
854 __INTRIN_INLINE
unsigned char _rotr8(unsigned char value
, unsigned char shift
)
856 unsigned char retval
;
857 __asm__("rorb %b[shift], %b[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
861 __INTRIN_INLINE
unsigned short _rotr16(unsigned short value
, unsigned char shift
)
863 unsigned short retval
;
864 __asm__("rorw %b[shift], %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
869 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
870 constraint (edx:eax) for the Mask argument, because it's the only way GCC
871 can pass 64-bit operands around - passing the two 32 bit parts separately
872 just confuses it. Also we declare Bit as an int and then truncate it to
873 match Visual C++ behavior
875 __INTRIN_INLINE
unsigned long long __ll_lshift(const unsigned long long Mask
, const int Bit
)
877 unsigned long long retval
= Mask
;
881 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
883 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
889 __INTRIN_INLINE
long long __ll_rshift(const long long Mask
, const int Bit
)
891 unsigned long long retval
= Mask
;
895 "shldl %b[Bit], %%eax, %%edx; sarl %b[Bit], %%eax" :
897 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
903 __INTRIN_INLINE
unsigned long long __ull_rshift(const unsigned long long Mask
, int Bit
)
905 unsigned long long retval
= Mask
;
909 "shrdl %b[Bit], %%eax, %%edx; shrl %b[Bit], %%eax" :
911 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
917 __INTRIN_INLINE
unsigned short _byteswap_ushort(unsigned short value
)
919 unsigned short retval
;
920 __asm__("rorw $8, %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
));
924 __INTRIN_INLINE
unsigned long _byteswap_ulong(unsigned long value
)
926 unsigned long retval
;
927 __asm__("bswapl %[retval]" : [retval
] "=r" (retval
) : "[retval]" (value
));
932 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
)
934 unsigned __int64 retval
;
935 __asm__("bswapq %[retval]" : [retval
] "=r" (retval
) : "[retval]" (value
));
939 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
)
944 unsigned long lowpart
;
945 unsigned long hipart
;
948 retval
.int64part
= value
;
949 __asm__("bswapl %[lowpart]\n"
951 : [lowpart
] "=r" (retval
.hipart
), [hipart
] "=r" (retval
.lowpart
) : "[lowpart]" (retval
.lowpart
), "[hipart]" (retval
.hipart
) );
952 return retval
.int64part
;
956 /*** 64-bit math ***/
957 __INTRIN_INLINE
long long __emul(const int a
, const int b
)
960 __asm__("imull %[b]" : "=A" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
964 __INTRIN_INLINE
unsigned long long __emulu(const unsigned int a
, const unsigned int b
)
966 unsigned long long retval
;
967 __asm__("mull %[b]" : "=A" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
973 __INTRIN_INLINE __int64
__mulh(__int64 a
, __int64 b
)
976 __asm__("imulq %[b]" : "=d" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
980 __INTRIN_INLINE
unsigned __int64
__umulh(unsigned __int64 a
, unsigned __int64 b
)
982 unsigned __int64 retval
;
983 __asm__("mulq %[b]" : "=d" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
990 __INTRIN_INLINE
unsigned char __inbyte(const unsigned short Port
)
993 __asm__
__volatile__("inb %w[Port], %b[byte]" : [byte
] "=a" (byte
) : [Port
] "Nd" (Port
));
997 __INTRIN_INLINE
unsigned short __inword(const unsigned short Port
)
1000 __asm__
__volatile__("inw %w[Port], %w[word]" : [word
] "=a" (word
) : [Port
] "Nd" (Port
));
1004 __INTRIN_INLINE
unsigned long __indword(const unsigned short Port
)
1006 unsigned long dword
;
1007 __asm__
__volatile__("inl %w[Port], %k[dword]" : [dword
] "=a" (dword
) : [Port
] "Nd" (Port
));
1011 __INTRIN_INLINE
void __inbytestring(unsigned short Port
, unsigned char * Buffer
, unsigned long Count
)
1013 __asm__ __volatile__
1016 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1017 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1022 __INTRIN_INLINE
void __inwordstring(unsigned short Port
, unsigned short * Buffer
, unsigned long Count
)
1024 __asm__ __volatile__
1027 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1028 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1033 __INTRIN_INLINE
void __indwordstring(unsigned short Port
, unsigned long * Buffer
, unsigned long Count
)
1035 __asm__ __volatile__
1038 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1039 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1044 __INTRIN_INLINE
void __outbyte(unsigned short const Port
, const unsigned char Data
)
1046 __asm__
__volatile__("outb %b[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1049 __INTRIN_INLINE
void __outword(unsigned short const Port
, const unsigned short Data
)
1051 __asm__
__volatile__("outw %w[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1054 __INTRIN_INLINE
void __outdword(unsigned short const Port
, const unsigned long Data
)
1056 __asm__
__volatile__("outl %k[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1059 __INTRIN_INLINE
void __outbytestring(unsigned short const Port
, const unsigned char * const Buffer
, const unsigned long Count
)
1061 __asm__
__volatile__("rep; outsb" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1064 __INTRIN_INLINE
void __outwordstring(unsigned short const Port
, const unsigned short * const Buffer
, const unsigned long Count
)
1066 __asm__
__volatile__("rep; outsw" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1069 __INTRIN_INLINE
void __outdwordstring(unsigned short const Port
, const unsigned long * const Buffer
, const unsigned long Count
)
1071 __asm__
__volatile__("rep; outsl" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1075 /*** System information ***/
1076 __INTRIN_INLINE
void __cpuid(int CPUInfo
[], const int InfoType
)
1078 __asm__
__volatile__("cpuid" : "=a" (CPUInfo
[0]), "=b" (CPUInfo
[1]), "=c" (CPUInfo
[2]), "=d" (CPUInfo
[3]) : "a" (InfoType
));
1081 __INTRIN_INLINE
unsigned long long __rdtsc(void)
1084 unsigned long long low
, high
;
1085 __asm__
__volatile__("rdtsc" : "=a"(low
), "=d"(high
));
1086 return low
| (high
<< 32);
1088 unsigned long long retval
;
1089 __asm__
__volatile__("rdtsc" : "=A"(retval
));
1094 __INTRIN_INLINE
void __writeeflags(uintptr_t Value
)
1096 __asm__
__volatile__("push %0\n popf" : : "rim"(Value
));
1099 __INTRIN_INLINE
uintptr_t __readeflags(void)
1102 __asm__
__volatile__("pushf\n pop %0" : "=rm"(retval
));
1106 /*** Interrupts ***/
1107 __INTRIN_INLINE
void __debugbreak(void)
1112 __INTRIN_INLINE
void __int2c(void)
1114 __asm__("int $0x2c");
1117 __INTRIN_INLINE
void _disable(void)
1122 __INTRIN_INLINE
void _enable(void)
1127 __INTRIN_INLINE
void __halt(void)
1132 /*** Protected memory management ***/
1134 __INTRIN_INLINE
void __writecr0(const unsigned __int64 Data
)
1136 __asm__("mov %[Data], %%cr0" : : [Data
] "r" (Data
) : "memory");
1139 __INTRIN_INLINE
void __writecr3(const unsigned __int64 Data
)
1141 __asm__("mov %[Data], %%cr3" : : [Data
] "r" (Data
) : "memory");
1144 __INTRIN_INLINE
void __writecr4(const unsigned __int64 Data
)
1146 __asm__("mov %[Data], %%cr4" : : [Data
] "r" (Data
) : "memory");
1150 __INTRIN_INLINE
void __writecr8(const unsigned __int64 Data
)
1152 __asm__("mov %[Data], %%cr8" : : [Data
] "r" (Data
) : "memory");
1155 __INTRIN_INLINE
unsigned __int64
__readcr0(void)
1157 unsigned __int64 value
;
1158 __asm__
__volatile__("mov %%cr0, %[value]" : [value
] "=r" (value
));
1162 __INTRIN_INLINE
unsigned __int64
__readcr2(void)
1164 unsigned __int64 value
;
1165 __asm__
__volatile__("mov %%cr2, %[value]" : [value
] "=r" (value
));
1169 __INTRIN_INLINE
unsigned __int64
__readcr3(void)
1171 unsigned __int64 value
;
1172 __asm__
__volatile__("mov %%cr3, %[value]" : [value
] "=r" (value
));
1176 __INTRIN_INLINE
unsigned __int64
__readcr4(void)
1178 unsigned __int64 value
;
1179 __asm__
__volatile__("mov %%cr4, %[value]" : [value
] "=r" (value
));
1183 __INTRIN_INLINE
unsigned __int64
__readcr8(void)
1185 unsigned __int64 value
;
1186 __asm__
__volatile__("movq %%cr8, %q[value]" : [value
] "=r" (value
));
1190 __INTRIN_INLINE
unsigned long __readcr0(void)
1192 unsigned long value
;
1193 __asm__
__volatile__("mov %%cr0, %[value]" : [value
] "=r" (value
));
1197 __INTRIN_INLINE
unsigned long __readcr2(void)
1199 unsigned long value
;
1200 __asm__
__volatile__("mov %%cr2, %[value]" : [value
] "=r" (value
));
1204 __INTRIN_INLINE
unsigned long __readcr3(void)
1206 unsigned long value
;
1207 __asm__
__volatile__("mov %%cr3, %[value]" : [value
] "=r" (value
));
1211 __INTRIN_INLINE
unsigned long __readcr4(void)
1213 unsigned long value
;
1214 __asm__
__volatile__("mov %%cr4, %[value]" : [value
] "=r" (value
));
1220 __INTRIN_INLINE
unsigned __int64
__readdr(unsigned int reg
)
1222 unsigned __int64 value
;
1226 __asm__
__volatile__("movq %%dr0, %q[value]" : [value
] "=r" (value
));
1229 __asm__
__volatile__("movq %%dr1, %q[value]" : [value
] "=r" (value
));
1232 __asm__
__volatile__("movq %%dr2, %q[value]" : [value
] "=r" (value
));
1235 __asm__
__volatile__("movq %%dr3, %q[value]" : [value
] "=r" (value
));
1238 __asm__
__volatile__("movq %%dr4, %q[value]" : [value
] "=r" (value
));
1241 __asm__
__volatile__("movq %%dr5, %q[value]" : [value
] "=r" (value
));
1244 __asm__
__volatile__("movq %%dr6, %q[value]" : [value
] "=r" (value
));
1247 __asm__
__volatile__("movq %%dr7, %q[value]" : [value
] "=r" (value
));
1253 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned __int64 value
)
1258 __asm__("movq %q[value], %%dr0" : : [value
] "r" (value
) : "memory");
1261 __asm__("movq %q[value], %%dr1" : : [value
] "r" (value
) : "memory");
1264 __asm__("movq %q[value], %%dr2" : : [value
] "r" (value
) : "memory");
1267 __asm__("movq %q[value], %%dr3" : : [value
] "r" (value
) : "memory");
1270 __asm__("movq %q[value], %%dr4" : : [value
] "r" (value
) : "memory");
1273 __asm__("movq %q[value], %%dr5" : : [value
] "r" (value
) : "memory");
1276 __asm__("movq %q[value], %%dr6" : : [value
] "r" (value
) : "memory");
1279 __asm__("movq %q[value], %%dr7" : : [value
] "r" (value
) : "memory");
1284 __INTRIN_INLINE
unsigned int __readdr(unsigned int reg
)
1290 __asm__
__volatile__("mov %%dr0, %[value]" : [value
] "=r" (value
));
1293 __asm__
__volatile__("mov %%dr1, %[value]" : [value
] "=r" (value
));
1296 __asm__
__volatile__("mov %%dr2, %[value]" : [value
] "=r" (value
));
1299 __asm__
__volatile__("mov %%dr3, %[value]" : [value
] "=r" (value
));
1302 __asm__
__volatile__("mov %%dr4, %[value]" : [value
] "=r" (value
));
1305 __asm__
__volatile__("mov %%dr5, %[value]" : [value
] "=r" (value
));
1308 __asm__
__volatile__("mov %%dr6, %[value]" : [value
] "=r" (value
));
1311 __asm__
__volatile__("mov %%dr7, %[value]" : [value
] "=r" (value
));
1317 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned int value
)
1322 __asm__("mov %[value], %%dr0" : : [value
] "r" (value
) : "memory");
1325 __asm__("mov %[value], %%dr1" : : [value
] "r" (value
) : "memory");
1328 __asm__("mov %[value], %%dr2" : : [value
] "r" (value
) : "memory");
1331 __asm__("mov %[value], %%dr3" : : [value
] "r" (value
) : "memory");
1334 __asm__("mov %[value], %%dr4" : : [value
] "r" (value
) : "memory");
1337 __asm__("mov %[value], %%dr5" : : [value
] "r" (value
) : "memory");
1340 __asm__("mov %[value], %%dr6" : : [value
] "r" (value
) : "memory");
1343 __asm__("mov %[value], %%dr7" : : [value
] "r" (value
) : "memory");
1349 __INTRIN_INLINE
void __invlpg(void * const Address
)
1351 __asm__("invlpg %[Address]" : : [Address
] "m" (*((unsigned char *)(Address
))));
1355 /*** System operations ***/
1356 __INTRIN_INLINE
unsigned long long __readmsr(const int reg
)
1359 unsigned long low
, high
;
1360 __asm__
__volatile__("rdmsr" : "=a" (low
), "=d" (high
) : "c" (reg
));
1361 return ((unsigned long long)high
<< 32) | low
;
1363 unsigned long long retval
;
1364 __asm__
__volatile__("rdmsr" : "=A" (retval
) : "c" (reg
));
1369 __INTRIN_INLINE
void __writemsr(const unsigned long Register
, const unsigned long long Value
)
1372 __asm__
__volatile__("wrmsr" : : "a" (Value
), "d" (Value
>> 32), "c" (Register
));
1374 __asm__
__volatile__("wrmsr" : : "A" (Value
), "c" (Register
));
1378 __INTRIN_INLINE
unsigned long long __readpmc(const int counter
)
1380 unsigned long long retval
;
1381 __asm__
__volatile__("rdpmc" : "=A" (retval
) : "c" (counter
));
1385 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1386 __INTRIN_INLINE
unsigned long __segmentlimit(const unsigned long a
)
1388 unsigned long retval
;
1389 __asm__
__volatile__("lsl %[a], %[retval]" : [retval
] "=r" (retval
) : [a
] "rm" (a
));
1393 __INTRIN_INLINE
void __wbinvd(void)
1395 __asm__
__volatile__("wbinvd");
1398 __INTRIN_INLINE
void __lidt(void *Source
)
1400 __asm__
__volatile__("lidt %0" : : "m"(*(short*)Source
));
1403 __INTRIN_INLINE
void __sidt(void *Destination
)
1405 __asm__
__volatile__("sidt %0" : : "m"(*(short*)Destination
) : "memory");
1408 __INTRIN_INLINE
void _mm_pause(void)
1410 __asm__
__volatile__("pause");
1417 #endif /* KJK_INTRIN_X86_H_ */