2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
68 /*** Stack frame juggling ***/
69 #define _ReturnAddress() (__builtin_return_address(0))
70 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
71 /* TODO: __getcallerseflags but how??? */
74 /*** Atomic operations ***/
76 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
77 #define _ReadWriteBarrier() __sync_synchronize()
79 static void __inline__
__attribute__((always_inline
)) _MemoryBarrier(void)
81 __asm__
__volatile__("" : : : "memory");
83 #define _ReadWriteBarrier() _MemoryBarrier()
86 /* BUGBUG: GCC only supports full barriers */
87 #define _ReadBarrier _ReadWriteBarrier
88 #define _WriteBarrier _ReadWriteBarrier
90 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
92 static __inline__
__attribute__((always_inline
)) char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
)
94 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
97 static __inline__
__attribute__((always_inline
)) short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
)
99 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
102 static __inline__
__attribute__((always_inline
)) long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
)
104 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
107 static __inline__
__attribute__((always_inline
)) void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
)
109 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
112 static __inline__
__attribute__((always_inline
)) long _InterlockedExchange(volatile long * const Target
, const long Value
)
114 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
115 __sync_synchronize();
116 return __sync_lock_test_and_set(Target
, Value
);
119 #if defined(_M_AMD64)
120 static __inline__
__attribute__((always_inline
)) long long _InterlockedExchange64(volatile long long * const Target
, const long long Value
)
122 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
123 __sync_synchronize();
124 return __sync_lock_test_and_set(Target
, Value
);
128 static __inline__
__attribute__((always_inline
)) void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
)
131 __sync_synchronize();
132 return __sync_lock_test_and_set(Target
, Value
);
135 static __inline__
__attribute__((always_inline
)) long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
)
137 return __sync_fetch_and_add(Addend
, Value
);
140 static __inline__
__attribute__((always_inline
)) long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
)
142 return __sync_fetch_and_add(Addend
, Value
);
145 #if defined(_M_AMD64)
146 static __inline__
__attribute__((always_inline
)) long long _InterlockedExchangeAdd64(volatile long long * const Addend
, const long long Value
)
148 return __sync_fetch_and_add(Addend
, Value
);
152 static __inline__
__attribute__((always_inline
)) char _InterlockedAnd8(volatile char * const value
, const char mask
)
154 return __sync_fetch_and_and(value
, mask
);
157 static __inline__
__attribute__((always_inline
)) short _InterlockedAnd16(volatile short * const value
, const short mask
)
159 return __sync_fetch_and_and(value
, mask
);
162 static __inline__
__attribute__((always_inline
)) long _InterlockedAnd(volatile long * const value
, const long mask
)
164 return __sync_fetch_and_and(value
, mask
);
167 #if defined(_M_AMD64)
168 static __inline__
__attribute__((always_inline
)) long _InterlockedAnd64(volatile long long * const value
, const long long mask
)
170 return __sync_fetch_and_and(value
, mask
);
174 static __inline__
__attribute__((always_inline
)) char _InterlockedOr8(volatile char * const value
, const char mask
)
176 return __sync_fetch_and_or(value
, mask
);
179 static __inline__
__attribute__((always_inline
)) short _InterlockedOr16(volatile short * const value
, const short mask
)
181 return __sync_fetch_and_or(value
, mask
);
184 static __inline__
__attribute__((always_inline
)) long _InterlockedOr(volatile long * const value
, const long mask
)
186 return __sync_fetch_and_or(value
, mask
);
189 #if defined(_M_AMD64)
190 static __inline__
__attribute__((always_inline
)) long _InterlockedOr64(volatile long long * const value
, const long long mask
)
192 return __sync_fetch_and_or(value
, mask
);
196 static __inline__
__attribute__((always_inline
)) char _InterlockedXor8(volatile char * const value
, const char mask
)
198 return __sync_fetch_and_xor(value
, mask
);
201 static __inline__
__attribute__((always_inline
)) short _InterlockedXor16(volatile short * const value
, const short mask
)
203 return __sync_fetch_and_xor(value
, mask
);
206 static __inline__
__attribute__((always_inline
)) long _InterlockedXor(volatile long * const value
, const long mask
)
208 return __sync_fetch_and_xor(value
, mask
);
213 static __inline__
__attribute__((always_inline
)) char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
)
215 char retval
= Comperand
;
216 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
) : "memory");
220 static __inline__
__attribute__((always_inline
)) short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
)
222 short retval
= Comperand
;
223 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
): "memory");
227 static __inline__
__attribute__((always_inline
)) long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
)
229 long retval
= Comperand
;
230 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
): "memory");
234 static __inline__
__attribute__((always_inline
)) void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
)
236 void * retval
= (void *)Comperand
;
237 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval
] "=a" (retval
) : "[retval]" (retval
), [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
) : "memory");
241 static __inline__
__attribute__((always_inline
)) long _InterlockedExchange(volatile long * const Target
, const long Value
)
244 __asm__("xchgl %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
248 static __inline__
__attribute__((always_inline
)) void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
)
250 void * retval
= Value
;
251 __asm__("xchgl %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
255 static __inline__
__attribute__((always_inline
)) long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
)
258 __asm__("lock; xaddw %[retval], %[Addend]" : [retval
] "+r" (retval
) : [Addend
] "m" (*Addend
) : "memory");
262 static __inline__
__attribute__((always_inline
)) long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
)
265 __asm__("lock; xaddl %[retval], %[Addend]" : [retval
] "+r" (retval
) : [Addend
] "m" (*Addend
) : "memory");
269 static __inline__
__attribute__((always_inline
)) char _InterlockedAnd8(volatile char * const value
, const char mask
)
279 y
= _InterlockedCompareExchange8(value
, x
& mask
, x
);
286 static __inline__
__attribute__((always_inline
)) short _InterlockedAnd16(volatile short * const value
, const short mask
)
296 y
= _InterlockedCompareExchange16(value
, x
& mask
, x
);
303 static __inline__
__attribute__((always_inline
)) long _InterlockedAnd(volatile long * const value
, const long mask
)
313 y
= _InterlockedCompareExchange(value
, x
& mask
, x
);
320 static __inline__
__attribute__((always_inline
)) char _InterlockedOr8(volatile char * const value
, const char mask
)
330 y
= _InterlockedCompareExchange8(value
, x
| mask
, x
);
337 static __inline__
__attribute__((always_inline
)) short _InterlockedOr16(volatile short * const value
, const short mask
)
347 y
= _InterlockedCompareExchange16(value
, x
| mask
, x
);
354 static __inline__
__attribute__((always_inline
)) long _InterlockedOr(volatile long * const value
, const long mask
)
364 y
= _InterlockedCompareExchange(value
, x
| mask
, x
);
371 static __inline__
__attribute__((always_inline
)) char _InterlockedXor8(volatile char * const value
, const char mask
)
381 y
= _InterlockedCompareExchange8(value
, x
^ mask
, x
);
388 static __inline__
__attribute__((always_inline
)) short _InterlockedXor16(volatile short * const value
, const short mask
)
398 y
= _InterlockedCompareExchange16(value
, x
^ mask
, x
);
405 static __inline__
__attribute__((always_inline
)) long _InterlockedXor(volatile long * const value
, const long mask
)
415 y
= _InterlockedCompareExchange(value
, x
^ mask
, x
);
424 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
426 static __inline__
__attribute__((always_inline
)) long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
)
428 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
433 static __inline__
__attribute__((always_inline
)) long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
)
435 long long retval
= Comperand
;
439 "lock; cmpxchg8b %[Destination]" :
440 [retval
] "+A" (retval
) :
441 [Destination
] "m" (*Destination
),
442 "b" ((unsigned long)((Exchange
>> 0) & 0xFFFFFFFF)),
443 "c" ((unsigned long)((Exchange
>> 32) & 0xFFFFFFFF)) :
452 static __inline__
__attribute__((always_inline
)) long _InterlockedAddLargeStatistic(volatile long long * const Addend
, const long Value
)
456 "lock; add %[Value], %[Lo32];"
458 "lock; adc $0, %[Hi32];"
460 [Lo32
] "=m" (*((volatile long *)(Addend
) + 0)), [Hi32
] "=m" (*((volatile long *)(Addend
) + 1)) :
467 static __inline__
__attribute__((always_inline
)) long _InterlockedDecrement(volatile long * const lpAddend
)
469 return _InterlockedExchangeAdd(lpAddend
, -1) - 1;
472 static __inline__
__attribute__((always_inline
)) long _InterlockedIncrement(volatile long * const lpAddend
)
474 return _InterlockedExchangeAdd(lpAddend
, 1) + 1;
477 static __inline__
__attribute__((always_inline
)) long _InterlockedDecrement16(volatile short * const lpAddend
)
479 return _InterlockedExchangeAdd16(lpAddend
, -1) - 1;
482 static __inline__
__attribute__((always_inline
)) long _InterlockedIncrement16(volatile short * const lpAddend
)
484 return _InterlockedExchangeAdd16(lpAddend
, 1) + 1;
487 #if defined(_M_AMD64)
488 static __inline__
__attribute__((always_inline
)) long long _InterlockedDecrement64(volatile long long * const lpAddend
)
490 return _InterlockedExchangeAdd64(lpAddend
, -1) - 1;
493 static __inline__
__attribute__((always_inline
)) long long _InterlockedIncrement64(volatile long long * const lpAddend
)
495 return _InterlockedExchangeAdd64(lpAddend
, 1) + 1;
499 static __inline__
__attribute__((always_inline
)) unsigned char _interlockedbittestandreset(volatile long * a
, const long b
)
501 unsigned char retval
;
502 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval
] "=r" (retval
), [a
] "=m" (*a
) : [b
] "Ir" (b
) : "memory");
506 #if defined(_M_AMD64)
507 static __inline__
__attribute__((always_inline
)) unsigned char _interlockedbittestandreset64(volatile long long * a
, const long long b
)
509 unsigned char retval
;
510 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval
] "=r" (retval
), [a
] "=m" (*a
) : [b
] "Ir" (b
) : "memory");
515 static __inline__
__attribute__((always_inline
)) unsigned char _interlockedbittestandset(volatile long * a
, const long b
)
517 unsigned char retval
;
518 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval
] "=r" (retval
), [a
] "=m" (*a
) : [b
] "Ir" (b
) : "memory");
522 #if defined(_M_AMD64)
523 static __inline__
__attribute__((always_inline
)) unsigned char _interlockedbittestandset64(volatile long long * a
, const long long b
)
525 unsigned char retval
;
526 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval
] "=r" (retval
), [a
] "=m" (*a
) : [b
] "Ir" (b
) : "memory");
531 /*** String operations ***/
532 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
533 static __inline__
__attribute__((always_inline
)) void __stosb(unsigned char * Dest
, const unsigned char Data
, size_t Count
)
538 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
539 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
543 static __inline__
__attribute__((always_inline
)) void __stosw(unsigned short * Dest
, const unsigned short Data
, size_t Count
)
548 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
549 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
553 static __inline__
__attribute__((always_inline
)) void __stosd(unsigned long * Dest
, const unsigned long Data
, size_t Count
)
558 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
559 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
563 static __inline__
__attribute__((always_inline
)) void __movsb(unsigned char * Destination
, const unsigned char * Source
, size_t Count
)
568 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
569 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
573 static __inline__
__attribute__((always_inline
)) void __movsw(unsigned short * Destination
, const unsigned short * Source
, size_t Count
)
578 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
579 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
583 static __inline__
__attribute__((always_inline
)) void __movsd(unsigned long * Destination
, const unsigned long * Source
, size_t Count
)
588 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
589 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
593 #if defined(_M_AMD64)
594 /*** GS segment addressing ***/
596 static __inline__
__attribute__((always_inline
)) void __writegsbyte(const unsigned long Offset
, const unsigned char Data
)
598 __asm__
__volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
601 static __inline__
__attribute__((always_inline
)) void __writegsword(const unsigned long Offset
, const unsigned short Data
)
603 __asm__
__volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
606 static __inline__
__attribute__((always_inline
)) void __writegsdword(const unsigned long Offset
, const unsigned long Data
)
608 __asm__
__volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
611 static __inline__
__attribute__((always_inline
)) void __writegsqword(const unsigned long Offset
, const unsigned __int64 Data
)
613 __asm__
__volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
616 static __inline__
__attribute__((always_inline
)) unsigned char __readgsbyte(const unsigned long Offset
)
619 __asm__
__volatile__("movb %%gs:%a[Offset], %b[value]" : [value
] "=q" (value
) : [Offset
] "irm" (Offset
));
623 static __inline__
__attribute__((always_inline
)) unsigned short __readgsword(const unsigned long Offset
)
625 unsigned short value
;
626 __asm__
__volatile__("movw %%gs:%a[Offset], %w[value]" : [value
] "=q" (value
) : [Offset
] "irm" (Offset
));
630 static __inline__
__attribute__((always_inline
)) unsigned long __readgsdword(const unsigned long Offset
)
633 __asm__
__volatile__("movl %%gs:%a[Offset], %k[value]" : [value
] "=q" (value
) : [Offset
] "irm" (Offset
));
637 static __inline__
__attribute__((always_inline
)) unsigned __int64
__readgsqword(const unsigned long Offset
)
639 unsigned __int64 value
;
640 __asm__
__volatile__("movq %%gs:%a[Offset], %q[value]" : [value
] "=q" (value
) : [Offset
] "irm" (Offset
));
644 static __inline__
__attribute__((always_inline
)) void __incgsbyte(const unsigned long Offset
)
646 __asm__
__volatile__("incb %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
649 static __inline__
__attribute__((always_inline
)) void __incgsword(const unsigned long Offset
)
651 __asm__
__volatile__("incw %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
654 static __inline__
__attribute__((always_inline
)) void __incgsdword(const unsigned long Offset
)
656 __asm__
__volatile__("incl %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
659 static __inline__
__attribute__((always_inline
)) void __addgsbyte(const unsigned long Offset
, const unsigned char Data
)
661 __asm__
__volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
664 static __inline__
__attribute__((always_inline
)) void __addgsword(const unsigned long Offset
, const unsigned short Data
)
666 __asm__
__volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
669 static __inline__
__attribute__((always_inline
)) void __addgsdword(const unsigned long Offset
, const unsigned int Data
)
671 __asm__
__volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
674 static __inline__
__attribute__((always_inline
)) void __addgsqword(const unsigned long Offset
, const unsigned __int64 Data
)
676 __asm__
__volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
680 /*** FS segment addressing ***/
681 static __inline__
__attribute__((always_inline
)) void __writefsbyte(const unsigned long Offset
, const unsigned char Data
)
683 __asm__
__volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
686 static __inline__
__attribute__((always_inline
)) void __writefsword(const unsigned long Offset
, const unsigned short Data
)
688 __asm__
__volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
691 static __inline__
__attribute__((always_inline
)) void __writefsdword(const unsigned long Offset
, const unsigned long Data
)
693 __asm__
__volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
696 static __inline__
__attribute__((always_inline
)) unsigned char __readfsbyte(const unsigned long Offset
)
699 __asm__
__volatile__("movb %%fs:%a[Offset], %b[value]" : [value
] "=q" (value
) : [Offset
] "irm" (Offset
) : "memory");
703 static __inline__
__attribute__((always_inline
)) unsigned short __readfsword(const unsigned long Offset
)
705 unsigned short value
;
706 __asm__
__volatile__("movw %%fs:%a[Offset], %w[value]" : [value
] "=q" (value
) : [Offset
] "irm" (Offset
));
710 static __inline__
__attribute__((always_inline
)) unsigned long __readfsdword(const unsigned long Offset
)
713 __asm__
__volatile__("movl %%fs:%a[Offset], %k[value]" : [value
] "=q" (value
) : [Offset
] "irm" (Offset
));
717 static __inline__
__attribute__((always_inline
)) void __incfsbyte(const unsigned long Offset
)
719 __asm__
__volatile__("incb %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
722 static __inline__
__attribute__((always_inline
)) void __incfsword(const unsigned long Offset
)
724 __asm__
__volatile__("incw %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
727 static __inline__
__attribute__((always_inline
)) void __incfsdword(const unsigned long Offset
)
729 __asm__
__volatile__("incl %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
732 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
733 static __inline__
__attribute__((always_inline
)) void __addfsbyte(const unsigned long Offset
, const unsigned char Data
)
735 if(!__builtin_constant_p(Offset
))
736 __asm__
__volatile__("addb %k[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
738 __asm__
__volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
741 static __inline__
__attribute__((always_inline
)) void __addfsword(const unsigned long Offset
, const unsigned short Data
)
743 if(!__builtin_constant_p(Offset
))
744 __asm__
__volatile__("addw %k[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
746 __asm__
__volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
749 static __inline__
__attribute__((always_inline
)) void __addfsdword(const unsigned long Offset
, const unsigned int Data
)
751 if(!__builtin_constant_p(Offset
))
752 __asm__
__volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
754 __asm__
__volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
759 /*** Bit manipulation ***/
760 static __inline__
__attribute__((always_inline
)) unsigned char _BitScanForward(unsigned long * const Index
, const unsigned long Mask
)
762 __asm__("bsfl %[Mask], %[Index]" : [Index
] "=r" (*Index
) : [Mask
] "mr" (Mask
));
766 static __inline__
__attribute__((always_inline
)) unsigned char _BitScanReverse(unsigned long * const Index
, const unsigned long Mask
)
768 __asm__("bsrl %[Mask], %[Index]" : [Index
] "=r" (*Index
) : [Mask
] "mr" (Mask
));
772 /* NOTE: again, the bizarre implementation follows Visual C++ */
773 static __inline__
__attribute__((always_inline
)) unsigned char _bittest(const long * const a
, const long b
)
775 unsigned char retval
;
777 if(__builtin_constant_p(b
))
778 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*(a
+ (b
/ 32))), [b
] "Ir" (b
% 32));
780 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*a
), [b
] "r" (b
));
785 static __inline__
__attribute__((always_inline
)) unsigned char _bittestandcomplement(long * const a
, const long b
)
787 unsigned char retval
;
789 if(__builtin_constant_p(b
))
790 __asm__("btc %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*(a
+ (b
/ 32))), [b
] "Ir" (b
% 32));
792 __asm__("btc %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*a
), [b
] "r" (b
));
797 static __inline__
__attribute__((always_inline
)) unsigned char _bittestandreset(long * const a
, const long b
)
799 unsigned char retval
;
801 if(__builtin_constant_p(b
))
802 __asm__("btr %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*(a
+ (b
/ 32))), [b
] "Ir" (b
% 32));
804 __asm__("btr %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*a
), [b
] "r" (b
));
809 static __inline__
__attribute__((always_inline
)) unsigned char _bittestandset(long * const a
, const long b
)
811 unsigned char retval
;
813 if(__builtin_constant_p(b
))
814 __asm__("bts %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*(a
+ (b
/ 32))), [b
] "Ir" (b
% 32));
816 __asm__("bts %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*a
), [b
] "r" (b
));
821 static __inline__
__attribute__((always_inline
)) unsigned char _rotl8(const unsigned char value
, const unsigned char shift
)
823 unsigned char retval
;
824 __asm__("rolb %b[shift], %b[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
828 static __inline__
__attribute__((always_inline
)) unsigned short _rotl16(const unsigned short value
, const unsigned char shift
)
830 unsigned short retval
;
831 __asm__("rolw %b[shift], %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
836 static __inline__
__attribute__((always_inline
)) unsigned int _rotl(const unsigned int value
, const int shift
)
838 unsigned long retval
;
839 __asm__("roll %b[shift], %k[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
843 static __inline__
__attribute__((always_inline
)) unsigned long _rotr(const unsigned int value
, const unsigned char shift
)
845 unsigned long retval
;
846 __asm__("rorl %b[shift], %k[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
851 static __inline__
__attribute__((always_inline
)) unsigned char _rotr8(const unsigned char value
, const unsigned char shift
)
853 unsigned char retval
;
854 __asm__("rorb %b[shift], %b[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
858 static __inline__
__attribute__((always_inline
)) unsigned short _rotr16(const unsigned short value
, const unsigned char shift
)
860 unsigned short retval
;
861 __asm__("rorw %b[shift], %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
866 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
867 constraint (edx:eax) for the Mask argument, because it's the only way GCC
868 can pass 64-bit operands around - passing the two 32 bit parts separately
869 just confuses it. Also we declare Bit as an int and then truncate it to
870 match Visual C++ behavior
872 static __inline__
__attribute__((always_inline
)) unsigned long long __ll_lshift(const unsigned long long Mask
, const int Bit
)
874 unsigned long long retval
= Mask
;
878 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
880 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
886 static __inline__
__attribute__((always_inline
)) long long __ll_rshift(const long long Mask
, const int Bit
)
888 unsigned long long retval
= Mask
;
892 "shldl %b[Bit], %%eax, %%edx; sarl %b[Bit], %%eax" :
894 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
900 static __inline__
__attribute__((always_inline
)) unsigned long long __ull_rshift(const unsigned long long Mask
, int Bit
)
902 unsigned long long retval
= Mask
;
906 "shrdl %b[Bit], %%eax, %%edx; shrl %b[Bit], %%eax" :
908 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
914 static __inline__
__attribute__((always_inline
)) unsigned short _byteswap_ushort(unsigned short value
)
916 unsigned short retval
;
917 __asm__("rorw $8, %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
));
921 static __inline__
__attribute__((always_inline
)) unsigned long _byteswap_ulong(unsigned long value
)
923 unsigned long retval
;
924 __asm__("bswapl %[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
));
929 static __inline__
__attribute__((always_inline
)) unsigned __int64
_byteswap_uint64(unsigned __int64 value
)
931 unsigned __int64 retval
;
932 __asm__("bswapq %[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
));
936 static __inline__
__attribute__((always_inline
)) unsigned __int64
_byteswap_uint64(unsigned __int64 value
)
941 unsigned long lowpart
;
942 unsigned long hipart
;
945 retval
.int64part
= value
;
946 __asm__("bswapl %[lowpart]\n"
948 : [lowpart
] "=rm" (retval
.hipart
), [hipart
] "=rm" (retval
.lowpart
) : "[lowpart]" (retval
.lowpart
), "[hipart]" (retval
.hipart
) );
949 return retval
.int64part
;
953 /*** 64-bit math ***/
954 static __inline__
__attribute__((always_inline
)) long long __emul(const int a
, const int b
)
957 __asm__("imull %[b]" : "=A" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
961 static __inline__
__attribute__((always_inline
)) unsigned long long __emulu(const unsigned int a
, const unsigned int b
)
963 unsigned long long retval
;
964 __asm__("mull %[b]" : "=A" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
970 static __inline__
__attribute__((always_inline
)) unsigned char __inbyte(const unsigned short Port
)
973 __asm__
__volatile__("inb %w[Port], %b[byte]" : [byte
] "=a" (byte
) : [Port
] "Nd" (Port
));
977 static __inline__
__attribute__((always_inline
)) unsigned short __inword(const unsigned short Port
)
980 __asm__
__volatile__("inw %w[Port], %w[word]" : [word
] "=a" (word
) : [Port
] "Nd" (Port
));
984 static __inline__
__attribute__((always_inline
)) unsigned long __indword(const unsigned short Port
)
987 __asm__
__volatile__("inl %w[Port], %k[dword]" : [dword
] "=a" (dword
) : [Port
] "Nd" (Port
));
991 static __inline__
__attribute__((always_inline
)) void __inbytestring(unsigned short Port
, unsigned char * Buffer
, unsigned long Count
)
996 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
997 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1002 static __inline__
__attribute__((always_inline
)) void __inwordstring(unsigned short Port
, unsigned short * Buffer
, unsigned long Count
)
1004 __asm__ __volatile__
1007 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1008 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1013 static __inline__
__attribute__((always_inline
)) void __indwordstring(unsigned short Port
, unsigned long * Buffer
, unsigned long Count
)
1015 __asm__ __volatile__
1018 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1019 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1024 static __inline__
__attribute__((always_inline
)) void __outbyte(unsigned short const Port
, const unsigned char Data
)
1026 __asm__
__volatile__("outb %b[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1029 static __inline__
__attribute__((always_inline
)) void __outword(unsigned short const Port
, const unsigned short Data
)
1031 __asm__
__volatile__("outw %w[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1034 static __inline__
__attribute__((always_inline
)) void __outdword(unsigned short const Port
, const unsigned long Data
)
1036 __asm__
__volatile__("outl %k[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1039 static __inline__
__attribute__((always_inline
)) void __outbytestring(unsigned short const Port
, const unsigned char * const Buffer
, const unsigned long Count
)
1041 __asm__
__volatile__("rep; outsb" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1044 static __inline__
__attribute__((always_inline
)) void __outwordstring(unsigned short const Port
, const unsigned short * const Buffer
, const unsigned long Count
)
1046 __asm__
__volatile__("rep; outsw" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1049 static __inline__
__attribute__((always_inline
)) void __outdwordstring(unsigned short const Port
, const unsigned long * const Buffer
, const unsigned long Count
)
1051 __asm__
__volatile__("rep; outsl" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1055 /*** System information ***/
1056 static __inline__
__attribute__((always_inline
)) void __cpuid(int CPUInfo
[], const int InfoType
)
1058 __asm__
__volatile__("cpuid" : "=a" (CPUInfo
[0]), "=b" (CPUInfo
[1]), "=c" (CPUInfo
[2]), "=d" (CPUInfo
[3]) : "a" (InfoType
));
1061 static __inline__
__attribute__((always_inline
)) unsigned long long __rdtsc(void)
1064 unsigned long long low
, high
;
1065 __asm__
__volatile__("rdtsc" : "=a"(low
), "=d"(high
));
1066 return low
| (high
<< 32);
1068 unsigned long long retval
;
1069 __asm__
__volatile__("rdtsc" : "=A"(retval
));
1074 static __inline__
__attribute__((always_inline
)) void __writeeflags(uintptr_t Value
)
1076 __asm__
__volatile__("push %0\n popf" : : "rim"(Value
));
1079 static __inline__
__attribute__((always_inline
)) uintptr_t __readeflags(void)
1082 __asm__
__volatile__("pushf\n pop %0" : "=rm"(retval
));
1086 /*** Interrupts ***/
1087 static __inline__
__attribute__((always_inline
)) void __debugbreak(void)
1092 static __inline__
__attribute__((always_inline
)) void __int2c(void)
1094 __asm__("int $0x2c");
1097 static __inline__
__attribute__((always_inline
)) void _disable(void)
1102 static __inline__
__attribute__((always_inline
)) void _enable(void)
1108 /*** Protected memory management ***/
1110 static __inline__
__attribute__((always_inline
)) void __writecr0(const unsigned __int64 Data
)
1112 __asm__("mov %[Data], %%cr0" : : [Data
] "q" (Data
) : "memory");
1115 static __inline__
__attribute__((always_inline
)) void __writecr3(const unsigned __int64 Data
)
1117 __asm__("mov %[Data], %%cr3" : : [Data
] "q" (Data
) : "memory");
1120 static __inline__
__attribute__((always_inline
)) void __writecr4(const unsigned __int64 Data
)
1122 __asm__("mov %[Data], %%cr4" : : [Data
] "q" (Data
) : "memory");
1126 static __inline__
__attribute__((always_inline
)) void __writecr8(const unsigned __int64 Data
)
1128 __asm__("mov %[Data], %%cr8" : : [Data
] "q" (Data
) : "memory");
1132 static __inline__
__attribute__((always_inline
)) unsigned __int64
__readcr0(void)
1134 unsigned __int64 value
;
1135 __asm__
__volatile__("mov %%cr0, %[value]" : [value
] "=q" (value
));
1139 static __inline__
__attribute__((always_inline
)) unsigned __int64
__readcr2(void)
1141 unsigned __int64 value
;
1142 __asm__
__volatile__("mov %%cr2, %[value]" : [value
] "=q" (value
));
1146 static __inline__
__attribute__((always_inline
)) unsigned __int64
__readcr3(void)
1148 unsigned __int64 value
;
1149 __asm__
__volatile__("mov %%cr3, %[value]" : [value
] "=q" (value
));
1153 static __inline__
__attribute__((always_inline
)) unsigned __int64
__readcr4(void)
1155 unsigned __int64 value
;
1156 __asm__
__volatile__("mov %%cr4, %[value]" : [value
] "=q" (value
));
1161 static __inline__
__attribute__((always_inline
)) unsigned __int64
__readcr8(void)
1163 unsigned __int64 value
;
1164 __asm__
__volatile__("movq %%cr8, %q[value]" : [value
] "=q" (value
));
1170 static __inline__
__attribute__((always_inline
)) unsigned __int64
__readdr(unsigned int reg
)
1172 unsigned __int64 value
;
1176 __asm__
__volatile__("movq %%dr0, %q[value]" : [value
] "=q" (value
));
1179 __asm__
__volatile__("movq %%dr1, %q[value]" : [value
] "=q" (value
));
1182 __asm__
__volatile__("movq %%dr2, %q[value]" : [value
] "=q" (value
));
1185 __asm__
__volatile__("movq %%dr3, %q[value]" : [value
] "=q" (value
));
1188 __asm__
__volatile__("movq %%dr4, %q[value]" : [value
] "=q" (value
));
1191 __asm__
__volatile__("movq %%dr5, %q[value]" : [value
] "=q" (value
));
1194 __asm__
__volatile__("movq %%dr6, %q[value]" : [value
] "=q" (value
));
1197 __asm__
__volatile__("movq %%dr7, %q[value]" : [value
] "=q" (value
));
1203 static __inline__
__attribute__((always_inline
)) void __writedr(unsigned reg
, unsigned __int64 value
)
1208 __asm__("movq %q[value], %%dr0" : : [value
] "q" (value
) : "memory");
1211 __asm__("movq %q[value], %%dr1" : : [value
] "q" (value
) : "memory");
1214 __asm__("movq %q[value], %%dr2" : : [value
] "q" (value
) : "memory");
1217 __asm__("movq %q[value], %%dr3" : : [value
] "q" (value
) : "memory");
1220 __asm__("movq %q[value], %%dr4" : : [value
] "q" (value
) : "memory");
1223 __asm__("movq %q[value], %%dr5" : : [value
] "q" (value
) : "memory");
1226 __asm__("movq %q[value], %%dr6" : : [value
] "q" (value
) : "memory");
1229 __asm__("movq %q[value], %%dr7" : : [value
] "q" (value
) : "memory");
1235 static __inline__
__attribute__((always_inline
)) void __invlpg(void * const Address
)
1237 __asm__("invlpg %[Address]" : : [Address
] "m" (*((unsigned char *)(Address
))));
1241 /*** System operations ***/
1242 static __inline__
__attribute__((always_inline
)) unsigned long long __readmsr(const int reg
)
1245 unsigned long low
, high
;
1246 __asm__
__volatile__("rdmsr" : "=a" (low
), "=d" (high
) : "c" (reg
));
1247 return (high
<< 32) | low
;
1249 unsigned long long retval
;
1250 __asm__
__volatile__("rdmsr" : "=A" (retval
) : "c" (reg
));
1255 static __inline__
__attribute__((always_inline
)) void __writemsr(const unsigned long Register
, const unsigned long long Value
)
1258 __asm__
__volatile__("wrmsr" : : "a" (Value
), "d" (Value
>> 32), "c" (Register
));
1260 __asm__
__volatile__("wrmsr" : : "A" (Value
), "c" (Register
));
1264 static __inline__
__attribute__((always_inline
)) unsigned long long __readpmc(const int counter
)
1266 unsigned long long retval
;
1267 __asm__
__volatile__("rdpmc" : "=A" (retval
) : "c" (counter
));
1271 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1272 static __inline__
__attribute__((always_inline
)) unsigned long __segmentlimit(const unsigned long a
)
1274 unsigned long retval
;
1275 __asm__
__volatile__("lsl %[a], %[retval]" : [retval
] "=r" (retval
) : [a
] "rm" (a
));
1279 static __inline__
__attribute__((always_inline
)) void __wbinvd(void)
1281 __asm__
__volatile__("wbinvd");
1284 static __inline__
__attribute__((always_inline
)) void __lidt(void *Source
)
1286 __asm__
__volatile__("lidt %0" : : "m"(*(short*)Source
));
1289 static __inline__
__attribute__((always_inline
)) void __sidt(void *Destination
)
1291 __asm__
__volatile__("sidt %0" : : "m"(*(short*)Destination
) : "memory");
1294 #endif /* KJK_INTRIN_X86_H_ */