cee204458d37ae7dd457319c4b6aef664eb233b2
2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
72 /*** memcopy must be memmove ***/
73 void* memmove(void*, const void*, size_t);
74 __INTRIN_INLINE
void* memcpy(void* dest
, const void* source
, size_t num
)
76 return memmove(dest
, source
, num
);
80 /*** Stack frame juggling ***/
81 #define _ReturnAddress() (__builtin_return_address(0))
82 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
83 /* TODO: __getcallerseflags but how??? */
85 /* Maybe the same for x86? */
87 #define _alloca(s) __builtin_alloca(s)
90 /*** Memory barriers ***/
92 __INTRIN_INLINE
void _ReadWriteBarrier(void);
93 __INTRIN_INLINE
void _mm_mfence(void);
94 __INTRIN_INLINE
void _mm_lfence(void);
95 __INTRIN_INLINE
void _mm_sfence(void);
97 __INTRIN_INLINE
void __faststorefence(void);
100 __INTRIN_INLINE
void _ReadWriteBarrier(void)
102 __asm__
__volatile__("" : : : "memory");
105 /* GCC only supports full barriers */
106 #define _ReadBarrier _ReadWriteBarrier
107 #define _WriteBarrier _ReadWriteBarrier
109 __INTRIN_INLINE
void _mm_mfence(void)
111 __asm__
__volatile__("mfence" : : : "memory");
114 __INTRIN_INLINE
void _mm_lfence(void)
117 __asm__
__volatile__("lfence");
121 __INTRIN_INLINE
void _mm_sfence(void)
124 __asm__
__volatile__("sfence");
129 __INTRIN_INLINE
void __faststorefence(void)
132 __asm__
__volatile__("lock; orl $0, %0;" : : "m"(local
));
137 /*** Atomic operations ***/
139 __INTRIN_INLINE
long _InterlockedAddLargeStatistic(volatile long long * const Addend
, const long Value
);
140 __INTRIN_INLINE
unsigned char _interlockedbittestandreset(volatile long * a
, const long b
);
141 __INTRIN_INLINE
unsigned char _interlockedbittestandset(volatile long * a
, const long b
);
142 #if defined(_M_AMD64)
143 __INTRIN_INLINE
unsigned char _interlockedbittestandreset64(volatile long long * a
, const long long b
);
144 __INTRIN_INLINE
unsigned char _interlockedbittestandset64(volatile long long * a
, const long long b
);
147 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
149 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
);
150 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
);
151 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
);
152 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
);
153 __INTRIN_INLINE
char _InterlockedExchange8(volatile char * const Target
, const char Value
);
154 __INTRIN_INLINE
short _InterlockedExchange16(volatile short * const Target
, const short Value
);
155 __INTRIN_INLINE
long _InterlockedExchange(volatile long * const Target
, const long Value
);
156 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
);
157 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
);
158 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
);
159 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * const value
, const char mask
);
160 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * const value
, const short mask
);
161 __INTRIN_INLINE
long _InterlockedAnd(volatile long * const value
, const long mask
);
162 __INTRIN_INLINE
char _InterlockedOr8(volatile char * const value
, const char mask
);
163 __INTRIN_INLINE
short _InterlockedOr16(volatile short * const value
, const short mask
);
164 __INTRIN_INLINE
long _InterlockedOr(volatile long * const value
, const long mask
);
165 __INTRIN_INLINE
char _InterlockedXor8(volatile char * const value
, const char mask
);
166 __INTRIN_INLINE
short _InterlockedXor16(volatile short * const value
, const short mask
);
167 __INTRIN_INLINE
long _InterlockedXor(volatile long * const value
, const long mask
);
168 __INTRIN_INLINE
long _InterlockedDecrement(volatile long * const lpAddend
);
169 __INTRIN_INLINE
long _InterlockedIncrement(volatile long * const lpAddend
);
170 __INTRIN_INLINE
short _InterlockedDecrement16(volatile short * const lpAddend
);
171 __INTRIN_INLINE
short _InterlockedIncrement16(volatile short * const lpAddend
);
172 #if defined(_M_AMD64)
173 __INTRIN_INLINE
long long _InterlockedExchange64(volatile long long * const Target
, const long long Value
);
174 __INTRIN_INLINE
long long _InterlockedExchangeAdd64(volatile long long * const Addend
, const long long Value
);
175 __INTRIN_INLINE
long long _InterlockedAnd64(volatile long long * const value
, const long long mask
);
176 __INTRIN_INLINE
long long _InterlockedOr64(volatile long long * const value
, const long long mask
);
177 __INTRIN_INLINE
long long _InterlockedXor64(volatile long long * const value
, const long long mask
);
178 __INTRIN_INLINE
long long _InterlockedDecrement64(volatile long long * const lpAddend
);
179 __INTRIN_INLINE
long long _InterlockedIncrement64(volatile long long * const lpAddend
);
182 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
)
184 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
187 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
)
189 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
194 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
)
196 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
199 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
)
201 return (void *)__sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
206 __INTRIN_INLINE
char _InterlockedExchange8(volatile char * const Target
, const char Value
)
208 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
209 __sync_synchronize();
210 return __sync_lock_test_and_set(Target
, Value
);
213 __INTRIN_INLINE
short _InterlockedExchange16(volatile short * const Target
, const short Value
)
215 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
216 __sync_synchronize();
217 return __sync_lock_test_and_set(Target
, Value
);
222 __INTRIN_INLINE
long _InterlockedExchange(volatile long * const Target
, const long Value
)
224 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
225 __sync_synchronize();
226 return __sync_lock_test_and_set(Target
, Value
);
229 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
)
231 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
232 __sync_synchronize();
233 return (void *)__sync_lock_test_and_set(Target
, Value
);
238 #if defined(_M_AMD64)
239 __INTRIN_INLINE
long long _InterlockedExchange64(volatile long long * const Target
, const long long Value
)
241 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
242 __sync_synchronize();
243 return __sync_lock_test_and_set(Target
, Value
);
247 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
)
249 return __sync_fetch_and_add(Addend
, Value
);
253 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
)
255 return __sync_fetch_and_add(Addend
, Value
);
259 #if defined(_M_AMD64)
260 __INTRIN_INLINE
long long _InterlockedExchangeAdd64(volatile long long * const Addend
, const long long Value
)
262 return __sync_fetch_and_add(Addend
, Value
);
266 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * const value
, const char mask
)
268 return __sync_fetch_and_and(value
, mask
);
271 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * const value
, const short mask
)
273 return __sync_fetch_and_and(value
, mask
);
276 __INTRIN_INLINE
long _InterlockedAnd(volatile long * const value
, const long mask
)
278 return __sync_fetch_and_and(value
, mask
);
281 #if defined(_M_AMD64)
282 __INTRIN_INLINE
long long _InterlockedAnd64(volatile long long * const value
, const long long mask
)
284 return __sync_fetch_and_and(value
, mask
);
288 __INTRIN_INLINE
char _InterlockedOr8(volatile char * const value
, const char mask
)
290 return __sync_fetch_and_or(value
, mask
);
293 __INTRIN_INLINE
short _InterlockedOr16(volatile short * const value
, const short mask
)
295 return __sync_fetch_and_or(value
, mask
);
298 __INTRIN_INLINE
long _InterlockedOr(volatile long * const value
, const long mask
)
300 return __sync_fetch_and_or(value
, mask
);
303 #if defined(_M_AMD64)
304 __INTRIN_INLINE
long long _InterlockedOr64(volatile long long * const value
, const long long mask
)
306 return __sync_fetch_and_or(value
, mask
);
310 __INTRIN_INLINE
char _InterlockedXor8(volatile char * const value
, const char mask
)
312 return __sync_fetch_and_xor(value
, mask
);
315 __INTRIN_INLINE
short _InterlockedXor16(volatile short * const value
, const short mask
)
317 return __sync_fetch_and_xor(value
, mask
);
320 __INTRIN_INLINE
long _InterlockedXor(volatile long * const value
, const long mask
)
322 return __sync_fetch_and_xor(value
, mask
);
325 #if defined(_M_AMD64)
326 __INTRIN_INLINE
long long _InterlockedXor64(volatile long long * const value
, const long long mask
)
328 return __sync_fetch_and_xor(value
, mask
);
333 __INTRIN_INLINE
long _InterlockedDecrement(volatile long * const lpAddend
)
335 return __sync_sub_and_fetch(lpAddend
, 1);
338 __INTRIN_INLINE
long _InterlockedIncrement(volatile long * const lpAddend
)
340 return __sync_add_and_fetch(lpAddend
, 1);
344 __INTRIN_INLINE
short _InterlockedDecrement16(volatile short * const lpAddend
)
346 return __sync_sub_and_fetch(lpAddend
, 1);
349 __INTRIN_INLINE
short _InterlockedIncrement16(volatile short * const lpAddend
)
351 return __sync_add_and_fetch(lpAddend
, 1);
354 #if defined(_M_AMD64)
355 __INTRIN_INLINE
long long _InterlockedDecrement64(volatile long long * const lpAddend
)
357 return __sync_sub_and_fetch(lpAddend
, 1);
360 __INTRIN_INLINE
long long _InterlockedIncrement64(volatile long long * const lpAddend
)
362 return __sync_add_and_fetch(lpAddend
, 1);
366 #else /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
368 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
);
369 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
);
370 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
);
371 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
);
372 __INTRIN_INLINE
char _InterlockedExchange8(volatile char * const Target
, const char Value
);
373 __INTRIN_INLINE
short _InterlockedExchange16(volatile short * const Target
, const short Value
);
374 __INTRIN_INLINE
long _InterlockedExchange(volatile long * const Target
, const long Value
);
375 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
);
376 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
);
377 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
);
378 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * const value
, const char mask
);
379 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * const value
, const short mask
);
380 __INTRIN_INLINE
long _InterlockedAnd(volatile long * const value
, const long mask
);
381 __INTRIN_INLINE
char _InterlockedOr8(volatile char * const value
, const char mask
);
382 __INTRIN_INLINE
short _InterlockedOr16(volatile short * const value
, const short mask
);
383 __INTRIN_INLINE
long _InterlockedOr(volatile long * const value
, const long mask
);
384 __INTRIN_INLINE
char _InterlockedXor8(volatile char * const value
, const char mask
);
385 __INTRIN_INLINE
short _InterlockedXor16(volatile short * const value
, const short mask
);
386 __INTRIN_INLINE
long _InterlockedXor(volatile long * const value
, const long mask
);
387 __INTRIN_INLINE
long _InterlockedDecrement(volatile long * const lpAddend
);
388 __INTRIN_INLINE
long _InterlockedIncrement(volatile long * const lpAddend
);
389 __INTRIN_INLINE
short _InterlockedDecrement16(volatile short * const lpAddend
);
390 __INTRIN_INLINE
short _InterlockedIncrement16(volatile short * const lpAddend
);
391 #if defined(_M_AMD64)
392 __INTRIN_INLINE
long long _InterlockedDecrement64(volatile long long * const lpAddend
);
393 __INTRIN_INLINE
long long _InterlockedIncrement64(volatile long long * const lpAddend
);
396 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
)
398 char retval
= Comperand
;
399 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
) : "memory");
403 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
)
405 short retval
= Comperand
;
406 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
): "memory");
410 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
)
412 long retval
= Comperand
;
413 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
): "memory");
417 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
)
419 void * retval
= (void *)Comperand
;
420 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval
] "=a" (retval
) : "[retval]" (retval
), [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
) : "memory");
424 __INTRIN_INLINE
char _InterlockedExchange8(volatile char * const Target
, const char Value
)
427 __asm__("xchgb %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
431 __INTRIN_INLINE
short _InterlockedExchange16(volatile short * const Target
, const short Value
)
433 short retval
= Value
;
434 __asm__("xchgw %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
438 __INTRIN_INLINE
long _InterlockedExchange(volatile long * const Target
, const long Value
)
441 __asm__("xchgl %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
445 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
)
447 void * retval
= Value
;
448 __asm__("xchgl %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
452 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
)
455 __asm__("lock; xaddw %[retval], %[Addend]" : [retval
] "+r" (retval
) : [Addend
] "m" (*Addend
) : "memory");
459 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
)
462 __asm__("lock; xaddl %[retval], %[Addend]" : [retval
] "+r" (retval
) : [Addend
] "m" (*Addend
) : "memory");
466 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * const value
, const char mask
)
476 y
= _InterlockedCompareExchange8(value
, x
& mask
, x
);
483 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * const value
, const short mask
)
493 y
= _InterlockedCompareExchange16(value
, x
& mask
, x
);
500 __INTRIN_INLINE
long _InterlockedAnd(volatile long * const value
, const long mask
)
510 y
= _InterlockedCompareExchange(value
, x
& mask
, x
);
517 __INTRIN_INLINE
char _InterlockedOr8(volatile char * const value
, const char mask
)
527 y
= _InterlockedCompareExchange8(value
, x
| mask
, x
);
534 __INTRIN_INLINE
short _InterlockedOr16(volatile short * const value
, const short mask
)
544 y
= _InterlockedCompareExchange16(value
, x
| mask
, x
);
551 __INTRIN_INLINE
long _InterlockedOr(volatile long * const value
, const long mask
)
561 y
= _InterlockedCompareExchange(value
, x
| mask
, x
);
568 __INTRIN_INLINE
char _InterlockedXor8(volatile char * const value
, const char mask
)
578 y
= _InterlockedCompareExchange8(value
, x
^ mask
, x
);
585 __INTRIN_INLINE
short _InterlockedXor16(volatile short * const value
, const short mask
)
595 y
= _InterlockedCompareExchange16(value
, x
^ mask
, x
);
602 __INTRIN_INLINE
long _InterlockedXor(volatile long * const value
, const long mask
)
612 y
= _InterlockedCompareExchange(value
, x
^ mask
, x
);
619 __INTRIN_INLINE
long _InterlockedDecrement(volatile long * const lpAddend
)
621 return _InterlockedExchangeAdd(lpAddend
, -1) - 1;
624 __INTRIN_INLINE
long _InterlockedIncrement(volatile long * const lpAddend
)
626 return _InterlockedExchangeAdd(lpAddend
, 1) + 1;
629 __INTRIN_INLINE
short _InterlockedDecrement16(volatile short * const lpAddend
)
631 return _InterlockedExchangeAdd16(lpAddend
, -1) - 1;
634 __INTRIN_INLINE
short _InterlockedIncrement16(volatile short * const lpAddend
)
636 return _InterlockedExchangeAdd16(lpAddend
, 1) + 1;
639 #if defined(_M_AMD64)
640 __INTRIN_INLINE
long long _InterlockedDecrement64(volatile long long * const lpAddend
)
642 return _InterlockedExchangeAdd64(lpAddend
, -1) - 1;
645 __INTRIN_INLINE
long long _InterlockedIncrement64(volatile long long * const lpAddend
)
647 return _InterlockedExchangeAdd64(lpAddend
, 1) + 1;
651 #endif /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
653 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
655 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
);
656 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
)
658 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
663 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
);
664 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
)
666 long long retval
= Comperand
;
670 "lock; cmpxchg8b %[Destination]" :
671 [retval
] "+A" (retval
) :
672 [Destination
] "m" (*Destination
),
673 "b" ((unsigned long)((Exchange
>> 0) & 0xFFFFFFFF)),
674 "c" ((unsigned long)((Exchange
>> 32) & 0xFFFFFFFF)) :
683 __INTRIN_INLINE
long _InterlockedAddLargeStatistic(volatile long long * const Addend
, const long Value
)
687 "lock; addl %[Value], %[Lo32];"
689 "lock; adcl $0, %[Hi32];"
691 [Lo32
] "+m" (*((volatile long *)(Addend
) + 0)), [Hi32
] "+m" (*((volatile long *)(Addend
) + 1)) :
692 [Value
] "ir" (Value
) :
699 __INTRIN_INLINE
unsigned char _interlockedbittestandreset(volatile long * a
, const long b
)
701 unsigned char retval
;
702 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
706 #if defined(_M_AMD64)
707 __INTRIN_INLINE
unsigned char _interlockedbittestandreset64(volatile long long * a
, const long long b
)
709 unsigned char retval
;
710 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval
] "=r" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
715 __INTRIN_INLINE
unsigned char _interlockedbittestandset(volatile long * a
, const long b
)
717 unsigned char retval
;
718 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval
] "=q" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
722 #if defined(_M_AMD64)
723 __INTRIN_INLINE
unsigned char _interlockedbittestandset64(volatile long long * a
, const long long b
)
725 unsigned char retval
;
726 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval
] "=r" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
731 /*** String operations ***/
733 __INTRIN_INLINE
void __stosb(unsigned char * Dest
, const unsigned char Data
, size_t Count
);
734 __INTRIN_INLINE
void __stosw(unsigned short * Dest
, const unsigned short Data
, size_t Count
);
735 __INTRIN_INLINE
void __stosd(unsigned long * Dest
, const unsigned long Data
, size_t Count
);
736 __INTRIN_INLINE
void __movsb(unsigned char * Destination
, const unsigned char * Source
, size_t Count
);
737 __INTRIN_INLINE
void __movsw(unsigned short * Destination
, const unsigned short * Source
, size_t Count
);
738 __INTRIN_INLINE
void __movsd(unsigned long * Destination
, const unsigned long * Source
, size_t Count
);
740 __INTRIN_INLINE
void __stosq(unsigned __int64
* Dest
, const unsigned __int64 Data
, size_t Count
);
741 __INTRIN_INLINE
void __movsq(unsigned long * Destination
, const unsigned long * Source
, size_t Count
);
745 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
746 __INTRIN_INLINE
void __stosb(unsigned char * Dest
, const unsigned char Data
, size_t Count
)
751 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
752 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
756 __INTRIN_INLINE
void __stosw(unsigned short * Dest
, const unsigned short Data
, size_t Count
)
761 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
762 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
766 __INTRIN_INLINE
void __stosd(unsigned long * Dest
, const unsigned long Data
, size_t Count
)
771 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
772 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
777 __INTRIN_INLINE
void __stosq(unsigned __int64
* Dest
, const unsigned __int64 Data
, size_t Count
)
782 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
783 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
788 __INTRIN_INLINE
void __movsb(unsigned char * Destination
, const unsigned char * Source
, size_t Count
)
793 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
794 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
798 __INTRIN_INLINE
void __movsw(unsigned short * Destination
, const unsigned short * Source
, size_t Count
)
803 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
804 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
808 __INTRIN_INLINE
void __movsd(unsigned long * Destination
, const unsigned long * Source
, size_t Count
)
813 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
814 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
819 __INTRIN_INLINE
void __movsq(unsigned long * Destination
, const unsigned long * Source
, size_t Count
)
824 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
825 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
830 #if defined(_M_AMD64)
832 /*** GS segment addressing ***/
834 __INTRIN_INLINE
void __writegsbyte(const unsigned long Offset
, const unsigned char Data
);
835 __INTRIN_INLINE
void __writegsword(const unsigned long Offset
, const unsigned short Data
);
836 __INTRIN_INLINE
void __writegsdword(const unsigned long Offset
, const unsigned long Data
);
837 __INTRIN_INLINE
void __writegsqword(const unsigned long Offset
, const unsigned __int64 Data
);
838 __INTRIN_INLINE
unsigned char __readgsbyte(const unsigned long Offset
);
839 __INTRIN_INLINE
unsigned short __readgsword(const unsigned long Offset
);
840 __INTRIN_INLINE
unsigned long __readgsdword(const unsigned long Offset
);
841 __INTRIN_INLINE
unsigned __int64
__readgsqword(const unsigned long Offset
);
842 __INTRIN_INLINE
void __incgsbyte(const unsigned long Offset
);
843 __INTRIN_INLINE
void __incgsword(const unsigned long Offset
);
844 __INTRIN_INLINE
void __incgsdword(const unsigned long Offset
);
845 __INTRIN_INLINE
void __addgsbyte(const unsigned long Offset
, const unsigned char Data
);
846 __INTRIN_INLINE
void __addgsword(const unsigned long Offset
, const unsigned short Data
);
847 __INTRIN_INLINE
void __addgsdword(const unsigned long Offset
, const unsigned int Data
);
848 __INTRIN_INLINE
void __addgsqword(const unsigned long Offset
, const unsigned __int64 Data
);
851 __INTRIN_INLINE
void __writegsbyte(const unsigned long Offset
, const unsigned char Data
)
853 __asm__
__volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
856 __INTRIN_INLINE
void __writegsword(const unsigned long Offset
, const unsigned short Data
)
858 __asm__
__volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
861 __INTRIN_INLINE
void __writegsdword(const unsigned long Offset
, const unsigned long Data
)
863 __asm__
__volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
866 __INTRIN_INLINE
void __writegsqword(const unsigned long Offset
, const unsigned __int64 Data
)
868 __asm__
__volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
871 __INTRIN_INLINE
unsigned char __readgsbyte(const unsigned long Offset
)
874 __asm__
__volatile__("movb %%gs:%a[Offset], %b[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
878 __INTRIN_INLINE
unsigned short __readgsword(const unsigned long Offset
)
880 unsigned short value
;
881 __asm__
__volatile__("movw %%gs:%a[Offset], %w[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
885 __INTRIN_INLINE
unsigned long __readgsdword(const unsigned long Offset
)
888 __asm__
__volatile__("movl %%gs:%a[Offset], %k[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
892 __INTRIN_INLINE
unsigned __int64
__readgsqword(const unsigned long Offset
)
894 unsigned __int64 value
;
895 __asm__
__volatile__("movq %%gs:%a[Offset], %q[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
899 __INTRIN_INLINE
void __incgsbyte(const unsigned long Offset
)
901 __asm__
__volatile__("incb %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
904 __INTRIN_INLINE
void __incgsword(const unsigned long Offset
)
906 __asm__
__volatile__("incw %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
909 __INTRIN_INLINE
void __incgsdword(const unsigned long Offset
)
911 __asm__
__volatile__("incl %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
914 __INTRIN_INLINE
void __addgsbyte(const unsigned long Offset
, const unsigned char Data
)
916 __asm__
__volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
919 __INTRIN_INLINE
void __addgsword(const unsigned long Offset
, const unsigned short Data
)
921 __asm__
__volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
924 __INTRIN_INLINE
void __addgsdword(const unsigned long Offset
, const unsigned int Data
)
926 __asm__
__volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
929 __INTRIN_INLINE
void __addgsqword(const unsigned long Offset
, const unsigned __int64 Data
)
931 __asm__
__volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
934 #else /* defined(_M_AMD64) */
936 /*** FS segment addressing ***/
938 __INTRIN_INLINE
void __writefsbyte(const unsigned long Offset
, const unsigned char Data
);
939 __INTRIN_INLINE
void __writefsword(const unsigned long Offset
, const unsigned short Data
);
940 __INTRIN_INLINE
void __writefsdword(const unsigned long Offset
, const unsigned long Data
);
941 __INTRIN_INLINE
unsigned char __readfsbyte(const unsigned long Offset
);
942 __INTRIN_INLINE
unsigned short __readfsword(const unsigned long Offset
);
943 __INTRIN_INLINE
unsigned long __readfsdword(const unsigned long Offset
);
944 __INTRIN_INLINE
void __incfsbyte(const unsigned long Offset
);
945 __INTRIN_INLINE
void __incfsword(const unsigned long Offset
);
946 __INTRIN_INLINE
void __incfsdword(const unsigned long Offset
);
947 __INTRIN_INLINE
void __addfsbyte(const unsigned long Offset
, const unsigned char Data
);
948 __INTRIN_INLINE
void __addfsword(const unsigned long Offset
, const unsigned short Data
);
949 __INTRIN_INLINE
void __addfsdword(const unsigned long Offset
, const unsigned int Data
);
952 __INTRIN_INLINE
void __writefsbyte(const unsigned long Offset
, const unsigned char Data
)
954 __asm__
__volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
957 __INTRIN_INLINE
void __writefsword(const unsigned long Offset
, const unsigned short Data
)
959 __asm__
__volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
962 __INTRIN_INLINE
void __writefsdword(const unsigned long Offset
, const unsigned long Data
)
964 __asm__
__volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
967 __INTRIN_INLINE
unsigned char __readfsbyte(const unsigned long Offset
)
970 __asm__
__volatile__("movb %%fs:%a[Offset], %b[value]" : [value
] "=q" (value
) : [Offset
] "ir" (Offset
));
974 __INTRIN_INLINE
unsigned short __readfsword(const unsigned long Offset
)
976 unsigned short value
;
977 __asm__
__volatile__("movw %%fs:%a[Offset], %w[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
981 __INTRIN_INLINE
unsigned long __readfsdword(const unsigned long Offset
)
984 __asm__
__volatile__("movl %%fs:%a[Offset], %k[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
988 __INTRIN_INLINE
void __incfsbyte(const unsigned long Offset
)
990 __asm__
__volatile__("incb %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
993 __INTRIN_INLINE
void __incfsword(const unsigned long Offset
)
995 __asm__
__volatile__("incw %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
998 __INTRIN_INLINE
void __incfsdword(const unsigned long Offset
)
1000 __asm__
__volatile__("incl %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
1003 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
1004 __INTRIN_INLINE
void __addfsbyte(const unsigned long Offset
, const unsigned char Data
)
1006 if(!__builtin_constant_p(Offset
))
1007 __asm__
__volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
1009 __asm__
__volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
1012 __INTRIN_INLINE
void __addfsword(const unsigned long Offset
, const unsigned short Data
)
1014 if(!__builtin_constant_p(Offset
))
1015 __asm__
__volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
1017 __asm__
__volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
1020 __INTRIN_INLINE
void __addfsdword(const unsigned long Offset
, const unsigned int Data
)
1022 if(!__builtin_constant_p(Offset
))
1023 __asm__
__volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
1025 __asm__
__volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
1028 #endif /* defined(_M_AMD64) */
1031 /*** Bit manipulation ***/
1033 __INTRIN_INLINE
unsigned char _BitScanForward(unsigned long * const Index
, const unsigned long Mask
);
1034 __INTRIN_INLINE
unsigned char _BitScanReverse(unsigned long * const Index
, const unsigned long Mask
);
1035 __INTRIN_INLINE
unsigned char _bittest(const long * const a
, const long b
);
1037 __INTRIN_INLINE
unsigned char _bittest64(const __int64
* const a
, const __int64 b
);
1039 __INTRIN_INLINE
unsigned char _bittestandcomplement(long * const a
, const long b
);
1040 __INTRIN_INLINE
unsigned char _bittestandreset(long * const a
, const long b
);
1041 __INTRIN_INLINE
unsigned char _bittestandset(long * const a
, const long b
);
1042 __INTRIN_INLINE
unsigned char _rotl8(unsigned char value
, unsigned char shift
);
1043 __INTRIN_INLINE
unsigned short _rotl16(unsigned short value
, unsigned char shift
);
1044 __INTRIN_INLINE
unsigned int _rotl(unsigned int value
, int shift
);
1045 __INTRIN_INLINE
unsigned __int64
_rotl64(unsigned __int64 value
, int shift
);
1046 __INTRIN_INLINE
unsigned int _rotr(unsigned int value
, int shift
);
1047 __INTRIN_INLINE
unsigned char _rotr8(unsigned char value
, unsigned char shift
);
1048 __INTRIN_INLINE
unsigned short _rotr16(unsigned short value
, unsigned char shift
);
1049 __INTRIN_INLINE
unsigned long long __ll_lshift(const unsigned long long Mask
, const int Bit
);
1050 __INTRIN_INLINE
long long __ll_rshift(const long long Mask
, const int Bit
);
1051 __INTRIN_INLINE
unsigned long long __ull_rshift(const unsigned long long Mask
, int Bit
);
1052 __INTRIN_INLINE
unsigned short _byteswap_ushort(unsigned short value
);
1053 __INTRIN_INLINE
unsigned long _byteswap_ulong(unsigned long value
);
1055 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
);
1057 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
);
1061 __INTRIN_INLINE
unsigned char _BitScanForward(unsigned long * const Index
, const unsigned long Mask
)
1063 __asm__("bsfl %[Mask], %[Index]" : [Index
] "=r" (*Index
) : [Mask
] "mr" (Mask
));
1064 return Mask
? 1 : 0;
1067 __INTRIN_INLINE
unsigned char _BitScanReverse(unsigned long * const Index
, const unsigned long Mask
)
1069 __asm__("bsrl %[Mask], %[Index]" : [Index
] "=r" (*Index
) : [Mask
] "mr" (Mask
));
1070 return Mask
? 1 : 0;
1073 /* NOTE: again, the bizarre implementation follows Visual C++ */
1074 __INTRIN_INLINE
unsigned char _bittest(const long * const a
, const long b
)
1076 unsigned char retval
;
1078 if(__builtin_constant_p(b
))
1079 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*(a
+ (b
/ 32))), [b
] "Ir" (b
% 32));
1081 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "m" (*a
), [b
] "r" (b
));
1087 __INTRIN_INLINE
unsigned char _bittest64(const __int64
* const a
, const __int64 b
)
1089 unsigned char retval
;
1091 if(__builtin_constant_p(b
))
1092 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*(a
+ (b
/ 64))), [b
] "Ir" (b
% 64));
1094 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "m" (*a
), [b
] "r" (b
));
1100 __INTRIN_INLINE
unsigned char _bittestandcomplement(long * const a
, const long b
)
1102 unsigned char retval
;
1104 if(__builtin_constant_p(b
))
1105 __asm__("btc %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
1107 __asm__("btc %[b], %[a]; setb %b[retval]" : [a
] "+m" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
1112 __INTRIN_INLINE
unsigned char _bittestandreset(long * const a
, const long b
)
1114 unsigned char retval
;
1116 if(__builtin_constant_p(b
))
1117 __asm__("btr %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
1119 __asm__("btr %[b], %[a]; setb %b[retval]" : [a
] "+m" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
1124 __INTRIN_INLINE
unsigned char _bittestandset(long * const a
, const long b
)
1126 unsigned char retval
;
1128 if(__builtin_constant_p(b
))
1129 __asm__("bts %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
1131 __asm__("bts %[b], %[a]; setb %b[retval]" : [a
] "+m" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
1136 __INTRIN_INLINE
unsigned char _rotl8(unsigned char value
, unsigned char shift
)
1138 unsigned char retval
;
1139 __asm__("rolb %b[shift], %b[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
1143 __INTRIN_INLINE
unsigned short _rotl16(unsigned short value
, unsigned char shift
)
1145 unsigned short retval
;
1146 __asm__("rolw %b[shift], %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
1150 __INTRIN_INLINE
unsigned int _rotl(unsigned int value
, int shift
)
1152 unsigned long retval
;
1153 __asm__("roll %b[shift], %k[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
1158 __INTRIN_INLINE
unsigned __int64
_rotl64(unsigned __int64 value
, int shift
)
1160 unsigned __int64 retval
;
1161 __asm__("rolq %b[shift], %k[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
1165 __INTRIN_INLINE
unsigned __int64
_rotl64(unsigned __int64 value
, int shift
)
1167 /* FIXME: this is probably not optimal */
1168 return (value
<< shift
) | (value
>> (64 - shift
));
1172 __INTRIN_INLINE
unsigned int _rotr(unsigned int value
, int shift
)
1174 unsigned long retval
;
1175 __asm__("rorl %b[shift], %k[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
1179 __INTRIN_INLINE
unsigned char _rotr8(unsigned char value
, unsigned char shift
)
1181 unsigned char retval
;
1182 __asm__("rorb %b[shift], %b[retval]" : [retval
] "=qm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
1186 __INTRIN_INLINE
unsigned short _rotr16(unsigned short value
, unsigned char shift
)
1188 unsigned short retval
;
1189 __asm__("rorw %b[shift], %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
1194 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
1195 constraint (edx:eax) for the Mask argument, because it's the only way GCC
1196 can pass 64-bit operands around - passing the two 32 bit parts separately
1197 just confuses it. Also we declare Bit as an int and then truncate it to
1198 match Visual C++ behavior
1200 __INTRIN_INLINE
unsigned long long __ll_lshift(const unsigned long long Mask
, const int Bit
)
1202 unsigned long long retval
= Mask
;
1206 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
1208 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
1214 __INTRIN_INLINE
long long __ll_rshift(const long long Mask
, const int Bit
)
1216 long long retval
= Mask
;
1220 "shrdl %b[Bit], %%edx, %%eax; sarl %b[Bit], %%edx" :
1222 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
1228 __INTRIN_INLINE
unsigned long long __ull_rshift(const unsigned long long Mask
, int Bit
)
1230 unsigned long long retval
= Mask
;
1234 "shrdl %b[Bit], %%edx, %%eax; shrl %b[Bit], %%edx" :
1236 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
1242 __INTRIN_INLINE
unsigned short _byteswap_ushort(unsigned short value
)
1244 unsigned short retval
;
1245 __asm__("rorw $8, %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
));
1249 __INTRIN_INLINE
unsigned long _byteswap_ulong(unsigned long value
)
1251 unsigned long retval
;
1252 __asm__("bswapl %[retval]" : [retval
] "=r" (retval
) : "[retval]" (value
));
1257 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
)
1259 unsigned __int64 retval
;
1260 __asm__("bswapq %[retval]" : [retval
] "=r" (retval
) : "[retval]" (value
));
1264 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
)
1267 unsigned __int64 int64part
;
1269 unsigned long lowpart
;
1270 unsigned long hipart
;
1273 retval
.int64part
= value
;
1274 __asm__("bswapl %[lowpart]\n"
1275 "bswapl %[hipart]\n"
1276 : [lowpart
] "=r" (retval
.hipart
), [hipart
] "=r" (retval
.lowpart
) : "[lowpart]" (retval
.lowpart
), "[hipart]" (retval
.hipart
) );
1277 return retval
.int64part
;
1281 /*** 64-bit math ***/
1283 __INTRIN_INLINE
long long __emul(const int a
, const int b
);
1284 __INTRIN_INLINE
unsigned long long __emulu(const unsigned int a
, const unsigned int b
);
1286 __INTRIN_INLINE __int64
__mulh(__int64 a
, __int64 b
);
1287 __INTRIN_INLINE
unsigned __int64
__umulh(unsigned __int64 a
, unsigned __int64 b
);
1291 __INTRIN_INLINE
long long __emul(const int a
, const int b
)
1294 __asm__("imull %[b]" : "=A" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1298 __INTRIN_INLINE
unsigned long long __emulu(const unsigned int a
, const unsigned int b
)
1300 unsigned long long retval
;
1301 __asm__("mull %[b]" : "=A" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1307 __INTRIN_INLINE __int64
__mulh(__int64 a
, __int64 b
)
1310 __asm__("imulq %[b]" : "=d" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1314 __INTRIN_INLINE
unsigned __int64
__umulh(unsigned __int64 a
, unsigned __int64 b
)
1316 unsigned __int64 retval
;
1317 __asm__("mulq %[b]" : "=d" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1325 __INTRIN_INLINE
unsigned char __inbyte(const unsigned short Port
);
1326 __INTRIN_INLINE
unsigned short __inword(const unsigned short Port
);
1327 __INTRIN_INLINE
unsigned long __indword(const unsigned short Port
);
1328 __INTRIN_INLINE
void __inbytestring(unsigned short Port
, unsigned char * Buffer
, unsigned long Count
);
1329 __INTRIN_INLINE
void __inwordstring(unsigned short Port
, unsigned short * Buffer
, unsigned long Count
);
1330 __INTRIN_INLINE
void __indwordstring(unsigned short Port
, unsigned long * Buffer
, unsigned long Count
);
1331 __INTRIN_INLINE
void __outbyte(unsigned short const Port
, const unsigned char Data
);
1332 __INTRIN_INLINE
void __outword(unsigned short const Port
, const unsigned short Data
);
1333 __INTRIN_INLINE
void __outdword(unsigned short const Port
, const unsigned long Data
);
1334 __INTRIN_INLINE
void __outbytestring(unsigned short const Port
, const unsigned char * const Buffer
, const unsigned long Count
);
1335 __INTRIN_INLINE
void __outwordstring(unsigned short const Port
, const unsigned short * const Buffer
, const unsigned long Count
);
1336 __INTRIN_INLINE
void __outdwordstring(unsigned short const Port
, const unsigned long * const Buffer
, const unsigned long Count
);
1337 __INTRIN_INLINE
int _inp(unsigned short Port
);
1338 __INTRIN_INLINE
unsigned short _inpw(unsigned short Port
);
1339 __INTRIN_INLINE
unsigned long _inpd(unsigned short Port
);
1340 __INTRIN_INLINE
int _outp(unsigned short Port
, int databyte
);
1341 __INTRIN_INLINE
unsigned short _outpw(unsigned short Port
, unsigned short dataword
);
1342 __INTRIN_INLINE
unsigned long _outpd(unsigned short Port
, unsigned long dataword
);
1345 __INTRIN_INLINE
unsigned char __inbyte(const unsigned short Port
)
1348 __asm__
__volatile__("inb %w[Port], %b[byte]" : [byte
] "=a" (byte
) : [Port
] "Nd" (Port
));
1352 __INTRIN_INLINE
unsigned short __inword(const unsigned short Port
)
1354 unsigned short word
;
1355 __asm__
__volatile__("inw %w[Port], %w[word]" : [word
] "=a" (word
) : [Port
] "Nd" (Port
));
1359 __INTRIN_INLINE
unsigned long __indword(const unsigned short Port
)
1361 unsigned long dword
;
1362 __asm__
__volatile__("inl %w[Port], %k[dword]" : [dword
] "=a" (dword
) : [Port
] "Nd" (Port
));
1366 __INTRIN_INLINE
void __inbytestring(unsigned short Port
, unsigned char * Buffer
, unsigned long Count
)
1368 __asm__ __volatile__
1371 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1372 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1377 __INTRIN_INLINE
void __inwordstring(unsigned short Port
, unsigned short * Buffer
, unsigned long Count
)
1379 __asm__ __volatile__
1382 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1383 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1388 __INTRIN_INLINE
void __indwordstring(unsigned short Port
, unsigned long * Buffer
, unsigned long Count
)
1390 __asm__ __volatile__
1393 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1394 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1399 __INTRIN_INLINE
void __outbyte(unsigned short const Port
, const unsigned char Data
)
1401 __asm__
__volatile__("outb %b[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1404 __INTRIN_INLINE
void __outword(unsigned short const Port
, const unsigned short Data
)
1406 __asm__
__volatile__("outw %w[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1409 __INTRIN_INLINE
void __outdword(unsigned short const Port
, const unsigned long Data
)
1411 __asm__
__volatile__("outl %k[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1414 __INTRIN_INLINE
void __outbytestring(unsigned short const Port
, const unsigned char * const Buffer
, const unsigned long Count
)
1416 __asm__
__volatile__("rep; outsb" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1419 __INTRIN_INLINE
void __outwordstring(unsigned short const Port
, const unsigned short * const Buffer
, const unsigned long Count
)
1421 __asm__
__volatile__("rep; outsw" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1424 __INTRIN_INLINE
void __outdwordstring(unsigned short const Port
, const unsigned long * const Buffer
, const unsigned long Count
)
1426 __asm__
__volatile__("rep; outsl" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1429 __INTRIN_INLINE
int _inp(unsigned short Port
)
1431 return __inbyte(Port
);
1434 __INTRIN_INLINE
unsigned short _inpw(unsigned short Port
)
1436 return __inword(Port
);
1439 __INTRIN_INLINE
unsigned long _inpd(unsigned short Port
)
1441 return __indword(Port
);
1444 __INTRIN_INLINE
int _outp(unsigned short Port
, int databyte
)
1446 __outbyte(Port
, (unsigned char)databyte
);
1450 __INTRIN_INLINE
unsigned short _outpw(unsigned short Port
, unsigned short dataword
)
1452 __outword(Port
, dataword
);
1456 __INTRIN_INLINE
unsigned long _outpd(unsigned short Port
, unsigned long dataword
)
1458 __outdword(Port
, dataword
);
1463 /*** System information ***/
1465 __INTRIN_INLINE
void __cpuid(int CPUInfo
[], const int InfoType
);
1466 __INTRIN_INLINE
unsigned long long __rdtsc(void);
1467 __INTRIN_INLINE
void __writeeflags(uintptr_t Value
);
1468 __INTRIN_INLINE
uintptr_t __readeflags(void);
1471 __INTRIN_INLINE
void __cpuid(int CPUInfo
[], const int InfoType
)
1473 __asm__
__volatile__("cpuid" : "=a" (CPUInfo
[0]), "=b" (CPUInfo
[1]), "=c" (CPUInfo
[2]), "=d" (CPUInfo
[3]) : "a" (InfoType
));
1476 __INTRIN_INLINE
unsigned long long __rdtsc(void)
1479 unsigned long long low
, high
;
1480 __asm__
__volatile__("rdtsc" : "=a"(low
), "=d"(high
));
1481 return low
| (high
<< 32);
1483 unsigned long long retval
;
1484 __asm__
__volatile__("rdtsc" : "=A"(retval
));
1489 __INTRIN_INLINE
void __writeeflags(uintptr_t Value
)
1491 __asm__
__volatile__("push %0\n popf" : : "rim"(Value
));
1494 __INTRIN_INLINE
uintptr_t __readeflags(void)
1497 __asm__
__volatile__("pushf\n pop %0" : "=rm"(retval
));
1501 /*** Interrupts ***/
1503 __INTRIN_INLINE
void __int2c(void);
1504 __INTRIN_INLINE
void _disable(void);
1505 __INTRIN_INLINE
void _enable(void);
1506 __INTRIN_INLINE
void __halt(void);
1507 __declspec(noreturn
) __INTRIN_INLINE
void __fastfail(unsigned int Code
);
1510 #define __debugbreak() __asm__("int $3")
1512 __INTRIN_INLINE
void __debugbreak(void);
1513 __INTRIN_INLINE
void __debugbreak(void)
1519 __INTRIN_INLINE
void __int2c(void)
1521 __asm__("int $0x2c");
1524 __INTRIN_INLINE
void _disable(void)
1526 __asm__("cli" : : : "memory");
1529 __INTRIN_INLINE
void _enable(void)
1531 __asm__("sti" : : : "memory");
1534 __INTRIN_INLINE
void __halt(void)
1536 __asm__("hlt" : : : "memory");
1539 __declspec(noreturn
)
1540 __INTRIN_INLINE
void __fastfail(unsigned int Code
)
1542 __asm__("int $0x29" : : "c"(Code
) : "memory");
1545 /*** Protected memory management ***/
1547 __INTRIN_INLINE
void __invlpg(void * const Address
);
1549 __INTRIN_INLINE
void __writecr0(const unsigned __int64 Data
);
1550 __INTRIN_INLINE
void __writecr3(const unsigned __int64 Data
);
1551 __INTRIN_INLINE
void __writecr4(const unsigned __int64 Data
);
1552 __INTRIN_INLINE
void __writecr8(const unsigned __int64 Data
);
1553 __INTRIN_INLINE
unsigned __int64
__readcr0(void);
1554 __INTRIN_INLINE
unsigned __int64
__readcr2(void);
1555 __INTRIN_INLINE
unsigned __int64
__readcr3(void);
1556 __INTRIN_INLINE
unsigned __int64
__readcr4(void);
1557 __INTRIN_INLINE
unsigned __int64
__readcr8(void);
1558 __INTRIN_INLINE
unsigned __int64
__readdr(unsigned int reg
);
1559 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned __int64 value
);
1560 #else /* _M_AMD64 */
1561 __INTRIN_INLINE
void __writecr0(const unsigned int Data
);
1562 __INTRIN_INLINE
void __writecr3(const unsigned int Data
);
1563 __INTRIN_INLINE
void __writecr4(const unsigned int Data
);
1564 __INTRIN_INLINE
unsigned long __readcr0(void);
1565 __INTRIN_INLINE
unsigned long __readcr2(void);
1566 __INTRIN_INLINE
unsigned long __readcr3(void);
1567 __INTRIN_INLINE
unsigned long __readcr4(void);
1568 __INTRIN_INLINE
unsigned int __readdr(unsigned int reg
);
1569 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned int value
);
1570 #endif /* _M_AMD64 */
1575 __INTRIN_INLINE
void __writecr0(const unsigned __int64 Data
)
1577 __asm__("mov %[Data], %%cr0" : : [Data
] "r" (Data
) : "memory");
1580 __INTRIN_INLINE
void __writecr3(const unsigned __int64 Data
)
1582 __asm__("mov %[Data], %%cr3" : : [Data
] "r" (Data
) : "memory");
1585 __INTRIN_INLINE
void __writecr4(const unsigned __int64 Data
)
1587 __asm__("mov %[Data], %%cr4" : : [Data
] "r" (Data
) : "memory");
1590 __INTRIN_INLINE
void __writecr8(const unsigned __int64 Data
)
1592 __asm__("mov %[Data], %%cr8" : : [Data
] "r" (Data
) : "memory");
1595 __INTRIN_INLINE
unsigned __int64
__readcr0(void)
1597 unsigned __int64 value
;
1598 __asm__
__volatile__("mov %%cr0, %[value]" : [value
] "=r" (value
));
1602 __INTRIN_INLINE
unsigned __int64
__readcr2(void)
1604 unsigned __int64 value
;
1605 __asm__
__volatile__("mov %%cr2, %[value]" : [value
] "=r" (value
));
1609 __INTRIN_INLINE
unsigned __int64
__readcr3(void)
1611 unsigned __int64 value
;
1612 __asm__
__volatile__("mov %%cr3, %[value]" : [value
] "=r" (value
));
1616 __INTRIN_INLINE
unsigned __int64
__readcr4(void)
1618 unsigned __int64 value
;
1619 __asm__
__volatile__("mov %%cr4, %[value]" : [value
] "=r" (value
));
1623 __INTRIN_INLINE
unsigned __int64
__readcr8(void)
1625 unsigned __int64 value
;
1626 __asm__
__volatile__("movq %%cr8, %q[value]" : [value
] "=r" (value
));
1630 #else /* _M_AMD64 */
1632 __INTRIN_INLINE
void __writecr0(const unsigned int Data
)
1634 __asm__("mov %[Data], %%cr0" : : [Data
] "r" (Data
) : "memory");
1637 __INTRIN_INLINE
void __writecr3(const unsigned int Data
)
1639 __asm__("mov %[Data], %%cr3" : : [Data
] "r" (Data
) : "memory");
1642 __INTRIN_INLINE
void __writecr4(const unsigned int Data
)
1644 __asm__("mov %[Data], %%cr4" : : [Data
] "r" (Data
) : "memory");
1647 __INTRIN_INLINE
unsigned long __readcr0(void)
1649 unsigned long value
;
1650 __asm__
__volatile__("mov %%cr0, %[value]" : [value
] "=r" (value
));
1654 __INTRIN_INLINE
unsigned long __readcr2(void)
1656 unsigned long value
;
1657 __asm__
__volatile__("mov %%cr2, %[value]" : [value
] "=r" (value
));
1661 __INTRIN_INLINE
unsigned long __readcr3(void)
1663 unsigned long value
;
1664 __asm__
__volatile__("mov %%cr3, %[value]" : [value
] "=r" (value
));
1668 __INTRIN_INLINE
unsigned long __readcr4(void)
1670 unsigned long value
;
1671 __asm__
__volatile__("mov %%cr4, %[value]" : [value
] "=r" (value
));
1675 #endif /* _M_AMD64 */
1679 __INTRIN_INLINE
unsigned __int64
__readdr(unsigned int reg
)
1681 unsigned __int64 value
;
1685 __asm__
__volatile__("movq %%dr0, %q[value]" : [value
] "=r" (value
));
1688 __asm__
__volatile__("movq %%dr1, %q[value]" : [value
] "=r" (value
));
1691 __asm__
__volatile__("movq %%dr2, %q[value]" : [value
] "=r" (value
));
1694 __asm__
__volatile__("movq %%dr3, %q[value]" : [value
] "=r" (value
));
1697 __asm__
__volatile__("movq %%dr4, %q[value]" : [value
] "=r" (value
));
1700 __asm__
__volatile__("movq %%dr5, %q[value]" : [value
] "=r" (value
));
1703 __asm__
__volatile__("movq %%dr6, %q[value]" : [value
] "=r" (value
));
1706 __asm__
__volatile__("movq %%dr7, %q[value]" : [value
] "=r" (value
));
1712 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned __int64 value
)
1717 __asm__("movq %q[value], %%dr0" : : [value
] "r" (value
) : "memory");
1720 __asm__("movq %q[value], %%dr1" : : [value
] "r" (value
) : "memory");
1723 __asm__("movq %q[value], %%dr2" : : [value
] "r" (value
) : "memory");
1726 __asm__("movq %q[value], %%dr3" : : [value
] "r" (value
) : "memory");
1729 __asm__("movq %q[value], %%dr4" : : [value
] "r" (value
) : "memory");
1732 __asm__("movq %q[value], %%dr5" : : [value
] "r" (value
) : "memory");
1735 __asm__("movq %q[value], %%dr6" : : [value
] "r" (value
) : "memory");
1738 __asm__("movq %q[value], %%dr7" : : [value
] "r" (value
) : "memory");
1743 #else /* _M_AMD64 */
1745 __INTRIN_INLINE
unsigned int __readdr(unsigned int reg
)
1751 __asm__
__volatile__("mov %%dr0, %[value]" : [value
] "=r" (value
));
1754 __asm__
__volatile__("mov %%dr1, %[value]" : [value
] "=r" (value
));
1757 __asm__
__volatile__("mov %%dr2, %[value]" : [value
] "=r" (value
));
1760 __asm__
__volatile__("mov %%dr3, %[value]" : [value
] "=r" (value
));
1763 __asm__
__volatile__("mov %%dr4, %[value]" : [value
] "=r" (value
));
1766 __asm__
__volatile__("mov %%dr5, %[value]" : [value
] "=r" (value
));
1769 __asm__
__volatile__("mov %%dr6, %[value]" : [value
] "=r" (value
));
1772 __asm__
__volatile__("mov %%dr7, %[value]" : [value
] "=r" (value
));
1778 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned int value
)
1783 __asm__("mov %[value], %%dr0" : : [value
] "r" (value
) : "memory");
1786 __asm__("mov %[value], %%dr1" : : [value
] "r" (value
) : "memory");
1789 __asm__("mov %[value], %%dr2" : : [value
] "r" (value
) : "memory");
1792 __asm__("mov %[value], %%dr3" : : [value
] "r" (value
) : "memory");
1795 __asm__("mov %[value], %%dr4" : : [value
] "r" (value
) : "memory");
1798 __asm__("mov %[value], %%dr5" : : [value
] "r" (value
) : "memory");
1801 __asm__("mov %[value], %%dr6" : : [value
] "r" (value
) : "memory");
1804 __asm__("mov %[value], %%dr7" : : [value
] "r" (value
) : "memory");
1809 #endif /* _M_AMD64 */
1811 __INTRIN_INLINE
void __invlpg(void * const Address
)
1813 __asm__("invlpg %[Address]" : : [Address
] "m" (*((unsigned char *)(Address
))) : "memory");
1817 /*** System operations ***/
1819 __INTRIN_INLINE
unsigned long long __readmsr(const int reg
);
1820 __INTRIN_INLINE
void __writemsr(const unsigned long Register
, const unsigned long long Value
);
1821 __INTRIN_INLINE
unsigned long long __readpmc(const int counter
);
1822 __INTRIN_INLINE
unsigned long __segmentlimit(const unsigned long a
);
1823 __INTRIN_INLINE
void __wbinvd(void);
1824 __INTRIN_INLINE
void __lidt(void *Source
);
1825 __INTRIN_INLINE
void __sidt(void *Destination
);
1828 __INTRIN_INLINE
unsigned long long __readmsr(const int reg
)
1831 unsigned long low
, high
;
1832 __asm__
__volatile__("rdmsr" : "=a" (low
), "=d" (high
) : "c" (reg
));
1833 return ((unsigned long long)high
<< 32) | low
;
1835 unsigned long long retval
;
1836 __asm__
__volatile__("rdmsr" : "=A" (retval
) : "c" (reg
));
1841 __INTRIN_INLINE
void __writemsr(const unsigned long Register
, const unsigned long long Value
)
1844 __asm__
__volatile__("wrmsr" : : "a" (Value
), "d" (Value
>> 32), "c" (Register
));
1846 __asm__
__volatile__("wrmsr" : : "A" (Value
), "c" (Register
));
1850 __INTRIN_INLINE
unsigned long long __readpmc(const int counter
)
1852 unsigned long long retval
;
1853 __asm__
__volatile__("rdpmc" : "=A" (retval
) : "c" (counter
));
1857 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1858 __INTRIN_INLINE
unsigned long __segmentlimit(const unsigned long a
)
1860 unsigned long retval
;
1861 __asm__
__volatile__("lsl %[a], %[retval]" : [retval
] "=r" (retval
) : [a
] "rm" (a
));
1865 __INTRIN_INLINE
void __wbinvd(void)
1867 __asm__
__volatile__("wbinvd" : : : "memory");
1870 __INTRIN_INLINE
void __lidt(void *Source
)
1872 __asm__
__volatile__("lidt %0" : : "m"(*(short*)Source
));
1875 __INTRIN_INLINE
void __sidt(void *Destination
)
1877 __asm__
__volatile__("sidt %0" : : "m"(*(short*)Destination
) : "memory");
1880 /*** Misc operations ***/
1882 __INTRIN_INLINE
void _mm_pause(void);
1883 __INTRIN_INLINE
void __nop(void);
1885 __INTRIN_INLINE
void _mm_pause(void)
1887 __asm__
__volatile__("pause" : : : "memory");
1890 __INTRIN_INLINE
void __nop(void)
1892 __asm__
__volatile__("nop");
1899 #endif /* KJK_INTRIN_X86_H_ */