2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
72 /*** memcopy must be memmove ***/
73 __INTRIN_INLINE
void* memcpy(void* dest
, const void* source
, size_t num
)
75 return memmove(dest
, source
, num
);
79 /*** Stack frame juggling ***/
80 #define _ReturnAddress() (__builtin_return_address(0))
81 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
82 /* TODO: __getcallerseflags but how??? */
84 /* Maybe the same for x86? */
86 #define _alloca(s) __builtin_alloca(s)
89 /*** Memory barriers ***/
91 __INTRIN_INLINE
void _ReadWriteBarrier(void);
92 __INTRIN_INLINE
void _mm_mfence(void);
93 __INTRIN_INLINE
void _mm_lfence(void);
94 __INTRIN_INLINE
void _mm_sfence(void);
96 __INTRIN_INLINE
void __faststorefence(void);
99 __INTRIN_INLINE
void _ReadWriteBarrier(void)
101 __asm__
__volatile__("" : : : "memory");
104 /* GCC only supports full barriers */
105 #define _ReadBarrier _ReadWriteBarrier
106 #define _WriteBarrier _ReadWriteBarrier
108 __INTRIN_INLINE
void _mm_mfence(void)
110 __asm__
__volatile__("mfence" : : : "memory");
113 __INTRIN_INLINE
void _mm_lfence(void)
116 __asm__
__volatile__("lfence");
120 __INTRIN_INLINE
void _mm_sfence(void)
123 __asm__
__volatile__("sfence");
128 __INTRIN_INLINE
void __faststorefence(void)
131 __asm__
__volatile__("lock; orl $0, %0;" : : "m"(local
));
136 /*** Atomic operations ***/
138 __INTRIN_INLINE
long _InterlockedAddLargeStatistic(volatile long long * const Addend
, const long Value
);
139 __INTRIN_INLINE
unsigned char _interlockedbittestandreset(volatile long * a
, const long b
);
140 __INTRIN_INLINE
unsigned char _interlockedbittestandset(volatile long * a
, const long b
);
141 #if defined(_M_AMD64)
142 __INTRIN_INLINE
unsigned char _interlockedbittestandreset64(volatile long long * a
, const long long b
);
143 __INTRIN_INLINE
unsigned char _interlockedbittestandset64(volatile long long * a
, const long long b
);
146 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
148 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
);
149 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
);
150 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
);
151 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
);
152 __INTRIN_INLINE
long _InterlockedExchange(volatile long * const Target
, const long Value
);
153 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
);
154 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
);
155 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
);
156 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * const value
, const char mask
);
157 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * const value
, const short mask
);
158 __INTRIN_INLINE
long _InterlockedAnd(volatile long * const value
, const long mask
);
159 __INTRIN_INLINE
char _InterlockedOr8(volatile char * const value
, const char mask
);
160 __INTRIN_INLINE
short _InterlockedOr16(volatile short * const value
, const short mask
);
161 __INTRIN_INLINE
long _InterlockedOr(volatile long * const value
, const long mask
);
162 __INTRIN_INLINE
char _InterlockedXor8(volatile char * const value
, const char mask
);
163 __INTRIN_INLINE
short _InterlockedXor16(volatile short * const value
, const short mask
);
164 __INTRIN_INLINE
long _InterlockedXor(volatile long * const value
, const long mask
);
165 __INTRIN_INLINE
long _InterlockedDecrement(volatile long * const lpAddend
);
166 __INTRIN_INLINE
long _InterlockedIncrement(volatile long * const lpAddend
);
167 __INTRIN_INLINE
short _InterlockedDecrement16(volatile short * const lpAddend
);
168 __INTRIN_INLINE
short _InterlockedIncrement16(volatile short * const lpAddend
);
169 #if defined(_M_AMD64)
170 __INTRIN_INLINE
long long _InterlockedExchange64(volatile long long * const Target
, const long long Value
);
171 __INTRIN_INLINE
long long _InterlockedExchangeAdd64(volatile long long * const Addend
, const long long Value
);
172 __INTRIN_INLINE
long long _InterlockedAnd64(volatile long long * const value
, const long long mask
);
173 __INTRIN_INLINE
long long _InterlockedOr64(volatile long long * const value
, const long long mask
);
174 __INTRIN_INLINE
long long _InterlockedXor64(volatile long long * const value
, const long long mask
);
175 __INTRIN_INLINE
long long _InterlockedDecrement64(volatile long long * const lpAddend
);
176 __INTRIN_INLINE
long long _InterlockedIncrement64(volatile long long * const lpAddend
);
179 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
)
181 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
184 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
)
186 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
190 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
)
192 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
196 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
)
198 return (void *)__sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
201 __INTRIN_INLINE
long _InterlockedExchange(volatile long * const Target
, const long Value
)
203 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
204 __sync_synchronize();
205 return __sync_lock_test_and_set(Target
, Value
);
208 #if defined(_M_AMD64)
209 __INTRIN_INLINE
long long _InterlockedExchange64(volatile long long * const Target
, const long long Value
)
211 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
212 __sync_synchronize();
213 return __sync_lock_test_and_set(Target
, Value
);
217 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
)
220 __sync_synchronize();
221 return (void *)__sync_lock_test_and_set(Target
, Value
);
224 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
)
226 return __sync_fetch_and_add(Addend
, Value
);
230 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
)
232 return __sync_fetch_and_add(Addend
, Value
);
236 #if defined(_M_AMD64)
237 __INTRIN_INLINE
long long _InterlockedExchangeAdd64(volatile long long * const Addend
, const long long Value
)
239 return __sync_fetch_and_add(Addend
, Value
);
243 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * const value
, const char mask
)
245 return __sync_fetch_and_and(value
, mask
);
248 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * const value
, const short mask
)
250 return __sync_fetch_and_and(value
, mask
);
253 __INTRIN_INLINE
long _InterlockedAnd(volatile long * const value
, const long mask
)
255 return __sync_fetch_and_and(value
, mask
);
258 #if defined(_M_AMD64)
259 __INTRIN_INLINE
long long _InterlockedAnd64(volatile long long * const value
, const long long mask
)
261 return __sync_fetch_and_and(value
, mask
);
265 __INTRIN_INLINE
char _InterlockedOr8(volatile char * const value
, const char mask
)
267 return __sync_fetch_and_or(value
, mask
);
270 __INTRIN_INLINE
short _InterlockedOr16(volatile short * const value
, const short mask
)
272 return __sync_fetch_and_or(value
, mask
);
275 __INTRIN_INLINE
long _InterlockedOr(volatile long * const value
, const long mask
)
277 return __sync_fetch_and_or(value
, mask
);
280 #if defined(_M_AMD64)
281 __INTRIN_INLINE
long long _InterlockedOr64(volatile long long * const value
, const long long mask
)
283 return __sync_fetch_and_or(value
, mask
);
287 __INTRIN_INLINE
char _InterlockedXor8(volatile char * const value
, const char mask
)
289 return __sync_fetch_and_xor(value
, mask
);
292 __INTRIN_INLINE
short _InterlockedXor16(volatile short * const value
, const short mask
)
294 return __sync_fetch_and_xor(value
, mask
);
297 __INTRIN_INLINE
long _InterlockedXor(volatile long * const value
, const long mask
)
299 return __sync_fetch_and_xor(value
, mask
);
302 #if defined(_M_AMD64)
303 __INTRIN_INLINE
long long _InterlockedXor64(volatile long long * const value
, const long long mask
)
305 return __sync_fetch_and_xor(value
, mask
);
310 __INTRIN_INLINE
long _InterlockedDecrement(volatile long * const lpAddend
)
312 return __sync_sub_and_fetch(lpAddend
, 1);
315 __INTRIN_INLINE
long _InterlockedIncrement(volatile long * const lpAddend
)
317 return __sync_add_and_fetch(lpAddend
, 1);
321 __INTRIN_INLINE
short _InterlockedDecrement16(volatile short * const lpAddend
)
323 return __sync_sub_and_fetch(lpAddend
, 1);
326 __INTRIN_INLINE
short _InterlockedIncrement16(volatile short * const lpAddend
)
328 return __sync_add_and_fetch(lpAddend
, 1);
331 #if defined(_M_AMD64)
332 __INTRIN_INLINE
long long _InterlockedDecrement64(volatile long long * const lpAddend
)
334 return __sync_sub_and_fetch(lpAddend
, 1);
337 __INTRIN_INLINE
long long _InterlockedIncrement64(volatile long long * const lpAddend
)
339 return __sync_add_and_fetch(lpAddend
, 1);
343 #else /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
345 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
);
346 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
);
347 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
);
348 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
);
349 __INTRIN_INLINE
long _InterlockedExchange(volatile long * const Target
, const long Value
);
350 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
);
351 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
);
352 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
);
353 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * const value
, const char mask
);
354 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * const value
, const short mask
);
355 __INTRIN_INLINE
long _InterlockedAnd(volatile long * const value
, const long mask
);
356 __INTRIN_INLINE
char _InterlockedOr8(volatile char * const value
, const char mask
);
357 __INTRIN_INLINE
short _InterlockedOr16(volatile short * const value
, const short mask
);
358 __INTRIN_INLINE
long _InterlockedOr(volatile long * const value
, const long mask
);
359 __INTRIN_INLINE
char _InterlockedXor8(volatile char * const value
, const char mask
);
360 __INTRIN_INLINE
short _InterlockedXor16(volatile short * const value
, const short mask
);
361 __INTRIN_INLINE
long _InterlockedXor(volatile long * const value
, const long mask
);
362 __INTRIN_INLINE
long _InterlockedDecrement(volatile long * const lpAddend
);
363 __INTRIN_INLINE
long _InterlockedIncrement(volatile long * const lpAddend
);
364 __INTRIN_INLINE
short _InterlockedDecrement16(volatile short * const lpAddend
);
365 __INTRIN_INLINE
short _InterlockedIncrement16(volatile short * const lpAddend
);
366 #if defined(_M_AMD64)
367 __INTRIN_INLINE
long long _InterlockedDecrement64(volatile long long * const lpAddend
);
368 __INTRIN_INLINE
long long _InterlockedIncrement64(volatile long long * const lpAddend
);
371 __INTRIN_INLINE
char _InterlockedCompareExchange8(volatile char * const Destination
, const char Exchange
, const char Comperand
)
373 char retval
= Comperand
;
374 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
) : "memory");
378 __INTRIN_INLINE
short _InterlockedCompareExchange16(volatile short * const Destination
, const short Exchange
, const short Comperand
)
380 short retval
= Comperand
;
381 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
): "memory");
385 __INTRIN_INLINE
long _InterlockedCompareExchange(volatile long * const Destination
, const long Exchange
, const long Comperand
)
387 long retval
= Comperand
;
388 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval
] "+a" (retval
) : [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
): "memory");
392 __INTRIN_INLINE
void * _InterlockedCompareExchangePointer(void * volatile * const Destination
, void * const Exchange
, void * const Comperand
)
394 void * retval
= (void *)Comperand
;
395 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval
] "=a" (retval
) : "[retval]" (retval
), [Destination
] "m" (*Destination
), [Exchange
] "q" (Exchange
) : "memory");
399 __INTRIN_INLINE
long _InterlockedExchange(volatile long * const Target
, const long Value
)
402 __asm__("xchgl %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
406 __INTRIN_INLINE
void * _InterlockedExchangePointer(void * volatile * const Target
, void * const Value
)
408 void * retval
= Value
;
409 __asm__("xchgl %[retval], %[Target]" : [retval
] "+r" (retval
) : [Target
] "m" (*Target
) : "memory");
413 __INTRIN_INLINE
long _InterlockedExchangeAdd16(volatile short * const Addend
, const short Value
)
416 __asm__("lock; xaddw %[retval], %[Addend]" : [retval
] "+r" (retval
) : [Addend
] "m" (*Addend
) : "memory");
420 __INTRIN_INLINE
long _InterlockedExchangeAdd(volatile long * const Addend
, const long Value
)
423 __asm__("lock; xaddl %[retval], %[Addend]" : [retval
] "+r" (retval
) : [Addend
] "m" (*Addend
) : "memory");
427 __INTRIN_INLINE
char _InterlockedAnd8(volatile char * const value
, const char mask
)
437 y
= _InterlockedCompareExchange8(value
, x
& mask
, x
);
444 __INTRIN_INLINE
short _InterlockedAnd16(volatile short * const value
, const short mask
)
454 y
= _InterlockedCompareExchange16(value
, x
& mask
, x
);
461 __INTRIN_INLINE
long _InterlockedAnd(volatile long * const value
, const long mask
)
471 y
= _InterlockedCompareExchange(value
, x
& mask
, x
);
478 __INTRIN_INLINE
char _InterlockedOr8(volatile char * const value
, const char mask
)
488 y
= _InterlockedCompareExchange8(value
, x
| mask
, x
);
495 __INTRIN_INLINE
short _InterlockedOr16(volatile short * const value
, const short mask
)
505 y
= _InterlockedCompareExchange16(value
, x
| mask
, x
);
512 __INTRIN_INLINE
long _InterlockedOr(volatile long * const value
, const long mask
)
522 y
= _InterlockedCompareExchange(value
, x
| mask
, x
);
529 __INTRIN_INLINE
char _InterlockedXor8(volatile char * const value
, const char mask
)
539 y
= _InterlockedCompareExchange8(value
, x
^ mask
, x
);
546 __INTRIN_INLINE
short _InterlockedXor16(volatile short * const value
, const short mask
)
556 y
= _InterlockedCompareExchange16(value
, x
^ mask
, x
);
563 __INTRIN_INLINE
long _InterlockedXor(volatile long * const value
, const long mask
)
573 y
= _InterlockedCompareExchange(value
, x
^ mask
, x
);
580 __INTRIN_INLINE
long _InterlockedDecrement(volatile long * const lpAddend
)
582 return _InterlockedExchangeAdd(lpAddend
, -1) - 1;
585 __INTRIN_INLINE
long _InterlockedIncrement(volatile long * const lpAddend
)
587 return _InterlockedExchangeAdd(lpAddend
, 1) + 1;
590 __INTRIN_INLINE
short _InterlockedDecrement16(volatile short * const lpAddend
)
592 return _InterlockedExchangeAdd16(lpAddend
, -1) - 1;
595 __INTRIN_INLINE
short _InterlockedIncrement16(volatile short * const lpAddend
)
597 return _InterlockedExchangeAdd16(lpAddend
, 1) + 1;
600 #if defined(_M_AMD64)
601 __INTRIN_INLINE
long long _InterlockedDecrement64(volatile long long * const lpAddend
)
603 return _InterlockedExchangeAdd64(lpAddend
, -1) - 1;
606 __INTRIN_INLINE
long long _InterlockedIncrement64(volatile long long * const lpAddend
)
608 return _InterlockedExchangeAdd64(lpAddend
, 1) + 1;
612 #endif /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
614 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
616 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
);
617 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
)
619 return __sync_val_compare_and_swap(Destination
, Comperand
, Exchange
);
624 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
);
625 __INTRIN_INLINE
long long _InterlockedCompareExchange64(volatile long long * const Destination
, const long long Exchange
, const long long Comperand
)
627 long long retval
= Comperand
;
631 "lock; cmpxchg8b %[Destination]" :
632 [retval
] "+A" (retval
) :
633 [Destination
] "m" (*Destination
),
634 "b" ((unsigned long)((Exchange
>> 0) & 0xFFFFFFFF)),
635 "c" ((unsigned long)((Exchange
>> 32) & 0xFFFFFFFF)) :
644 __INTRIN_INLINE
long _InterlockedAddLargeStatistic(volatile long long * const Addend
, const long Value
)
648 "lock; add %[Value], %[Lo32];"
650 "lock; adc $0, %[Hi32];"
652 [Lo32
] "+m" (*((volatile long *)(Addend
) + 0)), [Hi32
] "+m" (*((volatile long *)(Addend
) + 1)) :
653 [Value
] "ir" (Value
) :
660 __INTRIN_INLINE
unsigned char _interlockedbittestandreset(volatile long * a
, const long b
)
662 unsigned char retval
;
663 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
667 #if defined(_M_AMD64)
668 __INTRIN_INLINE
unsigned char _interlockedbittestandreset64(volatile long long * a
, const long long b
)
670 unsigned char retval
;
671 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval
] "=r" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
676 __INTRIN_INLINE
unsigned char _interlockedbittestandset(volatile long * a
, const long b
)
678 unsigned char retval
;
679 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval
] "=q" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
683 #if defined(_M_AMD64)
684 __INTRIN_INLINE
unsigned char _interlockedbittestandset64(volatile long long * a
, const long long b
)
686 unsigned char retval
;
687 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval
] "=r" (retval
), [a
] "+m" (*a
) : [b
] "Ir" (b
) : "memory");
692 /*** String operations ***/
694 __INTRIN_INLINE
void __stosb(unsigned char * Dest
, const unsigned char Data
, size_t Count
);
695 __INTRIN_INLINE
void __stosw(unsigned short * Dest
, const unsigned short Data
, size_t Count
);
696 __INTRIN_INLINE
void __stosd(unsigned long * Dest
, const unsigned long Data
, size_t Count
);
697 __INTRIN_INLINE
void __movsb(unsigned char * Destination
, const unsigned char * Source
, size_t Count
);
698 __INTRIN_INLINE
void __movsw(unsigned short * Destination
, const unsigned short * Source
, size_t Count
);
699 __INTRIN_INLINE
void __movsd(unsigned long * Destination
, const unsigned long * Source
, size_t Count
);
701 __INTRIN_INLINE
void __stosq(unsigned __int64
* Dest
, const unsigned __int64 Data
, size_t Count
);
702 __INTRIN_INLINE
void __movsq(unsigned long * Destination
, const unsigned long * Source
, size_t Count
);
706 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
707 __INTRIN_INLINE
void __stosb(unsigned char * Dest
, const unsigned char Data
, size_t Count
)
712 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
713 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
717 __INTRIN_INLINE
void __stosw(unsigned short * Dest
, const unsigned short Data
, size_t Count
)
722 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
723 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
727 __INTRIN_INLINE
void __stosd(unsigned long * Dest
, const unsigned long Data
, size_t Count
)
732 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
733 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
738 __INTRIN_INLINE
void __stosq(unsigned __int64
* Dest
, const unsigned __int64 Data
, size_t Count
)
743 [Dest
] "=D" (Dest
), [Count
] "=c" (Count
) :
744 "[Dest]" (Dest
), "a" (Data
), "[Count]" (Count
)
749 __INTRIN_INLINE
void __movsb(unsigned char * Destination
, const unsigned char * Source
, size_t Count
)
754 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
755 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
759 __INTRIN_INLINE
void __movsw(unsigned short * Destination
, const unsigned short * Source
, size_t Count
)
764 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
765 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
769 __INTRIN_INLINE
void __movsd(unsigned long * Destination
, const unsigned long * Source
, size_t Count
)
774 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
775 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
780 __INTRIN_INLINE
void __movsq(unsigned long * Destination
, const unsigned long * Source
, size_t Count
)
785 [Destination
] "=D" (Destination
), [Source
] "=S" (Source
), [Count
] "=c" (Count
) :
786 "[Destination]" (Destination
), "[Source]" (Source
), "[Count]" (Count
)
791 #if defined(_M_AMD64)
793 /*** GS segment addressing ***/
795 __INTRIN_INLINE
void __writegsbyte(const unsigned long Offset
, const unsigned char Data
);
796 __INTRIN_INLINE
void __writegsword(const unsigned long Offset
, const unsigned short Data
);
797 __INTRIN_INLINE
void __writegsdword(const unsigned long Offset
, const unsigned long Data
);
798 __INTRIN_INLINE
void __writegsqword(const unsigned long Offset
, const unsigned __int64 Data
);
799 __INTRIN_INLINE
unsigned char __readgsbyte(const unsigned long Offset
);
800 __INTRIN_INLINE
unsigned short __readgsword(const unsigned long Offset
);
801 __INTRIN_INLINE
unsigned long __readgsdword(const unsigned long Offset
);
802 __INTRIN_INLINE
unsigned __int64
__readgsqword(const unsigned long Offset
);
803 __INTRIN_INLINE
void __incgsbyte(const unsigned long Offset
);
804 __INTRIN_INLINE
void __incgsword(const unsigned long Offset
);
805 __INTRIN_INLINE
void __incgsdword(const unsigned long Offset
);
806 __INTRIN_INLINE
void __addgsbyte(const unsigned long Offset
, const unsigned char Data
);
807 __INTRIN_INLINE
void __addgsword(const unsigned long Offset
, const unsigned short Data
);
808 __INTRIN_INLINE
void __addgsdword(const unsigned long Offset
, const unsigned int Data
);
809 __INTRIN_INLINE
void __addgsqword(const unsigned long Offset
, const unsigned __int64 Data
);
812 __INTRIN_INLINE
void __writegsbyte(const unsigned long Offset
, const unsigned char Data
)
814 __asm__
__volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
817 __INTRIN_INLINE
void __writegsword(const unsigned long Offset
, const unsigned short Data
)
819 __asm__
__volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
822 __INTRIN_INLINE
void __writegsdword(const unsigned long Offset
, const unsigned long Data
)
824 __asm__
__volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
827 __INTRIN_INLINE
void __writegsqword(const unsigned long Offset
, const unsigned __int64 Data
)
829 __asm__
__volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
832 __INTRIN_INLINE
unsigned char __readgsbyte(const unsigned long Offset
)
835 __asm__
__volatile__("movb %%gs:%a[Offset], %b[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
839 __INTRIN_INLINE
unsigned short __readgsword(const unsigned long Offset
)
841 unsigned short value
;
842 __asm__
__volatile__("movw %%gs:%a[Offset], %w[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
846 __INTRIN_INLINE
unsigned long __readgsdword(const unsigned long Offset
)
849 __asm__
__volatile__("movl %%gs:%a[Offset], %k[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
853 __INTRIN_INLINE
unsigned __int64
__readgsqword(const unsigned long Offset
)
855 unsigned __int64 value
;
856 __asm__
__volatile__("movq %%gs:%a[Offset], %q[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
860 __INTRIN_INLINE
void __incgsbyte(const unsigned long Offset
)
862 __asm__
__volatile__("incb %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
865 __INTRIN_INLINE
void __incgsword(const unsigned long Offset
)
867 __asm__
__volatile__("incw %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
870 __INTRIN_INLINE
void __incgsdword(const unsigned long Offset
)
872 __asm__
__volatile__("incl %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
875 __INTRIN_INLINE
void __addgsbyte(const unsigned long Offset
, const unsigned char Data
)
877 __asm__
__volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
880 __INTRIN_INLINE
void __addgsword(const unsigned long Offset
, const unsigned short Data
)
882 __asm__
__volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
885 __INTRIN_INLINE
void __addgsdword(const unsigned long Offset
, const unsigned int Data
)
887 __asm__
__volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
890 __INTRIN_INLINE
void __addgsqword(const unsigned long Offset
, const unsigned __int64 Data
)
892 __asm__
__volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
895 #else /* defined(_M_AMD64) */
897 /*** FS segment addressing ***/
899 __INTRIN_INLINE
void __writefsbyte(const unsigned long Offset
, const unsigned char Data
);
900 __INTRIN_INLINE
void __writefsword(const unsigned long Offset
, const unsigned short Data
);
901 __INTRIN_INLINE
void __writefsdword(const unsigned long Offset
, const unsigned long Data
);
902 __INTRIN_INLINE
unsigned char __readfsbyte(const unsigned long Offset
);
903 __INTRIN_INLINE
unsigned short __readfsword(const unsigned long Offset
);
904 __INTRIN_INLINE
unsigned long __readfsdword(const unsigned long Offset
);
905 __INTRIN_INLINE
void __incfsbyte(const unsigned long Offset
);
906 __INTRIN_INLINE
void __incfsword(const unsigned long Offset
);
907 __INTRIN_INLINE
void __incfsdword(const unsigned long Offset
);
908 __INTRIN_INLINE
void __addfsbyte(const unsigned long Offset
, const unsigned char Data
);
909 __INTRIN_INLINE
void __addfsword(const unsigned long Offset
, const unsigned short Data
);
910 __INTRIN_INLINE
void __addfsdword(const unsigned long Offset
, const unsigned int Data
);
913 __INTRIN_INLINE
void __writefsbyte(const unsigned long Offset
, const unsigned char Data
)
915 __asm__
__volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
918 __INTRIN_INLINE
void __writefsword(const unsigned long Offset
, const unsigned short Data
)
920 __asm__
__volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
923 __INTRIN_INLINE
void __writefsdword(const unsigned long Offset
, const unsigned long Data
)
925 __asm__
__volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "ir" (Data
) : "memory");
928 __INTRIN_INLINE
unsigned char __readfsbyte(const unsigned long Offset
)
931 __asm__
__volatile__("movb %%fs:%a[Offset], %b[value]" : [value
] "=q" (value
) : [Offset
] "ir" (Offset
));
935 __INTRIN_INLINE
unsigned short __readfsword(const unsigned long Offset
)
937 unsigned short value
;
938 __asm__
__volatile__("movw %%fs:%a[Offset], %w[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
942 __INTRIN_INLINE
unsigned long __readfsdword(const unsigned long Offset
)
945 __asm__
__volatile__("movl %%fs:%a[Offset], %k[value]" : [value
] "=r" (value
) : [Offset
] "ir" (Offset
));
949 __INTRIN_INLINE
void __incfsbyte(const unsigned long Offset
)
951 __asm__
__volatile__("incb %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
954 __INTRIN_INLINE
void __incfsword(const unsigned long Offset
)
956 __asm__
__volatile__("incw %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
959 __INTRIN_INLINE
void __incfsdword(const unsigned long Offset
)
961 __asm__
__volatile__("incl %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
) : "memory");
964 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
965 __INTRIN_INLINE
void __addfsbyte(const unsigned long Offset
, const unsigned char Data
)
967 if(!__builtin_constant_p(Offset
))
968 __asm__
__volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
970 __asm__
__volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
973 __INTRIN_INLINE
void __addfsword(const unsigned long Offset
, const unsigned short Data
)
975 if(!__builtin_constant_p(Offset
))
976 __asm__
__volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
978 __asm__
__volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
981 __INTRIN_INLINE
void __addfsdword(const unsigned long Offset
, const unsigned int Data
)
983 if(!__builtin_constant_p(Offset
))
984 __asm__
__volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset
] "r" (Offset
) : "memory");
986 __asm__
__volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset
] "ir" (Offset
), [Data
] "iq" (Data
) : "memory");
989 #endif /* defined(_M_AMD64) */
992 /*** Bit manipulation ***/
994 __INTRIN_INLINE
unsigned char _BitScanForward(unsigned long * const Index
, const unsigned long Mask
);
995 __INTRIN_INLINE
unsigned char _BitScanReverse(unsigned long * const Index
, const unsigned long Mask
);
996 __INTRIN_INLINE
unsigned char _bittest(const long * const a
, const long b
);
998 __INTRIN_INLINE
unsigned char _bittest64(const __int64
* const a
, const __int64 b
);
1000 __INTRIN_INLINE
unsigned char _bittestandcomplement(long * const a
, const long b
);
1001 __INTRIN_INLINE
unsigned char _bittestandreset(long * const a
, const long b
);
1002 __INTRIN_INLINE
unsigned char _bittestandset(long * const a
, const long b
);
1003 __INTRIN_INLINE
unsigned char _rotl8(unsigned char value
, unsigned char shift
);
1004 __INTRIN_INLINE
unsigned short _rotl16(unsigned short value
, unsigned char shift
);
1005 __INTRIN_INLINE
unsigned int _rotl(unsigned int value
, int shift
);
1006 __INTRIN_INLINE
unsigned int _rotr(unsigned int value
, int shift
);
1007 __INTRIN_INLINE
unsigned char _rotr8(unsigned char value
, unsigned char shift
);
1008 __INTRIN_INLINE
unsigned short _rotr16(unsigned short value
, unsigned char shift
);
1009 __INTRIN_INLINE
unsigned long long __ll_lshift(const unsigned long long Mask
, const int Bit
);
1010 __INTRIN_INLINE
long long __ll_rshift(const long long Mask
, const int Bit
);
1011 __INTRIN_INLINE
unsigned long long __ull_rshift(const unsigned long long Mask
, int Bit
);
1012 __INTRIN_INLINE
unsigned short _byteswap_ushort(unsigned short value
);
1013 __INTRIN_INLINE
unsigned long _byteswap_ulong(unsigned long value
);
1015 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
);
1017 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
);
1021 __INTRIN_INLINE
unsigned char _BitScanForward(unsigned long * const Index
, const unsigned long Mask
)
1023 __asm__("bsfl %[Mask], %[Index]" : [Index
] "=r" (*Index
) : [Mask
] "mr" (Mask
));
1024 return Mask
? 1 : 0;
1027 __INTRIN_INLINE
unsigned char _BitScanReverse(unsigned long * const Index
, const unsigned long Mask
)
1029 __asm__("bsrl %[Mask], %[Index]" : [Index
] "=r" (*Index
) : [Mask
] "mr" (Mask
));
1030 return Mask
? 1 : 0;
1033 /* NOTE: again, the bizarre implementation follows Visual C++ */
1034 __INTRIN_INLINE
unsigned char _bittest(const long * const a
, const long b
)
1036 unsigned char retval
;
1038 if(__builtin_constant_p(b
))
1039 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*(a
+ (b
/ 32))), [b
] "Ir" (b
% 32));
1041 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "m" (*a
), [b
] "r" (b
));
1047 __INTRIN_INLINE
unsigned char _bittest64(const __int64
* const a
, const __int64 b
)
1049 unsigned char retval
;
1051 if(__builtin_constant_p(b
))
1052 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "mr" (*(a
+ (b
/ 64))), [b
] "Ir" (b
% 64));
1054 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval
] "=q" (retval
) : [a
] "m" (*a
), [b
] "r" (b
));
1060 __INTRIN_INLINE
unsigned char _bittestandcomplement(long * const a
, const long b
)
1062 unsigned char retval
;
1064 if(__builtin_constant_p(b
))
1065 __asm__("btc %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
1067 __asm__("btc %[b], %[a]; setb %b[retval]" : [a
] "+m" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
1072 __INTRIN_INLINE
unsigned char _bittestandreset(long * const a
, const long b
)
1074 unsigned char retval
;
1076 if(__builtin_constant_p(b
))
1077 __asm__("btr %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
1079 __asm__("btr %[b], %[a]; setb %b[retval]" : [a
] "+m" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
1084 __INTRIN_INLINE
unsigned char _bittestandset(long * const a
, const long b
)
1086 unsigned char retval
;
1088 if(__builtin_constant_p(b
))
1089 __asm__("bts %[b], %[a]; setb %b[retval]" : [a
] "+mr" (*(a
+ (b
/ 32))), [retval
] "=q" (retval
) : [b
] "Ir" (b
% 32));
1091 __asm__("bts %[b], %[a]; setb %b[retval]" : [a
] "+m" (*a
), [retval
] "=q" (retval
) : [b
] "r" (b
));
1096 __INTRIN_INLINE
unsigned char _rotl8(unsigned char value
, unsigned char shift
)
1098 unsigned char retval
;
1099 __asm__("rolb %b[shift], %b[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
1103 __INTRIN_INLINE
unsigned short _rotl16(unsigned short value
, unsigned char shift
)
1105 unsigned short retval
;
1106 __asm__("rolw %b[shift], %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
1110 __INTRIN_INLINE
unsigned int _rotl(unsigned int value
, int shift
)
1112 unsigned long retval
;
1113 __asm__("roll %b[shift], %k[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
1117 __INTRIN_INLINE
unsigned int _rotr(unsigned int value
, int shift
)
1119 unsigned long retval
;
1120 __asm__("rorl %b[shift], %k[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
1124 __INTRIN_INLINE
unsigned char _rotr8(unsigned char value
, unsigned char shift
)
1126 unsigned char retval
;
1127 __asm__("rorb %b[shift], %b[retval]" : [retval
] "=qm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
1131 __INTRIN_INLINE
unsigned short _rotr16(unsigned short value
, unsigned char shift
)
1133 unsigned short retval
;
1134 __asm__("rorw %b[shift], %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
), [shift
] "Nc" (shift
));
1139 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
1140 constraint (edx:eax) for the Mask argument, because it's the only way GCC
1141 can pass 64-bit operands around - passing the two 32 bit parts separately
1142 just confuses it. Also we declare Bit as an int and then truncate it to
1143 match Visual C++ behavior
1145 __INTRIN_INLINE
unsigned long long __ll_lshift(const unsigned long long Mask
, const int Bit
)
1147 unsigned long long retval
= Mask
;
1151 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
1153 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
1159 __INTRIN_INLINE
long long __ll_rshift(const long long Mask
, const int Bit
)
1161 long long retval
= Mask
;
1165 "shrdl %b[Bit], %%edx, %%eax; sarl %b[Bit], %%edx" :
1167 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
1173 __INTRIN_INLINE
unsigned long long __ull_rshift(const unsigned long long Mask
, int Bit
)
1175 unsigned long long retval
= Mask
;
1179 "shrdl %b[Bit], %%edx, %%eax; shrl %b[Bit], %%edx" :
1181 [Bit
] "Nc" ((unsigned char)((unsigned long)Bit
) & 0xFF)
1187 __INTRIN_INLINE
unsigned short _byteswap_ushort(unsigned short value
)
1189 unsigned short retval
;
1190 __asm__("rorw $8, %w[retval]" : [retval
] "=rm" (retval
) : "[retval]" (value
));
1194 __INTRIN_INLINE
unsigned long _byteswap_ulong(unsigned long value
)
1196 unsigned long retval
;
1197 __asm__("bswapl %[retval]" : [retval
] "=r" (retval
) : "[retval]" (value
));
1202 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
)
1204 unsigned __int64 retval
;
1205 __asm__("bswapq %[retval]" : [retval
] "=r" (retval
) : "[retval]" (value
));
1209 __INTRIN_INLINE
unsigned __int64
_byteswap_uint64(unsigned __int64 value
)
1212 unsigned __int64 int64part
;
1214 unsigned long lowpart
;
1215 unsigned long hipart
;
1218 retval
.int64part
= value
;
1219 __asm__("bswapl %[lowpart]\n"
1220 "bswapl %[hipart]\n"
1221 : [lowpart
] "=r" (retval
.hipart
), [hipart
] "=r" (retval
.lowpart
) : "[lowpart]" (retval
.lowpart
), "[hipart]" (retval
.hipart
) );
1222 return retval
.int64part
;
1226 /*** 64-bit math ***/
1228 __INTRIN_INLINE
long long __emul(const int a
, const int b
);
1229 __INTRIN_INLINE
unsigned long long __emulu(const unsigned int a
, const unsigned int b
);
1231 __INTRIN_INLINE __int64
__mulh(__int64 a
, __int64 b
);
1232 __INTRIN_INLINE
unsigned __int64
__umulh(unsigned __int64 a
, unsigned __int64 b
);
1236 __INTRIN_INLINE
long long __emul(const int a
, const int b
)
1239 __asm__("imull %[b]" : "=A" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1243 __INTRIN_INLINE
unsigned long long __emulu(const unsigned int a
, const unsigned int b
)
1245 unsigned long long retval
;
1246 __asm__("mull %[b]" : "=A" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1252 __INTRIN_INLINE __int64
__mulh(__int64 a
, __int64 b
)
1255 __asm__("imulq %[b]" : "=d" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1259 __INTRIN_INLINE
unsigned __int64
__umulh(unsigned __int64 a
, unsigned __int64 b
)
1261 unsigned __int64 retval
;
1262 __asm__("mulq %[b]" : "=d" (retval
) : [a
] "a" (a
), [b
] "rm" (b
));
1270 __INTRIN_INLINE
unsigned char __inbyte(const unsigned short Port
);
1271 __INTRIN_INLINE
unsigned short __inword(const unsigned short Port
);
1272 __INTRIN_INLINE
unsigned long __indword(const unsigned short Port
);
1273 __INTRIN_INLINE
void __inbytestring(unsigned short Port
, unsigned char * Buffer
, unsigned long Count
);
1274 __INTRIN_INLINE
void __inwordstring(unsigned short Port
, unsigned short * Buffer
, unsigned long Count
);
1275 __INTRIN_INLINE
void __indwordstring(unsigned short Port
, unsigned long * Buffer
, unsigned long Count
);
1276 __INTRIN_INLINE
void __outbyte(unsigned short const Port
, const unsigned char Data
);
1277 __INTRIN_INLINE
void __outword(unsigned short const Port
, const unsigned short Data
);
1278 __INTRIN_INLINE
void __outdword(unsigned short const Port
, const unsigned long Data
);
1279 __INTRIN_INLINE
void __outbytestring(unsigned short const Port
, const unsigned char * const Buffer
, const unsigned long Count
);
1280 __INTRIN_INLINE
void __outwordstring(unsigned short const Port
, const unsigned short * const Buffer
, const unsigned long Count
);
1281 __INTRIN_INLINE
void __outdwordstring(unsigned short const Port
, const unsigned long * const Buffer
, const unsigned long Count
);
1282 __INTRIN_INLINE
int _inp(unsigned short Port
);
1283 __INTRIN_INLINE
unsigned short _inpw(unsigned short Port
);
1284 __INTRIN_INLINE
unsigned long _inpd(unsigned short Port
);
1285 __INTRIN_INLINE
int _outp(unsigned short Port
, int databyte
);
1286 __INTRIN_INLINE
unsigned short _outpw(unsigned short Port
, unsigned short dataword
);
1287 __INTRIN_INLINE
unsigned long _outpd(unsigned short Port
, unsigned long dataword
);
1290 __INTRIN_INLINE
unsigned char __inbyte(const unsigned short Port
)
1293 __asm__
__volatile__("inb %w[Port], %b[byte]" : [byte
] "=a" (byte
) : [Port
] "Nd" (Port
));
1297 __INTRIN_INLINE
unsigned short __inword(const unsigned short Port
)
1299 unsigned short word
;
1300 __asm__
__volatile__("inw %w[Port], %w[word]" : [word
] "=a" (word
) : [Port
] "Nd" (Port
));
1304 __INTRIN_INLINE
unsigned long __indword(const unsigned short Port
)
1306 unsigned long dword
;
1307 __asm__
__volatile__("inl %w[Port], %k[dword]" : [dword
] "=a" (dword
) : [Port
] "Nd" (Port
));
1311 __INTRIN_INLINE
void __inbytestring(unsigned short Port
, unsigned char * Buffer
, unsigned long Count
)
1313 __asm__ __volatile__
1316 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1317 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1322 __INTRIN_INLINE
void __inwordstring(unsigned short Port
, unsigned short * Buffer
, unsigned long Count
)
1324 __asm__ __volatile__
1327 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1328 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1333 __INTRIN_INLINE
void __indwordstring(unsigned short Port
, unsigned long * Buffer
, unsigned long Count
)
1335 __asm__ __volatile__
1338 [Buffer
] "=D" (Buffer
), [Count
] "=c" (Count
) :
1339 "d" (Port
), "[Buffer]" (Buffer
), "[Count]" (Count
) :
1344 __INTRIN_INLINE
void __outbyte(unsigned short const Port
, const unsigned char Data
)
1346 __asm__
__volatile__("outb %b[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1349 __INTRIN_INLINE
void __outword(unsigned short const Port
, const unsigned short Data
)
1351 __asm__
__volatile__("outw %w[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1354 __INTRIN_INLINE
void __outdword(unsigned short const Port
, const unsigned long Data
)
1356 __asm__
__volatile__("outl %k[Data], %w[Port]" : : [Port
] "Nd" (Port
), [Data
] "a" (Data
));
1359 __INTRIN_INLINE
void __outbytestring(unsigned short const Port
, const unsigned char * const Buffer
, const unsigned long Count
)
1361 __asm__
__volatile__("rep; outsb" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1364 __INTRIN_INLINE
void __outwordstring(unsigned short const Port
, const unsigned short * const Buffer
, const unsigned long Count
)
1366 __asm__
__volatile__("rep; outsw" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1369 __INTRIN_INLINE
void __outdwordstring(unsigned short const Port
, const unsigned long * const Buffer
, const unsigned long Count
)
1371 __asm__
__volatile__("rep; outsl" : : [Port
] "d" (Port
), [Buffer
] "S" (Buffer
), "c" (Count
));
1374 __INTRIN_INLINE
int _inp(unsigned short Port
)
1376 return __inbyte(Port
);
1379 __INTRIN_INLINE
unsigned short _inpw(unsigned short Port
)
1381 return __inword(Port
);
1384 __INTRIN_INLINE
unsigned long _inpd(unsigned short Port
)
1386 return __indword(Port
);
1389 __INTRIN_INLINE
int _outp(unsigned short Port
, int databyte
)
1391 __outbyte(Port
, (unsigned char)databyte
);
1395 __INTRIN_INLINE
unsigned short _outpw(unsigned short Port
, unsigned short dataword
)
1397 __outword(Port
, dataword
);
1401 __INTRIN_INLINE
unsigned long _outpd(unsigned short Port
, unsigned long dataword
)
1403 __outdword(Port
, dataword
);
1408 /*** System information ***/
1410 __INTRIN_INLINE
void __cpuid(int CPUInfo
[], const int InfoType
);
1411 __INTRIN_INLINE
unsigned long long __rdtsc(void);
1412 __INTRIN_INLINE
void __writeeflags(uintptr_t Value
);
1413 __INTRIN_INLINE
uintptr_t __readeflags(void);
1416 __INTRIN_INLINE
void __cpuid(int CPUInfo
[], const int InfoType
)
1418 __asm__
__volatile__("cpuid" : "=a" (CPUInfo
[0]), "=b" (CPUInfo
[1]), "=c" (CPUInfo
[2]), "=d" (CPUInfo
[3]) : "a" (InfoType
));
1421 __INTRIN_INLINE
unsigned long long __rdtsc(void)
1424 unsigned long long low
, high
;
1425 __asm__
__volatile__("rdtsc" : "=a"(low
), "=d"(high
));
1426 return low
| (high
<< 32);
1428 unsigned long long retval
;
1429 __asm__
__volatile__("rdtsc" : "=A"(retval
));
1434 __INTRIN_INLINE
void __writeeflags(uintptr_t Value
)
1436 __asm__
__volatile__("push %0\n popf" : : "rim"(Value
));
1439 __INTRIN_INLINE
uintptr_t __readeflags(void)
1442 __asm__
__volatile__("pushf\n pop %0" : "=rm"(retval
));
1446 /*** Interrupts ***/
1448 __INTRIN_INLINE
void __int2c(void);
1449 __INTRIN_INLINE
void _disable(void);
1450 __INTRIN_INLINE
void _enable(void);
1451 __INTRIN_INLINE
void __halt(void);
1454 #define __debugbreak() __asm__("int $3")
1456 __INTRIN_INLINE
void __debugbreak(void);
1457 __INTRIN_INLINE
void __debugbreak(void)
1463 __INTRIN_INLINE
void __int2c(void)
1465 __asm__("int $0x2c");
1468 __INTRIN_INLINE
void _disable(void)
1470 __asm__("cli" : : : "memory");
1473 __INTRIN_INLINE
void _enable(void)
1475 __asm__("sti" : : : "memory");
1478 __INTRIN_INLINE
void __halt(void)
1480 __asm__("hlt\n\t" : : : "memory");
1483 /*** Protected memory management ***/
1485 __INTRIN_INLINE
void __invlpg(void * const Address
);
1487 __INTRIN_INLINE
void __writecr0(const unsigned __int64 Data
);
1488 __INTRIN_INLINE
void __writecr3(const unsigned __int64 Data
);
1489 __INTRIN_INLINE
void __writecr4(const unsigned __int64 Data
);
1490 __INTRIN_INLINE
void __writecr8(const unsigned __int64 Data
);
1491 __INTRIN_INLINE
unsigned __int64
__readcr0(void);
1492 __INTRIN_INLINE
unsigned __int64
__readcr2(void);
1493 __INTRIN_INLINE
unsigned __int64
__readcr3(void);
1494 __INTRIN_INLINE
unsigned __int64
__readcr4(void);
1495 __INTRIN_INLINE
unsigned __int64
__readcr8(void);
1496 __INTRIN_INLINE
unsigned __int64
__readdr(unsigned int reg
);
1497 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned __int64 value
);
1498 #else /* _M_AMD64 */
1499 __INTRIN_INLINE
void __writecr0(const unsigned int Data
);
1500 __INTRIN_INLINE
void __writecr3(const unsigned int Data
);
1501 __INTRIN_INLINE
void __writecr4(const unsigned int Data
);
1502 __INTRIN_INLINE
unsigned long __readcr0(void);
1503 __INTRIN_INLINE
unsigned long __readcr2(void);
1504 __INTRIN_INLINE
unsigned long __readcr3(void);
1505 __INTRIN_INLINE
unsigned long __readcr4(void);
1506 __INTRIN_INLINE
unsigned int __readdr(unsigned int reg
);
1507 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned int value
);
1508 #endif /* _M_AMD64 */
1513 __INTRIN_INLINE
void __writecr0(const unsigned __int64 Data
)
1515 __asm__("mov %[Data], %%cr0" : : [Data
] "r" (Data
) : "memory");
1518 __INTRIN_INLINE
void __writecr3(const unsigned __int64 Data
)
1520 __asm__("mov %[Data], %%cr3" : : [Data
] "r" (Data
) : "memory");
1523 __INTRIN_INLINE
void __writecr4(const unsigned __int64 Data
)
1525 __asm__("mov %[Data], %%cr4" : : [Data
] "r" (Data
) : "memory");
1528 __INTRIN_INLINE
void __writecr8(const unsigned __int64 Data
)
1530 __asm__("mov %[Data], %%cr8" : : [Data
] "r" (Data
) : "memory");
1533 __INTRIN_INLINE
unsigned __int64
__readcr0(void)
1535 unsigned __int64 value
;
1536 __asm__
__volatile__("mov %%cr0, %[value]" : [value
] "=r" (value
));
1540 __INTRIN_INLINE
unsigned __int64
__readcr2(void)
1542 unsigned __int64 value
;
1543 __asm__
__volatile__("mov %%cr2, %[value]" : [value
] "=r" (value
));
1547 __INTRIN_INLINE
unsigned __int64
__readcr3(void)
1549 unsigned __int64 value
;
1550 __asm__
__volatile__("mov %%cr3, %[value]" : [value
] "=r" (value
));
1554 __INTRIN_INLINE
unsigned __int64
__readcr4(void)
1556 unsigned __int64 value
;
1557 __asm__
__volatile__("mov %%cr4, %[value]" : [value
] "=r" (value
));
1561 __INTRIN_INLINE
unsigned __int64
__readcr8(void)
1563 unsigned __int64 value
;
1564 __asm__
__volatile__("movq %%cr8, %q[value]" : [value
] "=r" (value
));
1568 #else /* _M_AMD64 */
1570 __INTRIN_INLINE
void __writecr0(const unsigned int Data
)
1572 __asm__("mov %[Data], %%cr0" : : [Data
] "r" (Data
) : "memory");
1575 __INTRIN_INLINE
void __writecr3(const unsigned int Data
)
1577 __asm__("mov %[Data], %%cr3" : : [Data
] "r" (Data
) : "memory");
1580 __INTRIN_INLINE
void __writecr4(const unsigned int Data
)
1582 __asm__("mov %[Data], %%cr4" : : [Data
] "r" (Data
) : "memory");
1585 __INTRIN_INLINE
unsigned long __readcr0(void)
1587 unsigned long value
;
1588 __asm__
__volatile__("mov %%cr0, %[value]" : [value
] "=r" (value
));
1592 __INTRIN_INLINE
unsigned long __readcr2(void)
1594 unsigned long value
;
1595 __asm__
__volatile__("mov %%cr2, %[value]" : [value
] "=r" (value
));
1599 __INTRIN_INLINE
unsigned long __readcr3(void)
1601 unsigned long value
;
1602 __asm__
__volatile__("mov %%cr3, %[value]" : [value
] "=r" (value
));
1606 __INTRIN_INLINE
unsigned long __readcr4(void)
1608 unsigned long value
;
1609 __asm__
__volatile__("mov %%cr4, %[value]" : [value
] "=r" (value
));
1613 #endif /* _M_AMD64 */
1617 __INTRIN_INLINE
unsigned __int64
__readdr(unsigned int reg
)
1619 unsigned __int64 value
;
1623 __asm__
__volatile__("movq %%dr0, %q[value]" : [value
] "=r" (value
));
1626 __asm__
__volatile__("movq %%dr1, %q[value]" : [value
] "=r" (value
));
1629 __asm__
__volatile__("movq %%dr2, %q[value]" : [value
] "=r" (value
));
1632 __asm__
__volatile__("movq %%dr3, %q[value]" : [value
] "=r" (value
));
1635 __asm__
__volatile__("movq %%dr4, %q[value]" : [value
] "=r" (value
));
1638 __asm__
__volatile__("movq %%dr5, %q[value]" : [value
] "=r" (value
));
1641 __asm__
__volatile__("movq %%dr6, %q[value]" : [value
] "=r" (value
));
1644 __asm__
__volatile__("movq %%dr7, %q[value]" : [value
] "=r" (value
));
1650 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned __int64 value
)
1655 __asm__("movq %q[value], %%dr0" : : [value
] "r" (value
) : "memory");
1658 __asm__("movq %q[value], %%dr1" : : [value
] "r" (value
) : "memory");
1661 __asm__("movq %q[value], %%dr2" : : [value
] "r" (value
) : "memory");
1664 __asm__("movq %q[value], %%dr3" : : [value
] "r" (value
) : "memory");
1667 __asm__("movq %q[value], %%dr4" : : [value
] "r" (value
) : "memory");
1670 __asm__("movq %q[value], %%dr5" : : [value
] "r" (value
) : "memory");
1673 __asm__("movq %q[value], %%dr6" : : [value
] "r" (value
) : "memory");
1676 __asm__("movq %q[value], %%dr7" : : [value
] "r" (value
) : "memory");
1681 #else /* _M_AMD64 */
1683 __INTRIN_INLINE
unsigned int __readdr(unsigned int reg
)
1689 __asm__
__volatile__("mov %%dr0, %[value]" : [value
] "=r" (value
));
1692 __asm__
__volatile__("mov %%dr1, %[value]" : [value
] "=r" (value
));
1695 __asm__
__volatile__("mov %%dr2, %[value]" : [value
] "=r" (value
));
1698 __asm__
__volatile__("mov %%dr3, %[value]" : [value
] "=r" (value
));
1701 __asm__
__volatile__("mov %%dr4, %[value]" : [value
] "=r" (value
));
1704 __asm__
__volatile__("mov %%dr5, %[value]" : [value
] "=r" (value
));
1707 __asm__
__volatile__("mov %%dr6, %[value]" : [value
] "=r" (value
));
1710 __asm__
__volatile__("mov %%dr7, %[value]" : [value
] "=r" (value
));
1716 __INTRIN_INLINE
void __writedr(unsigned reg
, unsigned int value
)
1721 __asm__("mov %[value], %%dr0" : : [value
] "r" (value
) : "memory");
1724 __asm__("mov %[value], %%dr1" : : [value
] "r" (value
) : "memory");
1727 __asm__("mov %[value], %%dr2" : : [value
] "r" (value
) : "memory");
1730 __asm__("mov %[value], %%dr3" : : [value
] "r" (value
) : "memory");
1733 __asm__("mov %[value], %%dr4" : : [value
] "r" (value
) : "memory");
1736 __asm__("mov %[value], %%dr5" : : [value
] "r" (value
) : "memory");
1739 __asm__("mov %[value], %%dr6" : : [value
] "r" (value
) : "memory");
1742 __asm__("mov %[value], %%dr7" : : [value
] "r" (value
) : "memory");
1747 #endif /* _M_AMD64 */
1749 __INTRIN_INLINE
void __invlpg(void * const Address
)
1751 __asm__("invlpg %[Address]" : : [Address
] "m" (*((unsigned char *)(Address
))) : "memory");
1755 /*** System operations ***/
1757 __INTRIN_INLINE
unsigned long long __readmsr(const int reg
);
1758 __INTRIN_INLINE
void __writemsr(const unsigned long Register
, const unsigned long long Value
);
1759 __INTRIN_INLINE
unsigned long long __readpmc(const int counter
);
1760 __INTRIN_INLINE
unsigned long __segmentlimit(const unsigned long a
);
1761 __INTRIN_INLINE
void __wbinvd(void);
1762 __INTRIN_INLINE
void __lidt(void *Source
);
1763 __INTRIN_INLINE
void __sidt(void *Destination
);
1766 __INTRIN_INLINE
unsigned long long __readmsr(const int reg
)
1769 unsigned long low
, high
;
1770 __asm__
__volatile__("rdmsr" : "=a" (low
), "=d" (high
) : "c" (reg
));
1771 return ((unsigned long long)high
<< 32) | low
;
1773 unsigned long long retval
;
1774 __asm__
__volatile__("rdmsr" : "=A" (retval
) : "c" (reg
));
1779 __INTRIN_INLINE
void __writemsr(const unsigned long Register
, const unsigned long long Value
)
1782 __asm__
__volatile__("wrmsr" : : "a" (Value
), "d" (Value
>> 32), "c" (Register
));
1784 __asm__
__volatile__("wrmsr" : : "A" (Value
), "c" (Register
));
1788 __INTRIN_INLINE
unsigned long long __readpmc(const int counter
)
1790 unsigned long long retval
;
1791 __asm__
__volatile__("rdpmc" : "=A" (retval
) : "c" (counter
));
1795 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1796 __INTRIN_INLINE
unsigned long __segmentlimit(const unsigned long a
)
1798 unsigned long retval
;
1799 __asm__
__volatile__("lsl %[a], %[retval]" : [retval
] "=r" (retval
) : [a
] "rm" (a
));
1803 __INTRIN_INLINE
void __wbinvd(void)
1805 __asm__
__volatile__("wbinvd" : : : "memory");
1808 __INTRIN_INLINE
void __lidt(void *Source
)
1810 __asm__
__volatile__("lidt %0" : : "m"(*(short*)Source
));
1813 __INTRIN_INLINE
void __sidt(void *Destination
)
1815 __asm__
__volatile__("sidt %0" : : "m"(*(short*)Destination
) : "memory");
1818 /*** Misc operations ***/
1820 __INTRIN_INLINE
void _mm_pause(void);
1821 __INTRIN_INLINE
void __nop(void);
1823 __INTRIN_INLINE
void _mm_pause(void)
1825 __asm__
__volatile__("pause" : : : "memory");
1828 __INTRIN_INLINE
void __nop(void)
1830 __asm__
__volatile__("nop");
1837 #endif /* KJK_INTRIN_X86_H_ */