f474a818d5da152332eb026c9368ece849d13245
[reactos.git] / sdk / include / crt / mingw32 / intrin_x86.h
1 /*
2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
5 projects.
6
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
8
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
15
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
26 */
27
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
30
31 /*
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
36 */
37
38 /*
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
42
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
46
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
49
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
57
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
60
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
62 ...
63
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
66 */
67
68 #ifdef __cplusplus
69 extern "C" {
70 #endif
71
72 /*** memcopy must be memmove ***/
73 void* __cdecl memmove(void* dest, const void* source, size_t num);
74 __INTRIN_INLINE void* __cdecl memcpy(void* dest, const void* source, size_t num)
75 {
76 return memmove(dest, source, num);
77 }
78
79
80 /*** Stack frame juggling ***/
81 #define _ReturnAddress() (__builtin_return_address(0))
82 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
83 /* TODO: __getcallerseflags but how??? */
84
85 /* Maybe the same for x86? */
86 #ifdef __x86_64__
87 #define _alloca(s) __builtin_alloca(s)
88 #endif
89
90 /*** Memory barriers ***/
91
92 #ifndef __clang__
93 __INTRIN_INLINE void _ReadWriteBarrier(void)
94 {
95 __asm__ __volatile__("" : : : "memory");
96 }
97
98 /* GCC only supports full barriers */
99 #define _ReadBarrier _ReadWriteBarrier
100 #define _WriteBarrier _ReadWriteBarrier
101
102 __INTRIN_INLINE void _mm_mfence(void)
103 {
104 __asm__ __volatile__("mfence" : : : "memory");
105 }
106
107 __INTRIN_INLINE void _mm_lfence(void)
108 {
109 _ReadBarrier();
110 __asm__ __volatile__("lfence");
111 _ReadBarrier();
112 }
113
114 __INTRIN_INLINE void _mm_sfence(void)
115 {
116 _WriteBarrier();
117 __asm__ __volatile__("sfence");
118 _WriteBarrier();
119 }
120 #endif /* !__clang__ */
121
122 #ifdef __x86_64__
123 __INTRIN_INLINE void __faststorefence(void)
124 {
125 long local;
126 __asm__ __volatile__("lock; orl $0, %0;" : : "m"(local));
127 }
128 #endif
129
130
131 /*** Atomic operations ***/
132
133 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
134
135 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * Destination, char Exchange, char Comperand)
136 {
137 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
138 }
139
140 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * Destination, short Exchange, short Comperand)
141 {
142 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
143 }
144
145 #ifndef __clang__
146
147 __INTRIN_INLINE long __cdecl _InterlockedCompareExchange(volatile long * Destination, long Exchange, long Comperand)
148 {
149 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
150 }
151
152 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * Destination, void * Exchange, void * Comperand)
153 {
154 return (void *)__sync_val_compare_and_swap(Destination, Comperand, Exchange);
155 }
156
157 #endif
158
159 __INTRIN_INLINE char _InterlockedExchange8(volatile char * Target, char Value)
160 {
161 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
162 __sync_synchronize();
163 return __sync_lock_test_and_set(Target, Value);
164 }
165
166 __INTRIN_INLINE short _InterlockedExchange16(volatile short * Target, short Value)
167 {
168 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
169 __sync_synchronize();
170 return __sync_lock_test_and_set(Target, Value);
171 }
172
173 #ifndef __clang__
174
175 __INTRIN_INLINE long __cdecl _InterlockedExchange(volatile long * Target, long Value)
176 {
177 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
178 __sync_synchronize();
179 return __sync_lock_test_and_set(Target, Value);
180 }
181
182 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * Target, void * Value)
183 {
184 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
185 __sync_synchronize();
186 return (void *)__sync_lock_test_and_set(Target, Value);
187 }
188
189 #endif
190
191 #if defined(__x86_64__)
192 __INTRIN_INLINE long long _InterlockedExchange64(volatile long long * Target, long long Value)
193 {
194 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
195 __sync_synchronize();
196 return __sync_lock_test_and_set(Target, Value);
197 }
198 #endif
199
200 __INTRIN_INLINE char _InterlockedExchangeAdd8(char volatile * Addend, char Value)
201 {
202 return __sync_fetch_and_add(Addend, Value);
203 }
204
205 __INTRIN_INLINE short _InterlockedExchangeAdd16(volatile short * Addend, short Value)
206 {
207 return __sync_fetch_and_add(Addend, Value);
208 }
209
210 #ifndef __clang__
211 __INTRIN_INLINE long __cdecl _InterlockedExchangeAdd(volatile long * Addend, long Value)
212 {
213 return __sync_fetch_and_add(Addend, Value);
214 }
215 #endif
216
217 #if defined(__x86_64__)
218 __INTRIN_INLINE long long _InterlockedExchangeAdd64(volatile long long * Addend, long long Value)
219 {
220 return __sync_fetch_and_add(Addend, Value);
221 }
222 #endif
223
224 __INTRIN_INLINE char _InterlockedAnd8(volatile char * value, char mask)
225 {
226 return __sync_fetch_and_and(value, mask);
227 }
228
229 __INTRIN_INLINE short _InterlockedAnd16(volatile short * value, short mask)
230 {
231 return __sync_fetch_and_and(value, mask);
232 }
233
234 __INTRIN_INLINE long _InterlockedAnd(volatile long * value, long mask)
235 {
236 return __sync_fetch_and_and(value, mask);
237 }
238
239 #if defined(__x86_64__)
240 __INTRIN_INLINE long long _InterlockedAnd64(volatile long long * value, long long mask)
241 {
242 return __sync_fetch_and_and(value, mask);
243 }
244 #endif
245
246 __INTRIN_INLINE char _InterlockedOr8(volatile char * value, char mask)
247 {
248 return __sync_fetch_and_or(value, mask);
249 }
250
251 __INTRIN_INLINE short _InterlockedOr16(volatile short * value, short mask)
252 {
253 return __sync_fetch_and_or(value, mask);
254 }
255
256 __INTRIN_INLINE long _InterlockedOr(volatile long * value, long mask)
257 {
258 return __sync_fetch_and_or(value, mask);
259 }
260
261 #if defined(__x86_64__)
262 __INTRIN_INLINE long long _InterlockedOr64(volatile long long * value, long long mask)
263 {
264 return __sync_fetch_and_or(value, mask);
265 }
266 #endif
267
268 __INTRIN_INLINE char _InterlockedXor8(volatile char * value, char mask)
269 {
270 return __sync_fetch_and_xor(value, mask);
271 }
272
273 __INTRIN_INLINE short _InterlockedXor16(volatile short * value, short mask)
274 {
275 return __sync_fetch_and_xor(value, mask);
276 }
277
278 __INTRIN_INLINE long _InterlockedXor(volatile long * value, long mask)
279 {
280 return __sync_fetch_and_xor(value, mask);
281 }
282
283 #if defined(__x86_64__)
284 __INTRIN_INLINE long long _InterlockedXor64(volatile long long * value, long long mask)
285 {
286 return __sync_fetch_and_xor(value, mask);
287 }
288 #endif
289
290 #ifndef __clang__
291 __INTRIN_INLINE long __cdecl _InterlockedDecrement(volatile long * lpAddend)
292 {
293 return __sync_sub_and_fetch(lpAddend, 1);
294 }
295
296 __INTRIN_INLINE long __cdecl _InterlockedIncrement(volatile long * lpAddend)
297 {
298 return __sync_add_and_fetch(lpAddend, 1);
299 }
300 #endif
301
302 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * lpAddend)
303 {
304 return __sync_sub_and_fetch(lpAddend, 1);
305 }
306
307 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * lpAddend)
308 {
309 return __sync_add_and_fetch(lpAddend, 1);
310 }
311
312 #if defined(__x86_64__)
313 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * lpAddend)
314 {
315 return __sync_sub_and_fetch(lpAddend, 1);
316 }
317
318 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * lpAddend)
319 {
320 return __sync_add_and_fetch(lpAddend, 1);
321 }
322 #endif
323
324 #else /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
325
326 #ifndef __clang__
327
328 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * Destination, char Exchange, char Comperand)
329 {
330 char retval = Comperand;
331 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
332 return retval;
333 }
334
335 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * Destination, short Exchange, short Comperand)
336 {
337 short retval = Comperand;
338 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
339 return retval;
340 }
341
342 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * Destination, long Exchange, long Comperand)
343 {
344 long retval = Comperand;
345 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
346 return retval;
347 }
348
349 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * Destination, void * Exchange, void * Comperand)
350 {
351 void * retval = (void *)Comperand;
352 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "=a" (retval) : "[retval]" (retval), [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
353 return retval;
354 }
355
356 __INTRIN_INLINE char _InterlockedExchange8(volatile char * Target, char Value)
357 {
358 char retval = Value;
359 __asm__("xchgb %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
360 return retval;
361 }
362
363 __INTRIN_INLINE short _InterlockedExchange16(volatile short * Target, short Value)
364 {
365 short retval = Value;
366 __asm__("xchgw %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
367 return retval;
368 }
369
370 __INTRIN_INLINE long _InterlockedExchange(volatile long * Target, long Value)
371 {
372 long retval = Value;
373 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
374 return retval;
375 }
376
377 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * Target, void * Value)
378 {
379 void * retval = Value;
380 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
381 return retval;
382 }
383
384 __INTRIN_INLINE char _InterlockedExchangeAdd8(char volatile * Addend, char Value)
385 {
386 char retval = Value;
387 __asm__("lock; xaddb %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
388 return retval;
389 }
390
391 __INTRIN_INLINE short _InterlockedExchangeAdd16(volatile short * Addend, short Value)
392 {
393 short retval = Value;
394 __asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
395 return retval;
396 }
397
398 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * Addend, long Value)
399 {
400 long retval = Value;
401 __asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
402 return retval;
403 }
404
405 __INTRIN_INLINE char _InterlockedAnd8(volatile char * value, char mask)
406 {
407 char x;
408 char y;
409
410 y = *value;
411
412 do
413 {
414 x = y;
415 y = _InterlockedCompareExchange8(value, x & mask, x);
416 }
417 while(y != x);
418
419 return y;
420 }
421
422 __INTRIN_INLINE short _InterlockedAnd16(volatile short * value, short mask)
423 {
424 short x;
425 short y;
426
427 y = *value;
428
429 do
430 {
431 x = y;
432 y = _InterlockedCompareExchange16(value, x & mask, x);
433 }
434 while(y != x);
435
436 return y;
437 }
438
439 __INTRIN_INLINE long _InterlockedAnd(volatile long * value, long mask)
440 {
441 long x;
442 long y;
443
444 y = *value;
445
446 do
447 {
448 x = y;
449 y = _InterlockedCompareExchange(value, x & mask, x);
450 }
451 while(y != x);
452
453 return y;
454 }
455
456 __INTRIN_INLINE char _InterlockedOr8(volatile char * value, char mask)
457 {
458 char x;
459 char y;
460
461 y = *value;
462
463 do
464 {
465 x = y;
466 y = _InterlockedCompareExchange8(value, x | mask, x);
467 }
468 while(y != x);
469
470 return y;
471 }
472
473 __INTRIN_INLINE short _InterlockedOr16(volatile short * value, short mask)
474 {
475 short x;
476 short y;
477
478 y = *value;
479
480 do
481 {
482 x = y;
483 y = _InterlockedCompareExchange16(value, x | mask, x);
484 }
485 while(y != x);
486
487 return y;
488 }
489
490 __INTRIN_INLINE long _InterlockedOr(volatile long * value, long mask)
491 {
492 long x;
493 long y;
494
495 y = *value;
496
497 do
498 {
499 x = y;
500 y = _InterlockedCompareExchange(value, x | mask, x);
501 }
502 while(y != x);
503
504 return y;
505 }
506
507 __INTRIN_INLINE char _InterlockedXor8(volatile char * value, char mask)
508 {
509 char x;
510 char y;
511
512 y = *value;
513
514 do
515 {
516 x = y;
517 y = _InterlockedCompareExchange8(value, x ^ mask, x);
518 }
519 while(y != x);
520
521 return y;
522 }
523
524 __INTRIN_INLINE short _InterlockedXor16(volatile short * value, short mask)
525 {
526 short x;
527 short y;
528
529 y = *value;
530
531 do
532 {
533 x = y;
534 y = _InterlockedCompareExchange16(value, x ^ mask, x);
535 }
536 while(y != x);
537
538 return y;
539 }
540
541 __INTRIN_INLINE long _InterlockedXor(volatile long * value, long mask)
542 {
543 long x;
544 long y;
545
546 y = *value;
547
548 do
549 {
550 x = y;
551 y = _InterlockedCompareExchange(value, x ^ mask, x);
552 }
553 while(y != x);
554
555 return y;
556 }
557
558 __INTRIN_INLINE long _InterlockedDecrement(volatile long * lpAddend)
559 {
560 return _InterlockedExchangeAdd(lpAddend, -1) - 1;
561 }
562
563 __INTRIN_INLINE long _InterlockedIncrement(volatile long * lpAddend)
564 {
565 return _InterlockedExchangeAdd(lpAddend, 1) + 1;
566 }
567
568 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * lpAddend)
569 {
570 return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
571 }
572
573 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * lpAddend)
574 {
575 return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
576 }
577
578 #endif /* !__clang__ */
579
580 #if defined(__x86_64__)
581 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * lpAddend)
582 {
583 return _InterlockedExchangeAdd64(lpAddend, -1) - 1;
584 }
585
586 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * lpAddend)
587 {
588 return _InterlockedExchangeAdd64(lpAddend, 1) + 1;
589 }
590 #endif
591
592 #endif /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
593
594 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
595
596 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * Destination, long long Exchange, long long Comperand)
597 {
598 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
599 }
600
601 #else
602
603 #ifndef __clang__
604 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * Destination, long long Exchange, long long Comperand)
605 {
606 long long retval = Comperand;
607
608 __asm__
609 (
610 "lock; cmpxchg8b %[Destination]" :
611 [retval] "+A" (retval) :
612 [Destination] "m" (*Destination),
613 "b" ((unsigned long)((Exchange >> 0) & 0xFFFFFFFF)),
614 "c" ((unsigned long)((Exchange >> 32) & 0xFFFFFFFF)) :
615 "memory"
616 );
617
618 return retval;
619 }
620 #endif /* !__clang__ */
621
622 #endif
623
624 #ifdef __i386__
625 __INTRIN_INLINE long _InterlockedAddLargeStatistic(volatile long long * Addend, long Value)
626 {
627 __asm__
628 (
629 "lock; addl %[Value], %[Lo32];"
630 "jae LABEL%=;"
631 "lock; adcl $0, %[Hi32];"
632 "LABEL%=:;" :
633 [Lo32] "+m" (*((volatile long *)(Addend) + 0)), [Hi32] "+m" (*((volatile long *)(Addend) + 1)) :
634 [Value] "ir" (Value) :
635 "memory"
636 );
637
638 return Value;
639 }
640 #endif /* __i386__ */
641
642 __INTRIN_INLINE unsigned char _interlockedbittestandreset(volatile long * a, long b)
643 {
644 unsigned char retval;
645 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
646 return retval;
647 }
648
649 #if defined(__x86_64__)
650 __INTRIN_INLINE unsigned char _interlockedbittestandreset64(volatile long long * a, long long b)
651 {
652 unsigned char retval;
653 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
654 return retval;
655 }
656 #endif
657
658 #ifndef __clang__
659 __INTRIN_INLINE unsigned char _interlockedbittestandset(volatile long * a, long b)
660 {
661 unsigned char retval;
662 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
663 return retval;
664 }
665 #endif /* !__clang__ */
666
667 #if defined(__x86_64__)
668 __INTRIN_INLINE unsigned char _interlockedbittestandset64(volatile long long * a, long long b)
669 {
670 unsigned char retval;
671 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
672 return retval;
673 }
674 #endif
675
676 /*** String operations ***/
677
678 #ifndef __clang__
679 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
680 __INTRIN_INLINE void __stosb(unsigned char * Dest, unsigned char Data, size_t Count)
681 {
682 __asm__ __volatile__
683 (
684 "rep; stosb" :
685 [Dest] "=D" (Dest), [Count] "=c" (Count) :
686 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
687 );
688 }
689 #endif
690
691 __INTRIN_INLINE void __stosw(unsigned short * Dest, unsigned short Data, size_t Count)
692 {
693 __asm__ __volatile__
694 (
695 "rep; stosw" :
696 [Dest] "=D" (Dest), [Count] "=c" (Count) :
697 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
698 );
699 }
700
701 __INTRIN_INLINE void __stosd(unsigned long * Dest, unsigned long Data, size_t Count)
702 {
703 __asm__ __volatile__
704 (
705 "rep; stosl" :
706 [Dest] "=D" (Dest), [Count] "=c" (Count) :
707 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
708 );
709 }
710
711 #ifdef __x86_64__
712 __INTRIN_INLINE void __stosq(unsigned long long * Dest, unsigned long long Data, size_t Count)
713 {
714 __asm__ __volatile__
715 (
716 "rep; stosq" :
717 [Dest] "=D" (Dest), [Count] "=c" (Count) :
718 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
719 );
720 }
721 #endif
722
723 __INTRIN_INLINE void __movsb(unsigned char * Destination, const unsigned char * Source, size_t Count)
724 {
725 __asm__ __volatile__
726 (
727 "rep; movsb" :
728 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
729 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
730 );
731 }
732
733 __INTRIN_INLINE void __movsw(unsigned short * Destination, const unsigned short * Source, size_t Count)
734 {
735 __asm__ __volatile__
736 (
737 "rep; movsw" :
738 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
739 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
740 );
741 }
742
743 __INTRIN_INLINE void __movsd(unsigned long * Destination, const unsigned long * Source, size_t Count)
744 {
745 __asm__ __volatile__
746 (
747 "rep; movsd" :
748 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
749 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
750 );
751 }
752
753 #ifdef __x86_64__
754 __INTRIN_INLINE void __movsq(unsigned long * Destination, const unsigned long * Source, size_t Count)
755 {
756 __asm__ __volatile__
757 (
758 "rep; movsq" :
759 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
760 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
761 );
762 }
763 #endif
764
765 #if defined(__x86_64__)
766
767 /*** GS segment addressing ***/
768
769 __INTRIN_INLINE void __writegsbyte(unsigned long Offset, unsigned char Data)
770 {
771 __asm__ __volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
772 }
773
774 __INTRIN_INLINE void __writegsword(unsigned long Offset, unsigned short Data)
775 {
776 __asm__ __volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
777 }
778
779 __INTRIN_INLINE void __writegsdword(unsigned long Offset, unsigned long Data)
780 {
781 __asm__ __volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
782 }
783
784 __INTRIN_INLINE void __writegsqword(unsigned long Offset, unsigned long long Data)
785 {
786 __asm__ __volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
787 }
788
789 __INTRIN_INLINE unsigned char __readgsbyte(unsigned long Offset)
790 {
791 unsigned char value;
792 __asm__ __volatile__("movb %%gs:%a[Offset], %b[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
793 return value;
794 }
795
796 __INTRIN_INLINE unsigned short __readgsword(unsigned long Offset)
797 {
798 unsigned short value;
799 __asm__ __volatile__("movw %%gs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
800 return value;
801 }
802
803 __INTRIN_INLINE unsigned long __readgsdword(unsigned long Offset)
804 {
805 unsigned long value;
806 __asm__ __volatile__("movl %%gs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
807 return value;
808 }
809
810 __INTRIN_INLINE unsigned long long __readgsqword(unsigned long Offset)
811 {
812 unsigned long long value;
813 __asm__ __volatile__("movq %%gs:%a[Offset], %q[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
814 return value;
815 }
816
817 __INTRIN_INLINE void __incgsbyte(unsigned long Offset)
818 {
819 __asm__ __volatile__("incb %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
820 }
821
822 __INTRIN_INLINE void __incgsword(unsigned long Offset)
823 {
824 __asm__ __volatile__("incw %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
825 }
826
827 __INTRIN_INLINE void __incgsdword(unsigned long Offset)
828 {
829 __asm__ __volatile__("incl %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
830 }
831
832 __INTRIN_INLINE void __incgsqword(unsigned long Offset)
833 {
834 __asm__ __volatile__("incq %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
835 }
836
837 __INTRIN_INLINE void __addgsbyte(unsigned long Offset, unsigned char Data)
838 {
839 __asm__ __volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
840 }
841
842 __INTRIN_INLINE void __addgsword(unsigned long Offset, unsigned short Data)
843 {
844 __asm__ __volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
845 }
846
847 __INTRIN_INLINE void __addgsdword(unsigned long Offset, unsigned int Data)
848 {
849 __asm__ __volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
850 }
851
852 __INTRIN_INLINE void __addgsqword(unsigned long Offset, unsigned long long Data)
853 {
854 __asm__ __volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
855 }
856
857 #else /* defined(__x86_64__) */
858
859 /*** FS segment addressing ***/
860
861 __INTRIN_INLINE void __writefsbyte(unsigned long Offset, unsigned char Data)
862 {
863 __asm__ __volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
864 }
865
866 __INTRIN_INLINE void __writefsword(unsigned long Offset, unsigned short Data)
867 {
868 __asm__ __volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
869 }
870
871 __INTRIN_INLINE void __writefsdword(unsigned long Offset, unsigned long Data)
872 {
873 __asm__ __volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
874 }
875
876 #ifndef __clang__
877
878 __INTRIN_INLINE unsigned char __readfsbyte(unsigned long Offset)
879 {
880 unsigned char value;
881 __asm__ __volatile__("movb %%fs:%a[Offset], %b[value]" : [value] "=q" (value) : [Offset] "ir" (Offset));
882 return value;
883 }
884
885 __INTRIN_INLINE unsigned short __readfsword(unsigned long Offset)
886 {
887 unsigned short value;
888 __asm__ __volatile__("movw %%fs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
889 return value;
890 }
891
892 __INTRIN_INLINE unsigned long __readfsdword(unsigned long Offset)
893 {
894 unsigned long value;
895 __asm__ __volatile__("movl %%fs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
896 return value;
897 }
898
899 #endif /* !__clang__ */
900
901 __INTRIN_INLINE void __incfsbyte(unsigned long Offset)
902 {
903 __asm__ __volatile__("incb %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
904 }
905
906 __INTRIN_INLINE void __incfsword(unsigned long Offset)
907 {
908 __asm__ __volatile__("incw %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
909 }
910
911 __INTRIN_INLINE void __incfsdword(unsigned long Offset)
912 {
913 __asm__ __volatile__("incl %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
914 }
915
916 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
917 __INTRIN_INLINE void __addfsbyte(unsigned long Offset, unsigned char Data)
918 {
919 if(!__builtin_constant_p(Offset))
920 __asm__ __volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
921 else
922 __asm__ __volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
923 }
924
925 __INTRIN_INLINE void __addfsword(unsigned long Offset, unsigned short Data)
926 {
927 if(!__builtin_constant_p(Offset))
928 __asm__ __volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
929 else
930 __asm__ __volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
931 }
932
933 __INTRIN_INLINE void __addfsdword(unsigned long Offset, unsigned long Data)
934 {
935 if(!__builtin_constant_p(Offset))
936 __asm__ __volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
937 else
938 __asm__ __volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
939 }
940
941 #endif /* defined(__x86_64__) */
942
943
944 /*** Bit manipulation ***/
945
946 #ifndef __clang__
947
948 __INTRIN_INLINE unsigned char _BitScanForward(unsigned long * Index, unsigned long Mask)
949 {
950 __asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
951 return Mask ? 1 : 0;
952 }
953
954 __INTRIN_INLINE unsigned char _BitScanReverse(unsigned long * Index, unsigned long Mask)
955 {
956 __asm__("bsrl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
957 return Mask ? 1 : 0;
958 }
959
960 #endif /* !__clang__ */
961
962 /* NOTE: again, the bizarre implementation follows Visual C++ */
963 __INTRIN_INLINE unsigned char _bittest(const long * a, long b)
964 {
965 unsigned char retval;
966
967 if(__builtin_constant_p(b))
968 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 32))), [b] "Ir" (b % 32));
969 else
970 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
971
972 return retval;
973 }
974
975 #ifdef __x86_64__
976 __INTRIN_INLINE unsigned char _BitScanForward64(unsigned long * Index, unsigned long long Mask)
977 {
978 unsigned long long Index64;
979 __asm__("bsfq %[Mask], %[Index]" : [Index] "=r" (Index64) : [Mask] "mr" (Mask));
980 *Index = Index64;
981 return Mask ? 1 : 0;
982 }
983
984 __INTRIN_INLINE unsigned char _BitScanReverse64(unsigned long * Index, unsigned long long Mask)
985 {
986 unsigned long long Index64;
987 __asm__("bsrq %[Mask], %[Index]" : [Index] "=r" (Index64) : [Mask] "mr" (Mask));
988 *Index = Index64;
989 return Mask ? 1 : 0;
990 }
991
992 __INTRIN_INLINE unsigned char _bittest64(const long long * a, long long b)
993 {
994 unsigned char retval;
995
996 if(__builtin_constant_p(b))
997 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 64))), [b] "Ir" (b % 64));
998 else
999 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
1000
1001 return retval;
1002 }
1003 #endif
1004
1005 __INTRIN_INLINE unsigned char _bittestandcomplement(long * a, long b)
1006 {
1007 unsigned char retval;
1008
1009 if(__builtin_constant_p(b))
1010 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1011 else
1012 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1013
1014 return retval;
1015 }
1016
1017 __INTRIN_INLINE unsigned char _bittestandreset(long * a, long b)
1018 {
1019 unsigned char retval;
1020
1021 if(__builtin_constant_p(b))
1022 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1023 else
1024 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1025
1026 return retval;
1027 }
1028
1029 __INTRIN_INLINE unsigned char _bittestandset(long * a, long b)
1030 {
1031 unsigned char retval;
1032
1033 if(__builtin_constant_p(b))
1034 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1035 else
1036 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1037
1038 return retval;
1039 }
1040
1041 #ifdef __x86_64__
1042
1043 __INTRIN_INLINE unsigned char _bittestandset64(long long * a, long long b)
1044 {
1045 unsigned char retval;
1046
1047 if(__builtin_constant_p(b))
1048 __asm__("btsq %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 64))), [retval] "=q" (retval) : [b] "Ir" (b % 64));
1049 else
1050 __asm__("btsq %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1051
1052 return retval;
1053 }
1054
1055 __INTRIN_INLINE unsigned char _bittestandreset64(long long * a, long long b)
1056 {
1057 unsigned char retval;
1058
1059 if(__builtin_constant_p(b))
1060 __asm__("btrq %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 64))), [retval] "=q" (retval) : [b] "Ir" (b % 64));
1061 else
1062 __asm__("btrq %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1063
1064 return retval;
1065 }
1066
1067 __INTRIN_INLINE unsigned char _bittestandcomplement64(long long * a, long long b)
1068 {
1069 unsigned char retval;
1070
1071 if(__builtin_constant_p(b))
1072 __asm__("btcq %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 64))), [retval] "=q" (retval) : [b] "Ir" (b % 64));
1073 else
1074 __asm__("btcq %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1075
1076 return retval;
1077 }
1078
1079 #endif
1080
1081 #ifndef __clang__
1082
1083 __INTRIN_INLINE unsigned char __cdecl _rotl8(unsigned char value, unsigned char shift)
1084 {
1085 unsigned char retval;
1086 __asm__("rolb %b[shift], %b[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1087 return retval;
1088 }
1089
1090 __INTRIN_INLINE unsigned short __cdecl _rotl16(unsigned short value, unsigned char shift)
1091 {
1092 unsigned short retval;
1093 __asm__("rolw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1094 return retval;
1095 }
1096
1097 __INTRIN_INLINE unsigned int __cdecl _rotl(unsigned int value, int shift)
1098 {
1099 unsigned int retval;
1100 __asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1101 return retval;
1102 }
1103
1104 #endif /* !__clang__ */
1105
1106 #ifdef __x86_64__
1107 __INTRIN_INLINE unsigned long long _rotl64(unsigned long long value, int shift)
1108 {
1109 unsigned long long retval;
1110 __asm__("rolq %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1111 return retval;
1112 }
1113 #else /* __x86_64__ */
1114 #ifndef __clang__
1115 __INTRIN_INLINE unsigned long long __cdecl _rotl64(unsigned long long value, int shift)
1116 {
1117 /* FIXME: this is probably not optimal */
1118 return (value << shift) | (value >> (64 - shift));
1119 }
1120 #endif /* !__clang__ */
1121 #endif /* __x86_64__ */
1122
1123 #ifndef __clang__
1124
1125 __INTRIN_INLINE unsigned int __cdecl _rotr(unsigned int value, int shift)
1126 {
1127 unsigned int retval;
1128 __asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1129 return retval;
1130 }
1131
1132 __INTRIN_INLINE unsigned char __cdecl _rotr8(unsigned char value, unsigned char shift)
1133 {
1134 unsigned char retval;
1135 __asm__("rorb %b[shift], %b[retval]" : [retval] "=qm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1136 return retval;
1137 }
1138
1139 __INTRIN_INLINE unsigned short __cdecl _rotr16(unsigned short value, unsigned char shift)
1140 {
1141 unsigned short retval;
1142 __asm__("rorw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1143 return retval;
1144 }
1145
1146 #endif /* !__clang__ */
1147
1148 #ifdef __x86_64__
1149 __INTRIN_INLINE unsigned long long _rotr64(unsigned long long value, int shift)
1150 {
1151 unsigned long long retval;
1152 __asm__("rorq %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1153 return retval;
1154 }
1155 #else /* __x86_64__ */
1156 #ifndef __clang__
1157 __INTRIN_INLINE unsigned long long __cdecl _rotr64(unsigned long long value, int shift)
1158 {
1159 /* FIXME: this is probably not optimal */
1160 return (value >> shift) | (value << (64 - shift));
1161 }
1162 #endif /* !__clang__ */
1163 #endif /* __x86_64__ */
1164
1165 #ifndef __clang__
1166
1167 __INTRIN_INLINE unsigned long __cdecl _lrotl(unsigned long value, int shift)
1168 {
1169 unsigned long retval;
1170 __asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1171 return retval;
1172 }
1173
1174 __INTRIN_INLINE unsigned long __cdecl _lrotr(unsigned long value, int shift)
1175 {
1176 unsigned long retval;
1177 __asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1178 return retval;
1179 }
1180
1181 #endif /* !__clang__ */
1182
1183 /*
1184 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
1185 constraint (edx:eax) for the Mask argument, because it's the only way GCC
1186 can pass 64-bit operands around - passing the two 32 bit parts separately
1187 just confuses it. Also we declare Bit as an int and then truncate it to
1188 match Visual C++ behavior
1189 */
1190 __INTRIN_INLINE unsigned long long __ll_lshift(unsigned long long Mask, int Bit)
1191 {
1192 unsigned long long retval = Mask;
1193
1194 __asm__
1195 (
1196 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
1197 "+A" (retval) :
1198 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1199 );
1200
1201 return retval;
1202 }
1203
1204 __INTRIN_INLINE long long __ll_rshift(long long Mask, int Bit)
1205 {
1206 long long retval = Mask;
1207
1208 __asm__
1209 (
1210 "shrdl %b[Bit], %%edx, %%eax; sarl %b[Bit], %%edx" :
1211 "+A" (retval) :
1212 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1213 );
1214
1215 return retval;
1216 }
1217
1218 __INTRIN_INLINE unsigned long long __ull_rshift(unsigned long long Mask, int Bit)
1219 {
1220 unsigned long long retval = Mask;
1221
1222 __asm__
1223 (
1224 "shrdl %b[Bit], %%edx, %%eax; shrl %b[Bit], %%edx" :
1225 "+A" (retval) :
1226 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1227 );
1228
1229 return retval;
1230 }
1231
1232 __INTRIN_INLINE unsigned short __cdecl _byteswap_ushort(unsigned short value)
1233 {
1234 unsigned short retval;
1235 __asm__("rorw $8, %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value));
1236 return retval;
1237 }
1238
1239 __INTRIN_INLINE unsigned long __cdecl _byteswap_ulong(unsigned long value)
1240 {
1241 unsigned long retval;
1242 __asm__("bswapl %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1243 return retval;
1244 }
1245
1246 #ifdef __x86_64__
1247 __INTRIN_INLINE unsigned long long _byteswap_uint64(unsigned long long value)
1248 {
1249 unsigned long long retval;
1250 __asm__("bswapq %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1251 return retval;
1252 }
1253 #else
1254 __INTRIN_INLINE unsigned long long __cdecl _byteswap_uint64(unsigned long long value)
1255 {
1256 union {
1257 unsigned long long int64part;
1258 struct {
1259 unsigned long lowpart;
1260 unsigned long hipart;
1261 };
1262 } retval;
1263 retval.int64part = value;
1264 __asm__("bswapl %[lowpart]\n"
1265 "bswapl %[hipart]\n"
1266 : [lowpart] "=r" (retval.hipart), [hipart] "=r" (retval.lowpart) : "[lowpart]" (retval.lowpart), "[hipart]" (retval.hipart) );
1267 return retval.int64part;
1268 }
1269 #endif
1270
1271 __INTRIN_INLINE unsigned int __lzcnt(unsigned int value)
1272 {
1273 return __builtin_clz(value);
1274 }
1275
1276 __INTRIN_INLINE unsigned short __lzcnt16(unsigned short value)
1277 {
1278 return __builtin_clz(value);
1279 }
1280
1281 #ifndef __clang__
1282
1283 __INTRIN_INLINE unsigned int __popcnt(unsigned int value)
1284 {
1285 return __builtin_popcount(value);
1286 }
1287
1288 __INTRIN_INLINE unsigned short __popcnt16(unsigned short value)
1289 {
1290 return __builtin_popcount(value);
1291 }
1292
1293 #endif /* !__clang__ */
1294
1295 #ifdef __x86_64__
1296 __INTRIN_INLINE unsigned long long __lzcnt64(unsigned long long value)
1297 {
1298 return __builtin_clzll(value);
1299 }
1300
1301 __INTRIN_INLINE unsigned long long __popcnt64(unsigned long long value)
1302 {
1303 return __builtin_popcountll(value);
1304 }
1305 #endif
1306
1307 /*** 64-bit math ***/
1308
1309 #ifndef __clang__
1310
1311 __INTRIN_INLINE long long __emul(int a, int b)
1312 {
1313 long long retval;
1314 __asm__("imull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1315 return retval;
1316 }
1317
1318 __INTRIN_INLINE unsigned long long __emulu(unsigned int a, unsigned int b)
1319 {
1320 unsigned long long retval;
1321 __asm__("mull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1322 return retval;
1323 }
1324
1325 #endif /* !__clang__ */
1326
1327 __INTRIN_INLINE long long __cdecl _abs64(long long value)
1328 {
1329 return (value >= 0) ? value : -value;
1330 }
1331
1332 #ifdef __x86_64__
1333
1334 __INTRIN_INLINE long long __mulh(long long a, long long b)
1335 {
1336 long long retval;
1337 __asm__("imulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1338 return retval;
1339 }
1340
1341 __INTRIN_INLINE unsigned long long __umulh(unsigned long long a, unsigned long long b)
1342 {
1343 unsigned long long retval;
1344 __asm__("mulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1345 return retval;
1346 }
1347
1348 #endif
1349
1350 /*** Port I/O ***/
1351
1352 __INTRIN_INLINE unsigned char __inbyte(unsigned short Port)
1353 {
1354 unsigned char byte;
1355 __asm__ __volatile__("inb %w[Port], %b[byte]" : [byte] "=a" (byte) : [Port] "Nd" (Port));
1356 return byte;
1357 }
1358
1359 __INTRIN_INLINE unsigned short __inword(unsigned short Port)
1360 {
1361 unsigned short word;
1362 __asm__ __volatile__("inw %w[Port], %w[word]" : [word] "=a" (word) : [Port] "Nd" (Port));
1363 return word;
1364 }
1365
1366 __INTRIN_INLINE unsigned long __indword(unsigned short Port)
1367 {
1368 unsigned long dword;
1369 __asm__ __volatile__("inl %w[Port], %k[dword]" : [dword] "=a" (dword) : [Port] "Nd" (Port));
1370 return dword;
1371 }
1372
1373 __INTRIN_INLINE void __inbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1374 {
1375 __asm__ __volatile__
1376 (
1377 "rep; insb" :
1378 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1379 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1380 "memory"
1381 );
1382 }
1383
1384 __INTRIN_INLINE void __inwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1385 {
1386 __asm__ __volatile__
1387 (
1388 "rep; insw" :
1389 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1390 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1391 "memory"
1392 );
1393 }
1394
1395 __INTRIN_INLINE void __indwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1396 {
1397 __asm__ __volatile__
1398 (
1399 "rep; insl" :
1400 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1401 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1402 "memory"
1403 );
1404 }
1405
1406 __INTRIN_INLINE void __outbyte(unsigned short Port, unsigned char Data)
1407 {
1408 __asm__ __volatile__("outb %b[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1409 }
1410
1411 __INTRIN_INLINE void __outword(unsigned short Port, unsigned short Data)
1412 {
1413 __asm__ __volatile__("outw %w[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1414 }
1415
1416 __INTRIN_INLINE void __outdword(unsigned short Port, unsigned long Data)
1417 {
1418 __asm__ __volatile__("outl %k[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1419 }
1420
1421 __INTRIN_INLINE void __outbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1422 {
1423 __asm__ __volatile__("rep; outsb" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1424 }
1425
1426 __INTRIN_INLINE void __outwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1427 {
1428 __asm__ __volatile__("rep; outsw" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1429 }
1430
1431 __INTRIN_INLINE void __outdwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1432 {
1433 __asm__ __volatile__("rep; outsl" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1434 }
1435
1436 __INTRIN_INLINE int __cdecl _inp(unsigned short Port)
1437 {
1438 return __inbyte(Port);
1439 }
1440
1441 __INTRIN_INLINE unsigned short __cdecl _inpw(unsigned short Port)
1442 {
1443 return __inword(Port);
1444 }
1445
1446 __INTRIN_INLINE unsigned long __cdecl _inpd(unsigned short Port)
1447 {
1448 return __indword(Port);
1449 }
1450
1451 __INTRIN_INLINE int __cdecl _outp(unsigned short Port, int databyte)
1452 {
1453 __outbyte(Port, (unsigned char)databyte);
1454 return databyte;
1455 }
1456
1457 __INTRIN_INLINE unsigned short __cdecl _outpw(unsigned short Port, unsigned short dataword)
1458 {
1459 __outword(Port, dataword);
1460 return dataword;
1461 }
1462
1463 __INTRIN_INLINE unsigned long __cdecl _outpd(unsigned short Port, unsigned long dataword)
1464 {
1465 __outdword(Port, dataword);
1466 return dataword;
1467 }
1468
1469
1470 /*** System information ***/
1471
1472 __INTRIN_INLINE void __cpuid(int CPUInfo[4], int InfoType)
1473 {
1474 __asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType));
1475 }
1476
1477 __INTRIN_INLINE void __cpuidex(int CPUInfo[4], int InfoType, int ECXValue)
1478 {
1479 __asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType), "c" (ECXValue));
1480 }
1481
1482 #ifndef __clang__
1483 __INTRIN_INLINE unsigned long long __rdtsc(void)
1484 {
1485 #ifdef __x86_64__
1486 unsigned long long low, high;
1487 __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
1488 return low | (high << 32);
1489 #else
1490 unsigned long long retval;
1491 __asm__ __volatile__("rdtsc" : "=A"(retval));
1492 return retval;
1493 #endif
1494 }
1495 #endif /* !__clang__ */
1496
1497 __INTRIN_INLINE void __writeeflags(uintptr_t Value)
1498 {
1499 __asm__ __volatile__("push %0\n popf" : : "rim"(Value));
1500 }
1501
1502 __INTRIN_INLINE uintptr_t __readeflags(void)
1503 {
1504 uintptr_t retval;
1505 __asm__ __volatile__("pushf\n pop %0" : "=rm"(retval));
1506 return retval;
1507 }
1508
1509 /*** Interrupts ***/
1510
1511 #ifndef __clang__
1512
1513 __INTRIN_INLINE void __cdecl __debugbreak(void)
1514 {
1515 __asm__("int $3");
1516 }
1517
1518 __INTRIN_INLINE void __ud2(void)
1519 {
1520 __asm__("ud2");
1521 }
1522
1523 __INTRIN_INLINE void __int2c(void)
1524 {
1525 __asm__("int $0x2c");
1526 }
1527
1528 #endif /* !__clang__ */
1529
1530 __INTRIN_INLINE void __cdecl _disable(void)
1531 {
1532 __asm__("cli" : : : "memory");
1533 }
1534
1535 __INTRIN_INLINE void __cdecl _enable(void)
1536 {
1537 __asm__("sti" : : : "memory");
1538 }
1539
1540 __INTRIN_INLINE void __halt(void)
1541 {
1542 __asm__("hlt" : : : "memory");
1543 }
1544
1545 #ifndef __clang__
1546 __declspec(noreturn)
1547 __INTRIN_INLINE void __fastfail(unsigned int Code)
1548 {
1549 __asm__("int $0x29" : : "c"(Code) : "memory");
1550 __builtin_unreachable();
1551 }
1552 #endif
1553
1554 /*** Protected memory management ***/
1555
1556 #ifdef __x86_64__
1557
1558 __INTRIN_INLINE void __writecr0(unsigned long long Data)
1559 {
1560 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1561 }
1562
1563 __INTRIN_INLINE void __writecr3(unsigned long long Data)
1564 {
1565 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1566 }
1567
1568 __INTRIN_INLINE void __writecr4(unsigned long long Data)
1569 {
1570 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1571 }
1572
1573 __INTRIN_INLINE void __writecr8(unsigned long long Data)
1574 {
1575 __asm__("mov %[Data], %%cr8" : : [Data] "r" (Data) : "memory");
1576 }
1577
1578 __INTRIN_INLINE unsigned long long __readcr0(void)
1579 {
1580 unsigned long long value;
1581 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1582 return value;
1583 }
1584
1585 __INTRIN_INLINE unsigned long long __readcr2(void)
1586 {
1587 unsigned long long value;
1588 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1589 return value;
1590 }
1591
1592 __INTRIN_INLINE unsigned long long __readcr3(void)
1593 {
1594 unsigned long long value;
1595 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1596 return value;
1597 }
1598
1599 __INTRIN_INLINE unsigned long long __readcr4(void)
1600 {
1601 unsigned long long value;
1602 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1603 return value;
1604 }
1605
1606 __INTRIN_INLINE unsigned long long __readcr8(void)
1607 {
1608 unsigned long long value;
1609 __asm__ __volatile__("movq %%cr8, %q[value]" : [value] "=r" (value));
1610 return value;
1611 }
1612
1613 #else /* __x86_64__ */
1614
1615 __INTRIN_INLINE void __writecr0(unsigned int Data)
1616 {
1617 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1618 }
1619
1620 __INTRIN_INLINE void __writecr3(unsigned int Data)
1621 {
1622 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1623 }
1624
1625 __INTRIN_INLINE void __writecr4(unsigned int Data)
1626 {
1627 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1628 }
1629
1630 #ifndef __clang__
1631 __INTRIN_INLINE void __writecr8(unsigned int Data)
1632 {
1633 __asm__("mov %[Data], %%cr8" : : [Data] "r" (Data) : "memory");
1634 }
1635 #endif
1636
1637 __INTRIN_INLINE unsigned long __readcr0(void)
1638 {
1639 unsigned long value;
1640 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1641 return value;
1642 }
1643
1644 __INTRIN_INLINE unsigned long __readcr2(void)
1645 {
1646 unsigned long value;
1647 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1648 return value;
1649 }
1650
1651 __INTRIN_INLINE unsigned long __readcr3(void)
1652 {
1653 unsigned long value;
1654 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1655 return value;
1656 }
1657
1658 __INTRIN_INLINE unsigned long __readcr4(void)
1659 {
1660 unsigned long value;
1661 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1662 return value;
1663 }
1664
1665 #ifndef __clang__
1666 __INTRIN_INLINE unsigned long __readcr8(void)
1667 {
1668 unsigned long value;
1669 __asm__ __volatile__("mov %%cr8, %[value]" : [value] "=r" (value));
1670 return value;
1671 }
1672 #endif
1673
1674 #endif /* __x86_64__ */
1675
1676 #ifdef __x86_64__
1677
1678 __INTRIN_INLINE unsigned long long __readdr(unsigned int reg)
1679 {
1680 unsigned long long value;
1681 switch (reg)
1682 {
1683 case 0:
1684 __asm__ __volatile__("movq %%dr0, %q[value]" : [value] "=r" (value));
1685 break;
1686 case 1:
1687 __asm__ __volatile__("movq %%dr1, %q[value]" : [value] "=r" (value));
1688 break;
1689 case 2:
1690 __asm__ __volatile__("movq %%dr2, %q[value]" : [value] "=r" (value));
1691 break;
1692 case 3:
1693 __asm__ __volatile__("movq %%dr3, %q[value]" : [value] "=r" (value));
1694 break;
1695 case 4:
1696 __asm__ __volatile__("movq %%dr4, %q[value]" : [value] "=r" (value));
1697 break;
1698 case 5:
1699 __asm__ __volatile__("movq %%dr5, %q[value]" : [value] "=r" (value));
1700 break;
1701 case 6:
1702 __asm__ __volatile__("movq %%dr6, %q[value]" : [value] "=r" (value));
1703 break;
1704 case 7:
1705 __asm__ __volatile__("movq %%dr7, %q[value]" : [value] "=r" (value));
1706 break;
1707 }
1708 return value;
1709 }
1710
1711 __INTRIN_INLINE void __writedr(unsigned reg, unsigned long long value)
1712 {
1713 switch (reg)
1714 {
1715 case 0:
1716 __asm__("movq %q[value], %%dr0" : : [value] "r" (value) : "memory");
1717 break;
1718 case 1:
1719 __asm__("movq %q[value], %%dr1" : : [value] "r" (value) : "memory");
1720 break;
1721 case 2:
1722 __asm__("movq %q[value], %%dr2" : : [value] "r" (value) : "memory");
1723 break;
1724 case 3:
1725 __asm__("movq %q[value], %%dr3" : : [value] "r" (value) : "memory");
1726 break;
1727 case 4:
1728 __asm__("movq %q[value], %%dr4" : : [value] "r" (value) : "memory");
1729 break;
1730 case 5:
1731 __asm__("movq %q[value], %%dr5" : : [value] "r" (value) : "memory");
1732 break;
1733 case 6:
1734 __asm__("movq %q[value], %%dr6" : : [value] "r" (value) : "memory");
1735 break;
1736 case 7:
1737 __asm__("movq %q[value], %%dr7" : : [value] "r" (value) : "memory");
1738 break;
1739 }
1740 }
1741
1742 #else /* __x86_64__ */
1743
1744 __INTRIN_INLINE unsigned int __readdr(unsigned int reg)
1745 {
1746 unsigned int value;
1747 switch (reg)
1748 {
1749 case 0:
1750 __asm__ __volatile__("mov %%dr0, %[value]" : [value] "=r" (value));
1751 break;
1752 case 1:
1753 __asm__ __volatile__("mov %%dr1, %[value]" : [value] "=r" (value));
1754 break;
1755 case 2:
1756 __asm__ __volatile__("mov %%dr2, %[value]" : [value] "=r" (value));
1757 break;
1758 case 3:
1759 __asm__ __volatile__("mov %%dr3, %[value]" : [value] "=r" (value));
1760 break;
1761 case 4:
1762 __asm__ __volatile__("mov %%dr4, %[value]" : [value] "=r" (value));
1763 break;
1764 case 5:
1765 __asm__ __volatile__("mov %%dr5, %[value]" : [value] "=r" (value));
1766 break;
1767 case 6:
1768 __asm__ __volatile__("mov %%dr6, %[value]" : [value] "=r" (value));
1769 break;
1770 case 7:
1771 __asm__ __volatile__("mov %%dr7, %[value]" : [value] "=r" (value));
1772 break;
1773 }
1774 return value;
1775 }
1776
1777 __INTRIN_INLINE void __writedr(unsigned reg, unsigned int value)
1778 {
1779 switch (reg)
1780 {
1781 case 0:
1782 __asm__("mov %[value], %%dr0" : : [value] "r" (value) : "memory");
1783 break;
1784 case 1:
1785 __asm__("mov %[value], %%dr1" : : [value] "r" (value) : "memory");
1786 break;
1787 case 2:
1788 __asm__("mov %[value], %%dr2" : : [value] "r" (value) : "memory");
1789 break;
1790 case 3:
1791 __asm__("mov %[value], %%dr3" : : [value] "r" (value) : "memory");
1792 break;
1793 case 4:
1794 __asm__("mov %[value], %%dr4" : : [value] "r" (value) : "memory");
1795 break;
1796 case 5:
1797 __asm__("mov %[value], %%dr5" : : [value] "r" (value) : "memory");
1798 break;
1799 case 6:
1800 __asm__("mov %[value], %%dr6" : : [value] "r" (value) : "memory");
1801 break;
1802 case 7:
1803 __asm__("mov %[value], %%dr7" : : [value] "r" (value) : "memory");
1804 break;
1805 }
1806 }
1807
1808 #endif /* __x86_64__ */
1809
1810 __INTRIN_INLINE void __invlpg(void *Address)
1811 {
1812 __asm__ __volatile__ ("invlpg (%[Address])" : : [Address] "b" (Address) : "memory");
1813 }
1814
1815
1816 /*** System operations ***/
1817
1818 __INTRIN_INLINE unsigned long long __readmsr(unsigned long reg)
1819 {
1820 #ifdef __x86_64__
1821 unsigned long low, high;
1822 __asm__ __volatile__("rdmsr" : "=a" (low), "=d" (high) : "c" (reg));
1823 return ((unsigned long long)high << 32) | low;
1824 #else
1825 unsigned long long retval;
1826 __asm__ __volatile__("rdmsr" : "=A" (retval) : "c" (reg));
1827 return retval;
1828 #endif
1829 }
1830
1831 __INTRIN_INLINE void __writemsr(unsigned long Register, unsigned long long Value)
1832 {
1833 #ifdef __x86_64__
1834 __asm__ __volatile__("wrmsr" : : "a" (Value), "d" (Value >> 32), "c" (Register));
1835 #else
1836 __asm__ __volatile__("wrmsr" : : "A" (Value), "c" (Register));
1837 #endif
1838 }
1839
1840 __INTRIN_INLINE unsigned long long __readpmc(unsigned long counter)
1841 {
1842 unsigned long long retval;
1843 __asm__ __volatile__("rdpmc" : "=A" (retval) : "c" (counter));
1844 return retval;
1845 }
1846
1847 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1848 __INTRIN_INLINE unsigned long __segmentlimit(unsigned long a)
1849 {
1850 unsigned long retval;
1851 __asm__ __volatile__("lsl %[a], %[retval]" : [retval] "=r" (retval) : [a] "rm" (a));
1852 return retval;
1853 }
1854
1855 __INTRIN_INLINE void __wbinvd(void)
1856 {
1857 __asm__ __volatile__("wbinvd" : : : "memory");
1858 }
1859
1860 __INTRIN_INLINE void __lidt(void *Source)
1861 {
1862 __asm__ __volatile__("lidt %0" : : "m"(*(short*)Source));
1863 }
1864
1865 __INTRIN_INLINE void __sidt(void *Destination)
1866 {
1867 __asm__ __volatile__("sidt %0" : : "m"(*(short*)Destination) : "memory");
1868 }
1869
1870 __INTRIN_INLINE void _sgdt(void *Destination)
1871 {
1872 __asm__ __volatile__("sgdt %0" : : "m"(*(short*)Destination) : "memory");
1873 }
1874
1875 /*** Misc operations ***/
1876
1877 #ifndef __clang__
1878 __INTRIN_INLINE void _mm_pause(void)
1879 {
1880 __asm__ __volatile__("pause" : : : "memory");
1881 }
1882 #endif
1883
1884 __INTRIN_INLINE void __nop(void)
1885 {
1886 __asm__ __volatile__("nop");
1887 }
1888
1889 #ifdef __cplusplus
1890 }
1891 #endif
1892
1893 #endif /* KJK_INTRIN_X86_H_ */
1894
1895 /* EOF */