1b3e8e0991bc6a9fc732b4ab2217bd89e815d6c5
[reactos.git] / sdk / include / crt / mingw32 / intrin_x86.h
1 /*
2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
5 projects.
6
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
8
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
15
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
26 */
27
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
30
31 /*
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
36 */
37
38 /*
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
42
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
46
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
49
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
57
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
60
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
62 ...
63
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
66 */
67
68 #ifdef __cplusplus
69 extern "C" {
70 #endif
71
72 /*** memcopy must be memmove ***/
73 void* __cdecl memmove(void* dest, const void* source, size_t num);
74 __INTRIN_INLINE void* __cdecl memcpy(void* dest, const void* source, size_t num)
75 {
76 return memmove(dest, source, num);
77 }
78
79
80 /*** Stack frame juggling ***/
81 #define _ReturnAddress() (__builtin_return_address(0))
82 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
83 /* TODO: __getcallerseflags but how??? */
84
85 /* Maybe the same for x86? */
86 #ifdef __x86_64__
87 #define _alloca(s) __builtin_alloca(s)
88 #endif
89
90 /*** Memory barriers ***/
91
92 __INTRIN_INLINE void _ReadWriteBarrier(void)
93 {
94 __asm__ __volatile__("" : : : "memory");
95 }
96
97 /* GCC only supports full barriers */
98 #define _ReadBarrier _ReadWriteBarrier
99 #define _WriteBarrier _ReadWriteBarrier
100
101 __INTRIN_INLINE void _mm_mfence(void)
102 {
103 __asm__ __volatile__("mfence" : : : "memory");
104 }
105
106 __INTRIN_INLINE void _mm_lfence(void)
107 {
108 _ReadBarrier();
109 __asm__ __volatile__("lfence");
110 _ReadBarrier();
111 }
112
113 __INTRIN_INLINE void _mm_sfence(void)
114 {
115 _WriteBarrier();
116 __asm__ __volatile__("sfence");
117 _WriteBarrier();
118 }
119
120 #ifdef __x86_64__
121 __INTRIN_INLINE void __faststorefence(void)
122 {
123 long local;
124 __asm__ __volatile__("lock; orl $0, %0;" : : "m"(local));
125 }
126 #endif
127
128
129 /*** Atomic operations ***/
130
131 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
132
133 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * Destination, char Exchange, char Comperand)
134 {
135 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
136 }
137
138 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * Destination, short Exchange, short Comperand)
139 {
140 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
141 }
142
143 #ifndef __clang__
144
145 __INTRIN_INLINE long __cdecl _InterlockedCompareExchange(volatile long * Destination, long Exchange, long Comperand)
146 {
147 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
148 }
149
150 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * Destination, void * Exchange, void * Comperand)
151 {
152 return (void *)__sync_val_compare_and_swap(Destination, Comperand, Exchange);
153 }
154
155 #endif
156
157 __INTRIN_INLINE char _InterlockedExchange8(volatile char * Target, char Value)
158 {
159 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
160 __sync_synchronize();
161 return __sync_lock_test_and_set(Target, Value);
162 }
163
164 __INTRIN_INLINE short _InterlockedExchange16(volatile short * Target, short Value)
165 {
166 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
167 __sync_synchronize();
168 return __sync_lock_test_and_set(Target, Value);
169 }
170
171 #ifndef __clang__
172
173 __INTRIN_INLINE long __cdecl _InterlockedExchange(volatile long * Target, long Value)
174 {
175 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
176 __sync_synchronize();
177 return __sync_lock_test_and_set(Target, Value);
178 }
179
180 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * Target, void * Value)
181 {
182 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
183 __sync_synchronize();
184 return (void *)__sync_lock_test_and_set(Target, Value);
185 }
186
187 #endif
188
189 #if defined(__x86_64__)
190 __INTRIN_INLINE long long _InterlockedExchange64(volatile long long * Target, long long Value)
191 {
192 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
193 __sync_synchronize();
194 return __sync_lock_test_and_set(Target, Value);
195 }
196 #endif
197
198 __INTRIN_INLINE char _InterlockedExchangeAdd8(char volatile * Addend, char Value)
199 {
200 return __sync_fetch_and_add(Addend, Value);
201 }
202
203 __INTRIN_INLINE short _InterlockedExchangeAdd16(volatile short * Addend, short Value)
204 {
205 return __sync_fetch_and_add(Addend, Value);
206 }
207
208 #ifndef __clang__
209 __INTRIN_INLINE long __cdecl _InterlockedExchangeAdd(volatile long * Addend, long Value)
210 {
211 return __sync_fetch_and_add(Addend, Value);
212 }
213 #endif
214
215 #if defined(__x86_64__)
216 __INTRIN_INLINE long long _InterlockedExchangeAdd64(volatile long long * Addend, long long Value)
217 {
218 return __sync_fetch_and_add(Addend, Value);
219 }
220 #endif
221
222 __INTRIN_INLINE char _InterlockedAnd8(volatile char * value, char mask)
223 {
224 return __sync_fetch_and_and(value, mask);
225 }
226
227 __INTRIN_INLINE short _InterlockedAnd16(volatile short * value, short mask)
228 {
229 return __sync_fetch_and_and(value, mask);
230 }
231
232 __INTRIN_INLINE long _InterlockedAnd(volatile long * value, long mask)
233 {
234 return __sync_fetch_and_and(value, mask);
235 }
236
237 #if defined(__x86_64__)
238 __INTRIN_INLINE long long _InterlockedAnd64(volatile long long * value, long long mask)
239 {
240 return __sync_fetch_and_and(value, mask);
241 }
242 #endif
243
244 __INTRIN_INLINE char _InterlockedOr8(volatile char * value, char mask)
245 {
246 return __sync_fetch_and_or(value, mask);
247 }
248
249 __INTRIN_INLINE short _InterlockedOr16(volatile short * value, short mask)
250 {
251 return __sync_fetch_and_or(value, mask);
252 }
253
254 __INTRIN_INLINE long _InterlockedOr(volatile long * value, long mask)
255 {
256 return __sync_fetch_and_or(value, mask);
257 }
258
259 #if defined(__x86_64__)
260 __INTRIN_INLINE long long _InterlockedOr64(volatile long long * value, long long mask)
261 {
262 return __sync_fetch_and_or(value, mask);
263 }
264 #endif
265
266 __INTRIN_INLINE char _InterlockedXor8(volatile char * value, char mask)
267 {
268 return __sync_fetch_and_xor(value, mask);
269 }
270
271 __INTRIN_INLINE short _InterlockedXor16(volatile short * value, short mask)
272 {
273 return __sync_fetch_and_xor(value, mask);
274 }
275
276 __INTRIN_INLINE long _InterlockedXor(volatile long * value, long mask)
277 {
278 return __sync_fetch_and_xor(value, mask);
279 }
280
281 #if defined(__x86_64__)
282 __INTRIN_INLINE long long _InterlockedXor64(volatile long long * value, long long mask)
283 {
284 return __sync_fetch_and_xor(value, mask);
285 }
286 #endif
287
288 #ifndef __clang__
289 __INTRIN_INLINE long __cdecl _InterlockedDecrement(volatile long * lpAddend)
290 {
291 return __sync_sub_and_fetch(lpAddend, 1);
292 }
293
294 __INTRIN_INLINE long __cdecl _InterlockedIncrement(volatile long * lpAddend)
295 {
296 return __sync_add_and_fetch(lpAddend, 1);
297 }
298 #endif
299
300 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * lpAddend)
301 {
302 return __sync_sub_and_fetch(lpAddend, 1);
303 }
304
305 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * lpAddend)
306 {
307 return __sync_add_and_fetch(lpAddend, 1);
308 }
309
310 #if defined(__x86_64__)
311 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * lpAddend)
312 {
313 return __sync_sub_and_fetch(lpAddend, 1);
314 }
315
316 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * lpAddend)
317 {
318 return __sync_add_and_fetch(lpAddend, 1);
319 }
320 #endif
321
322 #else /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
323
324 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * Destination, char Exchange, char Comperand)
325 {
326 char retval = Comperand;
327 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
328 return retval;
329 }
330
331 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * Destination, short Exchange, short Comperand)
332 {
333 short retval = Comperand;
334 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
335 return retval;
336 }
337
338 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * Destination, long Exchange, long Comperand)
339 {
340 long retval = Comperand;
341 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
342 return retval;
343 }
344
345 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * Destination, void * Exchange, void * Comperand)
346 {
347 void * retval = (void *)Comperand;
348 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "=a" (retval) : "[retval]" (retval), [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
349 return retval;
350 }
351
352 __INTRIN_INLINE char _InterlockedExchange8(volatile char * Target, char Value)
353 {
354 char retval = Value;
355 __asm__("xchgb %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
356 return retval;
357 }
358
359 __INTRIN_INLINE short _InterlockedExchange16(volatile short * Target, short Value)
360 {
361 short retval = Value;
362 __asm__("xchgw %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
363 return retval;
364 }
365
366 __INTRIN_INLINE long _InterlockedExchange(volatile long * Target, long Value)
367 {
368 long retval = Value;
369 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
370 return retval;
371 }
372
373 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * Target, void * Value)
374 {
375 void * retval = Value;
376 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
377 return retval;
378 }
379
380 __INTRIN_INLINE char _InterlockedExchangeAdd8(char volatile * Addend, char Value)
381 {
382 char retval = Value;
383 __asm__("lock; xaddb %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
384 return retval;
385 }
386
387 __INTRIN_INLINE short _InterlockedExchangeAdd16(volatile short * Addend, short Value)
388 {
389 short retval = Value;
390 __asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
391 return retval;
392 }
393
394 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * Addend, long Value)
395 {
396 long retval = Value;
397 __asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
398 return retval;
399 }
400
401 __INTRIN_INLINE char _InterlockedAnd8(volatile char * value, char mask)
402 {
403 char x;
404 char y;
405
406 y = *value;
407
408 do
409 {
410 x = y;
411 y = _InterlockedCompareExchange8(value, x & mask, x);
412 }
413 while(y != x);
414
415 return y;
416 }
417
418 __INTRIN_INLINE short _InterlockedAnd16(volatile short * value, short mask)
419 {
420 short x;
421 short y;
422
423 y = *value;
424
425 do
426 {
427 x = y;
428 y = _InterlockedCompareExchange16(value, x & mask, x);
429 }
430 while(y != x);
431
432 return y;
433 }
434
435 __INTRIN_INLINE long _InterlockedAnd(volatile long * value, long mask)
436 {
437 long x;
438 long y;
439
440 y = *value;
441
442 do
443 {
444 x = y;
445 y = _InterlockedCompareExchange(value, x & mask, x);
446 }
447 while(y != x);
448
449 return y;
450 }
451
452 __INTRIN_INLINE char _InterlockedOr8(volatile char * value, char mask)
453 {
454 char x;
455 char y;
456
457 y = *value;
458
459 do
460 {
461 x = y;
462 y = _InterlockedCompareExchange8(value, x | mask, x);
463 }
464 while(y != x);
465
466 return y;
467 }
468
469 __INTRIN_INLINE short _InterlockedOr16(volatile short * value, short mask)
470 {
471 short x;
472 short y;
473
474 y = *value;
475
476 do
477 {
478 x = y;
479 y = _InterlockedCompareExchange16(value, x | mask, x);
480 }
481 while(y != x);
482
483 return y;
484 }
485
486 __INTRIN_INLINE long _InterlockedOr(volatile long * value, long mask)
487 {
488 long x;
489 long y;
490
491 y = *value;
492
493 do
494 {
495 x = y;
496 y = _InterlockedCompareExchange(value, x | mask, x);
497 }
498 while(y != x);
499
500 return y;
501 }
502
503 __INTRIN_INLINE char _InterlockedXor8(volatile char * value, char mask)
504 {
505 char x;
506 char y;
507
508 y = *value;
509
510 do
511 {
512 x = y;
513 y = _InterlockedCompareExchange8(value, x ^ mask, x);
514 }
515 while(y != x);
516
517 return y;
518 }
519
520 __INTRIN_INLINE short _InterlockedXor16(volatile short * value, short mask)
521 {
522 short x;
523 short y;
524
525 y = *value;
526
527 do
528 {
529 x = y;
530 y = _InterlockedCompareExchange16(value, x ^ mask, x);
531 }
532 while(y != x);
533
534 return y;
535 }
536
537 __INTRIN_INLINE long _InterlockedXor(volatile long * value, long mask)
538 {
539 long x;
540 long y;
541
542 y = *value;
543
544 do
545 {
546 x = y;
547 y = _InterlockedCompareExchange(value, x ^ mask, x);
548 }
549 while(y != x);
550
551 return y;
552 }
553
554 __INTRIN_INLINE long _InterlockedDecrement(volatile long * lpAddend)
555 {
556 return _InterlockedExchangeAdd(lpAddend, -1) - 1;
557 }
558
559 __INTRIN_INLINE long _InterlockedIncrement(volatile long * lpAddend)
560 {
561 return _InterlockedExchangeAdd(lpAddend, 1) + 1;
562 }
563
564 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * lpAddend)
565 {
566 return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
567 }
568
569 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * lpAddend)
570 {
571 return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
572 }
573
574 #if defined(__x86_64__)
575 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * lpAddend)
576 {
577 return _InterlockedExchangeAdd64(lpAddend, -1) - 1;
578 }
579
580 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * lpAddend)
581 {
582 return _InterlockedExchangeAdd64(lpAddend, 1) + 1;
583 }
584 #endif
585
586 #endif /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
587
588 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
589
590 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * Destination, long long Exchange, long long Comperand)
591 {
592 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
593 }
594
595 #else
596
597 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * Destination, long long Exchange, long long Comperand)
598 {
599 long long retval = Comperand;
600
601 __asm__
602 (
603 "lock; cmpxchg8b %[Destination]" :
604 [retval] "+A" (retval) :
605 [Destination] "m" (*Destination),
606 "b" ((unsigned long)((Exchange >> 0) & 0xFFFFFFFF)),
607 "c" ((unsigned long)((Exchange >> 32) & 0xFFFFFFFF)) :
608 "memory"
609 );
610
611 return retval;
612 }
613
614 #endif
615
616 #ifdef __i386__
617 __INTRIN_INLINE long _InterlockedAddLargeStatistic(volatile long long * Addend, long Value)
618 {
619 __asm__
620 (
621 "lock; addl %[Value], %[Lo32];"
622 "jae LABEL%=;"
623 "lock; adcl $0, %[Hi32];"
624 "LABEL%=:;" :
625 [Lo32] "+m" (*((volatile long *)(Addend) + 0)), [Hi32] "+m" (*((volatile long *)(Addend) + 1)) :
626 [Value] "ir" (Value) :
627 "memory"
628 );
629
630 return Value;
631 }
632 #endif /* __i386__ */
633
634 __INTRIN_INLINE unsigned char _interlockedbittestandreset(volatile long * a, long b)
635 {
636 unsigned char retval;
637 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
638 return retval;
639 }
640
641 #if defined(__x86_64__)
642 __INTRIN_INLINE unsigned char _interlockedbittestandreset64(volatile long long * a, long long b)
643 {
644 unsigned char retval;
645 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
646 return retval;
647 }
648 #endif
649
650 __INTRIN_INLINE unsigned char _interlockedbittestandset(volatile long * a, long b)
651 {
652 unsigned char retval;
653 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
654 return retval;
655 }
656
657 #if defined(__x86_64__)
658 __INTRIN_INLINE unsigned char _interlockedbittestandset64(volatile long long * a, long long b)
659 {
660 unsigned char retval;
661 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
662 return retval;
663 }
664 #endif
665
666 /*** String operations ***/
667
668 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
669 __INTRIN_INLINE void __stosb(unsigned char * Dest, unsigned char Data, size_t Count)
670 {
671 __asm__ __volatile__
672 (
673 "rep; stosb" :
674 [Dest] "=D" (Dest), [Count] "=c" (Count) :
675 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
676 );
677 }
678
679 __INTRIN_INLINE void __stosw(unsigned short * Dest, unsigned short Data, size_t Count)
680 {
681 __asm__ __volatile__
682 (
683 "rep; stosw" :
684 [Dest] "=D" (Dest), [Count] "=c" (Count) :
685 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
686 );
687 }
688
689 __INTRIN_INLINE void __stosd(unsigned long * Dest, unsigned long Data, size_t Count)
690 {
691 __asm__ __volatile__
692 (
693 "rep; stosl" :
694 [Dest] "=D" (Dest), [Count] "=c" (Count) :
695 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
696 );
697 }
698
699 #ifdef __x86_64__
700 __INTRIN_INLINE void __stosq(unsigned long long * Dest, unsigned long long Data, size_t Count)
701 {
702 __asm__ __volatile__
703 (
704 "rep; stosq" :
705 [Dest] "=D" (Dest), [Count] "=c" (Count) :
706 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
707 );
708 }
709 #endif
710
711 __INTRIN_INLINE void __movsb(unsigned char * Destination, const unsigned char * Source, size_t Count)
712 {
713 __asm__ __volatile__
714 (
715 "rep; movsb" :
716 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
717 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
718 );
719 }
720
721 __INTRIN_INLINE void __movsw(unsigned short * Destination, const unsigned short * Source, size_t Count)
722 {
723 __asm__ __volatile__
724 (
725 "rep; movsw" :
726 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
727 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
728 );
729 }
730
731 __INTRIN_INLINE void __movsd(unsigned long * Destination, const unsigned long * Source, size_t Count)
732 {
733 __asm__ __volatile__
734 (
735 "rep; movsd" :
736 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
737 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
738 );
739 }
740
741 #ifdef __x86_64__
742 __INTRIN_INLINE void __movsq(unsigned long * Destination, const unsigned long * Source, size_t Count)
743 {
744 __asm__ __volatile__
745 (
746 "rep; movsq" :
747 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
748 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
749 );
750 }
751 #endif
752
753 #if defined(__x86_64__)
754
755 /*** GS segment addressing ***/
756
757 __INTRIN_INLINE void __writegsbyte(unsigned long Offset, unsigned char Data)
758 {
759 __asm__ __volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
760 }
761
762 __INTRIN_INLINE void __writegsword(unsigned long Offset, unsigned short Data)
763 {
764 __asm__ __volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
765 }
766
767 __INTRIN_INLINE void __writegsdword(unsigned long Offset, unsigned long Data)
768 {
769 __asm__ __volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
770 }
771
772 __INTRIN_INLINE void __writegsqword(unsigned long Offset, unsigned long long Data)
773 {
774 __asm__ __volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
775 }
776
777 __INTRIN_INLINE unsigned char __readgsbyte(unsigned long Offset)
778 {
779 unsigned char value;
780 __asm__ __volatile__("movb %%gs:%a[Offset], %b[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
781 return value;
782 }
783
784 __INTRIN_INLINE unsigned short __readgsword(unsigned long Offset)
785 {
786 unsigned short value;
787 __asm__ __volatile__("movw %%gs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
788 return value;
789 }
790
791 __INTRIN_INLINE unsigned long __readgsdword(unsigned long Offset)
792 {
793 unsigned long value;
794 __asm__ __volatile__("movl %%gs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
795 return value;
796 }
797
798 __INTRIN_INLINE unsigned long long __readgsqword(unsigned long Offset)
799 {
800 unsigned long long value;
801 __asm__ __volatile__("movq %%gs:%a[Offset], %q[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
802 return value;
803 }
804
805 __INTRIN_INLINE void __incgsbyte(unsigned long Offset)
806 {
807 __asm__ __volatile__("incb %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
808 }
809
810 __INTRIN_INLINE void __incgsword(unsigned long Offset)
811 {
812 __asm__ __volatile__("incw %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
813 }
814
815 __INTRIN_INLINE void __incgsdword(unsigned long Offset)
816 {
817 __asm__ __volatile__("incl %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
818 }
819
820 __INTRIN_INLINE void __incgsqword(unsigned long Offset)
821 {
822 __asm__ __volatile__("incq %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
823 }
824
825 __INTRIN_INLINE void __addgsbyte(unsigned long Offset, unsigned char Data)
826 {
827 __asm__ __volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
828 }
829
830 __INTRIN_INLINE void __addgsword(unsigned long Offset, unsigned short Data)
831 {
832 __asm__ __volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
833 }
834
835 __INTRIN_INLINE void __addgsdword(unsigned long Offset, unsigned int Data)
836 {
837 __asm__ __volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
838 }
839
840 __INTRIN_INLINE void __addgsqword(unsigned long Offset, unsigned long long Data)
841 {
842 __asm__ __volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
843 }
844
845 #else /* defined(__x86_64__) */
846
847 /*** FS segment addressing ***/
848
849 __INTRIN_INLINE void __writefsbyte(unsigned long Offset, unsigned char Data)
850 {
851 __asm__ __volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
852 }
853
854 __INTRIN_INLINE void __writefsword(unsigned long Offset, unsigned short Data)
855 {
856 __asm__ __volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
857 }
858
859 __INTRIN_INLINE void __writefsdword(unsigned long Offset, unsigned long Data)
860 {
861 __asm__ __volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
862 }
863
864 __INTRIN_INLINE unsigned char __readfsbyte(unsigned long Offset)
865 {
866 unsigned char value;
867 __asm__ __volatile__("movb %%fs:%a[Offset], %b[value]" : [value] "=q" (value) : [Offset] "ir" (Offset));
868 return value;
869 }
870
871 __INTRIN_INLINE unsigned short __readfsword(unsigned long Offset)
872 {
873 unsigned short value;
874 __asm__ __volatile__("movw %%fs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
875 return value;
876 }
877
878 __INTRIN_INLINE unsigned long __readfsdword(unsigned long Offset)
879 {
880 unsigned long value;
881 __asm__ __volatile__("movl %%fs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
882 return value;
883 }
884
885 __INTRIN_INLINE void __incfsbyte(unsigned long Offset)
886 {
887 __asm__ __volatile__("incb %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
888 }
889
890 __INTRIN_INLINE void __incfsword(unsigned long Offset)
891 {
892 __asm__ __volatile__("incw %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
893 }
894
895 __INTRIN_INLINE void __incfsdword(unsigned long Offset)
896 {
897 __asm__ __volatile__("incl %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
898 }
899
900 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
901 __INTRIN_INLINE void __addfsbyte(unsigned long Offset, unsigned char Data)
902 {
903 if(!__builtin_constant_p(Offset))
904 __asm__ __volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
905 else
906 __asm__ __volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
907 }
908
909 __INTRIN_INLINE void __addfsword(unsigned long Offset, unsigned short Data)
910 {
911 if(!__builtin_constant_p(Offset))
912 __asm__ __volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
913 else
914 __asm__ __volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
915 }
916
917 __INTRIN_INLINE void __addfsdword(unsigned long Offset, unsigned long Data)
918 {
919 if(!__builtin_constant_p(Offset))
920 __asm__ __volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
921 else
922 __asm__ __volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
923 }
924
925 #endif /* defined(__x86_64__) */
926
927
928 /*** Bit manipulation ***/
929
930 __INTRIN_INLINE unsigned char _BitScanForward(unsigned long * Index, unsigned long Mask)
931 {
932 __asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
933 return Mask ? 1 : 0;
934 }
935
936 __INTRIN_INLINE unsigned char _BitScanReverse(unsigned long * Index, unsigned long Mask)
937 {
938 __asm__("bsrl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
939 return Mask ? 1 : 0;
940 }
941
942 /* NOTE: again, the bizarre implementation follows Visual C++ */
943 __INTRIN_INLINE unsigned char _bittest(const long * a, long b)
944 {
945 unsigned char retval;
946
947 if(__builtin_constant_p(b))
948 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 32))), [b] "Ir" (b % 32));
949 else
950 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
951
952 return retval;
953 }
954
955 #ifdef __x86_64__
956 __INTRIN_INLINE unsigned char _BitScanForward64(unsigned long * Index, unsigned long long Mask)
957 {
958 unsigned long long Index64;
959 __asm__("bsfq %[Mask], %[Index]" : [Index] "=r" (Index64) : [Mask] "mr" (Mask));
960 *Index = Index64;
961 return Mask ? 1 : 0;
962 }
963
964 __INTRIN_INLINE unsigned char _BitScanReverse64(unsigned long * Index, unsigned long long Mask)
965 {
966 unsigned long long Index64;
967 __asm__("bsrq %[Mask], %[Index]" : [Index] "=r" (Index64) : [Mask] "mr" (Mask));
968 *Index = Index64;
969 return Mask ? 1 : 0;
970 }
971
972 __INTRIN_INLINE unsigned char _bittest64(const long long * a, long long b)
973 {
974 unsigned char retval;
975
976 if(__builtin_constant_p(b))
977 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 64))), [b] "Ir" (b % 64));
978 else
979 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
980
981 return retval;
982 }
983 #endif
984
985 __INTRIN_INLINE unsigned char _bittestandcomplement(long * a, long b)
986 {
987 unsigned char retval;
988
989 if(__builtin_constant_p(b))
990 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
991 else
992 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
993
994 return retval;
995 }
996
997 __INTRIN_INLINE unsigned char _bittestandreset(long * a, long b)
998 {
999 unsigned char retval;
1000
1001 if(__builtin_constant_p(b))
1002 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1003 else
1004 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1005
1006 return retval;
1007 }
1008
1009 __INTRIN_INLINE unsigned char _bittestandset(long * a, long b)
1010 {
1011 unsigned char retval;
1012
1013 if(__builtin_constant_p(b))
1014 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1015 else
1016 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1017
1018 return retval;
1019 }
1020
1021 #ifdef __x86_64__
1022
1023 __INTRIN_INLINE unsigned char _bittestandset64(long long * a, long long b)
1024 {
1025 unsigned char retval;
1026
1027 if(__builtin_constant_p(b))
1028 __asm__("btsq %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 64))), [retval] "=q" (retval) : [b] "Ir" (b % 64));
1029 else
1030 __asm__("btsq %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1031
1032 return retval;
1033 }
1034
1035 __INTRIN_INLINE unsigned char _bittestandreset64(long long * a, long long b)
1036 {
1037 unsigned char retval;
1038
1039 if(__builtin_constant_p(b))
1040 __asm__("btrq %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 64))), [retval] "=q" (retval) : [b] "Ir" (b % 64));
1041 else
1042 __asm__("btrq %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1043
1044 return retval;
1045 }
1046
1047 __INTRIN_INLINE unsigned char _bittestandcomplement64(long long * a, long long b)
1048 {
1049 unsigned char retval;
1050
1051 if(__builtin_constant_p(b))
1052 __asm__("btcq %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 64))), [retval] "=q" (retval) : [b] "Ir" (b % 64));
1053 else
1054 __asm__("btcq %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1055
1056 return retval;
1057 }
1058
1059 #endif
1060
1061 __INTRIN_INLINE unsigned char __cdecl _rotl8(unsigned char value, unsigned char shift)
1062 {
1063 unsigned char retval;
1064 __asm__("rolb %b[shift], %b[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1065 return retval;
1066 }
1067
1068 __INTRIN_INLINE unsigned short __cdecl _rotl16(unsigned short value, unsigned char shift)
1069 {
1070 unsigned short retval;
1071 __asm__("rolw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1072 return retval;
1073 }
1074
1075 __INTRIN_INLINE unsigned int __cdecl _rotl(unsigned int value, int shift)
1076 {
1077 unsigned int retval;
1078 __asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1079 return retval;
1080 }
1081
1082 #ifdef __x86_64__
1083 __INTRIN_INLINE unsigned long long _rotl64(unsigned long long value, int shift)
1084 {
1085 unsigned long long retval;
1086 __asm__("rolq %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1087 return retval;
1088 }
1089 #else
1090 __INTRIN_INLINE unsigned long long __cdecl _rotl64(unsigned long long value, int shift)
1091 {
1092 /* FIXME: this is probably not optimal */
1093 return (value << shift) | (value >> (64 - shift));
1094 }
1095 #endif
1096
1097 __INTRIN_INLINE unsigned int __cdecl _rotr(unsigned int value, int shift)
1098 {
1099 unsigned int retval;
1100 __asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1101 return retval;
1102 }
1103
1104 __INTRIN_INLINE unsigned char __cdecl _rotr8(unsigned char value, unsigned char shift)
1105 {
1106 unsigned char retval;
1107 __asm__("rorb %b[shift], %b[retval]" : [retval] "=qm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1108 return retval;
1109 }
1110
1111 __INTRIN_INLINE unsigned short __cdecl _rotr16(unsigned short value, unsigned char shift)
1112 {
1113 unsigned short retval;
1114 __asm__("rorw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1115 return retval;
1116 }
1117
1118 #ifdef __x86_64__
1119 __INTRIN_INLINE unsigned long long _rotr64(unsigned long long value, int shift)
1120 {
1121 unsigned long long retval;
1122 __asm__("rorq %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1123 return retval;
1124 }
1125 #else
1126 __INTRIN_INLINE unsigned long long __cdecl _rotr64(unsigned long long value, int shift)
1127 {
1128 /* FIXME: this is probably not optimal */
1129 return (value >> shift) | (value << (64 - shift));
1130 }
1131 #endif
1132
1133 __INTRIN_INLINE unsigned long __cdecl _lrotl(unsigned long value, int shift)
1134 {
1135 unsigned long retval;
1136 __asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1137 return retval;
1138 }
1139
1140 __INTRIN_INLINE unsigned long __cdecl _lrotr(unsigned long value, int shift)
1141 {
1142 unsigned long retval;
1143 __asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1144 return retval;
1145 }
1146
1147 /*
1148 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
1149 constraint (edx:eax) for the Mask argument, because it's the only way GCC
1150 can pass 64-bit operands around - passing the two 32 bit parts separately
1151 just confuses it. Also we declare Bit as an int and then truncate it to
1152 match Visual C++ behavior
1153 */
1154 __INTRIN_INLINE unsigned long long __ll_lshift(unsigned long long Mask, int Bit)
1155 {
1156 unsigned long long retval = Mask;
1157
1158 __asm__
1159 (
1160 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
1161 "+A" (retval) :
1162 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1163 );
1164
1165 return retval;
1166 }
1167
1168 __INTRIN_INLINE long long __ll_rshift(long long Mask, int Bit)
1169 {
1170 long long retval = Mask;
1171
1172 __asm__
1173 (
1174 "shrdl %b[Bit], %%edx, %%eax; sarl %b[Bit], %%edx" :
1175 "+A" (retval) :
1176 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1177 );
1178
1179 return retval;
1180 }
1181
1182 __INTRIN_INLINE unsigned long long __ull_rshift(unsigned long long Mask, int Bit)
1183 {
1184 unsigned long long retval = Mask;
1185
1186 __asm__
1187 (
1188 "shrdl %b[Bit], %%edx, %%eax; shrl %b[Bit], %%edx" :
1189 "+A" (retval) :
1190 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1191 );
1192
1193 return retval;
1194 }
1195
1196 __INTRIN_INLINE unsigned short __cdecl _byteswap_ushort(unsigned short value)
1197 {
1198 unsigned short retval;
1199 __asm__("rorw $8, %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value));
1200 return retval;
1201 }
1202
1203 __INTRIN_INLINE unsigned long __cdecl _byteswap_ulong(unsigned long value)
1204 {
1205 unsigned long retval;
1206 __asm__("bswapl %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1207 return retval;
1208 }
1209
1210 #ifdef __x86_64__
1211 __INTRIN_INLINE unsigned long long _byteswap_uint64(unsigned long long value)
1212 {
1213 unsigned long long retval;
1214 __asm__("bswapq %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1215 return retval;
1216 }
1217 #else
1218 __INTRIN_INLINE unsigned long long __cdecl _byteswap_uint64(unsigned long long value)
1219 {
1220 union {
1221 unsigned long long int64part;
1222 struct {
1223 unsigned long lowpart;
1224 unsigned long hipart;
1225 };
1226 } retval;
1227 retval.int64part = value;
1228 __asm__("bswapl %[lowpart]\n"
1229 "bswapl %[hipart]\n"
1230 : [lowpart] "=r" (retval.hipart), [hipart] "=r" (retval.lowpart) : "[lowpart]" (retval.lowpart), "[hipart]" (retval.hipart) );
1231 return retval.int64part;
1232 }
1233 #endif
1234
1235 __INTRIN_INLINE unsigned int __lzcnt(unsigned int value)
1236 {
1237 return __builtin_clz(value);
1238 }
1239
1240 __INTRIN_INLINE unsigned short __lzcnt16(unsigned short value)
1241 {
1242 return __builtin_clz(value);
1243 }
1244
1245 __INTRIN_INLINE unsigned int __popcnt(unsigned int value)
1246 {
1247 return __builtin_popcount(value);
1248 }
1249
1250 __INTRIN_INLINE unsigned short __popcnt16(unsigned short value)
1251 {
1252 return __builtin_popcount(value);
1253 }
1254
1255 #ifdef __x86_64__
1256 __INTRIN_INLINE unsigned long long __lzcnt64(unsigned long long value)
1257 {
1258 return __builtin_clzll(value);
1259 }
1260
1261 __INTRIN_INLINE unsigned long long __popcnt64(unsigned long long value)
1262 {
1263 return __builtin_popcountll(value);
1264 }
1265 #endif
1266
1267 /*** 64-bit math ***/
1268
1269 __INTRIN_INLINE long long __emul(int a, int b)
1270 {
1271 long long retval;
1272 __asm__("imull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1273 return retval;
1274 }
1275
1276 __INTRIN_INLINE unsigned long long __emulu(unsigned int a, unsigned int b)
1277 {
1278 unsigned long long retval;
1279 __asm__("mull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1280 return retval;
1281 }
1282
1283 __INTRIN_INLINE long long __cdecl _abs64(long long value)
1284 {
1285 return (value >= 0) ? value : -value;
1286 }
1287
1288 #ifdef __x86_64__
1289
1290 __INTRIN_INLINE long long __mulh(long long a, long long b)
1291 {
1292 long long retval;
1293 __asm__("imulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1294 return retval;
1295 }
1296
1297 __INTRIN_INLINE unsigned long long __umulh(unsigned long long a, unsigned long long b)
1298 {
1299 unsigned long long retval;
1300 __asm__("mulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1301 return retval;
1302 }
1303
1304 #endif
1305
1306 /*** Port I/O ***/
1307
1308 __INTRIN_INLINE unsigned char __inbyte(unsigned short Port)
1309 {
1310 unsigned char byte;
1311 __asm__ __volatile__("inb %w[Port], %b[byte]" : [byte] "=a" (byte) : [Port] "Nd" (Port));
1312 return byte;
1313 }
1314
1315 __INTRIN_INLINE unsigned short __inword(unsigned short Port)
1316 {
1317 unsigned short word;
1318 __asm__ __volatile__("inw %w[Port], %w[word]" : [word] "=a" (word) : [Port] "Nd" (Port));
1319 return word;
1320 }
1321
1322 __INTRIN_INLINE unsigned long __indword(unsigned short Port)
1323 {
1324 unsigned long dword;
1325 __asm__ __volatile__("inl %w[Port], %k[dword]" : [dword] "=a" (dword) : [Port] "Nd" (Port));
1326 return dword;
1327 }
1328
1329 __INTRIN_INLINE void __inbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1330 {
1331 __asm__ __volatile__
1332 (
1333 "rep; insb" :
1334 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1335 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1336 "memory"
1337 );
1338 }
1339
1340 __INTRIN_INLINE void __inwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1341 {
1342 __asm__ __volatile__
1343 (
1344 "rep; insw" :
1345 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1346 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1347 "memory"
1348 );
1349 }
1350
1351 __INTRIN_INLINE void __indwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1352 {
1353 __asm__ __volatile__
1354 (
1355 "rep; insl" :
1356 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1357 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1358 "memory"
1359 );
1360 }
1361
1362 __INTRIN_INLINE void __outbyte(unsigned short Port, unsigned char Data)
1363 {
1364 __asm__ __volatile__("outb %b[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1365 }
1366
1367 __INTRIN_INLINE void __outword(unsigned short Port, unsigned short Data)
1368 {
1369 __asm__ __volatile__("outw %w[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1370 }
1371
1372 __INTRIN_INLINE void __outdword(unsigned short Port, unsigned long Data)
1373 {
1374 __asm__ __volatile__("outl %k[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1375 }
1376
1377 __INTRIN_INLINE void __outbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1378 {
1379 __asm__ __volatile__("rep; outsb" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1380 }
1381
1382 __INTRIN_INLINE void __outwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1383 {
1384 __asm__ __volatile__("rep; outsw" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1385 }
1386
1387 __INTRIN_INLINE void __outdwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1388 {
1389 __asm__ __volatile__("rep; outsl" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1390 }
1391
1392 __INTRIN_INLINE int __cdecl _inp(unsigned short Port)
1393 {
1394 return __inbyte(Port);
1395 }
1396
1397 __INTRIN_INLINE unsigned short __cdecl _inpw(unsigned short Port)
1398 {
1399 return __inword(Port);
1400 }
1401
1402 __INTRIN_INLINE unsigned long __cdecl _inpd(unsigned short Port)
1403 {
1404 return __indword(Port);
1405 }
1406
1407 __INTRIN_INLINE int __cdecl _outp(unsigned short Port, int databyte)
1408 {
1409 __outbyte(Port, (unsigned char)databyte);
1410 return databyte;
1411 }
1412
1413 __INTRIN_INLINE unsigned short __cdecl _outpw(unsigned short Port, unsigned short dataword)
1414 {
1415 __outword(Port, dataword);
1416 return dataword;
1417 }
1418
1419 __INTRIN_INLINE unsigned long __cdecl _outpd(unsigned short Port, unsigned long dataword)
1420 {
1421 __outdword(Port, dataword);
1422 return dataword;
1423 }
1424
1425
1426 /*** System information ***/
1427
1428 __INTRIN_INLINE void __cpuid(int CPUInfo[4], int InfoType)
1429 {
1430 __asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType));
1431 }
1432
1433 __INTRIN_INLINE void __cpuidex(int CPUInfo[4], int InfoType, int ECXValue)
1434 {
1435 __asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType), "c" (ECXValue));
1436 }
1437
1438 __INTRIN_INLINE unsigned long long __rdtsc(void)
1439 {
1440 #ifdef __x86_64__
1441 unsigned long long low, high;
1442 __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
1443 return low | (high << 32);
1444 #else
1445 unsigned long long retval;
1446 __asm__ __volatile__("rdtsc" : "=A"(retval));
1447 return retval;
1448 #endif
1449 }
1450
1451 __INTRIN_INLINE void __writeeflags(uintptr_t Value)
1452 {
1453 __asm__ __volatile__("push %0\n popf" : : "rim"(Value));
1454 }
1455
1456 __INTRIN_INLINE uintptr_t __readeflags(void)
1457 {
1458 uintptr_t retval;
1459 __asm__ __volatile__("pushf\n pop %0" : "=rm"(retval));
1460 return retval;
1461 }
1462
1463 /*** Interrupts ***/
1464
1465 #ifdef __clang__
1466 #define __debugbreak() __asm__("int $3")
1467 #else
1468 __INTRIN_INLINE void __cdecl __debugbreak(void)
1469 {
1470 __asm__("int $3");
1471 }
1472 #endif
1473
1474 __INTRIN_INLINE void __ud2(void)
1475 {
1476 __asm__("ud2");
1477 }
1478
1479 __INTRIN_INLINE void __int2c(void)
1480 {
1481 __asm__("int $0x2c");
1482 }
1483
1484 __INTRIN_INLINE void __cdecl _disable(void)
1485 {
1486 __asm__("cli" : : : "memory");
1487 }
1488
1489 __INTRIN_INLINE void __cdecl _enable(void)
1490 {
1491 __asm__("sti" : : : "memory");
1492 }
1493
1494 __INTRIN_INLINE void __halt(void)
1495 {
1496 __asm__("hlt" : : : "memory");
1497 }
1498
1499 __declspec(noreturn)
1500 __INTRIN_INLINE void __fastfail(unsigned int Code)
1501 {
1502 __asm__("int $0x29" : : "c"(Code) : "memory");
1503 __builtin_unreachable();
1504 }
1505
1506 /*** Protected memory management ***/
1507
1508 #ifdef __x86_64__
1509
1510 __INTRIN_INLINE void __writecr0(unsigned long long Data)
1511 {
1512 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1513 }
1514
1515 __INTRIN_INLINE void __writecr3(unsigned long long Data)
1516 {
1517 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1518 }
1519
1520 __INTRIN_INLINE void __writecr4(unsigned long long Data)
1521 {
1522 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1523 }
1524
1525 __INTRIN_INLINE void __writecr8(unsigned long long Data)
1526 {
1527 __asm__("mov %[Data], %%cr8" : : [Data] "r" (Data) : "memory");
1528 }
1529
1530 __INTRIN_INLINE unsigned long long __readcr0(void)
1531 {
1532 unsigned long long value;
1533 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1534 return value;
1535 }
1536
1537 __INTRIN_INLINE unsigned long long __readcr2(void)
1538 {
1539 unsigned long long value;
1540 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1541 return value;
1542 }
1543
1544 __INTRIN_INLINE unsigned long long __readcr3(void)
1545 {
1546 unsigned long long value;
1547 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1548 return value;
1549 }
1550
1551 __INTRIN_INLINE unsigned long long __readcr4(void)
1552 {
1553 unsigned long long value;
1554 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1555 return value;
1556 }
1557
1558 __INTRIN_INLINE unsigned long long __readcr8(void)
1559 {
1560 unsigned long long value;
1561 __asm__ __volatile__("movq %%cr8, %q[value]" : [value] "=r" (value));
1562 return value;
1563 }
1564
1565 #else /* __x86_64__ */
1566
1567 __INTRIN_INLINE void __writecr0(unsigned int Data)
1568 {
1569 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1570 }
1571
1572 __INTRIN_INLINE void __writecr3(unsigned int Data)
1573 {
1574 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1575 }
1576
1577 __INTRIN_INLINE void __writecr4(unsigned int Data)
1578 {
1579 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1580 }
1581
1582 __INTRIN_INLINE void __writecr8(unsigned int Data)
1583 {
1584 __asm__("mov %[Data], %%cr8" : : [Data] "r" (Data) : "memory");
1585 }
1586
1587 __INTRIN_INLINE unsigned long __readcr0(void)
1588 {
1589 unsigned long value;
1590 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1591 return value;
1592 }
1593
1594 __INTRIN_INLINE unsigned long __readcr2(void)
1595 {
1596 unsigned long value;
1597 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1598 return value;
1599 }
1600
1601 __INTRIN_INLINE unsigned long __readcr3(void)
1602 {
1603 unsigned long value;
1604 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1605 return value;
1606 }
1607
1608 __INTRIN_INLINE unsigned long __readcr4(void)
1609 {
1610 unsigned long value;
1611 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1612 return value;
1613 }
1614
1615 __INTRIN_INLINE unsigned long __readcr8(void)
1616 {
1617 unsigned long value;
1618 __asm__ __volatile__("mov %%cr8, %[value]" : [value] "=r" (value));
1619 return value;
1620 }
1621
1622 #endif /* __x86_64__ */
1623
1624 #ifdef __x86_64__
1625
1626 __INTRIN_INLINE unsigned long long __readdr(unsigned int reg)
1627 {
1628 unsigned long long value;
1629 switch (reg)
1630 {
1631 case 0:
1632 __asm__ __volatile__("movq %%dr0, %q[value]" : [value] "=r" (value));
1633 break;
1634 case 1:
1635 __asm__ __volatile__("movq %%dr1, %q[value]" : [value] "=r" (value));
1636 break;
1637 case 2:
1638 __asm__ __volatile__("movq %%dr2, %q[value]" : [value] "=r" (value));
1639 break;
1640 case 3:
1641 __asm__ __volatile__("movq %%dr3, %q[value]" : [value] "=r" (value));
1642 break;
1643 case 4:
1644 __asm__ __volatile__("movq %%dr4, %q[value]" : [value] "=r" (value));
1645 break;
1646 case 5:
1647 __asm__ __volatile__("movq %%dr5, %q[value]" : [value] "=r" (value));
1648 break;
1649 case 6:
1650 __asm__ __volatile__("movq %%dr6, %q[value]" : [value] "=r" (value));
1651 break;
1652 case 7:
1653 __asm__ __volatile__("movq %%dr7, %q[value]" : [value] "=r" (value));
1654 break;
1655 }
1656 return value;
1657 }
1658
1659 __INTRIN_INLINE void __writedr(unsigned reg, unsigned long long value)
1660 {
1661 switch (reg)
1662 {
1663 case 0:
1664 __asm__("movq %q[value], %%dr0" : : [value] "r" (value) : "memory");
1665 break;
1666 case 1:
1667 __asm__("movq %q[value], %%dr1" : : [value] "r" (value) : "memory");
1668 break;
1669 case 2:
1670 __asm__("movq %q[value], %%dr2" : : [value] "r" (value) : "memory");
1671 break;
1672 case 3:
1673 __asm__("movq %q[value], %%dr3" : : [value] "r" (value) : "memory");
1674 break;
1675 case 4:
1676 __asm__("movq %q[value], %%dr4" : : [value] "r" (value) : "memory");
1677 break;
1678 case 5:
1679 __asm__("movq %q[value], %%dr5" : : [value] "r" (value) : "memory");
1680 break;
1681 case 6:
1682 __asm__("movq %q[value], %%dr6" : : [value] "r" (value) : "memory");
1683 break;
1684 case 7:
1685 __asm__("movq %q[value], %%dr7" : : [value] "r" (value) : "memory");
1686 break;
1687 }
1688 }
1689
1690 #else /* __x86_64__ */
1691
1692 __INTRIN_INLINE unsigned int __readdr(unsigned int reg)
1693 {
1694 unsigned int value;
1695 switch (reg)
1696 {
1697 case 0:
1698 __asm__ __volatile__("mov %%dr0, %[value]" : [value] "=r" (value));
1699 break;
1700 case 1:
1701 __asm__ __volatile__("mov %%dr1, %[value]" : [value] "=r" (value));
1702 break;
1703 case 2:
1704 __asm__ __volatile__("mov %%dr2, %[value]" : [value] "=r" (value));
1705 break;
1706 case 3:
1707 __asm__ __volatile__("mov %%dr3, %[value]" : [value] "=r" (value));
1708 break;
1709 case 4:
1710 __asm__ __volatile__("mov %%dr4, %[value]" : [value] "=r" (value));
1711 break;
1712 case 5:
1713 __asm__ __volatile__("mov %%dr5, %[value]" : [value] "=r" (value));
1714 break;
1715 case 6:
1716 __asm__ __volatile__("mov %%dr6, %[value]" : [value] "=r" (value));
1717 break;
1718 case 7:
1719 __asm__ __volatile__("mov %%dr7, %[value]" : [value] "=r" (value));
1720 break;
1721 }
1722 return value;
1723 }
1724
1725 __INTRIN_INLINE void __writedr(unsigned reg, unsigned int value)
1726 {
1727 switch (reg)
1728 {
1729 case 0:
1730 __asm__("mov %[value], %%dr0" : : [value] "r" (value) : "memory");
1731 break;
1732 case 1:
1733 __asm__("mov %[value], %%dr1" : : [value] "r" (value) : "memory");
1734 break;
1735 case 2:
1736 __asm__("mov %[value], %%dr2" : : [value] "r" (value) : "memory");
1737 break;
1738 case 3:
1739 __asm__("mov %[value], %%dr3" : : [value] "r" (value) : "memory");
1740 break;
1741 case 4:
1742 __asm__("mov %[value], %%dr4" : : [value] "r" (value) : "memory");
1743 break;
1744 case 5:
1745 __asm__("mov %[value], %%dr5" : : [value] "r" (value) : "memory");
1746 break;
1747 case 6:
1748 __asm__("mov %[value], %%dr6" : : [value] "r" (value) : "memory");
1749 break;
1750 case 7:
1751 __asm__("mov %[value], %%dr7" : : [value] "r" (value) : "memory");
1752 break;
1753 }
1754 }
1755
1756 #endif /* __x86_64__ */
1757
1758 __INTRIN_INLINE void __invlpg(void *Address)
1759 {
1760 __asm__ __volatile__ ("invlpg (%[Address])" : : [Address] "b" (Address) : "memory");
1761 }
1762
1763
1764 /*** System operations ***/
1765
1766 __INTRIN_INLINE unsigned long long __readmsr(unsigned long reg)
1767 {
1768 #ifdef __x86_64__
1769 unsigned long low, high;
1770 __asm__ __volatile__("rdmsr" : "=a" (low), "=d" (high) : "c" (reg));
1771 return ((unsigned long long)high << 32) | low;
1772 #else
1773 unsigned long long retval;
1774 __asm__ __volatile__("rdmsr" : "=A" (retval) : "c" (reg));
1775 return retval;
1776 #endif
1777 }
1778
1779 __INTRIN_INLINE void __writemsr(unsigned long Register, unsigned long long Value)
1780 {
1781 #ifdef __x86_64__
1782 __asm__ __volatile__("wrmsr" : : "a" (Value), "d" (Value >> 32), "c" (Register));
1783 #else
1784 __asm__ __volatile__("wrmsr" : : "A" (Value), "c" (Register));
1785 #endif
1786 }
1787
1788 __INTRIN_INLINE unsigned long long __readpmc(unsigned long counter)
1789 {
1790 unsigned long long retval;
1791 __asm__ __volatile__("rdpmc" : "=A" (retval) : "c" (counter));
1792 return retval;
1793 }
1794
1795 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1796 __INTRIN_INLINE unsigned long __segmentlimit(unsigned long a)
1797 {
1798 unsigned long retval;
1799 __asm__ __volatile__("lsl %[a], %[retval]" : [retval] "=r" (retval) : [a] "rm" (a));
1800 return retval;
1801 }
1802
1803 __INTRIN_INLINE void __wbinvd(void)
1804 {
1805 __asm__ __volatile__("wbinvd" : : : "memory");
1806 }
1807
1808 __INTRIN_INLINE void __lidt(void *Source)
1809 {
1810 __asm__ __volatile__("lidt %0" : : "m"(*(short*)Source));
1811 }
1812
1813 __INTRIN_INLINE void __sidt(void *Destination)
1814 {
1815 __asm__ __volatile__("sidt %0" : : "m"(*(short*)Destination) : "memory");
1816 }
1817
1818 __INTRIN_INLINE void _sgdt(void *Destination)
1819 {
1820 __asm__ __volatile__("sgdt %0" : : "m"(*(short*)Destination) : "memory");
1821 }
1822
1823 /*** Misc operations ***/
1824
1825 __INTRIN_INLINE void _mm_pause(void)
1826 {
1827 __asm__ __volatile__("pause" : : : "memory");
1828 }
1829
1830 __INTRIN_INLINE void __nop(void)
1831 {
1832 __asm__ __volatile__("nop");
1833 }
1834
1835 #ifdef __cplusplus
1836 }
1837 #endif
1838
1839 #endif /* KJK_INTRIN_X86_H_ */
1840
1841 /* EOF */