Merge my current work done on the kd++ branch:
[reactos.git] / reactos / include / crt / mingw32 / intrin_x86.h
1 /*
2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
5 projects.
6
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
8
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
15
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
26 */
27
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
30
31 /*
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
36 */
37
38 /*
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
42
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
46
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
49
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
57
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
60
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
62 ...
63
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
66 */
67
68 #ifdef __cplusplus
69 extern "C" {
70 #endif
71
72 /*** memcopy must be memmove ***/
73 __INTRIN_INLINE void* memcpy(void* dest, const void* source, size_t num)
74 {
75 return memmove(dest, source, num);
76 }
77
78
79 /*** Stack frame juggling ***/
80 #define _ReturnAddress() (__builtin_return_address(0))
81 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
82 /* TODO: __getcallerseflags but how??? */
83
84 /* Maybe the same for x86? */
85 #ifdef __x86_64__
86 #define _alloca(s) __builtin_alloca(s)
87 #endif
88
89 /*** Memory barriers ***/
90
91 __INTRIN_INLINE void _ReadWriteBarrier(void)
92 {
93 __asm__ __volatile__("" : : : "memory");
94 }
95
96 /* GCC only supports full barriers */
97 #define _ReadBarrier _ReadWriteBarrier
98 #define _WriteBarrier _ReadWriteBarrier
99
100 __INTRIN_INLINE void _mm_mfence(void)
101 {
102 __asm__ __volatile__("mfence" : : : "memory");
103 }
104
105 __INTRIN_INLINE void _mm_lfence(void)
106 {
107 _ReadBarrier();
108 __asm__ __volatile__("lfence");
109 _ReadBarrier();
110 }
111
112 __INTRIN_INLINE void _mm_sfence(void)
113 {
114 _WriteBarrier();
115 __asm__ __volatile__("sfence");
116 _WriteBarrier();
117 }
118
119 #ifdef __x86_64__
120 __INTRIN_INLINE void __faststorefence(void)
121 {
122 long local;
123 __asm__ __volatile__("lock; orl $0, %0;" : : "m"(local));
124 }
125 #endif
126
127
128 /*** Atomic operations ***/
129
130 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
131
132 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand)
133 {
134 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
135 }
136
137 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand)
138 {
139 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
140 }
141
142 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand)
143 {
144 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
145 }
146
147 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand)
148 {
149 return (void *)__sync_val_compare_and_swap(Destination, Comperand, Exchange);
150 }
151
152 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value)
153 {
154 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
155 __sync_synchronize();
156 return __sync_lock_test_and_set(Target, Value);
157 }
158
159 #if defined(_M_AMD64)
160 __INTRIN_INLINE long long _InterlockedExchange64(volatile long long * const Target, const long long Value)
161 {
162 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
163 __sync_synchronize();
164 return __sync_lock_test_and_set(Target, Value);
165 }
166 #endif
167
168 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value)
169 {
170 /* NOTE: ditto */
171 __sync_synchronize();
172 return (void *)__sync_lock_test_and_set(Target, Value);
173 }
174
175 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value)
176 {
177 return __sync_fetch_and_add(Addend, Value);
178 }
179
180 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value)
181 {
182 return __sync_fetch_and_add(Addend, Value);
183 }
184
185 #if defined(_M_AMD64)
186 __INTRIN_INLINE long long _InterlockedExchangeAdd64(volatile long long * const Addend, const long long Value)
187 {
188 return __sync_fetch_and_add(Addend, Value);
189 }
190 #endif
191
192 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask)
193 {
194 return __sync_fetch_and_and(value, mask);
195 }
196
197 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask)
198 {
199 return __sync_fetch_and_and(value, mask);
200 }
201
202 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask)
203 {
204 return __sync_fetch_and_and(value, mask);
205 }
206
207 #if defined(_M_AMD64)
208 __INTRIN_INLINE long long _InterlockedAnd64(volatile long long * const value, const long long mask)
209 {
210 return __sync_fetch_and_and(value, mask);
211 }
212 #endif
213
214 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask)
215 {
216 return __sync_fetch_and_or(value, mask);
217 }
218
219 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask)
220 {
221 return __sync_fetch_and_or(value, mask);
222 }
223
224 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask)
225 {
226 return __sync_fetch_and_or(value, mask);
227 }
228
229 #if defined(_M_AMD64)
230 __INTRIN_INLINE long long _InterlockedOr64(volatile long long * const value, const long long mask)
231 {
232 return __sync_fetch_and_or(value, mask);
233 }
234 #endif
235
236 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask)
237 {
238 return __sync_fetch_and_xor(value, mask);
239 }
240
241 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask)
242 {
243 return __sync_fetch_and_xor(value, mask);
244 }
245
246 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask)
247 {
248 return __sync_fetch_and_xor(value, mask);
249 }
250
251 #if defined(_M_AMD64)
252 __INTRIN_INLINE long long _InterlockedXor64(volatile long long * const value, const long long mask)
253 {
254 return __sync_fetch_and_xor(value, mask);
255 }
256 #endif
257
258 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend)
259 {
260 return __sync_sub_and_fetch(lpAddend, 1);
261 }
262
263 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend)
264 {
265 return __sync_add_and_fetch(lpAddend, 1);
266 }
267
268 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend)
269 {
270 return __sync_sub_and_fetch(lpAddend, 1);
271 }
272
273 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend)
274 {
275 return __sync_add_and_fetch(lpAddend, 1);
276 }
277
278 #if defined(_M_AMD64)
279 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend)
280 {
281 return __sync_sub_and_fetch(lpAddend, 1);
282 }
283
284 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend)
285 {
286 return __sync_add_and_fetch(lpAddend, 1);
287 }
288 #endif
289
290 #else
291
292 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand)
293 {
294 char retval = Comperand;
295 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
296 return retval;
297 }
298
299 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand)
300 {
301 short retval = Comperand;
302 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
303 return retval;
304 }
305
306 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand)
307 {
308 long retval = Comperand;
309 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
310 return retval;
311 }
312
313 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand)
314 {
315 void * retval = (void *)Comperand;
316 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "=a" (retval) : "[retval]" (retval), [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
317 return retval;
318 }
319
320 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value)
321 {
322 long retval = Value;
323 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
324 return retval;
325 }
326
327 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value)
328 {
329 void * retval = Value;
330 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
331 return retval;
332 }
333
334 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value)
335 {
336 long retval = Value;
337 __asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
338 return retval;
339 }
340
341 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value)
342 {
343 long retval = Value;
344 __asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
345 return retval;
346 }
347
348 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask)
349 {
350 char x;
351 char y;
352
353 y = *value;
354
355 do
356 {
357 x = y;
358 y = _InterlockedCompareExchange8(value, x & mask, x);
359 }
360 while(y != x);
361
362 return y;
363 }
364
365 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask)
366 {
367 short x;
368 short y;
369
370 y = *value;
371
372 do
373 {
374 x = y;
375 y = _InterlockedCompareExchange16(value, x & mask, x);
376 }
377 while(y != x);
378
379 return y;
380 }
381
382 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask)
383 {
384 long x;
385 long y;
386
387 y = *value;
388
389 do
390 {
391 x = y;
392 y = _InterlockedCompareExchange(value, x & mask, x);
393 }
394 while(y != x);
395
396 return y;
397 }
398
399 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask)
400 {
401 char x;
402 char y;
403
404 y = *value;
405
406 do
407 {
408 x = y;
409 y = _InterlockedCompareExchange8(value, x | mask, x);
410 }
411 while(y != x);
412
413 return y;
414 }
415
416 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask)
417 {
418 short x;
419 short y;
420
421 y = *value;
422
423 do
424 {
425 x = y;
426 y = _InterlockedCompareExchange16(value, x | mask, x);
427 }
428 while(y != x);
429
430 return y;
431 }
432
433 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask)
434 {
435 long x;
436 long y;
437
438 y = *value;
439
440 do
441 {
442 x = y;
443 y = _InterlockedCompareExchange(value, x | mask, x);
444 }
445 while(y != x);
446
447 return y;
448 }
449
450 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask)
451 {
452 char x;
453 char y;
454
455 y = *value;
456
457 do
458 {
459 x = y;
460 y = _InterlockedCompareExchange8(value, x ^ mask, x);
461 }
462 while(y != x);
463
464 return y;
465 }
466
467 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask)
468 {
469 short x;
470 short y;
471
472 y = *value;
473
474 do
475 {
476 x = y;
477 y = _InterlockedCompareExchange16(value, x ^ mask, x);
478 }
479 while(y != x);
480
481 return y;
482 }
483
484 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask)
485 {
486 long x;
487 long y;
488
489 y = *value;
490
491 do
492 {
493 x = y;
494 y = _InterlockedCompareExchange(value, x ^ mask, x);
495 }
496 while(y != x);
497
498 return y;
499 }
500
501 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend)
502 {
503 return _InterlockedExchangeAdd(lpAddend, -1) - 1;
504 }
505
506 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend)
507 {
508 return _InterlockedExchangeAdd(lpAddend, 1) + 1;
509 }
510
511 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend)
512 {
513 return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
514 }
515
516 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend)
517 {
518 return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
519 }
520
521 #if defined(_M_AMD64)
522 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend)
523 {
524 return _InterlockedExchangeAdd64(lpAddend, -1) - 1;
525 }
526
527 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend)
528 {
529 return _InterlockedExchangeAdd64(lpAddend, 1) + 1;
530 }
531 #endif
532
533 #endif
534
535 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
536
537 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand)
538 {
539 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
540 }
541
542 #else
543
544 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand)
545 {
546 long long retval = Comperand;
547
548 __asm__
549 (
550 "lock; cmpxchg8b %[Destination]" :
551 [retval] "+A" (retval) :
552 [Destination] "m" (*Destination),
553 "b" ((unsigned long)((Exchange >> 0) & 0xFFFFFFFF)),
554 "c" ((unsigned long)((Exchange >> 32) & 0xFFFFFFFF)) :
555 "memory"
556 );
557
558 return retval;
559 }
560
561 #endif
562
563 __INTRIN_INLINE long _InterlockedAddLargeStatistic(volatile long long * const Addend, const long Value)
564 {
565 __asm__
566 (
567 "lock; add %[Value], %[Lo32];"
568 "jae LABEL%=;"
569 "lock; adc $0, %[Hi32];"
570 "LABEL%=:;" :
571 [Lo32] "+m" (*((volatile long *)(Addend) + 0)), [Hi32] "+m" (*((volatile long *)(Addend) + 1)) :
572 [Value] "ir" (Value) :
573 "memory"
574 );
575
576 return Value;
577 }
578
579 __INTRIN_INLINE unsigned char _interlockedbittestandreset(volatile long * a, const long b)
580 {
581 unsigned char retval;
582 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
583 return retval;
584 }
585
586 #if defined(_M_AMD64)
587 __INTRIN_INLINE unsigned char _interlockedbittestandreset64(volatile long long * a, const long long b)
588 {
589 unsigned char retval;
590 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
591 return retval;
592 }
593 #endif
594
595 __INTRIN_INLINE unsigned char _interlockedbittestandset(volatile long * a, const long b)
596 {
597 unsigned char retval;
598 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
599 return retval;
600 }
601
602 #if defined(_M_AMD64)
603 __INTRIN_INLINE unsigned char _interlockedbittestandset64(volatile long long * a, const long long b)
604 {
605 unsigned char retval;
606 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
607 return retval;
608 }
609 #endif
610
611 /*** String operations ***/
612 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
613 __INTRIN_INLINE void __stosb(unsigned char * Dest, const unsigned char Data, size_t Count)
614 {
615 __asm__ __volatile__
616 (
617 "rep; stosb" :
618 [Dest] "=D" (Dest), [Count] "=c" (Count) :
619 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
620 );
621 }
622
623 __INTRIN_INLINE void __stosw(unsigned short * Dest, const unsigned short Data, size_t Count)
624 {
625 __asm__ __volatile__
626 (
627 "rep; stosw" :
628 [Dest] "=D" (Dest), [Count] "=c" (Count) :
629 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
630 );
631 }
632
633 __INTRIN_INLINE void __stosd(unsigned long * Dest, const unsigned long Data, size_t Count)
634 {
635 __asm__ __volatile__
636 (
637 "rep; stosl" :
638 [Dest] "=D" (Dest), [Count] "=c" (Count) :
639 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
640 );
641 }
642
643 #ifdef _M_AMD64
644 __INTRIN_INLINE void __stosq(unsigned __int64 * Dest, const unsigned __int64 Data, size_t Count)
645 {
646 __asm__ __volatile__
647 (
648 "rep; stosq" :
649 [Dest] "=D" (Dest), [Count] "=c" (Count) :
650 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
651 );
652 }
653 #endif
654
655 __INTRIN_INLINE void __movsb(unsigned char * Destination, const unsigned char * Source, size_t Count)
656 {
657 __asm__ __volatile__
658 (
659 "rep; movsb" :
660 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
661 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
662 );
663 }
664
665 __INTRIN_INLINE void __movsw(unsigned short * Destination, const unsigned short * Source, size_t Count)
666 {
667 __asm__ __volatile__
668 (
669 "rep; movsw" :
670 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
671 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
672 );
673 }
674
675 __INTRIN_INLINE void __movsd(unsigned long * Destination, const unsigned long * Source, size_t Count)
676 {
677 __asm__ __volatile__
678 (
679 "rep; movsd" :
680 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
681 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
682 );
683 }
684
685 #ifdef _M_AMD64
686 __INTRIN_INLINE void __movsq(unsigned long * Destination, const unsigned long * Source, size_t Count)
687 {
688 __asm__ __volatile__
689 (
690 "rep; movsq" :
691 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
692 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
693 );
694 }
695 #endif
696
697 #if defined(_M_AMD64)
698 /*** GS segment addressing ***/
699
700 __INTRIN_INLINE void __writegsbyte(const unsigned long Offset, const unsigned char Data)
701 {
702 __asm__ __volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
703 }
704
705 __INTRIN_INLINE void __writegsword(const unsigned long Offset, const unsigned short Data)
706 {
707 __asm__ __volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
708 }
709
710 __INTRIN_INLINE void __writegsdword(const unsigned long Offset, const unsigned long Data)
711 {
712 __asm__ __volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
713 }
714
715 __INTRIN_INLINE void __writegsqword(const unsigned long Offset, const unsigned __int64 Data)
716 {
717 __asm__ __volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
718 }
719
720 __INTRIN_INLINE unsigned char __readgsbyte(const unsigned long Offset)
721 {
722 unsigned char value;
723 __asm__ __volatile__("movb %%gs:%a[Offset], %b[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
724 return value;
725 }
726
727 __INTRIN_INLINE unsigned short __readgsword(const unsigned long Offset)
728 {
729 unsigned short value;
730 __asm__ __volatile__("movw %%gs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
731 return value;
732 }
733
734 __INTRIN_INLINE unsigned long __readgsdword(const unsigned long Offset)
735 {
736 unsigned long value;
737 __asm__ __volatile__("movl %%gs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
738 return value;
739 }
740
741 __INTRIN_INLINE unsigned __int64 __readgsqword(const unsigned long Offset)
742 {
743 unsigned __int64 value;
744 __asm__ __volatile__("movq %%gs:%a[Offset], %q[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
745 return value;
746 }
747
748 __INTRIN_INLINE void __incgsbyte(const unsigned long Offset)
749 {
750 __asm__ __volatile__("incb %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
751 }
752
753 __INTRIN_INLINE void __incgsword(const unsigned long Offset)
754 {
755 __asm__ __volatile__("incw %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
756 }
757
758 __INTRIN_INLINE void __incgsdword(const unsigned long Offset)
759 {
760 __asm__ __volatile__("incl %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
761 }
762
763 __INTRIN_INLINE void __addgsbyte(const unsigned long Offset, const unsigned char Data)
764 {
765 __asm__ __volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
766 }
767
768 __INTRIN_INLINE void __addgsword(const unsigned long Offset, const unsigned short Data)
769 {
770 __asm__ __volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
771 }
772
773 __INTRIN_INLINE void __addgsdword(const unsigned long Offset, const unsigned int Data)
774 {
775 __asm__ __volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
776 }
777
778 __INTRIN_INLINE void __addgsqword(const unsigned long Offset, const unsigned __int64 Data)
779 {
780 __asm__ __volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
781 }
782
783 #else
784 /*** FS segment addressing ***/
785 __INTRIN_INLINE void __writefsbyte(const unsigned long Offset, const unsigned char Data)
786 {
787 __asm__ __volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
788 }
789
790 __INTRIN_INLINE void __writefsword(const unsigned long Offset, const unsigned short Data)
791 {
792 __asm__ __volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
793 }
794
795 __INTRIN_INLINE void __writefsdword(const unsigned long Offset, const unsigned long Data)
796 {
797 __asm__ __volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
798 }
799
800 __INTRIN_INLINE unsigned char __readfsbyte(const unsigned long Offset)
801 {
802 unsigned char value;
803 __asm__ __volatile__("movb %%fs:%a[Offset], %b[value]" : [value] "=q" (value) : [Offset] "ir" (Offset));
804 return value;
805 }
806
807 __INTRIN_INLINE unsigned short __readfsword(const unsigned long Offset)
808 {
809 unsigned short value;
810 __asm__ __volatile__("movw %%fs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
811 return value;
812 }
813
814 __INTRIN_INLINE unsigned long __readfsdword(const unsigned long Offset)
815 {
816 unsigned long value;
817 __asm__ __volatile__("movl %%fs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
818 return value;
819 }
820
821 __INTRIN_INLINE void __incfsbyte(const unsigned long Offset)
822 {
823 __asm__ __volatile__("incb %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
824 }
825
826 __INTRIN_INLINE void __incfsword(const unsigned long Offset)
827 {
828 __asm__ __volatile__("incw %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
829 }
830
831 __INTRIN_INLINE void __incfsdword(const unsigned long Offset)
832 {
833 __asm__ __volatile__("incl %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
834 }
835
836 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
837 __INTRIN_INLINE void __addfsbyte(const unsigned long Offset, const unsigned char Data)
838 {
839 if(!__builtin_constant_p(Offset))
840 __asm__ __volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
841 else
842 __asm__ __volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
843 }
844
845 __INTRIN_INLINE void __addfsword(const unsigned long Offset, const unsigned short Data)
846 {
847 if(!__builtin_constant_p(Offset))
848 __asm__ __volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
849 else
850 __asm__ __volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
851 }
852
853 __INTRIN_INLINE void __addfsdword(const unsigned long Offset, const unsigned int Data)
854 {
855 if(!__builtin_constant_p(Offset))
856 __asm__ __volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
857 else
858 __asm__ __volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
859 }
860 #endif
861
862
863 /*** Bit manipulation ***/
864 __INTRIN_INLINE unsigned char _BitScanForward(unsigned long * const Index, const unsigned long Mask)
865 {
866 __asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
867 return Mask ? 1 : 0;
868 }
869
870 __INTRIN_INLINE unsigned char _BitScanReverse(unsigned long * const Index, const unsigned long Mask)
871 {
872 __asm__("bsrl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
873 return Mask ? 1 : 0;
874 }
875
876 /* NOTE: again, the bizarre implementation follows Visual C++ */
877 __INTRIN_INLINE unsigned char _bittest(const long * const a, const long b)
878 {
879 unsigned char retval;
880
881 if(__builtin_constant_p(b))
882 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 32))), [b] "Ir" (b % 32));
883 else
884 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*a), [b] "r" (b));
885
886 return retval;
887 }
888
889 #ifdef _M_AMD64
890 __INTRIN_INLINE unsigned char _bittest64(const __int64 * const a, const __int64 b)
891 {
892 unsigned char retval;
893
894 if(__builtin_constant_p(b))
895 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 64))), [b] "Ir" (b % 64));
896 else
897 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*a), [b] "r" (b));
898
899 return retval;
900 }
901 #endif
902
903 __INTRIN_INLINE unsigned char _bittestandcomplement(long * const a, const long b)
904 {
905 unsigned char retval;
906
907 if(__builtin_constant_p(b))
908 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
909 else
910 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+mr" (*a), [retval] "=q" (retval) : [b] "r" (b));
911
912 return retval;
913 }
914
915 __INTRIN_INLINE unsigned char _bittestandreset(long * const a, const long b)
916 {
917 unsigned char retval;
918
919 if(__builtin_constant_p(b))
920 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
921 else
922 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+mr" (*a), [retval] "=q" (retval) : [b] "r" (b));
923
924 return retval;
925 }
926
927 __INTRIN_INLINE unsigned char _bittestandset(long * const a, const long b)
928 {
929 unsigned char retval;
930
931 if(__builtin_constant_p(b))
932 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
933 else
934 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+mr" (*a), [retval] "=q" (retval) : [b] "r" (b));
935
936 return retval;
937 }
938
939 __INTRIN_INLINE unsigned char _rotl8(unsigned char value, unsigned char shift)
940 {
941 unsigned char retval;
942 __asm__("rolb %b[shift], %b[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
943 return retval;
944 }
945
946 __INTRIN_INLINE unsigned short _rotl16(unsigned short value, unsigned char shift)
947 {
948 unsigned short retval;
949 __asm__("rolw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
950 return retval;
951 }
952
953 __INTRIN_INLINE unsigned int _rotl(unsigned int value, int shift)
954 {
955 unsigned long retval;
956 __asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
957 return retval;
958 }
959
960 __INTRIN_INLINE unsigned int _rotr(unsigned int value, int shift)
961 {
962 unsigned long retval;
963 __asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
964 return retval;
965 }
966
967 __INTRIN_INLINE unsigned char _rotr8(unsigned char value, unsigned char shift)
968 {
969 unsigned char retval;
970 __asm__("rorb %b[shift], %b[retval]" : [retval] "=qm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
971 return retval;
972 }
973
974 __INTRIN_INLINE unsigned short _rotr16(unsigned short value, unsigned char shift)
975 {
976 unsigned short retval;
977 __asm__("rorw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
978 return retval;
979 }
980
981 /*
982 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
983 constraint (edx:eax) for the Mask argument, because it's the only way GCC
984 can pass 64-bit operands around - passing the two 32 bit parts separately
985 just confuses it. Also we declare Bit as an int and then truncate it to
986 match Visual C++ behavior
987 */
988 __INTRIN_INLINE unsigned long long __ll_lshift(const unsigned long long Mask, const int Bit)
989 {
990 unsigned long long retval = Mask;
991
992 __asm__
993 (
994 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
995 "+A" (retval) :
996 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
997 );
998
999 return retval;
1000 }
1001
1002 __INTRIN_INLINE long long __ll_rshift(const long long Mask, const int Bit)
1003 {
1004 unsigned long long retval = Mask;
1005
1006 __asm__
1007 (
1008 "shldl %b[Bit], %%eax, %%edx; sarl %b[Bit], %%eax" :
1009 "+A" (retval) :
1010 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1011 );
1012
1013 return retval;
1014 }
1015
1016 __INTRIN_INLINE unsigned long long __ull_rshift(const unsigned long long Mask, int Bit)
1017 {
1018 unsigned long long retval = Mask;
1019
1020 __asm__
1021 (
1022 "shrdl %b[Bit], %%eax, %%edx; shrl %b[Bit], %%eax" :
1023 "+A" (retval) :
1024 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1025 );
1026
1027 return retval;
1028 }
1029
1030 __INTRIN_INLINE unsigned short _byteswap_ushort(unsigned short value)
1031 {
1032 unsigned short retval;
1033 __asm__("rorw $8, %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value));
1034 return retval;
1035 }
1036
1037 __INTRIN_INLINE unsigned long _byteswap_ulong(unsigned long value)
1038 {
1039 unsigned long retval;
1040 __asm__("bswapl %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1041 return retval;
1042 }
1043
1044 #ifdef _M_AMD64
1045 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value)
1046 {
1047 unsigned __int64 retval;
1048 __asm__("bswapq %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1049 return retval;
1050 }
1051 #else
1052 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value)
1053 {
1054 union {
1055 __int64 int64part;
1056 struct {
1057 unsigned long lowpart;
1058 unsigned long hipart;
1059 };
1060 } retval;
1061 retval.int64part = value;
1062 __asm__("bswapl %[lowpart]\n"
1063 "bswapl %[hipart]\n"
1064 : [lowpart] "=r" (retval.hipart), [hipart] "=r" (retval.lowpart) : "[lowpart]" (retval.lowpart), "[hipart]" (retval.hipart) );
1065 return retval.int64part;
1066 }
1067 #endif
1068
1069 /*** 64-bit math ***/
1070 __INTRIN_INLINE long long __emul(const int a, const int b)
1071 {
1072 long long retval;
1073 __asm__("imull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1074 return retval;
1075 }
1076
1077 __INTRIN_INLINE unsigned long long __emulu(const unsigned int a, const unsigned int b)
1078 {
1079 unsigned long long retval;
1080 __asm__("mull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1081 return retval;
1082 }
1083
1084 #ifdef _M_AMD64
1085
1086 __INTRIN_INLINE __int64 __mulh(__int64 a, __int64 b)
1087 {
1088 __int64 retval;
1089 __asm__("imulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1090 return retval;
1091 }
1092
1093 __INTRIN_INLINE unsigned __int64 __umulh(unsigned __int64 a, unsigned __int64 b)
1094 {
1095 unsigned __int64 retval;
1096 __asm__("mulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1097 return retval;
1098 }
1099
1100 #endif
1101
1102 /*** Port I/O ***/
1103 __INTRIN_INLINE unsigned char __inbyte(const unsigned short Port)
1104 {
1105 unsigned char byte;
1106 __asm__ __volatile__("inb %w[Port], %b[byte]" : [byte] "=a" (byte) : [Port] "Nd" (Port));
1107 return byte;
1108 }
1109
1110 __INTRIN_INLINE unsigned short __inword(const unsigned short Port)
1111 {
1112 unsigned short word;
1113 __asm__ __volatile__("inw %w[Port], %w[word]" : [word] "=a" (word) : [Port] "Nd" (Port));
1114 return word;
1115 }
1116
1117 __INTRIN_INLINE unsigned long __indword(const unsigned short Port)
1118 {
1119 unsigned long dword;
1120 __asm__ __volatile__("inl %w[Port], %k[dword]" : [dword] "=a" (dword) : [Port] "Nd" (Port));
1121 return dword;
1122 }
1123
1124 __INTRIN_INLINE void __inbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1125 {
1126 __asm__ __volatile__
1127 (
1128 "rep; insb" :
1129 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1130 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1131 "memory"
1132 );
1133 }
1134
1135 __INTRIN_INLINE void __inwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1136 {
1137 __asm__ __volatile__
1138 (
1139 "rep; insw" :
1140 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1141 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1142 "memory"
1143 );
1144 }
1145
1146 __INTRIN_INLINE void __indwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1147 {
1148 __asm__ __volatile__
1149 (
1150 "rep; insl" :
1151 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1152 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1153 "memory"
1154 );
1155 }
1156
1157 __INTRIN_INLINE void __outbyte(unsigned short const Port, const unsigned char Data)
1158 {
1159 __asm__ __volatile__("outb %b[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1160 }
1161
1162 __INTRIN_INLINE void __outword(unsigned short const Port, const unsigned short Data)
1163 {
1164 __asm__ __volatile__("outw %w[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1165 }
1166
1167 __INTRIN_INLINE void __outdword(unsigned short const Port, const unsigned long Data)
1168 {
1169 __asm__ __volatile__("outl %k[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1170 }
1171
1172 __INTRIN_INLINE void __outbytestring(unsigned short const Port, const unsigned char * const Buffer, const unsigned long Count)
1173 {
1174 __asm__ __volatile__("rep; outsb" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1175 }
1176
1177 __INTRIN_INLINE void __outwordstring(unsigned short const Port, const unsigned short * const Buffer, const unsigned long Count)
1178 {
1179 __asm__ __volatile__("rep; outsw" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1180 }
1181
1182 __INTRIN_INLINE void __outdwordstring(unsigned short const Port, const unsigned long * const Buffer, const unsigned long Count)
1183 {
1184 __asm__ __volatile__("rep; outsl" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1185 }
1186
1187 __INTRIN_INLINE int _inp(unsigned short Port)
1188 {
1189 return __inbyte(Port);
1190 }
1191
1192 __INTRIN_INLINE unsigned short _inpw(unsigned short Port)
1193 {
1194 return __inword(Port);
1195 }
1196
1197 __INTRIN_INLINE unsigned long _inpd(unsigned short Port)
1198 {
1199 return __indword(Port);
1200 }
1201
1202 __INTRIN_INLINE int _outp(unsigned short Port, int databyte)
1203 {
1204 __outbyte(Port, databyte);
1205 return databyte;
1206 }
1207
1208 __INTRIN_INLINE unsigned short _outpw(unsigned short Port, unsigned short dataword)
1209 {
1210 __outword(Port, dataword);
1211 return dataword;
1212 }
1213
1214 __INTRIN_INLINE unsigned long _outpd(unsigned short Port, unsigned long dataword)
1215 {
1216 __outdword(Port, dataword);
1217 return dataword;
1218 }
1219
1220
1221 /*** System information ***/
1222 __INTRIN_INLINE void __cpuid(int CPUInfo[], const int InfoType)
1223 {
1224 __asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType));
1225 }
1226
1227 __INTRIN_INLINE unsigned long long __rdtsc(void)
1228 {
1229 #ifdef _M_AMD64
1230 unsigned long long low, high;
1231 __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
1232 return low | (high << 32);
1233 #else
1234 unsigned long long retval;
1235 __asm__ __volatile__("rdtsc" : "=A"(retval));
1236 return retval;
1237 #endif
1238 }
1239
1240 __INTRIN_INLINE void __writeeflags(uintptr_t Value)
1241 {
1242 __asm__ __volatile__("push %0\n popf" : : "rim"(Value));
1243 }
1244
1245 __INTRIN_INLINE uintptr_t __readeflags(void)
1246 {
1247 uintptr_t retval;
1248 __asm__ __volatile__("pushf\n pop %0" : "=rm"(retval));
1249 return retval;
1250 }
1251
1252 /*** Interrupts ***/
1253 #ifdef __clang__
1254 #define __debugbreak() __asm__("int $3")
1255 #else
1256 __INTRIN_INLINE void __debugbreak(void)
1257 {
1258 __asm__("int $3");
1259 }
1260 #endif
1261
1262 __INTRIN_INLINE void __int2c(void)
1263 {
1264 __asm__("int $0x2c");
1265 }
1266
1267 __INTRIN_INLINE void _disable(void)
1268 {
1269 __asm__("cli" : : : "memory");
1270 }
1271
1272 __INTRIN_INLINE void _enable(void)
1273 {
1274 __asm__("sti" : : : "memory");
1275 }
1276
1277 __INTRIN_INLINE void __halt(void)
1278 {
1279 __asm__("hlt\n\t" : : : "memory");
1280 }
1281
1282 /*** Protected memory management ***/
1283
1284 #ifdef _M_AMD64
1285 __INTRIN_INLINE void __writecr0(const unsigned __int64 Data)
1286 {
1287 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1288 }
1289
1290 __INTRIN_INLINE void __writecr3(const unsigned __int64 Data)
1291 {
1292 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1293 }
1294
1295 __INTRIN_INLINE void __writecr4(const unsigned __int64 Data)
1296 {
1297 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1298 }
1299
1300 __INTRIN_INLINE void __writecr8(const unsigned __int64 Data)
1301 {
1302 __asm__("mov %[Data], %%cr8" : : [Data] "r" (Data) : "memory");
1303 }
1304
1305 __INTRIN_INLINE unsigned __int64 __readcr0(void)
1306 {
1307 unsigned __int64 value;
1308 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1309 return value;
1310 }
1311
1312 __INTRIN_INLINE unsigned __int64 __readcr2(void)
1313 {
1314 unsigned __int64 value;
1315 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1316 return value;
1317 }
1318
1319 __INTRIN_INLINE unsigned __int64 __readcr3(void)
1320 {
1321 unsigned __int64 value;
1322 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1323 return value;
1324 }
1325
1326 __INTRIN_INLINE unsigned __int64 __readcr4(void)
1327 {
1328 unsigned __int64 value;
1329 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1330 return value;
1331 }
1332
1333 __INTRIN_INLINE unsigned __int64 __readcr8(void)
1334 {
1335 unsigned __int64 value;
1336 __asm__ __volatile__("movq %%cr8, %q[value]" : [value] "=r" (value));
1337 return value;
1338 }
1339 #else
1340 __INTRIN_INLINE void __writecr0(const unsigned int Data)
1341 {
1342 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1343 }
1344
1345 __INTRIN_INLINE void __writecr3(const unsigned int Data)
1346 {
1347 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1348 }
1349
1350 __INTRIN_INLINE void __writecr4(const unsigned int Data)
1351 {
1352 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1353 }
1354
1355 __INTRIN_INLINE unsigned long __readcr0(void)
1356 {
1357 unsigned long value;
1358 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1359 return value;
1360 }
1361
1362 __INTRIN_INLINE unsigned long __readcr2(void)
1363 {
1364 unsigned long value;
1365 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1366 return value;
1367 }
1368
1369 __INTRIN_INLINE unsigned long __readcr3(void)
1370 {
1371 unsigned long value;
1372 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1373 return value;
1374 }
1375
1376 __INTRIN_INLINE unsigned long __readcr4(void)
1377 {
1378 unsigned long value;
1379 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1380 return value;
1381 }
1382 #endif
1383
1384 #ifdef _M_AMD64
1385 __INTRIN_INLINE unsigned __int64 __readdr(unsigned int reg)
1386 {
1387 unsigned __int64 value;
1388 switch (reg)
1389 {
1390 case 0:
1391 __asm__ __volatile__("movq %%dr0, %q[value]" : [value] "=r" (value));
1392 break;
1393 case 1:
1394 __asm__ __volatile__("movq %%dr1, %q[value]" : [value] "=r" (value));
1395 break;
1396 case 2:
1397 __asm__ __volatile__("movq %%dr2, %q[value]" : [value] "=r" (value));
1398 break;
1399 case 3:
1400 __asm__ __volatile__("movq %%dr3, %q[value]" : [value] "=r" (value));
1401 break;
1402 case 4:
1403 __asm__ __volatile__("movq %%dr4, %q[value]" : [value] "=r" (value));
1404 break;
1405 case 5:
1406 __asm__ __volatile__("movq %%dr5, %q[value]" : [value] "=r" (value));
1407 break;
1408 case 6:
1409 __asm__ __volatile__("movq %%dr6, %q[value]" : [value] "=r" (value));
1410 break;
1411 case 7:
1412 __asm__ __volatile__("movq %%dr7, %q[value]" : [value] "=r" (value));
1413 break;
1414 }
1415 return value;
1416 }
1417
1418 __INTRIN_INLINE void __writedr(unsigned reg, unsigned __int64 value)
1419 {
1420 switch (reg)
1421 {
1422 case 0:
1423 __asm__("movq %q[value], %%dr0" : : [value] "r" (value) : "memory");
1424 break;
1425 case 1:
1426 __asm__("movq %q[value], %%dr1" : : [value] "r" (value) : "memory");
1427 break;
1428 case 2:
1429 __asm__("movq %q[value], %%dr2" : : [value] "r" (value) : "memory");
1430 break;
1431 case 3:
1432 __asm__("movq %q[value], %%dr3" : : [value] "r" (value) : "memory");
1433 break;
1434 case 4:
1435 __asm__("movq %q[value], %%dr4" : : [value] "r" (value) : "memory");
1436 break;
1437 case 5:
1438 __asm__("movq %q[value], %%dr5" : : [value] "r" (value) : "memory");
1439 break;
1440 case 6:
1441 __asm__("movq %q[value], %%dr6" : : [value] "r" (value) : "memory");
1442 break;
1443 case 7:
1444 __asm__("movq %q[value], %%dr7" : : [value] "r" (value) : "memory");
1445 break;
1446 }
1447 }
1448 #else
1449 __INTRIN_INLINE unsigned int __readdr(unsigned int reg)
1450 {
1451 unsigned int value;
1452 switch (reg)
1453 {
1454 case 0:
1455 __asm__ __volatile__("mov %%dr0, %[value]" : [value] "=r" (value));
1456 break;
1457 case 1:
1458 __asm__ __volatile__("mov %%dr1, %[value]" : [value] "=r" (value));
1459 break;
1460 case 2:
1461 __asm__ __volatile__("mov %%dr2, %[value]" : [value] "=r" (value));
1462 break;
1463 case 3:
1464 __asm__ __volatile__("mov %%dr3, %[value]" : [value] "=r" (value));
1465 break;
1466 case 4:
1467 __asm__ __volatile__("mov %%dr4, %[value]" : [value] "=r" (value));
1468 break;
1469 case 5:
1470 __asm__ __volatile__("mov %%dr5, %[value]" : [value] "=r" (value));
1471 break;
1472 case 6:
1473 __asm__ __volatile__("mov %%dr6, %[value]" : [value] "=r" (value));
1474 break;
1475 case 7:
1476 __asm__ __volatile__("mov %%dr7, %[value]" : [value] "=r" (value));
1477 break;
1478 }
1479 return value;
1480 }
1481
1482 __INTRIN_INLINE void __writedr(unsigned reg, unsigned int value)
1483 {
1484 switch (reg)
1485 {
1486 case 0:
1487 __asm__("mov %[value], %%dr0" : : [value] "r" (value) : "memory");
1488 break;
1489 case 1:
1490 __asm__("mov %[value], %%dr1" : : [value] "r" (value) : "memory");
1491 break;
1492 case 2:
1493 __asm__("mov %[value], %%dr2" : : [value] "r" (value) : "memory");
1494 break;
1495 case 3:
1496 __asm__("mov %[value], %%dr3" : : [value] "r" (value) : "memory");
1497 break;
1498 case 4:
1499 __asm__("mov %[value], %%dr4" : : [value] "r" (value) : "memory");
1500 break;
1501 case 5:
1502 __asm__("mov %[value], %%dr5" : : [value] "r" (value) : "memory");
1503 break;
1504 case 6:
1505 __asm__("mov %[value], %%dr6" : : [value] "r" (value) : "memory");
1506 break;
1507 case 7:
1508 __asm__("mov %[value], %%dr7" : : [value] "r" (value) : "memory");
1509 break;
1510 }
1511 }
1512 #endif
1513
1514 __INTRIN_INLINE void __invlpg(void * const Address)
1515 {
1516 __asm__("invlpg %[Address]" : : [Address] "m" (*((unsigned char *)(Address))) : "memory");
1517 }
1518
1519
1520 /*** System operations ***/
1521 __INTRIN_INLINE unsigned long long __readmsr(const int reg)
1522 {
1523 #ifdef _M_AMD64
1524 unsigned long low, high;
1525 __asm__ __volatile__("rdmsr" : "=a" (low), "=d" (high) : "c" (reg));
1526 return ((unsigned long long)high << 32) | low;
1527 #else
1528 unsigned long long retval;
1529 __asm__ __volatile__("rdmsr" : "=A" (retval) : "c" (reg));
1530 return retval;
1531 #endif
1532 }
1533
1534 __INTRIN_INLINE void __writemsr(const unsigned long Register, const unsigned long long Value)
1535 {
1536 #ifdef _M_AMD64
1537 __asm__ __volatile__("wrmsr" : : "a" (Value), "d" (Value >> 32), "c" (Register));
1538 #else
1539 __asm__ __volatile__("wrmsr" : : "A" (Value), "c" (Register));
1540 #endif
1541 }
1542
1543 __INTRIN_INLINE unsigned long long __readpmc(const int counter)
1544 {
1545 unsigned long long retval;
1546 __asm__ __volatile__("rdpmc" : "=A" (retval) : "c" (counter));
1547 return retval;
1548 }
1549
1550 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1551 __INTRIN_INLINE unsigned long __segmentlimit(const unsigned long a)
1552 {
1553 unsigned long retval;
1554 __asm__ __volatile__("lsl %[a], %[retval]" : [retval] "=r" (retval) : [a] "rm" (a));
1555 return retval;
1556 }
1557
1558 __INTRIN_INLINE void __wbinvd(void)
1559 {
1560 __asm__ __volatile__("wbinvd" : : : "memory");
1561 }
1562
1563 __INTRIN_INLINE void __lidt(void *Source)
1564 {
1565 __asm__ __volatile__("lidt %0" : : "m"(*(short*)Source));
1566 }
1567
1568 __INTRIN_INLINE void __sidt(void *Destination)
1569 {
1570 __asm__ __volatile__("sidt %0" : : "m"(*(short*)Destination) : "memory");
1571 }
1572
1573 /*** Misc operations ***/
1574
1575 __INTRIN_INLINE void _mm_pause(void)
1576 {
1577 __asm__ __volatile__("pause" : : : "memory");
1578 }
1579
1580 __INTRIN_INLINE void __nop(void)
1581 {
1582 __asm__ __volatile__("nop");
1583 }
1584
1585 #ifdef __cplusplus
1586 }
1587 #endif
1588
1589 #endif /* KJK_INTRIN_X86_H_ */
1590
1591 /* EOF */