[FREELDR]
[reactos.git] / reactos / include / crt / mingw32 / intrin_x86.h
1 /*
2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
5 projects.
6
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
8
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
15
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
26 */
27
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
30
31 /*
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
36 */
37
38 /*
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
42
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
46
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
49
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
57
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
60
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
62 ...
63
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
66 */
67
68 #ifdef __cplusplus
69 extern "C" {
70 #endif
71
72 /*** Stack frame juggling ***/
73 #define _ReturnAddress() (__builtin_return_address(0))
74 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
75 /* TODO: __getcallerseflags but how??? */
76
77 /* Maybe the same for x86? */
78 #ifdef __x86_64__
79 #define _alloca(s) __builtin_alloca(s)
80 #endif
81
82 /*** Memory barriers ***/
83
84 __INTRIN_INLINE void _ReadWriteBarrier(void)
85 {
86 __asm__ __volatile__("" : : : "memory");
87 }
88
89 /* GCC only supports full barriers */
90 #define _ReadBarrier _ReadWriteBarrier
91 #define _WriteBarrier _ReadWriteBarrier
92
93 __INTRIN_INLINE void _mm_mfence(void)
94 {
95 __asm__ __volatile__("mfence" : : : "memory");
96 }
97
98 __INTRIN_INLINE void _mm_lfence(void)
99 {
100 _ReadBarrier();
101 __asm__ __volatile__("lfence");
102 _ReadBarrier();
103 }
104
105 __INTRIN_INLINE void _mm_sfence(void)
106 {
107 _WriteBarrier();
108 __asm__ __volatile__("sfence");
109 _WriteBarrier();
110 }
111
112 #ifdef __x86_64__
113 __INTRIN_INLINE void __faststorefence(void)
114 {
115 long local;
116 __asm__ __volatile__("lock; orl $0, %0;" : : "m"(local));
117 }
118 #endif
119
120
121 /*** Atomic operations ***/
122
123 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
124
125 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand)
126 {
127 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
128 }
129
130 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand)
131 {
132 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
133 }
134
135 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand)
136 {
137 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
138 }
139
140 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand)
141 {
142 return (void *)__sync_val_compare_and_swap(Destination, Comperand, Exchange);
143 }
144
145 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value)
146 {
147 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
148 __sync_synchronize();
149 return __sync_lock_test_and_set(Target, Value);
150 }
151
152 #if defined(_M_AMD64)
153 __INTRIN_INLINE long long _InterlockedExchange64(volatile long long * const Target, const long long Value)
154 {
155 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
156 __sync_synchronize();
157 return __sync_lock_test_and_set(Target, Value);
158 }
159 #endif
160
161 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value)
162 {
163 /* NOTE: ditto */
164 __sync_synchronize();
165 return (void *)__sync_lock_test_and_set(Target, Value);
166 }
167
168 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value)
169 {
170 return __sync_fetch_and_add(Addend, Value);
171 }
172
173 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value)
174 {
175 return __sync_fetch_and_add(Addend, Value);
176 }
177
178 #if defined(_M_AMD64)
179 __INTRIN_INLINE long long _InterlockedExchangeAdd64(volatile long long * const Addend, const long long Value)
180 {
181 return __sync_fetch_and_add(Addend, Value);
182 }
183 #endif
184
185 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask)
186 {
187 return __sync_fetch_and_and(value, mask);
188 }
189
190 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask)
191 {
192 return __sync_fetch_and_and(value, mask);
193 }
194
195 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask)
196 {
197 return __sync_fetch_and_and(value, mask);
198 }
199
200 #if defined(_M_AMD64)
201 __INTRIN_INLINE long long _InterlockedAnd64(volatile long long * const value, const long long mask)
202 {
203 return __sync_fetch_and_and(value, mask);
204 }
205 #endif
206
207 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask)
208 {
209 return __sync_fetch_and_or(value, mask);
210 }
211
212 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask)
213 {
214 return __sync_fetch_and_or(value, mask);
215 }
216
217 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask)
218 {
219 return __sync_fetch_and_or(value, mask);
220 }
221
222 #if defined(_M_AMD64)
223 __INTRIN_INLINE long long _InterlockedOr64(volatile long long * const value, const long long mask)
224 {
225 return __sync_fetch_and_or(value, mask);
226 }
227 #endif
228
229 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask)
230 {
231 return __sync_fetch_and_xor(value, mask);
232 }
233
234 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask)
235 {
236 return __sync_fetch_and_xor(value, mask);
237 }
238
239 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask)
240 {
241 return __sync_fetch_and_xor(value, mask);
242 }
243
244 #if defined(_M_AMD64)
245 __INTRIN_INLINE long long _InterlockedXor64(volatile long long * const value, const long long mask)
246 {
247 return __sync_fetch_and_xor(value, mask);
248 }
249 #endif
250
251 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend)
252 {
253 return __sync_sub_and_fetch(lpAddend, 1);
254 }
255
256 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend)
257 {
258 return __sync_add_and_fetch(lpAddend, 1);
259 }
260
261 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend)
262 {
263 return __sync_sub_and_fetch(lpAddend, 1);
264 }
265
266 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend)
267 {
268 return __sync_add_and_fetch(lpAddend, 1);
269 }
270
271 #if defined(_M_AMD64)
272 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend)
273 {
274 return __sync_sub_and_fetch(lpAddend, 1);
275 }
276
277 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend)
278 {
279 return __sync_add_and_fetch(lpAddend, 1);
280 }
281 #endif
282
283 #else
284
285 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand)
286 {
287 char retval = Comperand;
288 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
289 return retval;
290 }
291
292 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand)
293 {
294 short retval = Comperand;
295 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
296 return retval;
297 }
298
299 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand)
300 {
301 long retval = Comperand;
302 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
303 return retval;
304 }
305
306 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand)
307 {
308 void * retval = (void *)Comperand;
309 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "=a" (retval) : "[retval]" (retval), [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
310 return retval;
311 }
312
313 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value)
314 {
315 long retval = Value;
316 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
317 return retval;
318 }
319
320 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value)
321 {
322 void * retval = Value;
323 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
324 return retval;
325 }
326
327 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value)
328 {
329 long retval = Value;
330 __asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
331 return retval;
332 }
333
334 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value)
335 {
336 long retval = Value;
337 __asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
338 return retval;
339 }
340
341 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask)
342 {
343 char x;
344 char y;
345
346 y = *value;
347
348 do
349 {
350 x = y;
351 y = _InterlockedCompareExchange8(value, x & mask, x);
352 }
353 while(y != x);
354
355 return y;
356 }
357
358 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask)
359 {
360 short x;
361 short y;
362
363 y = *value;
364
365 do
366 {
367 x = y;
368 y = _InterlockedCompareExchange16(value, x & mask, x);
369 }
370 while(y != x);
371
372 return y;
373 }
374
375 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask)
376 {
377 long x;
378 long y;
379
380 y = *value;
381
382 do
383 {
384 x = y;
385 y = _InterlockedCompareExchange(value, x & mask, x);
386 }
387 while(y != x);
388
389 return y;
390 }
391
392 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask)
393 {
394 char x;
395 char y;
396
397 y = *value;
398
399 do
400 {
401 x = y;
402 y = _InterlockedCompareExchange8(value, x | mask, x);
403 }
404 while(y != x);
405
406 return y;
407 }
408
409 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask)
410 {
411 short x;
412 short y;
413
414 y = *value;
415
416 do
417 {
418 x = y;
419 y = _InterlockedCompareExchange16(value, x | mask, x);
420 }
421 while(y != x);
422
423 return y;
424 }
425
426 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask)
427 {
428 long x;
429 long y;
430
431 y = *value;
432
433 do
434 {
435 x = y;
436 y = _InterlockedCompareExchange(value, x | mask, x);
437 }
438 while(y != x);
439
440 return y;
441 }
442
443 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask)
444 {
445 char x;
446 char y;
447
448 y = *value;
449
450 do
451 {
452 x = y;
453 y = _InterlockedCompareExchange8(value, x ^ mask, x);
454 }
455 while(y != x);
456
457 return y;
458 }
459
460 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask)
461 {
462 short x;
463 short y;
464
465 y = *value;
466
467 do
468 {
469 x = y;
470 y = _InterlockedCompareExchange16(value, x ^ mask, x);
471 }
472 while(y != x);
473
474 return y;
475 }
476
477 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask)
478 {
479 long x;
480 long y;
481
482 y = *value;
483
484 do
485 {
486 x = y;
487 y = _InterlockedCompareExchange(value, x ^ mask, x);
488 }
489 while(y != x);
490
491 return y;
492 }
493
494 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend)
495 {
496 return _InterlockedExchangeAdd(lpAddend, -1) - 1;
497 }
498
499 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend)
500 {
501 return _InterlockedExchangeAdd(lpAddend, 1) + 1;
502 }
503
504 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend)
505 {
506 return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
507 }
508
509 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend)
510 {
511 return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
512 }
513
514 #if defined(_M_AMD64)
515 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend)
516 {
517 return _InterlockedExchangeAdd64(lpAddend, -1) - 1;
518 }
519
520 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend)
521 {
522 return _InterlockedExchangeAdd64(lpAddend, 1) + 1;
523 }
524 #endif
525
526 #endif
527
528 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
529
530 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand)
531 {
532 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
533 }
534
535 #else
536
537 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand)
538 {
539 long long retval = Comperand;
540
541 __asm__
542 (
543 "lock; cmpxchg8b %[Destination]" :
544 [retval] "+A" (retval) :
545 [Destination] "m" (*Destination),
546 "b" ((unsigned long)((Exchange >> 0) & 0xFFFFFFFF)),
547 "c" ((unsigned long)((Exchange >> 32) & 0xFFFFFFFF)) :
548 "memory"
549 );
550
551 return retval;
552 }
553
554 #endif
555
556 __INTRIN_INLINE long _InterlockedAddLargeStatistic(volatile long long * const Addend, const long Value)
557 {
558 __asm__
559 (
560 "lock; add %[Value], %[Lo32];"
561 "jae LABEL%=;"
562 "lock; adc $0, %[Hi32];"
563 "LABEL%=:;" :
564 [Lo32] "+m" (*((volatile long *)(Addend) + 0)), [Hi32] "+m" (*((volatile long *)(Addend) + 1)) :
565 [Value] "ir" (Value) :
566 "memory"
567 );
568
569 return Value;
570 }
571
572 __INTRIN_INLINE unsigned char _interlockedbittestandreset(volatile long * a, const long b)
573 {
574 unsigned char retval;
575 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
576 return retval;
577 }
578
579 #if defined(_M_AMD64)
580 __INTRIN_INLINE unsigned char _interlockedbittestandreset64(volatile long long * a, const long long b)
581 {
582 unsigned char retval;
583 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
584 return retval;
585 }
586 #endif
587
588 __INTRIN_INLINE unsigned char _interlockedbittestandset(volatile long * a, const long b)
589 {
590 unsigned char retval;
591 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
592 return retval;
593 }
594
595 #if defined(_M_AMD64)
596 __INTRIN_INLINE unsigned char _interlockedbittestandset64(volatile long long * a, const long long b)
597 {
598 unsigned char retval;
599 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
600 return retval;
601 }
602 #endif
603
604 /*** String operations ***/
605 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
606 __INTRIN_INLINE void __stosb(unsigned char * Dest, const unsigned char Data, size_t Count)
607 {
608 __asm__ __volatile__
609 (
610 "rep; stosb" :
611 [Dest] "=D" (Dest), [Count] "=c" (Count) :
612 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
613 );
614 }
615
616 __INTRIN_INLINE void __stosw(unsigned short * Dest, const unsigned short Data, size_t Count)
617 {
618 __asm__ __volatile__
619 (
620 "rep; stosw" :
621 [Dest] "=D" (Dest), [Count] "=c" (Count) :
622 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
623 );
624 }
625
626 __INTRIN_INLINE void __stosd(unsigned long * Dest, const unsigned long Data, size_t Count)
627 {
628 __asm__ __volatile__
629 (
630 "rep; stosl" :
631 [Dest] "=D" (Dest), [Count] "=c" (Count) :
632 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
633 );
634 }
635
636 #ifdef _M_AMD64
637 __INTRIN_INLINE void __stosq(unsigned __int64 * Dest, const unsigned __int64 Data, size_t Count)
638 {
639 __asm__ __volatile__
640 (
641 "rep; stosq" :
642 [Dest] "=D" (Dest), [Count] "=c" (Count) :
643 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
644 );
645 }
646 #endif
647
648 __INTRIN_INLINE void __movsb(unsigned char * Destination, const unsigned char * Source, size_t Count)
649 {
650 __asm__ __volatile__
651 (
652 "rep; movsb" :
653 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
654 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
655 );
656 }
657
658 __INTRIN_INLINE void __movsw(unsigned short * Destination, const unsigned short * Source, size_t Count)
659 {
660 __asm__ __volatile__
661 (
662 "rep; movsw" :
663 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
664 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
665 );
666 }
667
668 __INTRIN_INLINE void __movsd(unsigned long * Destination, const unsigned long * Source, size_t Count)
669 {
670 __asm__ __volatile__
671 (
672 "rep; movsd" :
673 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
674 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
675 );
676 }
677
678 #ifdef _M_AMD64
679 __INTRIN_INLINE void __movsq(unsigned long * Destination, const unsigned long * Source, size_t Count)
680 {
681 __asm__ __volatile__
682 (
683 "rep; movsq" :
684 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
685 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
686 );
687 }
688 #endif
689
690 #if defined(_M_AMD64)
691 /*** GS segment addressing ***/
692
693 __INTRIN_INLINE void __writegsbyte(const unsigned long Offset, const unsigned char Data)
694 {
695 __asm__ __volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
696 }
697
698 __INTRIN_INLINE void __writegsword(const unsigned long Offset, const unsigned short Data)
699 {
700 __asm__ __volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
701 }
702
703 __INTRIN_INLINE void __writegsdword(const unsigned long Offset, const unsigned long Data)
704 {
705 __asm__ __volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
706 }
707
708 __INTRIN_INLINE void __writegsqword(const unsigned long Offset, const unsigned __int64 Data)
709 {
710 __asm__ __volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
711 }
712
713 __INTRIN_INLINE unsigned char __readgsbyte(const unsigned long Offset)
714 {
715 unsigned char value;
716 __asm__ __volatile__("movb %%gs:%a[Offset], %b[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
717 return value;
718 }
719
720 __INTRIN_INLINE unsigned short __readgsword(const unsigned long Offset)
721 {
722 unsigned short value;
723 __asm__ __volatile__("movw %%gs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
724 return value;
725 }
726
727 __INTRIN_INLINE unsigned long __readgsdword(const unsigned long Offset)
728 {
729 unsigned long value;
730 __asm__ __volatile__("movl %%gs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
731 return value;
732 }
733
734 __INTRIN_INLINE unsigned __int64 __readgsqword(const unsigned long Offset)
735 {
736 unsigned __int64 value;
737 __asm__ __volatile__("movq %%gs:%a[Offset], %q[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
738 return value;
739 }
740
741 __INTRIN_INLINE void __incgsbyte(const unsigned long Offset)
742 {
743 __asm__ __volatile__("incb %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
744 }
745
746 __INTRIN_INLINE void __incgsword(const unsigned long Offset)
747 {
748 __asm__ __volatile__("incw %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
749 }
750
751 __INTRIN_INLINE void __incgsdword(const unsigned long Offset)
752 {
753 __asm__ __volatile__("incl %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
754 }
755
756 __INTRIN_INLINE void __addgsbyte(const unsigned long Offset, const unsigned char Data)
757 {
758 __asm__ __volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
759 }
760
761 __INTRIN_INLINE void __addgsword(const unsigned long Offset, const unsigned short Data)
762 {
763 __asm__ __volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
764 }
765
766 __INTRIN_INLINE void __addgsdword(const unsigned long Offset, const unsigned int Data)
767 {
768 __asm__ __volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
769 }
770
771 __INTRIN_INLINE void __addgsqword(const unsigned long Offset, const unsigned __int64 Data)
772 {
773 __asm__ __volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
774 }
775
776 #else
777 /*** FS segment addressing ***/
778 __INTRIN_INLINE void __writefsbyte(const unsigned long Offset, const unsigned char Data)
779 {
780 __asm__ __volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
781 }
782
783 __INTRIN_INLINE void __writefsword(const unsigned long Offset, const unsigned short Data)
784 {
785 __asm__ __volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
786 }
787
788 __INTRIN_INLINE void __writefsdword(const unsigned long Offset, const unsigned long Data)
789 {
790 __asm__ __volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
791 }
792
793 __INTRIN_INLINE unsigned char __readfsbyte(const unsigned long Offset)
794 {
795 unsigned char value;
796 __asm__ __volatile__("movb %%fs:%a[Offset], %b[value]" : [value] "=q" (value) : [Offset] "ir" (Offset));
797 return value;
798 }
799
800 __INTRIN_INLINE unsigned short __readfsword(const unsigned long Offset)
801 {
802 unsigned short value;
803 __asm__ __volatile__("movw %%fs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
804 return value;
805 }
806
807 __INTRIN_INLINE unsigned long __readfsdword(const unsigned long Offset)
808 {
809 unsigned long value;
810 __asm__ __volatile__("movl %%fs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
811 return value;
812 }
813
814 __INTRIN_INLINE void __incfsbyte(const unsigned long Offset)
815 {
816 __asm__ __volatile__("incb %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
817 }
818
819 __INTRIN_INLINE void __incfsword(const unsigned long Offset)
820 {
821 __asm__ __volatile__("incw %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
822 }
823
824 __INTRIN_INLINE void __incfsdword(const unsigned long Offset)
825 {
826 __asm__ __volatile__("incl %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
827 }
828
829 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
830 __INTRIN_INLINE void __addfsbyte(const unsigned long Offset, const unsigned char Data)
831 {
832 if(!__builtin_constant_p(Offset))
833 __asm__ __volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
834 else
835 __asm__ __volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
836 }
837
838 __INTRIN_INLINE void __addfsword(const unsigned long Offset, const unsigned short Data)
839 {
840 if(!__builtin_constant_p(Offset))
841 __asm__ __volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
842 else
843 __asm__ __volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
844 }
845
846 __INTRIN_INLINE void __addfsdword(const unsigned long Offset, const unsigned int Data)
847 {
848 if(!__builtin_constant_p(Offset))
849 __asm__ __volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
850 else
851 __asm__ __volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
852 }
853 #endif
854
855
856 /*** Bit manipulation ***/
857 __INTRIN_INLINE unsigned char _BitScanForward(unsigned long * const Index, const unsigned long Mask)
858 {
859 __asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
860 return Mask ? 1 : 0;
861 }
862
863 __INTRIN_INLINE unsigned char _BitScanReverse(unsigned long * const Index, const unsigned long Mask)
864 {
865 __asm__("bsrl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
866 return Mask ? 1 : 0;
867 }
868
869 /* NOTE: again, the bizarre implementation follows Visual C++ */
870 __INTRIN_INLINE unsigned char _bittest(const long * const a, const long b)
871 {
872 unsigned char retval;
873
874 if(__builtin_constant_p(b))
875 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 32))), [b] "Ir" (b % 32));
876 else
877 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*a), [b] "r" (b));
878
879 return retval;
880 }
881
882 #ifdef _M_AMD64
883 __INTRIN_INLINE unsigned char _bittest64(const __int64 * const a, const __int64 b)
884 {
885 unsigned char retval;
886
887 if(__builtin_constant_p(b))
888 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 64))), [b] "Ir" (b % 64));
889 else
890 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*a), [b] "r" (b));
891
892 return retval;
893 }
894 #endif
895
896 __INTRIN_INLINE unsigned char _bittestandcomplement(long * const a, const long b)
897 {
898 unsigned char retval;
899
900 if(__builtin_constant_p(b))
901 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
902 else
903 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+mr" (*a), [retval] "=q" (retval) : [b] "r" (b));
904
905 return retval;
906 }
907
908 __INTRIN_INLINE unsigned char _bittestandreset(long * const a, const long b)
909 {
910 unsigned char retval;
911
912 if(__builtin_constant_p(b))
913 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
914 else
915 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+mr" (*a), [retval] "=q" (retval) : [b] "r" (b));
916
917 return retval;
918 }
919
920 __INTRIN_INLINE unsigned char _bittestandset(long * const a, const long b)
921 {
922 unsigned char retval;
923
924 if(__builtin_constant_p(b))
925 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
926 else
927 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+mr" (*a), [retval] "=q" (retval) : [b] "r" (b));
928
929 return retval;
930 }
931
932 __INTRIN_INLINE unsigned char _rotl8(unsigned char value, unsigned char shift)
933 {
934 unsigned char retval;
935 __asm__("rolb %b[shift], %b[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
936 return retval;
937 }
938
939 __INTRIN_INLINE unsigned short _rotl16(unsigned short value, unsigned char shift)
940 {
941 unsigned short retval;
942 __asm__("rolw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
943 return retval;
944 }
945
946 __INTRIN_INLINE unsigned int _rotl(unsigned int value, int shift)
947 {
948 unsigned long retval;
949 __asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
950 return retval;
951 }
952
953 __INTRIN_INLINE unsigned int _rotr(unsigned int value, int shift)
954 {
955 unsigned long retval;
956 __asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
957 return retval;
958 }
959
960 __INTRIN_INLINE unsigned char _rotr8(unsigned char value, unsigned char shift)
961 {
962 unsigned char retval;
963 __asm__("rorb %b[shift], %b[retval]" : [retval] "=qm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
964 return retval;
965 }
966
967 __INTRIN_INLINE unsigned short _rotr16(unsigned short value, unsigned char shift)
968 {
969 unsigned short retval;
970 __asm__("rorw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
971 return retval;
972 }
973
974 /*
975 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
976 constraint (edx:eax) for the Mask argument, because it's the only way GCC
977 can pass 64-bit operands around - passing the two 32 bit parts separately
978 just confuses it. Also we declare Bit as an int and then truncate it to
979 match Visual C++ behavior
980 */
981 __INTRIN_INLINE unsigned long long __ll_lshift(const unsigned long long Mask, const int Bit)
982 {
983 unsigned long long retval = Mask;
984
985 __asm__
986 (
987 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
988 "+A" (retval) :
989 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
990 );
991
992 return retval;
993 }
994
995 __INTRIN_INLINE long long __ll_rshift(const long long Mask, const int Bit)
996 {
997 unsigned long long retval = Mask;
998
999 __asm__
1000 (
1001 "shldl %b[Bit], %%eax, %%edx; sarl %b[Bit], %%eax" :
1002 "+A" (retval) :
1003 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1004 );
1005
1006 return retval;
1007 }
1008
1009 __INTRIN_INLINE unsigned long long __ull_rshift(const unsigned long long Mask, int Bit)
1010 {
1011 unsigned long long retval = Mask;
1012
1013 __asm__
1014 (
1015 "shrdl %b[Bit], %%eax, %%edx; shrl %b[Bit], %%eax" :
1016 "+A" (retval) :
1017 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1018 );
1019
1020 return retval;
1021 }
1022
1023 __INTRIN_INLINE unsigned short _byteswap_ushort(unsigned short value)
1024 {
1025 unsigned short retval;
1026 __asm__("rorw $8, %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value));
1027 return retval;
1028 }
1029
1030 __INTRIN_INLINE unsigned long _byteswap_ulong(unsigned long value)
1031 {
1032 unsigned long retval;
1033 __asm__("bswapl %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1034 return retval;
1035 }
1036
1037 #ifdef _M_AMD64
1038 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value)
1039 {
1040 unsigned __int64 retval;
1041 __asm__("bswapq %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1042 return retval;
1043 }
1044 #else
1045 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value)
1046 {
1047 union {
1048 __int64 int64part;
1049 struct {
1050 unsigned long lowpart;
1051 unsigned long hipart;
1052 };
1053 } retval;
1054 retval.int64part = value;
1055 __asm__("bswapl %[lowpart]\n"
1056 "bswapl %[hipart]\n"
1057 : [lowpart] "=r" (retval.hipart), [hipart] "=r" (retval.lowpart) : "[lowpart]" (retval.lowpart), "[hipart]" (retval.hipart) );
1058 return retval.int64part;
1059 }
1060 #endif
1061
1062 /*** 64-bit math ***/
1063 __INTRIN_INLINE long long __emul(const int a, const int b)
1064 {
1065 long long retval;
1066 __asm__("imull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1067 return retval;
1068 }
1069
1070 __INTRIN_INLINE unsigned long long __emulu(const unsigned int a, const unsigned int b)
1071 {
1072 unsigned long long retval;
1073 __asm__("mull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1074 return retval;
1075 }
1076
1077 #ifdef _M_AMD64
1078
1079 __INTRIN_INLINE __int64 __mulh(__int64 a, __int64 b)
1080 {
1081 __int64 retval;
1082 __asm__("imulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1083 return retval;
1084 }
1085
1086 __INTRIN_INLINE unsigned __int64 __umulh(unsigned __int64 a, unsigned __int64 b)
1087 {
1088 unsigned __int64 retval;
1089 __asm__("mulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1090 return retval;
1091 }
1092
1093 #endif
1094
1095 /*** Port I/O ***/
1096 __INTRIN_INLINE unsigned char __inbyte(const unsigned short Port)
1097 {
1098 unsigned char byte;
1099 __asm__ __volatile__("inb %w[Port], %b[byte]" : [byte] "=a" (byte) : [Port] "Nd" (Port));
1100 return byte;
1101 }
1102
1103 __INTRIN_INLINE unsigned short __inword(const unsigned short Port)
1104 {
1105 unsigned short word;
1106 __asm__ __volatile__("inw %w[Port], %w[word]" : [word] "=a" (word) : [Port] "Nd" (Port));
1107 return word;
1108 }
1109
1110 __INTRIN_INLINE unsigned long __indword(const unsigned short Port)
1111 {
1112 unsigned long dword;
1113 __asm__ __volatile__("inl %w[Port], %k[dword]" : [dword] "=a" (dword) : [Port] "Nd" (Port));
1114 return dword;
1115 }
1116
1117 __INTRIN_INLINE void __inbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1118 {
1119 __asm__ __volatile__
1120 (
1121 "rep; insb" :
1122 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1123 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1124 "memory"
1125 );
1126 }
1127
1128 __INTRIN_INLINE void __inwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1129 {
1130 __asm__ __volatile__
1131 (
1132 "rep; insw" :
1133 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1134 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1135 "memory"
1136 );
1137 }
1138
1139 __INTRIN_INLINE void __indwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1140 {
1141 __asm__ __volatile__
1142 (
1143 "rep; insl" :
1144 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1145 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1146 "memory"
1147 );
1148 }
1149
1150 __INTRIN_INLINE void __outbyte(unsigned short const Port, const unsigned char Data)
1151 {
1152 __asm__ __volatile__("outb %b[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1153 }
1154
1155 __INTRIN_INLINE void __outword(unsigned short const Port, const unsigned short Data)
1156 {
1157 __asm__ __volatile__("outw %w[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1158 }
1159
1160 __INTRIN_INLINE void __outdword(unsigned short const Port, const unsigned long Data)
1161 {
1162 __asm__ __volatile__("outl %k[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1163 }
1164
1165 __INTRIN_INLINE void __outbytestring(unsigned short const Port, const unsigned char * const Buffer, const unsigned long Count)
1166 {
1167 __asm__ __volatile__("rep; outsb" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1168 }
1169
1170 __INTRIN_INLINE void __outwordstring(unsigned short const Port, const unsigned short * const Buffer, const unsigned long Count)
1171 {
1172 __asm__ __volatile__("rep; outsw" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1173 }
1174
1175 __INTRIN_INLINE void __outdwordstring(unsigned short const Port, const unsigned long * const Buffer, const unsigned long Count)
1176 {
1177 __asm__ __volatile__("rep; outsl" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1178 }
1179
1180 __INTRIN_INLINE int _inp(unsigned short Port)
1181 {
1182 return __inbyte(Port);
1183 }
1184
1185 __INTRIN_INLINE unsigned short _inpw(unsigned short Port)
1186 {
1187 return __inword(Port);
1188 }
1189
1190 __INTRIN_INLINE unsigned long _inpd(unsigned short Port)
1191 {
1192 return __indword(Port);
1193 }
1194
1195 __INTRIN_INLINE int _outp(unsigned short Port, int databyte)
1196 {
1197 __outbyte(Port, databyte);
1198 return databyte;
1199 }
1200
1201 __INTRIN_INLINE unsigned short _outpw(unsigned short Port, unsigned short dataword)
1202 {
1203 __outword(Port, dataword);
1204 return dataword;
1205 }
1206
1207 __INTRIN_INLINE unsigned long _outpd(unsigned short Port, unsigned long dataword)
1208 {
1209 __outdword(Port, dataword);
1210 return dataword;
1211 }
1212
1213
1214 /*** System information ***/
1215 __INTRIN_INLINE void __cpuid(int CPUInfo[], const int InfoType)
1216 {
1217 __asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType));
1218 }
1219
1220 __INTRIN_INLINE unsigned long long __rdtsc(void)
1221 {
1222 #ifdef _M_AMD64
1223 unsigned long long low, high;
1224 __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
1225 return low | (high << 32);
1226 #else
1227 unsigned long long retval;
1228 __asm__ __volatile__("rdtsc" : "=A"(retval));
1229 return retval;
1230 #endif
1231 }
1232
1233 __INTRIN_INLINE void __writeeflags(uintptr_t Value)
1234 {
1235 __asm__ __volatile__("push %0\n popf" : : "rim"(Value));
1236 }
1237
1238 __INTRIN_INLINE uintptr_t __readeflags(void)
1239 {
1240 uintptr_t retval;
1241 __asm__ __volatile__("pushf\n pop %0" : "=rm"(retval));
1242 return retval;
1243 }
1244
1245 /*** Interrupts ***/
1246 #ifdef __clang__
1247 #define __debugbreak() __asm__("int $3")
1248 #else
1249 __INTRIN_INLINE void __debugbreak(void)
1250 {
1251 __asm__("int $3");
1252 }
1253 #endif
1254
1255 __INTRIN_INLINE void __int2c(void)
1256 {
1257 __asm__("int $0x2c");
1258 }
1259
1260 __INTRIN_INLINE void _disable(void)
1261 {
1262 __asm__("cli" : : : "memory");
1263 }
1264
1265 __INTRIN_INLINE void _enable(void)
1266 {
1267 __asm__("sti" : : : "memory");
1268 }
1269
1270 __INTRIN_INLINE void __halt(void)
1271 {
1272 __asm__("hlt\n\t" : : : "memory");
1273 }
1274
1275 /*** Protected memory management ***/
1276
1277 #ifdef _M_AMD64
1278 __INTRIN_INLINE void __writecr0(const unsigned __int64 Data)
1279 {
1280 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1281 }
1282
1283 __INTRIN_INLINE void __writecr3(const unsigned __int64 Data)
1284 {
1285 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1286 }
1287
1288 __INTRIN_INLINE void __writecr4(const unsigned __int64 Data)
1289 {
1290 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1291 }
1292
1293 __INTRIN_INLINE void __writecr8(const unsigned __int64 Data)
1294 {
1295 __asm__("mov %[Data], %%cr8" : : [Data] "r" (Data) : "memory");
1296 }
1297
1298 __INTRIN_INLINE unsigned __int64 __readcr0(void)
1299 {
1300 unsigned __int64 value;
1301 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1302 return value;
1303 }
1304
1305 __INTRIN_INLINE unsigned __int64 __readcr2(void)
1306 {
1307 unsigned __int64 value;
1308 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1309 return value;
1310 }
1311
1312 __INTRIN_INLINE unsigned __int64 __readcr3(void)
1313 {
1314 unsigned __int64 value;
1315 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1316 return value;
1317 }
1318
1319 __INTRIN_INLINE unsigned __int64 __readcr4(void)
1320 {
1321 unsigned __int64 value;
1322 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1323 return value;
1324 }
1325
1326 __INTRIN_INLINE unsigned __int64 __readcr8(void)
1327 {
1328 unsigned __int64 value;
1329 __asm__ __volatile__("movq %%cr8, %q[value]" : [value] "=r" (value));
1330 return value;
1331 }
1332 #else
1333 __INTRIN_INLINE void __writecr0(const unsigned int Data)
1334 {
1335 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1336 }
1337
1338 __INTRIN_INLINE void __writecr3(const unsigned int Data)
1339 {
1340 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1341 }
1342
1343 __INTRIN_INLINE void __writecr4(const unsigned int Data)
1344 {
1345 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1346 }
1347
1348 __INTRIN_INLINE unsigned long __readcr0(void)
1349 {
1350 unsigned long value;
1351 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1352 return value;
1353 }
1354
1355 __INTRIN_INLINE unsigned long __readcr2(void)
1356 {
1357 unsigned long value;
1358 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1359 return value;
1360 }
1361
1362 __INTRIN_INLINE unsigned long __readcr3(void)
1363 {
1364 unsigned long value;
1365 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1366 return value;
1367 }
1368
1369 __INTRIN_INLINE unsigned long __readcr4(void)
1370 {
1371 unsigned long value;
1372 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1373 return value;
1374 }
1375 #endif
1376
1377 #ifdef _M_AMD64
1378 __INTRIN_INLINE unsigned __int64 __readdr(unsigned int reg)
1379 {
1380 unsigned __int64 value;
1381 switch (reg)
1382 {
1383 case 0:
1384 __asm__ __volatile__("movq %%dr0, %q[value]" : [value] "=r" (value));
1385 break;
1386 case 1:
1387 __asm__ __volatile__("movq %%dr1, %q[value]" : [value] "=r" (value));
1388 break;
1389 case 2:
1390 __asm__ __volatile__("movq %%dr2, %q[value]" : [value] "=r" (value));
1391 break;
1392 case 3:
1393 __asm__ __volatile__("movq %%dr3, %q[value]" : [value] "=r" (value));
1394 break;
1395 case 4:
1396 __asm__ __volatile__("movq %%dr4, %q[value]" : [value] "=r" (value));
1397 break;
1398 case 5:
1399 __asm__ __volatile__("movq %%dr5, %q[value]" : [value] "=r" (value));
1400 break;
1401 case 6:
1402 __asm__ __volatile__("movq %%dr6, %q[value]" : [value] "=r" (value));
1403 break;
1404 case 7:
1405 __asm__ __volatile__("movq %%dr7, %q[value]" : [value] "=r" (value));
1406 break;
1407 }
1408 return value;
1409 }
1410
1411 __INTRIN_INLINE void __writedr(unsigned reg, unsigned __int64 value)
1412 {
1413 switch (reg)
1414 {
1415 case 0:
1416 __asm__("movq %q[value], %%dr0" : : [value] "r" (value) : "memory");
1417 break;
1418 case 1:
1419 __asm__("movq %q[value], %%dr1" : : [value] "r" (value) : "memory");
1420 break;
1421 case 2:
1422 __asm__("movq %q[value], %%dr2" : : [value] "r" (value) : "memory");
1423 break;
1424 case 3:
1425 __asm__("movq %q[value], %%dr3" : : [value] "r" (value) : "memory");
1426 break;
1427 case 4:
1428 __asm__("movq %q[value], %%dr4" : : [value] "r" (value) : "memory");
1429 break;
1430 case 5:
1431 __asm__("movq %q[value], %%dr5" : : [value] "r" (value) : "memory");
1432 break;
1433 case 6:
1434 __asm__("movq %q[value], %%dr6" : : [value] "r" (value) : "memory");
1435 break;
1436 case 7:
1437 __asm__("movq %q[value], %%dr7" : : [value] "r" (value) : "memory");
1438 break;
1439 }
1440 }
1441 #else
1442 __INTRIN_INLINE unsigned int __readdr(unsigned int reg)
1443 {
1444 unsigned int value;
1445 switch (reg)
1446 {
1447 case 0:
1448 __asm__ __volatile__("mov %%dr0, %[value]" : [value] "=r" (value));
1449 break;
1450 case 1:
1451 __asm__ __volatile__("mov %%dr1, %[value]" : [value] "=r" (value));
1452 break;
1453 case 2:
1454 __asm__ __volatile__("mov %%dr2, %[value]" : [value] "=r" (value));
1455 break;
1456 case 3:
1457 __asm__ __volatile__("mov %%dr3, %[value]" : [value] "=r" (value));
1458 break;
1459 case 4:
1460 __asm__ __volatile__("mov %%dr4, %[value]" : [value] "=r" (value));
1461 break;
1462 case 5:
1463 __asm__ __volatile__("mov %%dr5, %[value]" : [value] "=r" (value));
1464 break;
1465 case 6:
1466 __asm__ __volatile__("mov %%dr6, %[value]" : [value] "=r" (value));
1467 break;
1468 case 7:
1469 __asm__ __volatile__("mov %%dr7, %[value]" : [value] "=r" (value));
1470 break;
1471 }
1472 return value;
1473 }
1474
1475 __INTRIN_INLINE void __writedr(unsigned reg, unsigned int value)
1476 {
1477 switch (reg)
1478 {
1479 case 0:
1480 __asm__("mov %[value], %%dr0" : : [value] "r" (value) : "memory");
1481 break;
1482 case 1:
1483 __asm__("mov %[value], %%dr1" : : [value] "r" (value) : "memory");
1484 break;
1485 case 2:
1486 __asm__("mov %[value], %%dr2" : : [value] "r" (value) : "memory");
1487 break;
1488 case 3:
1489 __asm__("mov %[value], %%dr3" : : [value] "r" (value) : "memory");
1490 break;
1491 case 4:
1492 __asm__("mov %[value], %%dr4" : : [value] "r" (value) : "memory");
1493 break;
1494 case 5:
1495 __asm__("mov %[value], %%dr5" : : [value] "r" (value) : "memory");
1496 break;
1497 case 6:
1498 __asm__("mov %[value], %%dr6" : : [value] "r" (value) : "memory");
1499 break;
1500 case 7:
1501 __asm__("mov %[value], %%dr7" : : [value] "r" (value) : "memory");
1502 break;
1503 }
1504 }
1505 #endif
1506
1507 __INTRIN_INLINE void __invlpg(void * const Address)
1508 {
1509 __asm__("invlpg %[Address]" : : [Address] "m" (*((unsigned char *)(Address))) : "memory");
1510 }
1511
1512
1513 /*** System operations ***/
1514 __INTRIN_INLINE unsigned long long __readmsr(const int reg)
1515 {
1516 #ifdef _M_AMD64
1517 unsigned long low, high;
1518 __asm__ __volatile__("rdmsr" : "=a" (low), "=d" (high) : "c" (reg));
1519 return ((unsigned long long)high << 32) | low;
1520 #else
1521 unsigned long long retval;
1522 __asm__ __volatile__("rdmsr" : "=A" (retval) : "c" (reg));
1523 return retval;
1524 #endif
1525 }
1526
1527 __INTRIN_INLINE void __writemsr(const unsigned long Register, const unsigned long long Value)
1528 {
1529 #ifdef _M_AMD64
1530 __asm__ __volatile__("wrmsr" : : "a" (Value), "d" (Value >> 32), "c" (Register));
1531 #else
1532 __asm__ __volatile__("wrmsr" : : "A" (Value), "c" (Register));
1533 #endif
1534 }
1535
1536 __INTRIN_INLINE unsigned long long __readpmc(const int counter)
1537 {
1538 unsigned long long retval;
1539 __asm__ __volatile__("rdpmc" : "=A" (retval) : "c" (counter));
1540 return retval;
1541 }
1542
1543 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1544 __INTRIN_INLINE unsigned long __segmentlimit(const unsigned long a)
1545 {
1546 unsigned long retval;
1547 __asm__ __volatile__("lsl %[a], %[retval]" : [retval] "=r" (retval) : [a] "rm" (a));
1548 return retval;
1549 }
1550
1551 __INTRIN_INLINE void __wbinvd(void)
1552 {
1553 __asm__ __volatile__("wbinvd" : : : "memory");
1554 }
1555
1556 __INTRIN_INLINE void __lidt(void *Source)
1557 {
1558 __asm__ __volatile__("lidt %0" : : "m"(*(short*)Source));
1559 }
1560
1561 __INTRIN_INLINE void __sidt(void *Destination)
1562 {
1563 __asm__ __volatile__("sidt %0" : : "m"(*(short*)Destination) : "memory");
1564 }
1565
1566 /*** Misc operations ***/
1567
1568 __INTRIN_INLINE void _mm_pause(void)
1569 {
1570 __asm__ __volatile__("pause" : : : "memory");
1571 }
1572
1573 __INTRIN_INLINE void __nop(void)
1574 {
1575 __asm__ __volatile__("nop");
1576 }
1577
1578 #ifdef __cplusplus
1579 }
1580 #endif
1581
1582 #endif /* KJK_INTRIN_X86_H_ */
1583
1584 /* EOF */