Import my Hybrid-CD stuff from last year.
[reactos.git] / reactos / include / crt / mingw32 / intrin_x86.h
1 /*
2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
5 projects.
6
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
8
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
15
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
26 */
27
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
30
31 /*
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
36 */
37
38 /*
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
42
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
46
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
49
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
57
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
60
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
62 ...
63
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
66 */
67
68 #ifdef __cplusplus
69 extern "C" {
70 #endif
71
72 /*** Stack frame juggling ***/
73 #define _ReturnAddress() (__builtin_return_address(0))
74 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
75 /* TODO: __getcallerseflags but how??? */
76
77 /* Maybe the same for x86? */
78 #ifdef _x86_64
79 #define _alloca(s) __builtin_alloca(s)
80 #endif
81
82 /*** Memory barriers ***/
83
84 #ifdef _x86_64
85 __INTRIN_INLINE void __faststorefence(void)
86 {
87 long local;
88 __asm__ __volatile__("lock; orl $0, %0;" : : "m"(local));
89 }
90 #endif
91
92 __INTRIN_INLINE void _mm_lfence(void)
93 {
94 __asm__ __volatile__("lfence");
95 }
96
97 __INTRIN_INLINE void _mm_sfence(void)
98 {
99 __asm__ __volatile__("sfence");
100 }
101
102 __INTRIN_INLINE void _ReadWriteBarrier(void)
103 {
104 __asm__ __volatile__("" : : : "memory");
105 }
106
107 /* GCC only supports full barriers */
108 #define _ReadBarrier _ReadWriteBarrier
109 #define _WriteBarrier _ReadWriteBarrier
110
111 /*** Atomic operations ***/
112
113 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
114
115 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand)
116 {
117 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
118 }
119
120 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand)
121 {
122 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
123 }
124
125 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand)
126 {
127 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
128 }
129
130 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand)
131 {
132 return (void *)__sync_val_compare_and_swap(Destination, Comperand, Exchange);
133 }
134
135 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value)
136 {
137 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
138 __sync_synchronize();
139 return __sync_lock_test_and_set(Target, Value);
140 }
141
142 #if defined(_M_AMD64)
143 __INTRIN_INLINE long long _InterlockedExchange64(volatile long long * const Target, const long long Value)
144 {
145 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
146 __sync_synchronize();
147 return __sync_lock_test_and_set(Target, Value);
148 }
149 #endif
150
151 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value)
152 {
153 /* NOTE: ditto */
154 __sync_synchronize();
155 return (void *)__sync_lock_test_and_set(Target, Value);
156 }
157
158 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value)
159 {
160 return __sync_fetch_and_add(Addend, Value);
161 }
162
163 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value)
164 {
165 return __sync_fetch_and_add(Addend, Value);
166 }
167
168 #if defined(_M_AMD64)
169 __INTRIN_INLINE long long _InterlockedExchangeAdd64(volatile long long * const Addend, const long long Value)
170 {
171 return __sync_fetch_and_add(Addend, Value);
172 }
173 #endif
174
175 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask)
176 {
177 return __sync_fetch_and_and(value, mask);
178 }
179
180 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask)
181 {
182 return __sync_fetch_and_and(value, mask);
183 }
184
185 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask)
186 {
187 return __sync_fetch_and_and(value, mask);
188 }
189
190 #if defined(_M_AMD64)
191 __INTRIN_INLINE long _InterlockedAnd64(volatile long long * const value, const long long mask)
192 {
193 return __sync_fetch_and_and(value, mask);
194 }
195 #endif
196
197 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask)
198 {
199 return __sync_fetch_and_or(value, mask);
200 }
201
202 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask)
203 {
204 return __sync_fetch_and_or(value, mask);
205 }
206
207 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask)
208 {
209 return __sync_fetch_and_or(value, mask);
210 }
211
212 #if defined(_M_AMD64)
213 __INTRIN_INLINE long _InterlockedOr64(volatile long long * const value, const long long mask)
214 {
215 return __sync_fetch_and_or(value, mask);
216 }
217 #endif
218
219 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask)
220 {
221 return __sync_fetch_and_xor(value, mask);
222 }
223
224 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask)
225 {
226 return __sync_fetch_and_xor(value, mask);
227 }
228
229 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask)
230 {
231 return __sync_fetch_and_xor(value, mask);
232 }
233
234 #else
235
236 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand)
237 {
238 char retval = Comperand;
239 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
240 return retval;
241 }
242
243 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand)
244 {
245 short retval = Comperand;
246 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
247 return retval;
248 }
249
250 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand)
251 {
252 long retval = Comperand;
253 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
254 return retval;
255 }
256
257 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand)
258 {
259 void * retval = (void *)Comperand;
260 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "=a" (retval) : "[retval]" (retval), [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
261 return retval;
262 }
263
264 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value)
265 {
266 long retval = Value;
267 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
268 return retval;
269 }
270
271 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value)
272 {
273 void * retval = Value;
274 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
275 return retval;
276 }
277
278 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value)
279 {
280 long retval = Value;
281 __asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
282 return retval;
283 }
284
285 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value)
286 {
287 long retval = Value;
288 __asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
289 return retval;
290 }
291
292 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask)
293 {
294 char x;
295 char y;
296
297 y = *value;
298
299 do
300 {
301 x = y;
302 y = _InterlockedCompareExchange8(value, x & mask, x);
303 }
304 while(y != x);
305
306 return y;
307 }
308
309 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask)
310 {
311 short x;
312 short y;
313
314 y = *value;
315
316 do
317 {
318 x = y;
319 y = _InterlockedCompareExchange16(value, x & mask, x);
320 }
321 while(y != x);
322
323 return y;
324 }
325
326 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask)
327 {
328 long x;
329 long y;
330
331 y = *value;
332
333 do
334 {
335 x = y;
336 y = _InterlockedCompareExchange(value, x & mask, x);
337 }
338 while(y != x);
339
340 return y;
341 }
342
343 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask)
344 {
345 char x;
346 char y;
347
348 y = *value;
349
350 do
351 {
352 x = y;
353 y = _InterlockedCompareExchange8(value, x | mask, x);
354 }
355 while(y != x);
356
357 return y;
358 }
359
360 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask)
361 {
362 short x;
363 short y;
364
365 y = *value;
366
367 do
368 {
369 x = y;
370 y = _InterlockedCompareExchange16(value, x | mask, x);
371 }
372 while(y != x);
373
374 return y;
375 }
376
377 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask)
378 {
379 long x;
380 long y;
381
382 y = *value;
383
384 do
385 {
386 x = y;
387 y = _InterlockedCompareExchange(value, x | mask, x);
388 }
389 while(y != x);
390
391 return y;
392 }
393
394 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask)
395 {
396 char x;
397 char y;
398
399 y = *value;
400
401 do
402 {
403 x = y;
404 y = _InterlockedCompareExchange8(value, x ^ mask, x);
405 }
406 while(y != x);
407
408 return y;
409 }
410
411 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask)
412 {
413 short x;
414 short y;
415
416 y = *value;
417
418 do
419 {
420 x = y;
421 y = _InterlockedCompareExchange16(value, x ^ mask, x);
422 }
423 while(y != x);
424
425 return y;
426 }
427
428 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask)
429 {
430 long x;
431 long y;
432
433 y = *value;
434
435 do
436 {
437 x = y;
438 y = _InterlockedCompareExchange(value, x ^ mask, x);
439 }
440 while(y != x);
441
442 return y;
443 }
444
445 #endif
446
447 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
448
449 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand)
450 {
451 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
452 }
453
454 #else
455
456 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand)
457 {
458 long long retval = Comperand;
459
460 __asm__
461 (
462 "lock; cmpxchg8b %[Destination]" :
463 [retval] "+A" (retval) :
464 [Destination] "m" (*Destination),
465 "b" ((unsigned long)((Exchange >> 0) & 0xFFFFFFFF)),
466 "c" ((unsigned long)((Exchange >> 32) & 0xFFFFFFFF)) :
467 "memory"
468 );
469
470 return retval;
471 }
472
473 #endif
474
475 __INTRIN_INLINE long _InterlockedAddLargeStatistic(volatile long long * const Addend, const long Value)
476 {
477 __asm__
478 (
479 "lock; add %[Value], %[Lo32];"
480 "jae LABEL%=;"
481 "lock; adc $0, %[Hi32];"
482 "LABEL%=:;" :
483 [Lo32] "+m" (*((volatile long *)(Addend) + 0)), [Hi32] "+m" (*((volatile long *)(Addend) + 1)) :
484 [Value] "ir" (Value) :
485 "memory"
486 );
487
488 return Value;
489 }
490
491 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend)
492 {
493 return _InterlockedExchangeAdd(lpAddend, -1) - 1;
494 }
495
496 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend)
497 {
498 return _InterlockedExchangeAdd(lpAddend, 1) + 1;
499 }
500
501 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend)
502 {
503 return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
504 }
505
506 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend)
507 {
508 return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
509 }
510
511 #if defined(_M_AMD64)
512 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend)
513 {
514 return _InterlockedExchangeAdd64(lpAddend, -1) - 1;
515 }
516
517 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend)
518 {
519 return _InterlockedExchangeAdd64(lpAddend, 1) + 1;
520 }
521 #endif
522
523 __INTRIN_INLINE unsigned char _interlockedbittestandreset(volatile long * a, const long b)
524 {
525 unsigned char retval;
526 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
527 return retval;
528 }
529
530 #if defined(_M_AMD64)
531 __INTRIN_INLINE unsigned char _interlockedbittestandreset64(volatile long long * a, const long long b)
532 {
533 unsigned char retval;
534 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
535 return retval;
536 }
537 #endif
538
539 __INTRIN_INLINE unsigned char _interlockedbittestandset(volatile long * a, const long b)
540 {
541 unsigned char retval;
542 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
543 return retval;
544 }
545
546 #if defined(_M_AMD64)
547 __INTRIN_INLINE unsigned char _interlockedbittestandset64(volatile long long * a, const long long b)
548 {
549 unsigned char retval;
550 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
551 return retval;
552 }
553 #endif
554
555 /*** String operations ***/
556 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
557 __INTRIN_INLINE void __stosb(unsigned char * Dest, const unsigned char Data, size_t Count)
558 {
559 __asm__ __volatile__
560 (
561 "rep; stosb" :
562 [Dest] "=D" (Dest), [Count] "=c" (Count) :
563 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
564 );
565 }
566
567 __INTRIN_INLINE void __stosw(unsigned short * Dest, const unsigned short Data, size_t Count)
568 {
569 __asm__ __volatile__
570 (
571 "rep; stosw" :
572 [Dest] "=D" (Dest), [Count] "=c" (Count) :
573 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
574 );
575 }
576
577 __INTRIN_INLINE void __stosd(unsigned long * Dest, const unsigned long Data, size_t Count)
578 {
579 __asm__ __volatile__
580 (
581 "rep; stosl" :
582 [Dest] "=D" (Dest), [Count] "=c" (Count) :
583 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
584 );
585 }
586
587 #ifdef _M_AMD64
588 __INTRIN_INLINE void __stosq(unsigned __int64 * Dest, const unsigned __int64 Data, size_t Count)
589 {
590 __asm__ __volatile__
591 (
592 "rep; stosq" :
593 [Dest] "=D" (Dest), [Count] "=c" (Count) :
594 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
595 );
596 }
597 #endif
598
599 __INTRIN_INLINE void __movsb(unsigned char * Destination, const unsigned char * Source, size_t Count)
600 {
601 __asm__ __volatile__
602 (
603 "rep; movsb" :
604 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
605 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
606 );
607 }
608
609 __INTRIN_INLINE void __movsw(unsigned short * Destination, const unsigned short * Source, size_t Count)
610 {
611 __asm__ __volatile__
612 (
613 "rep; movsw" :
614 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
615 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
616 );
617 }
618
619 __INTRIN_INLINE void __movsd(unsigned long * Destination, const unsigned long * Source, size_t Count)
620 {
621 __asm__ __volatile__
622 (
623 "rep; movsd" :
624 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
625 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
626 );
627 }
628
629 #ifdef _M_AMD64
630 __INTRIN_INLINE void __movsq(unsigned long * Destination, const unsigned long * Source, size_t Count)
631 {
632 __asm__ __volatile__
633 (
634 "rep; movsq" :
635 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
636 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
637 );
638 }
639 #endif
640
641 #if defined(_M_AMD64)
642 /*** GS segment addressing ***/
643
644 __INTRIN_INLINE void __writegsbyte(const unsigned long Offset, const unsigned char Data)
645 {
646 __asm__ __volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
647 }
648
649 __INTRIN_INLINE void __writegsword(const unsigned long Offset, const unsigned short Data)
650 {
651 __asm__ __volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
652 }
653
654 __INTRIN_INLINE void __writegsdword(const unsigned long Offset, const unsigned long Data)
655 {
656 __asm__ __volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
657 }
658
659 __INTRIN_INLINE void __writegsqword(const unsigned long Offset, const unsigned __int64 Data)
660 {
661 __asm__ __volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
662 }
663
664 __INTRIN_INLINE unsigned char __readgsbyte(const unsigned long Offset)
665 {
666 unsigned char value;
667 __asm__ __volatile__("movb %%gs:%a[Offset], %b[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
668 return value;
669 }
670
671 __INTRIN_INLINE unsigned short __readgsword(const unsigned long Offset)
672 {
673 unsigned short value;
674 __asm__ __volatile__("movw %%gs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
675 return value;
676 }
677
678 __INTRIN_INLINE unsigned long __readgsdword(const unsigned long Offset)
679 {
680 unsigned long value;
681 __asm__ __volatile__("movl %%gs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
682 return value;
683 }
684
685 __INTRIN_INLINE unsigned __int64 __readgsqword(const unsigned long Offset)
686 {
687 unsigned __int64 value;
688 __asm__ __volatile__("movq %%gs:%a[Offset], %q[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
689 return value;
690 }
691
692 __INTRIN_INLINE void __incgsbyte(const unsigned long Offset)
693 {
694 __asm__ __volatile__("incb %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
695 }
696
697 __INTRIN_INLINE void __incgsword(const unsigned long Offset)
698 {
699 __asm__ __volatile__("incw %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
700 }
701
702 __INTRIN_INLINE void __incgsdword(const unsigned long Offset)
703 {
704 __asm__ __volatile__("incl %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
705 }
706
707 __INTRIN_INLINE void __addgsbyte(const unsigned long Offset, const unsigned char Data)
708 {
709 __asm__ __volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
710 }
711
712 __INTRIN_INLINE void __addgsword(const unsigned long Offset, const unsigned short Data)
713 {
714 __asm__ __volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
715 }
716
717 __INTRIN_INLINE void __addgsdword(const unsigned long Offset, const unsigned int Data)
718 {
719 __asm__ __volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
720 }
721
722 __INTRIN_INLINE void __addgsqword(const unsigned long Offset, const unsigned __int64 Data)
723 {
724 __asm__ __volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
725 }
726
727 #else
728 /*** FS segment addressing ***/
729 __INTRIN_INLINE void __writefsbyte(const unsigned long Offset, const unsigned char Data)
730 {
731 __asm__ __volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
732 }
733
734 __INTRIN_INLINE void __writefsword(const unsigned long Offset, const unsigned short Data)
735 {
736 __asm__ __volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
737 }
738
739 __INTRIN_INLINE void __writefsdword(const unsigned long Offset, const unsigned long Data)
740 {
741 __asm__ __volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
742 }
743
744 __INTRIN_INLINE unsigned char __readfsbyte(const unsigned long Offset)
745 {
746 unsigned char value;
747 __asm__ __volatile__("movb %%fs:%a[Offset], %b[value]" : [value] "=q" (value) : [Offset] "ir" (Offset));
748 return value;
749 }
750
751 __INTRIN_INLINE unsigned short __readfsword(const unsigned long Offset)
752 {
753 unsigned short value;
754 __asm__ __volatile__("movw %%fs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
755 return value;
756 }
757
758 __INTRIN_INLINE unsigned long __readfsdword(const unsigned long Offset)
759 {
760 unsigned long value;
761 __asm__ __volatile__("movl %%fs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
762 return value;
763 }
764
765 __INTRIN_INLINE void __incfsbyte(const unsigned long Offset)
766 {
767 __asm__ __volatile__("incb %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
768 }
769
770 __INTRIN_INLINE void __incfsword(const unsigned long Offset)
771 {
772 __asm__ __volatile__("incw %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
773 }
774
775 __INTRIN_INLINE void __incfsdword(const unsigned long Offset)
776 {
777 __asm__ __volatile__("incl %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
778 }
779
780 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
781 __INTRIN_INLINE void __addfsbyte(const unsigned long Offset, const unsigned char Data)
782 {
783 if(!__builtin_constant_p(Offset))
784 __asm__ __volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
785 else
786 __asm__ __volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
787 }
788
789 __INTRIN_INLINE void __addfsword(const unsigned long Offset, const unsigned short Data)
790 {
791 if(!__builtin_constant_p(Offset))
792 __asm__ __volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
793 else
794 __asm__ __volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
795 }
796
797 __INTRIN_INLINE void __addfsdword(const unsigned long Offset, const unsigned int Data)
798 {
799 if(!__builtin_constant_p(Offset))
800 __asm__ __volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
801 else
802 __asm__ __volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
803 }
804 #endif
805
806
807 /*** Bit manipulation ***/
808 __INTRIN_INLINE unsigned char _BitScanForward(unsigned long * const Index, const unsigned long Mask)
809 {
810 __asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
811 return Mask ? 1 : 0;
812 }
813
814 __INTRIN_INLINE unsigned char _BitScanReverse(unsigned long * const Index, const unsigned long Mask)
815 {
816 __asm__("bsrl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
817 return Mask ? 1 : 0;
818 }
819
820 /* NOTE: again, the bizarre implementation follows Visual C++ */
821 __INTRIN_INLINE unsigned char _bittest(const long * const a, const long b)
822 {
823 unsigned char retval;
824
825 if(__builtin_constant_p(b))
826 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 32))), [b] "Ir" (b % 32));
827 else
828 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*a), [b] "r" (b));
829
830 return retval;
831 }
832
833 #ifdef _M_AMD64
834 __INTRIN_INLINE unsigned char _bittest64(const __int64 * const a, const __int64 b)
835 {
836 unsigned char retval;
837
838 if(__builtin_constant_p(b))
839 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 64))), [b] "Ir" (b % 64));
840 else
841 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*a), [b] "r" (b));
842
843 return retval;
844 }
845 #endif
846
847 __INTRIN_INLINE unsigned char _bittestandcomplement(long * const a, const long b)
848 {
849 unsigned char retval;
850
851 if(__builtin_constant_p(b))
852 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
853 else
854 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+mr" (*a), [retval] "=q" (retval) : [b] "r" (b));
855
856 return retval;
857 }
858
859 __INTRIN_INLINE unsigned char _bittestandreset(long * const a, const long b)
860 {
861 unsigned char retval;
862
863 if(__builtin_constant_p(b))
864 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
865 else
866 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+mr" (*a), [retval] "=q" (retval) : [b] "r" (b));
867
868 return retval;
869 }
870
871 __INTRIN_INLINE unsigned char _bittestandset(long * const a, const long b)
872 {
873 unsigned char retval;
874
875 if(__builtin_constant_p(b))
876 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
877 else
878 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+mr" (*a), [retval] "=q" (retval) : [b] "r" (b));
879
880 return retval;
881 }
882
883 __INTRIN_INLINE unsigned char _rotl8(unsigned char value, unsigned char shift)
884 {
885 unsigned char retval;
886 __asm__("rolb %b[shift], %b[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
887 return retval;
888 }
889
890 __INTRIN_INLINE unsigned short _rotl16(unsigned short value, unsigned char shift)
891 {
892 unsigned short retval;
893 __asm__("rolw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
894 return retval;
895 }
896
897 __INTRIN_INLINE unsigned int _rotl(unsigned int value, int shift)
898 {
899 unsigned long retval;
900 __asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
901 return retval;
902 }
903
904 __INTRIN_INLINE unsigned int _rotr(unsigned int value, int shift)
905 {
906 unsigned long retval;
907 __asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
908 return retval;
909 }
910
911 __INTRIN_INLINE unsigned char _rotr8(unsigned char value, unsigned char shift)
912 {
913 unsigned char retval;
914 __asm__("rorb %b[shift], %b[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
915 return retval;
916 }
917
918 __INTRIN_INLINE unsigned short _rotr16(unsigned short value, unsigned char shift)
919 {
920 unsigned short retval;
921 __asm__("rorw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
922 return retval;
923 }
924
925 /*
926 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
927 constraint (edx:eax) for the Mask argument, because it's the only way GCC
928 can pass 64-bit operands around - passing the two 32 bit parts separately
929 just confuses it. Also we declare Bit as an int and then truncate it to
930 match Visual C++ behavior
931 */
932 __INTRIN_INLINE unsigned long long __ll_lshift(const unsigned long long Mask, const int Bit)
933 {
934 unsigned long long retval = Mask;
935
936 __asm__
937 (
938 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
939 "+A" (retval) :
940 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
941 );
942
943 return retval;
944 }
945
946 __INTRIN_INLINE long long __ll_rshift(const long long Mask, const int Bit)
947 {
948 unsigned long long retval = Mask;
949
950 __asm__
951 (
952 "shldl %b[Bit], %%eax, %%edx; sarl %b[Bit], %%eax" :
953 "+A" (retval) :
954 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
955 );
956
957 return retval;
958 }
959
960 __INTRIN_INLINE unsigned long long __ull_rshift(const unsigned long long Mask, int Bit)
961 {
962 unsigned long long retval = Mask;
963
964 __asm__
965 (
966 "shrdl %b[Bit], %%eax, %%edx; shrl %b[Bit], %%eax" :
967 "+A" (retval) :
968 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
969 );
970
971 return retval;
972 }
973
974 __INTRIN_INLINE unsigned short _byteswap_ushort(unsigned short value)
975 {
976 unsigned short retval;
977 __asm__("rorw $8, %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value));
978 return retval;
979 }
980
981 __INTRIN_INLINE unsigned long _byteswap_ulong(unsigned long value)
982 {
983 unsigned long retval;
984 __asm__("bswapl %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
985 return retval;
986 }
987
988 #ifdef _M_AMD64
989 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value)
990 {
991 unsigned __int64 retval;
992 __asm__("bswapq %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
993 return retval;
994 }
995 #else
996 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value)
997 {
998 union {
999 __int64 int64part;
1000 struct {
1001 unsigned long lowpart;
1002 unsigned long hipart;
1003 };
1004 } retval;
1005 retval.int64part = value;
1006 __asm__("bswapl %[lowpart]\n"
1007 "bswapl %[hipart]\n"
1008 : [lowpart] "=r" (retval.hipart), [hipart] "=r" (retval.lowpart) : "[lowpart]" (retval.lowpart), "[hipart]" (retval.hipart) );
1009 return retval.int64part;
1010 }
1011 #endif
1012
1013 /*** 64-bit math ***/
1014 __INTRIN_INLINE long long __emul(const int a, const int b)
1015 {
1016 long long retval;
1017 __asm__("imull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1018 return retval;
1019 }
1020
1021 __INTRIN_INLINE unsigned long long __emulu(const unsigned int a, const unsigned int b)
1022 {
1023 unsigned long long retval;
1024 __asm__("mull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1025 return retval;
1026 }
1027
1028 #ifdef _M_AMD64
1029
1030 __INTRIN_INLINE __int64 __mulh(__int64 a, __int64 b)
1031 {
1032 __int64 retval;
1033 __asm__("imulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1034 return retval;
1035 }
1036
1037 __INTRIN_INLINE unsigned __int64 __umulh(unsigned __int64 a, unsigned __int64 b)
1038 {
1039 unsigned __int64 retval;
1040 __asm__("mulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1041 return retval;
1042 }
1043
1044 #endif
1045
1046 /*** Port I/O ***/
1047 __INTRIN_INLINE unsigned char __inbyte(const unsigned short Port)
1048 {
1049 unsigned char byte;
1050 __asm__ __volatile__("inb %w[Port], %b[byte]" : [byte] "=a" (byte) : [Port] "Nd" (Port));
1051 return byte;
1052 }
1053
1054 __INTRIN_INLINE unsigned short __inword(const unsigned short Port)
1055 {
1056 unsigned short word;
1057 __asm__ __volatile__("inw %w[Port], %w[word]" : [word] "=a" (word) : [Port] "Nd" (Port));
1058 return word;
1059 }
1060
1061 __INTRIN_INLINE unsigned long __indword(const unsigned short Port)
1062 {
1063 unsigned long dword;
1064 __asm__ __volatile__("inl %w[Port], %k[dword]" : [dword] "=a" (dword) : [Port] "Nd" (Port));
1065 return dword;
1066 }
1067
1068 __INTRIN_INLINE void __inbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1069 {
1070 __asm__ __volatile__
1071 (
1072 "rep; insb" :
1073 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1074 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1075 "memory"
1076 );
1077 }
1078
1079 __INTRIN_INLINE void __inwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1080 {
1081 __asm__ __volatile__
1082 (
1083 "rep; insw" :
1084 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1085 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1086 "memory"
1087 );
1088 }
1089
1090 __INTRIN_INLINE void __indwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1091 {
1092 __asm__ __volatile__
1093 (
1094 "rep; insl" :
1095 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1096 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1097 "memory"
1098 );
1099 }
1100
1101 __INTRIN_INLINE void __outbyte(unsigned short const Port, const unsigned char Data)
1102 {
1103 __asm__ __volatile__("outb %b[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1104 }
1105
1106 __INTRIN_INLINE void __outword(unsigned short const Port, const unsigned short Data)
1107 {
1108 __asm__ __volatile__("outw %w[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1109 }
1110
1111 __INTRIN_INLINE void __outdword(unsigned short const Port, const unsigned long Data)
1112 {
1113 __asm__ __volatile__("outl %k[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1114 }
1115
1116 __INTRIN_INLINE void __outbytestring(unsigned short const Port, const unsigned char * const Buffer, const unsigned long Count)
1117 {
1118 __asm__ __volatile__("rep; outsb" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1119 }
1120
1121 __INTRIN_INLINE void __outwordstring(unsigned short const Port, const unsigned short * const Buffer, const unsigned long Count)
1122 {
1123 __asm__ __volatile__("rep; outsw" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1124 }
1125
1126 __INTRIN_INLINE void __outdwordstring(unsigned short const Port, const unsigned long * const Buffer, const unsigned long Count)
1127 {
1128 __asm__ __volatile__("rep; outsl" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1129 }
1130
1131 __INTRIN_INLINE int _inp(unsigned short Port)
1132 {
1133 return __inbyte(Port);
1134 }
1135
1136 __INTRIN_INLINE unsigned short _inpw(unsigned short Port)
1137 {
1138 return __inword(Port);
1139 }
1140
1141 __INTRIN_INLINE unsigned long _inpd(unsigned short Port)
1142 {
1143 return __indword(Port);
1144 }
1145
1146 __INTRIN_INLINE int _outp(unsigned short Port, int databyte)
1147 {
1148 __outbyte(Port, databyte);
1149 return databyte;
1150 }
1151
1152 __INTRIN_INLINE unsigned short _outpw(unsigned short Port, unsigned short dataword)
1153 {
1154 __outword(Port, dataword);
1155 return dataword;
1156 }
1157
1158 __INTRIN_INLINE unsigned long _outpd(unsigned short Port, unsigned long dataword)
1159 {
1160 __outdword(Port, dataword);
1161 return dataword;
1162 }
1163
1164
1165 /*** System information ***/
1166 __INTRIN_INLINE void __cpuid(int CPUInfo[], const int InfoType)
1167 {
1168 __asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType));
1169 }
1170
1171 __INTRIN_INLINE unsigned long long __rdtsc(void)
1172 {
1173 #ifdef _M_AMD64
1174 unsigned long long low, high;
1175 __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
1176 return low | (high << 32);
1177 #else
1178 unsigned long long retval;
1179 __asm__ __volatile__("rdtsc" : "=A"(retval));
1180 return retval;
1181 #endif
1182 }
1183
1184 __INTRIN_INLINE void __writeeflags(uintptr_t Value)
1185 {
1186 __asm__ __volatile__("push %0\n popf" : : "rim"(Value));
1187 }
1188
1189 __INTRIN_INLINE uintptr_t __readeflags(void)
1190 {
1191 uintptr_t retval;
1192 __asm__ __volatile__("pushf\n pop %0" : "=rm"(retval));
1193 return retval;
1194 }
1195
1196 /*** Interrupts ***/
1197 __INTRIN_INLINE void __debugbreak(void)
1198 {
1199 __asm__("int $3");
1200 }
1201
1202 __INTRIN_INLINE void __int2c(void)
1203 {
1204 __asm__("int $0x2c");
1205 }
1206
1207 __INTRIN_INLINE void _disable(void)
1208 {
1209 __asm__("cli");
1210 }
1211
1212 __INTRIN_INLINE void _enable(void)
1213 {
1214 __asm__("sti");
1215 }
1216
1217 __INTRIN_INLINE void __halt(void)
1218 {
1219 __asm__("hlt\n\t");
1220 }
1221
1222 /*** Protected memory management ***/
1223
1224 __INTRIN_INLINE void __writecr0(const unsigned __int64 Data)
1225 {
1226 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1227 }
1228
1229 __INTRIN_INLINE void __writecr3(const unsigned __int64 Data)
1230 {
1231 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1232 }
1233
1234 __INTRIN_INLINE void __writecr4(const unsigned __int64 Data)
1235 {
1236 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1237 }
1238
1239 #ifdef _M_AMD64
1240 __INTRIN_INLINE void __writecr8(const unsigned __int64 Data)
1241 {
1242 __asm__("mov %[Data], %%cr8" : : [Data] "r" (Data) : "memory");
1243 }
1244
1245 __INTRIN_INLINE unsigned __int64 __readcr0(void)
1246 {
1247 unsigned __int64 value;
1248 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1249 return value;
1250 }
1251
1252 __INTRIN_INLINE unsigned __int64 __readcr2(void)
1253 {
1254 unsigned __int64 value;
1255 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1256 return value;
1257 }
1258
1259 __INTRIN_INLINE unsigned __int64 __readcr3(void)
1260 {
1261 unsigned __int64 value;
1262 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1263 return value;
1264 }
1265
1266 __INTRIN_INLINE unsigned __int64 __readcr4(void)
1267 {
1268 unsigned __int64 value;
1269 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1270 return value;
1271 }
1272
1273 __INTRIN_INLINE unsigned __int64 __readcr8(void)
1274 {
1275 unsigned __int64 value;
1276 __asm__ __volatile__("movq %%cr8, %q[value]" : [value] "=r" (value));
1277 return value;
1278 }
1279 #else
1280 __INTRIN_INLINE unsigned long __readcr0(void)
1281 {
1282 unsigned long value;
1283 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1284 return value;
1285 }
1286
1287 __INTRIN_INLINE unsigned long __readcr2(void)
1288 {
1289 unsigned long value;
1290 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1291 return value;
1292 }
1293
1294 __INTRIN_INLINE unsigned long __readcr3(void)
1295 {
1296 unsigned long value;
1297 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1298 return value;
1299 }
1300
1301 __INTRIN_INLINE unsigned long __readcr4(void)
1302 {
1303 unsigned long value;
1304 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1305 return value;
1306 }
1307 #endif
1308
1309 #ifdef _M_AMD64
1310 __INTRIN_INLINE unsigned __int64 __readdr(unsigned int reg)
1311 {
1312 unsigned __int64 value;
1313 switch (reg)
1314 {
1315 case 0:
1316 __asm__ __volatile__("movq %%dr0, %q[value]" : [value] "=r" (value));
1317 break;
1318 case 1:
1319 __asm__ __volatile__("movq %%dr1, %q[value]" : [value] "=r" (value));
1320 break;
1321 case 2:
1322 __asm__ __volatile__("movq %%dr2, %q[value]" : [value] "=r" (value));
1323 break;
1324 case 3:
1325 __asm__ __volatile__("movq %%dr3, %q[value]" : [value] "=r" (value));
1326 break;
1327 case 4:
1328 __asm__ __volatile__("movq %%dr4, %q[value]" : [value] "=r" (value));
1329 break;
1330 case 5:
1331 __asm__ __volatile__("movq %%dr5, %q[value]" : [value] "=r" (value));
1332 break;
1333 case 6:
1334 __asm__ __volatile__("movq %%dr6, %q[value]" : [value] "=r" (value));
1335 break;
1336 case 7:
1337 __asm__ __volatile__("movq %%dr7, %q[value]" : [value] "=r" (value));
1338 break;
1339 }
1340 return value;
1341 }
1342
1343 __INTRIN_INLINE void __writedr(unsigned reg, unsigned __int64 value)
1344 {
1345 switch (reg)
1346 {
1347 case 0:
1348 __asm__("movq %q[value], %%dr0" : : [value] "r" (value) : "memory");
1349 break;
1350 case 1:
1351 __asm__("movq %q[value], %%dr1" : : [value] "r" (value) : "memory");
1352 break;
1353 case 2:
1354 __asm__("movq %q[value], %%dr2" : : [value] "r" (value) : "memory");
1355 break;
1356 case 3:
1357 __asm__("movq %q[value], %%dr3" : : [value] "r" (value) : "memory");
1358 break;
1359 case 4:
1360 __asm__("movq %q[value], %%dr4" : : [value] "r" (value) : "memory");
1361 break;
1362 case 5:
1363 __asm__("movq %q[value], %%dr5" : : [value] "r" (value) : "memory");
1364 break;
1365 case 6:
1366 __asm__("movq %q[value], %%dr6" : : [value] "r" (value) : "memory");
1367 break;
1368 case 7:
1369 __asm__("movq %q[value], %%dr7" : : [value] "r" (value) : "memory");
1370 break;
1371 }
1372 }
1373 #else
1374 __INTRIN_INLINE unsigned int __readdr(unsigned int reg)
1375 {
1376 unsigned int value;
1377 switch (reg)
1378 {
1379 case 0:
1380 __asm__ __volatile__("mov %%dr0, %[value]" : [value] "=r" (value));
1381 break;
1382 case 1:
1383 __asm__ __volatile__("mov %%dr1, %[value]" : [value] "=r" (value));
1384 break;
1385 case 2:
1386 __asm__ __volatile__("mov %%dr2, %[value]" : [value] "=r" (value));
1387 break;
1388 case 3:
1389 __asm__ __volatile__("mov %%dr3, %[value]" : [value] "=r" (value));
1390 break;
1391 case 4:
1392 __asm__ __volatile__("mov %%dr4, %[value]" : [value] "=r" (value));
1393 break;
1394 case 5:
1395 __asm__ __volatile__("mov %%dr5, %[value]" : [value] "=r" (value));
1396 break;
1397 case 6:
1398 __asm__ __volatile__("mov %%dr6, %[value]" : [value] "=r" (value));
1399 break;
1400 case 7:
1401 __asm__ __volatile__("mov %%dr7, %[value]" : [value] "=r" (value));
1402 break;
1403 }
1404 return value;
1405 }
1406
1407 __INTRIN_INLINE void __writedr(unsigned reg, unsigned int value)
1408 {
1409 switch (reg)
1410 {
1411 case 0:
1412 __asm__("mov %[value], %%dr0" : : [value] "r" (value) : "memory");
1413 break;
1414 case 1:
1415 __asm__("mov %[value], %%dr1" : : [value] "r" (value) : "memory");
1416 break;
1417 case 2:
1418 __asm__("mov %[value], %%dr2" : : [value] "r" (value) : "memory");
1419 break;
1420 case 3:
1421 __asm__("mov %[value], %%dr3" : : [value] "r" (value) : "memory");
1422 break;
1423 case 4:
1424 __asm__("mov %[value], %%dr4" : : [value] "r" (value) : "memory");
1425 break;
1426 case 5:
1427 __asm__("mov %[value], %%dr5" : : [value] "r" (value) : "memory");
1428 break;
1429 case 6:
1430 __asm__("mov %[value], %%dr6" : : [value] "r" (value) : "memory");
1431 break;
1432 case 7:
1433 __asm__("mov %[value], %%dr7" : : [value] "r" (value) : "memory");
1434 break;
1435 }
1436 }
1437 #endif
1438
1439 __INTRIN_INLINE void __invlpg(void * const Address)
1440 {
1441 __asm__("invlpg %[Address]" : : [Address] "m" (*((unsigned char *)(Address))));
1442 }
1443
1444
1445 /*** System operations ***/
1446 __INTRIN_INLINE unsigned long long __readmsr(const int reg)
1447 {
1448 #ifdef _M_AMD64
1449 unsigned long low, high;
1450 __asm__ __volatile__("rdmsr" : "=a" (low), "=d" (high) : "c" (reg));
1451 return ((unsigned long long)high << 32) | low;
1452 #else
1453 unsigned long long retval;
1454 __asm__ __volatile__("rdmsr" : "=A" (retval) : "c" (reg));
1455 return retval;
1456 #endif
1457 }
1458
1459 __INTRIN_INLINE void __writemsr(const unsigned long Register, const unsigned long long Value)
1460 {
1461 #ifdef _M_AMD64
1462 __asm__ __volatile__("wrmsr" : : "a" (Value), "d" (Value >> 32), "c" (Register));
1463 #else
1464 __asm__ __volatile__("wrmsr" : : "A" (Value), "c" (Register));
1465 #endif
1466 }
1467
1468 __INTRIN_INLINE unsigned long long __readpmc(const int counter)
1469 {
1470 unsigned long long retval;
1471 __asm__ __volatile__("rdpmc" : "=A" (retval) : "c" (counter));
1472 return retval;
1473 }
1474
1475 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1476 __INTRIN_INLINE unsigned long __segmentlimit(const unsigned long a)
1477 {
1478 unsigned long retval;
1479 __asm__ __volatile__("lsl %[a], %[retval]" : [retval] "=r" (retval) : [a] "rm" (a));
1480 return retval;
1481 }
1482
1483 __INTRIN_INLINE void __wbinvd(void)
1484 {
1485 __asm__ __volatile__("wbinvd");
1486 }
1487
1488 __INTRIN_INLINE void __lidt(void *Source)
1489 {
1490 __asm__ __volatile__("lidt %0" : : "m"(*(short*)Source));
1491 }
1492
1493 __INTRIN_INLINE void __sidt(void *Destination)
1494 {
1495 __asm__ __volatile__("sidt %0" : : "m"(*(short*)Destination) : "memory");
1496 }
1497
1498 __INTRIN_INLINE void _mm_pause(void)
1499 {
1500 __asm__ __volatile__("pause");
1501 }
1502
1503 #ifdef __cplusplus
1504 }
1505 #endif
1506
1507 #endif /* KJK_INTRIN_X86_H_ */
1508
1509 /* EOF */