[CRT/INTRIN_X86]
[reactos.git] / reactos / include / crt / mingw32 / intrin_x86.h
1 /*
2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
5 projects.
6
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
8
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
15
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
26 */
27
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
30
31 /*
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
36 */
37
38 /*
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
42
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
46
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
49
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
57
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
60
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
62 ...
63
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
66 */
67
68 #ifdef __cplusplus
69 extern "C" {
70 #endif
71
72 /*** memcopy must be memmove ***/
73 void* memmove(void*, const void*, size_t);
74 __INTRIN_INLINE void* memcpy(void* dest, const void* source, size_t num)
75 {
76 return memmove(dest, source, num);
77 }
78
79
80 /*** Stack frame juggling ***/
81 #define _ReturnAddress() (__builtin_return_address(0))
82 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
83 /* TODO: __getcallerseflags but how??? */
84
85 /* Maybe the same for x86? */
86 #ifdef __x86_64__
87 #define _alloca(s) __builtin_alloca(s)
88 #endif
89
90 /*** Memory barriers ***/
91
92 __INTRIN_INLINE void _ReadWriteBarrier(void);
93 __INTRIN_INLINE void _mm_mfence(void);
94 __INTRIN_INLINE void _mm_lfence(void);
95 __INTRIN_INLINE void _mm_sfence(void);
96 #ifdef __x86_64__
97 __INTRIN_INLINE void __faststorefence(void);
98 #endif
99
100 __INTRIN_INLINE void _ReadWriteBarrier(void)
101 {
102 __asm__ __volatile__("" : : : "memory");
103 }
104
105 /* GCC only supports full barriers */
106 #define _ReadBarrier _ReadWriteBarrier
107 #define _WriteBarrier _ReadWriteBarrier
108
109 __INTRIN_INLINE void _mm_mfence(void)
110 {
111 __asm__ __volatile__("mfence" : : : "memory");
112 }
113
114 __INTRIN_INLINE void _mm_lfence(void)
115 {
116 _ReadBarrier();
117 __asm__ __volatile__("lfence");
118 _ReadBarrier();
119 }
120
121 __INTRIN_INLINE void _mm_sfence(void)
122 {
123 _WriteBarrier();
124 __asm__ __volatile__("sfence");
125 _WriteBarrier();
126 }
127
128 #ifdef __x86_64__
129 __INTRIN_INLINE void __faststorefence(void)
130 {
131 long local;
132 __asm__ __volatile__("lock; orl $0, %0;" : : "m"(local));
133 }
134 #endif
135
136
137 /*** Atomic operations ***/
138
139 __INTRIN_INLINE long _InterlockedAddLargeStatistic(volatile long long * const Addend, const long Value);
140 __INTRIN_INLINE unsigned char _interlockedbittestandreset(volatile long * a, const long b);
141 __INTRIN_INLINE unsigned char _interlockedbittestandset(volatile long * a, const long b);
142 #if defined(_M_AMD64)
143 __INTRIN_INLINE unsigned char _interlockedbittestandreset64(volatile long long * a, const long long b);
144 __INTRIN_INLINE unsigned char _interlockedbittestandset64(volatile long long * a, const long long b);
145 #endif
146
147 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
148
149 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand);
150 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand);
151 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand);
152 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand);
153 __INTRIN_INLINE char _InterlockedExchange8(volatile char * const Target, const char Value);
154 __INTRIN_INLINE short _InterlockedExchange16(volatile short * const Target, const short Value);
155 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value);
156 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value);
157 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value);
158 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value);
159 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask);
160 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask);
161 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask);
162 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask);
163 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask);
164 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask);
165 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask);
166 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask);
167 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask);
168 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend);
169 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend);
170 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend);
171 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend);
172 #if defined(_M_AMD64)
173 __INTRIN_INLINE long long _InterlockedExchange64(volatile long long * const Target, const long long Value);
174 __INTRIN_INLINE long long _InterlockedExchangeAdd64(volatile long long * const Addend, const long long Value);
175 __INTRIN_INLINE long long _InterlockedAnd64(volatile long long * const value, const long long mask);
176 __INTRIN_INLINE long long _InterlockedOr64(volatile long long * const value, const long long mask);
177 __INTRIN_INLINE long long _InterlockedXor64(volatile long long * const value, const long long mask);
178 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend);
179 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend);
180 #endif
181
182 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand)
183 {
184 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
185 }
186
187 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand)
188 {
189 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
190 }
191
192 #ifndef __clang__
193 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand)
194 {
195 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
196 }
197 #endif
198
199 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand)
200 {
201 return (void *)__sync_val_compare_and_swap(Destination, Comperand, Exchange);
202 }
203
204 __INTRIN_INLINE char _InterlockedExchange8(volatile char * const Target, const char Value)
205 {
206 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
207 __sync_synchronize();
208 return __sync_lock_test_and_set(Target, Value);
209 }
210
211 __INTRIN_INLINE short _InterlockedExchange16(volatile short * const Target, const short Value)
212 {
213 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
214 __sync_synchronize();
215 return __sync_lock_test_and_set(Target, Value);
216 }
217
218 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value)
219 {
220 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
221 __sync_synchronize();
222 return __sync_lock_test_and_set(Target, Value);
223 }
224
225 #if defined(_M_AMD64)
226 __INTRIN_INLINE long long _InterlockedExchange64(volatile long long * const Target, const long long Value)
227 {
228 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
229 __sync_synchronize();
230 return __sync_lock_test_and_set(Target, Value);
231 }
232 #endif
233
234 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value)
235 {
236 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
237 __sync_synchronize();
238 return (void *)__sync_lock_test_and_set(Target, Value);
239 }
240
241 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value)
242 {
243 return __sync_fetch_and_add(Addend, Value);
244 }
245
246 #ifndef __clang__
247 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value)
248 {
249 return __sync_fetch_and_add(Addend, Value);
250 }
251 #endif
252
253 #if defined(_M_AMD64)
254 __INTRIN_INLINE long long _InterlockedExchangeAdd64(volatile long long * const Addend, const long long Value)
255 {
256 return __sync_fetch_and_add(Addend, Value);
257 }
258 #endif
259
260 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask)
261 {
262 return __sync_fetch_and_and(value, mask);
263 }
264
265 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask)
266 {
267 return __sync_fetch_and_and(value, mask);
268 }
269
270 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask)
271 {
272 return __sync_fetch_and_and(value, mask);
273 }
274
275 #if defined(_M_AMD64)
276 __INTRIN_INLINE long long _InterlockedAnd64(volatile long long * const value, const long long mask)
277 {
278 return __sync_fetch_and_and(value, mask);
279 }
280 #endif
281
282 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask)
283 {
284 return __sync_fetch_and_or(value, mask);
285 }
286
287 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask)
288 {
289 return __sync_fetch_and_or(value, mask);
290 }
291
292 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask)
293 {
294 return __sync_fetch_and_or(value, mask);
295 }
296
297 #if defined(_M_AMD64)
298 __INTRIN_INLINE long long _InterlockedOr64(volatile long long * const value, const long long mask)
299 {
300 return __sync_fetch_and_or(value, mask);
301 }
302 #endif
303
304 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask)
305 {
306 return __sync_fetch_and_xor(value, mask);
307 }
308
309 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask)
310 {
311 return __sync_fetch_and_xor(value, mask);
312 }
313
314 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask)
315 {
316 return __sync_fetch_and_xor(value, mask);
317 }
318
319 #if defined(_M_AMD64)
320 __INTRIN_INLINE long long _InterlockedXor64(volatile long long * const value, const long long mask)
321 {
322 return __sync_fetch_and_xor(value, mask);
323 }
324 #endif
325
326 #ifndef __clang__
327 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend)
328 {
329 return __sync_sub_and_fetch(lpAddend, 1);
330 }
331
332 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend)
333 {
334 return __sync_add_and_fetch(lpAddend, 1);
335 }
336 #endif
337
338 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend)
339 {
340 return __sync_sub_and_fetch(lpAddend, 1);
341 }
342
343 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend)
344 {
345 return __sync_add_and_fetch(lpAddend, 1);
346 }
347
348 #if defined(_M_AMD64)
349 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend)
350 {
351 return __sync_sub_and_fetch(lpAddend, 1);
352 }
353
354 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend)
355 {
356 return __sync_add_and_fetch(lpAddend, 1);
357 }
358 #endif
359
360 #else /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
361
362 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand);
363 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand);
364 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand);
365 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand);
366 __INTRIN_INLINE char _InterlockedExchange8(volatile char * const Target, const char Value);
367 __INTRIN_INLINE short _InterlockedExchange16(volatile short * const Target, const short Value);
368 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value);
369 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value);
370 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value);
371 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value);
372 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask);
373 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask);
374 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask);
375 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask);
376 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask);
377 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask);
378 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask);
379 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask);
380 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask);
381 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend);
382 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend);
383 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend);
384 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend);
385 #if defined(_M_AMD64)
386 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend);
387 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend);
388 #endif
389
390 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand)
391 {
392 char retval = Comperand;
393 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
394 return retval;
395 }
396
397 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand)
398 {
399 short retval = Comperand;
400 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
401 return retval;
402 }
403
404 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand)
405 {
406 long retval = Comperand;
407 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
408 return retval;
409 }
410
411 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand)
412 {
413 void * retval = (void *)Comperand;
414 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "=a" (retval) : "[retval]" (retval), [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
415 return retval;
416 }
417
418 __INTRIN_INLINE char _InterlockedExchange8(volatile char * const Target, const char Value)
419 {
420 char retval = Value;
421 __asm__("xchgb %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
422 return retval;
423 }
424
425 __INTRIN_INLINE short _InterlockedExchange16(volatile short * const Target, const short Value)
426 {
427 short retval = Value;
428 __asm__("xchgw %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
429 return retval;
430 }
431
432 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value)
433 {
434 long retval = Value;
435 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
436 return retval;
437 }
438
439 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value)
440 {
441 void * retval = Value;
442 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
443 return retval;
444 }
445
446 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value)
447 {
448 long retval = Value;
449 __asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
450 return retval;
451 }
452
453 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value)
454 {
455 long retval = Value;
456 __asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
457 return retval;
458 }
459
460 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask)
461 {
462 char x;
463 char y;
464
465 y = *value;
466
467 do
468 {
469 x = y;
470 y = _InterlockedCompareExchange8(value, x & mask, x);
471 }
472 while(y != x);
473
474 return y;
475 }
476
477 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask)
478 {
479 short x;
480 short y;
481
482 y = *value;
483
484 do
485 {
486 x = y;
487 y = _InterlockedCompareExchange16(value, x & mask, x);
488 }
489 while(y != x);
490
491 return y;
492 }
493
494 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask)
495 {
496 long x;
497 long y;
498
499 y = *value;
500
501 do
502 {
503 x = y;
504 y = _InterlockedCompareExchange(value, x & mask, x);
505 }
506 while(y != x);
507
508 return y;
509 }
510
511 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask)
512 {
513 char x;
514 char y;
515
516 y = *value;
517
518 do
519 {
520 x = y;
521 y = _InterlockedCompareExchange8(value, x | mask, x);
522 }
523 while(y != x);
524
525 return y;
526 }
527
528 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask)
529 {
530 short x;
531 short y;
532
533 y = *value;
534
535 do
536 {
537 x = y;
538 y = _InterlockedCompareExchange16(value, x | mask, x);
539 }
540 while(y != x);
541
542 return y;
543 }
544
545 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask)
546 {
547 long x;
548 long y;
549
550 y = *value;
551
552 do
553 {
554 x = y;
555 y = _InterlockedCompareExchange(value, x | mask, x);
556 }
557 while(y != x);
558
559 return y;
560 }
561
562 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask)
563 {
564 char x;
565 char y;
566
567 y = *value;
568
569 do
570 {
571 x = y;
572 y = _InterlockedCompareExchange8(value, x ^ mask, x);
573 }
574 while(y != x);
575
576 return y;
577 }
578
579 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask)
580 {
581 short x;
582 short y;
583
584 y = *value;
585
586 do
587 {
588 x = y;
589 y = _InterlockedCompareExchange16(value, x ^ mask, x);
590 }
591 while(y != x);
592
593 return y;
594 }
595
596 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask)
597 {
598 long x;
599 long y;
600
601 y = *value;
602
603 do
604 {
605 x = y;
606 y = _InterlockedCompareExchange(value, x ^ mask, x);
607 }
608 while(y != x);
609
610 return y;
611 }
612
613 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend)
614 {
615 return _InterlockedExchangeAdd(lpAddend, -1) - 1;
616 }
617
618 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend)
619 {
620 return _InterlockedExchangeAdd(lpAddend, 1) + 1;
621 }
622
623 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend)
624 {
625 return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
626 }
627
628 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend)
629 {
630 return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
631 }
632
633 #if defined(_M_AMD64)
634 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend)
635 {
636 return _InterlockedExchangeAdd64(lpAddend, -1) - 1;
637 }
638
639 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend)
640 {
641 return _InterlockedExchangeAdd64(lpAddend, 1) + 1;
642 }
643 #endif
644
645 #endif /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
646
647 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
648
649 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand);
650 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand)
651 {
652 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
653 }
654
655 #else
656
657 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand);
658 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand)
659 {
660 long long retval = Comperand;
661
662 __asm__
663 (
664 "lock; cmpxchg8b %[Destination]" :
665 [retval] "+A" (retval) :
666 [Destination] "m" (*Destination),
667 "b" ((unsigned long)((Exchange >> 0) & 0xFFFFFFFF)),
668 "c" ((unsigned long)((Exchange >> 32) & 0xFFFFFFFF)) :
669 "memory"
670 );
671
672 return retval;
673 }
674
675 #endif
676
677 __INTRIN_INLINE long _InterlockedAddLargeStatistic(volatile long long * const Addend, const long Value)
678 {
679 __asm__
680 (
681 "lock; add %[Value], %[Lo32];"
682 "jae LABEL%=;"
683 "lock; adc $0, %[Hi32];"
684 "LABEL%=:;" :
685 [Lo32] "+m" (*((volatile long *)(Addend) + 0)), [Hi32] "+m" (*((volatile long *)(Addend) + 1)) :
686 [Value] "ir" (Value) :
687 "memory"
688 );
689
690 return Value;
691 }
692
693 __INTRIN_INLINE unsigned char _interlockedbittestandreset(volatile long * a, const long b)
694 {
695 unsigned char retval;
696 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
697 return retval;
698 }
699
700 #if defined(_M_AMD64)
701 __INTRIN_INLINE unsigned char _interlockedbittestandreset64(volatile long long * a, const long long b)
702 {
703 unsigned char retval;
704 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
705 return retval;
706 }
707 #endif
708
709 __INTRIN_INLINE unsigned char _interlockedbittestandset(volatile long * a, const long b)
710 {
711 unsigned char retval;
712 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
713 return retval;
714 }
715
716 #if defined(_M_AMD64)
717 __INTRIN_INLINE unsigned char _interlockedbittestandset64(volatile long long * a, const long long b)
718 {
719 unsigned char retval;
720 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
721 return retval;
722 }
723 #endif
724
725 /*** String operations ***/
726
727 __INTRIN_INLINE void __stosb(unsigned char * Dest, const unsigned char Data, size_t Count);
728 __INTRIN_INLINE void __stosw(unsigned short * Dest, const unsigned short Data, size_t Count);
729 __INTRIN_INLINE void __stosd(unsigned long * Dest, const unsigned long Data, size_t Count);
730 __INTRIN_INLINE void __movsb(unsigned char * Destination, const unsigned char * Source, size_t Count);
731 __INTRIN_INLINE void __movsw(unsigned short * Destination, const unsigned short * Source, size_t Count);
732 __INTRIN_INLINE void __movsd(unsigned long * Destination, const unsigned long * Source, size_t Count);
733 #ifdef _M_AMD64
734 __INTRIN_INLINE void __stosq(unsigned __int64 * Dest, const unsigned __int64 Data, size_t Count);
735 __INTRIN_INLINE void __movsq(unsigned long * Destination, const unsigned long * Source, size_t Count);
736 #endif
737
738
739 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
740 __INTRIN_INLINE void __stosb(unsigned char * Dest, const unsigned char Data, size_t Count)
741 {
742 __asm__ __volatile__
743 (
744 "rep; stosb" :
745 [Dest] "=D" (Dest), [Count] "=c" (Count) :
746 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
747 );
748 }
749
750 __INTRIN_INLINE void __stosw(unsigned short * Dest, const unsigned short Data, size_t Count)
751 {
752 __asm__ __volatile__
753 (
754 "rep; stosw" :
755 [Dest] "=D" (Dest), [Count] "=c" (Count) :
756 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
757 );
758 }
759
760 __INTRIN_INLINE void __stosd(unsigned long * Dest, const unsigned long Data, size_t Count)
761 {
762 __asm__ __volatile__
763 (
764 "rep; stosl" :
765 [Dest] "=D" (Dest), [Count] "=c" (Count) :
766 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
767 );
768 }
769
770 #ifdef _M_AMD64
771 __INTRIN_INLINE void __stosq(unsigned __int64 * Dest, const unsigned __int64 Data, size_t Count)
772 {
773 __asm__ __volatile__
774 (
775 "rep; stosq" :
776 [Dest] "=D" (Dest), [Count] "=c" (Count) :
777 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
778 );
779 }
780 #endif
781
782 __INTRIN_INLINE void __movsb(unsigned char * Destination, const unsigned char * Source, size_t Count)
783 {
784 __asm__ __volatile__
785 (
786 "rep; movsb" :
787 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
788 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
789 );
790 }
791
792 __INTRIN_INLINE void __movsw(unsigned short * Destination, const unsigned short * Source, size_t Count)
793 {
794 __asm__ __volatile__
795 (
796 "rep; movsw" :
797 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
798 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
799 );
800 }
801
802 __INTRIN_INLINE void __movsd(unsigned long * Destination, const unsigned long * Source, size_t Count)
803 {
804 __asm__ __volatile__
805 (
806 "rep; movsd" :
807 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
808 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
809 );
810 }
811
812 #ifdef _M_AMD64
813 __INTRIN_INLINE void __movsq(unsigned long * Destination, const unsigned long * Source, size_t Count)
814 {
815 __asm__ __volatile__
816 (
817 "rep; movsq" :
818 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
819 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
820 );
821 }
822 #endif
823
824 #if defined(_M_AMD64)
825
826 /*** GS segment addressing ***/
827
828 __INTRIN_INLINE void __writegsbyte(const unsigned long Offset, const unsigned char Data);
829 __INTRIN_INLINE void __writegsword(const unsigned long Offset, const unsigned short Data);
830 __INTRIN_INLINE void __writegsdword(const unsigned long Offset, const unsigned long Data);
831 __INTRIN_INLINE void __writegsqword(const unsigned long Offset, const unsigned __int64 Data);
832 __INTRIN_INLINE unsigned char __readgsbyte(const unsigned long Offset);
833 __INTRIN_INLINE unsigned short __readgsword(const unsigned long Offset);
834 __INTRIN_INLINE unsigned long __readgsdword(const unsigned long Offset);
835 __INTRIN_INLINE unsigned __int64 __readgsqword(const unsigned long Offset);
836 __INTRIN_INLINE void __incgsbyte(const unsigned long Offset);
837 __INTRIN_INLINE void __incgsword(const unsigned long Offset);
838 __INTRIN_INLINE void __incgsdword(const unsigned long Offset);
839 __INTRIN_INLINE void __addgsbyte(const unsigned long Offset, const unsigned char Data);
840 __INTRIN_INLINE void __addgsword(const unsigned long Offset, const unsigned short Data);
841 __INTRIN_INLINE void __addgsdword(const unsigned long Offset, const unsigned int Data);
842 __INTRIN_INLINE void __addgsqword(const unsigned long Offset, const unsigned __int64 Data);
843
844
845 __INTRIN_INLINE void __writegsbyte(const unsigned long Offset, const unsigned char Data)
846 {
847 __asm__ __volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
848 }
849
850 __INTRIN_INLINE void __writegsword(const unsigned long Offset, const unsigned short Data)
851 {
852 __asm__ __volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
853 }
854
855 __INTRIN_INLINE void __writegsdword(const unsigned long Offset, const unsigned long Data)
856 {
857 __asm__ __volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
858 }
859
860 __INTRIN_INLINE void __writegsqword(const unsigned long Offset, const unsigned __int64 Data)
861 {
862 __asm__ __volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
863 }
864
865 __INTRIN_INLINE unsigned char __readgsbyte(const unsigned long Offset)
866 {
867 unsigned char value;
868 __asm__ __volatile__("movb %%gs:%a[Offset], %b[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
869 return value;
870 }
871
872 __INTRIN_INLINE unsigned short __readgsword(const unsigned long Offset)
873 {
874 unsigned short value;
875 __asm__ __volatile__("movw %%gs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
876 return value;
877 }
878
879 __INTRIN_INLINE unsigned long __readgsdword(const unsigned long Offset)
880 {
881 unsigned long value;
882 __asm__ __volatile__("movl %%gs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
883 return value;
884 }
885
886 __INTRIN_INLINE unsigned __int64 __readgsqword(const unsigned long Offset)
887 {
888 unsigned __int64 value;
889 __asm__ __volatile__("movq %%gs:%a[Offset], %q[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
890 return value;
891 }
892
893 __INTRIN_INLINE void __incgsbyte(const unsigned long Offset)
894 {
895 __asm__ __volatile__("incb %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
896 }
897
898 __INTRIN_INLINE void __incgsword(const unsigned long Offset)
899 {
900 __asm__ __volatile__("incw %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
901 }
902
903 __INTRIN_INLINE void __incgsdword(const unsigned long Offset)
904 {
905 __asm__ __volatile__("incl %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
906 }
907
908 __INTRIN_INLINE void __addgsbyte(const unsigned long Offset, const unsigned char Data)
909 {
910 __asm__ __volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
911 }
912
913 __INTRIN_INLINE void __addgsword(const unsigned long Offset, const unsigned short Data)
914 {
915 __asm__ __volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
916 }
917
918 __INTRIN_INLINE void __addgsdword(const unsigned long Offset, const unsigned int Data)
919 {
920 __asm__ __volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
921 }
922
923 __INTRIN_INLINE void __addgsqword(const unsigned long Offset, const unsigned __int64 Data)
924 {
925 __asm__ __volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
926 }
927
928 #else /* defined(_M_AMD64) */
929
930 /*** FS segment addressing ***/
931
932 __INTRIN_INLINE void __writefsbyte(const unsigned long Offset, const unsigned char Data);
933 __INTRIN_INLINE void __writefsword(const unsigned long Offset, const unsigned short Data);
934 __INTRIN_INLINE void __writefsdword(const unsigned long Offset, const unsigned long Data);
935 __INTRIN_INLINE unsigned char __readfsbyte(const unsigned long Offset);
936 __INTRIN_INLINE unsigned short __readfsword(const unsigned long Offset);
937 __INTRIN_INLINE unsigned long __readfsdword(const unsigned long Offset);
938 __INTRIN_INLINE void __incfsbyte(const unsigned long Offset);
939 __INTRIN_INLINE void __incfsword(const unsigned long Offset);
940 __INTRIN_INLINE void __incfsdword(const unsigned long Offset);
941 __INTRIN_INLINE void __addfsbyte(const unsigned long Offset, const unsigned char Data);
942 __INTRIN_INLINE void __addfsword(const unsigned long Offset, const unsigned short Data);
943 __INTRIN_INLINE void __addfsdword(const unsigned long Offset, const unsigned int Data);
944
945
946 __INTRIN_INLINE void __writefsbyte(const unsigned long Offset, const unsigned char Data)
947 {
948 __asm__ __volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
949 }
950
951 __INTRIN_INLINE void __writefsword(const unsigned long Offset, const unsigned short Data)
952 {
953 __asm__ __volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
954 }
955
956 __INTRIN_INLINE void __writefsdword(const unsigned long Offset, const unsigned long Data)
957 {
958 __asm__ __volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
959 }
960
961 __INTRIN_INLINE unsigned char __readfsbyte(const unsigned long Offset)
962 {
963 unsigned char value;
964 __asm__ __volatile__("movb %%fs:%a[Offset], %b[value]" : [value] "=q" (value) : [Offset] "ir" (Offset));
965 return value;
966 }
967
968 __INTRIN_INLINE unsigned short __readfsword(const unsigned long Offset)
969 {
970 unsigned short value;
971 __asm__ __volatile__("movw %%fs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
972 return value;
973 }
974
975 __INTRIN_INLINE unsigned long __readfsdword(const unsigned long Offset)
976 {
977 unsigned long value;
978 __asm__ __volatile__("movl %%fs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
979 return value;
980 }
981
982 __INTRIN_INLINE void __incfsbyte(const unsigned long Offset)
983 {
984 __asm__ __volatile__("incb %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
985 }
986
987 __INTRIN_INLINE void __incfsword(const unsigned long Offset)
988 {
989 __asm__ __volatile__("incw %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
990 }
991
992 __INTRIN_INLINE void __incfsdword(const unsigned long Offset)
993 {
994 __asm__ __volatile__("incl %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
995 }
996
997 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
998 __INTRIN_INLINE void __addfsbyte(const unsigned long Offset, const unsigned char Data)
999 {
1000 if(!__builtin_constant_p(Offset))
1001 __asm__ __volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
1002 else
1003 __asm__ __volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
1004 }
1005
1006 __INTRIN_INLINE void __addfsword(const unsigned long Offset, const unsigned short Data)
1007 {
1008 if(!__builtin_constant_p(Offset))
1009 __asm__ __volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
1010 else
1011 __asm__ __volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
1012 }
1013
1014 __INTRIN_INLINE void __addfsdword(const unsigned long Offset, const unsigned int Data)
1015 {
1016 if(!__builtin_constant_p(Offset))
1017 __asm__ __volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
1018 else
1019 __asm__ __volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
1020 }
1021
1022 #endif /* defined(_M_AMD64) */
1023
1024
1025 /*** Bit manipulation ***/
1026
1027 __INTRIN_INLINE unsigned char _BitScanForward(unsigned long * const Index, const unsigned long Mask);
1028 __INTRIN_INLINE unsigned char _BitScanReverse(unsigned long * const Index, const unsigned long Mask);
1029 __INTRIN_INLINE unsigned char _bittest(const long * const a, const long b);
1030 #ifdef _M_AMD64
1031 __INTRIN_INLINE unsigned char _bittest64(const __int64 * const a, const __int64 b);
1032 #endif
1033 __INTRIN_INLINE unsigned char _bittestandcomplement(long * const a, const long b);
1034 __INTRIN_INLINE unsigned char _bittestandreset(long * const a, const long b);
1035 __INTRIN_INLINE unsigned char _bittestandset(long * const a, const long b);
1036 __INTRIN_INLINE unsigned char _rotl8(unsigned char value, unsigned char shift);
1037 __INTRIN_INLINE unsigned short _rotl16(unsigned short value, unsigned char shift);
1038 __INTRIN_INLINE unsigned int _rotl(unsigned int value, int shift);
1039 __INTRIN_INLINE unsigned __int64 _rotl64(unsigned __int64 value, int shift);
1040 __INTRIN_INLINE unsigned int _rotr(unsigned int value, int shift);
1041 __INTRIN_INLINE unsigned char _rotr8(unsigned char value, unsigned char shift);
1042 __INTRIN_INLINE unsigned short _rotr16(unsigned short value, unsigned char shift);
1043 __INTRIN_INLINE unsigned long long __ll_lshift(const unsigned long long Mask, const int Bit);
1044 __INTRIN_INLINE long long __ll_rshift(const long long Mask, const int Bit);
1045 __INTRIN_INLINE unsigned long long __ull_rshift(const unsigned long long Mask, int Bit);
1046 __INTRIN_INLINE unsigned short _byteswap_ushort(unsigned short value);
1047 __INTRIN_INLINE unsigned long _byteswap_ulong(unsigned long value);
1048 #ifdef _M_AMD64
1049 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value);
1050 #else
1051 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value);
1052 #endif
1053
1054
1055 __INTRIN_INLINE unsigned char _BitScanForward(unsigned long * const Index, const unsigned long Mask)
1056 {
1057 __asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
1058 return Mask ? 1 : 0;
1059 }
1060
1061 __INTRIN_INLINE unsigned char _BitScanReverse(unsigned long * const Index, const unsigned long Mask)
1062 {
1063 __asm__("bsrl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
1064 return Mask ? 1 : 0;
1065 }
1066
1067 /* NOTE: again, the bizarre implementation follows Visual C++ */
1068 __INTRIN_INLINE unsigned char _bittest(const long * const a, const long b)
1069 {
1070 unsigned char retval;
1071
1072 if(__builtin_constant_p(b))
1073 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 32))), [b] "Ir" (b % 32));
1074 else
1075 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
1076
1077 return retval;
1078 }
1079
1080 #ifdef _M_AMD64
1081 __INTRIN_INLINE unsigned char _bittest64(const __int64 * const a, const __int64 b)
1082 {
1083 unsigned char retval;
1084
1085 if(__builtin_constant_p(b))
1086 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 64))), [b] "Ir" (b % 64));
1087 else
1088 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
1089
1090 return retval;
1091 }
1092 #endif
1093
1094 __INTRIN_INLINE unsigned char _bittestandcomplement(long * const a, const long b)
1095 {
1096 unsigned char retval;
1097
1098 if(__builtin_constant_p(b))
1099 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1100 else
1101 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1102
1103 return retval;
1104 }
1105
1106 __INTRIN_INLINE unsigned char _bittestandreset(long * const a, const long b)
1107 {
1108 unsigned char retval;
1109
1110 if(__builtin_constant_p(b))
1111 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1112 else
1113 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1114
1115 return retval;
1116 }
1117
1118 __INTRIN_INLINE unsigned char _bittestandset(long * const a, const long b)
1119 {
1120 unsigned char retval;
1121
1122 if(__builtin_constant_p(b))
1123 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1124 else
1125 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1126
1127 return retval;
1128 }
1129
1130 __INTRIN_INLINE unsigned char _rotl8(unsigned char value, unsigned char shift)
1131 {
1132 unsigned char retval;
1133 __asm__("rolb %b[shift], %b[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1134 return retval;
1135 }
1136
1137 __INTRIN_INLINE unsigned short _rotl16(unsigned short value, unsigned char shift)
1138 {
1139 unsigned short retval;
1140 __asm__("rolw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1141 return retval;
1142 }
1143
1144 __INTRIN_INLINE unsigned int _rotl(unsigned int value, int shift)
1145 {
1146 unsigned long retval;
1147 __asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1148 return retval;
1149 }
1150
1151 #ifdef _M_AMD64
1152 __INTRIN_INLINE unsigned __int64 _rotl64(unsigned __int64 value, int shift)
1153 {
1154 unsigned __int64 retval;
1155 __asm__("rolq %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1156 return retval;
1157 }
1158 #else
1159 __INTRIN_INLINE unsigned __int64 _rotl64(unsigned __int64 value, int shift)
1160 {
1161 /* FIXME: this is probably not optimal */
1162 return (value << shift) | (value >> (64 - shift));
1163 }
1164 #endif
1165
1166 __INTRIN_INLINE unsigned int _rotr(unsigned int value, int shift)
1167 {
1168 unsigned long retval;
1169 __asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1170 return retval;
1171 }
1172
1173 __INTRIN_INLINE unsigned char _rotr8(unsigned char value, unsigned char shift)
1174 {
1175 unsigned char retval;
1176 __asm__("rorb %b[shift], %b[retval]" : [retval] "=qm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1177 return retval;
1178 }
1179
1180 __INTRIN_INLINE unsigned short _rotr16(unsigned short value, unsigned char shift)
1181 {
1182 unsigned short retval;
1183 __asm__("rorw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1184 return retval;
1185 }
1186
1187 /*
1188 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
1189 constraint (edx:eax) for the Mask argument, because it's the only way GCC
1190 can pass 64-bit operands around - passing the two 32 bit parts separately
1191 just confuses it. Also we declare Bit as an int and then truncate it to
1192 match Visual C++ behavior
1193 */
1194 __INTRIN_INLINE unsigned long long __ll_lshift(const unsigned long long Mask, const int Bit)
1195 {
1196 unsigned long long retval = Mask;
1197
1198 __asm__
1199 (
1200 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
1201 "+A" (retval) :
1202 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1203 );
1204
1205 return retval;
1206 }
1207
1208 __INTRIN_INLINE long long __ll_rshift(const long long Mask, const int Bit)
1209 {
1210 long long retval = Mask;
1211
1212 __asm__
1213 (
1214 "shrdl %b[Bit], %%edx, %%eax; sarl %b[Bit], %%edx" :
1215 "+A" (retval) :
1216 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1217 );
1218
1219 return retval;
1220 }
1221
1222 __INTRIN_INLINE unsigned long long __ull_rshift(const unsigned long long Mask, int Bit)
1223 {
1224 unsigned long long retval = Mask;
1225
1226 __asm__
1227 (
1228 "shrdl %b[Bit], %%edx, %%eax; shrl %b[Bit], %%edx" :
1229 "+A" (retval) :
1230 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1231 );
1232
1233 return retval;
1234 }
1235
1236 __INTRIN_INLINE unsigned short _byteswap_ushort(unsigned short value)
1237 {
1238 unsigned short retval;
1239 __asm__("rorw $8, %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value));
1240 return retval;
1241 }
1242
1243 __INTRIN_INLINE unsigned long _byteswap_ulong(unsigned long value)
1244 {
1245 unsigned long retval;
1246 __asm__("bswapl %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1247 return retval;
1248 }
1249
1250 #ifdef _M_AMD64
1251 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value)
1252 {
1253 unsigned __int64 retval;
1254 __asm__("bswapq %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1255 return retval;
1256 }
1257 #else
1258 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value)
1259 {
1260 union {
1261 unsigned __int64 int64part;
1262 struct {
1263 unsigned long lowpart;
1264 unsigned long hipart;
1265 };
1266 } retval;
1267 retval.int64part = value;
1268 __asm__("bswapl %[lowpart]\n"
1269 "bswapl %[hipart]\n"
1270 : [lowpart] "=r" (retval.hipart), [hipart] "=r" (retval.lowpart) : "[lowpart]" (retval.lowpart), "[hipart]" (retval.hipart) );
1271 return retval.int64part;
1272 }
1273 #endif
1274
1275 /*** 64-bit math ***/
1276
1277 __INTRIN_INLINE long long __emul(const int a, const int b);
1278 __INTRIN_INLINE unsigned long long __emulu(const unsigned int a, const unsigned int b);
1279 #ifdef _M_AMD64
1280 __INTRIN_INLINE __int64 __mulh(__int64 a, __int64 b);
1281 __INTRIN_INLINE unsigned __int64 __umulh(unsigned __int64 a, unsigned __int64 b);
1282 #endif
1283
1284
1285 __INTRIN_INLINE long long __emul(const int a, const int b)
1286 {
1287 long long retval;
1288 __asm__("imull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1289 return retval;
1290 }
1291
1292 __INTRIN_INLINE unsigned long long __emulu(const unsigned int a, const unsigned int b)
1293 {
1294 unsigned long long retval;
1295 __asm__("mull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1296 return retval;
1297 }
1298
1299 #ifdef _M_AMD64
1300
1301 __INTRIN_INLINE __int64 __mulh(__int64 a, __int64 b)
1302 {
1303 __int64 retval;
1304 __asm__("imulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1305 return retval;
1306 }
1307
1308 __INTRIN_INLINE unsigned __int64 __umulh(unsigned __int64 a, unsigned __int64 b)
1309 {
1310 unsigned __int64 retval;
1311 __asm__("mulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1312 return retval;
1313 }
1314
1315 #endif
1316
1317 /*** Port I/O ***/
1318
1319 __INTRIN_INLINE unsigned char __inbyte(const unsigned short Port);
1320 __INTRIN_INLINE unsigned short __inword(const unsigned short Port);
1321 __INTRIN_INLINE unsigned long __indword(const unsigned short Port);
1322 __INTRIN_INLINE void __inbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count);
1323 __INTRIN_INLINE void __inwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count);
1324 __INTRIN_INLINE void __indwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count);
1325 __INTRIN_INLINE void __outbyte(unsigned short const Port, const unsigned char Data);
1326 __INTRIN_INLINE void __outword(unsigned short const Port, const unsigned short Data);
1327 __INTRIN_INLINE void __outdword(unsigned short const Port, const unsigned long Data);
1328 __INTRIN_INLINE void __outbytestring(unsigned short const Port, const unsigned char * const Buffer, const unsigned long Count);
1329 __INTRIN_INLINE void __outwordstring(unsigned short const Port, const unsigned short * const Buffer, const unsigned long Count);
1330 __INTRIN_INLINE void __outdwordstring(unsigned short const Port, const unsigned long * const Buffer, const unsigned long Count);
1331 __INTRIN_INLINE int _inp(unsigned short Port);
1332 __INTRIN_INLINE unsigned short _inpw(unsigned short Port);
1333 __INTRIN_INLINE unsigned long _inpd(unsigned short Port);
1334 __INTRIN_INLINE int _outp(unsigned short Port, int databyte);
1335 __INTRIN_INLINE unsigned short _outpw(unsigned short Port, unsigned short dataword);
1336 __INTRIN_INLINE unsigned long _outpd(unsigned short Port, unsigned long dataword);
1337
1338
1339 __INTRIN_INLINE unsigned char __inbyte(const unsigned short Port)
1340 {
1341 unsigned char byte;
1342 __asm__ __volatile__("inb %w[Port], %b[byte]" : [byte] "=a" (byte) : [Port] "Nd" (Port));
1343 return byte;
1344 }
1345
1346 __INTRIN_INLINE unsigned short __inword(const unsigned short Port)
1347 {
1348 unsigned short word;
1349 __asm__ __volatile__("inw %w[Port], %w[word]" : [word] "=a" (word) : [Port] "Nd" (Port));
1350 return word;
1351 }
1352
1353 __INTRIN_INLINE unsigned long __indword(const unsigned short Port)
1354 {
1355 unsigned long dword;
1356 __asm__ __volatile__("inl %w[Port], %k[dword]" : [dword] "=a" (dword) : [Port] "Nd" (Port));
1357 return dword;
1358 }
1359
1360 __INTRIN_INLINE void __inbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1361 {
1362 __asm__ __volatile__
1363 (
1364 "rep; insb" :
1365 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1366 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1367 "memory"
1368 );
1369 }
1370
1371 __INTRIN_INLINE void __inwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1372 {
1373 __asm__ __volatile__
1374 (
1375 "rep; insw" :
1376 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1377 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1378 "memory"
1379 );
1380 }
1381
1382 __INTRIN_INLINE void __indwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1383 {
1384 __asm__ __volatile__
1385 (
1386 "rep; insl" :
1387 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1388 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1389 "memory"
1390 );
1391 }
1392
1393 __INTRIN_INLINE void __outbyte(unsigned short const Port, const unsigned char Data)
1394 {
1395 __asm__ __volatile__("outb %b[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1396 }
1397
1398 __INTRIN_INLINE void __outword(unsigned short const Port, const unsigned short Data)
1399 {
1400 __asm__ __volatile__("outw %w[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1401 }
1402
1403 __INTRIN_INLINE void __outdword(unsigned short const Port, const unsigned long Data)
1404 {
1405 __asm__ __volatile__("outl %k[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1406 }
1407
1408 __INTRIN_INLINE void __outbytestring(unsigned short const Port, const unsigned char * const Buffer, const unsigned long Count)
1409 {
1410 __asm__ __volatile__("rep; outsb" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1411 }
1412
1413 __INTRIN_INLINE void __outwordstring(unsigned short const Port, const unsigned short * const Buffer, const unsigned long Count)
1414 {
1415 __asm__ __volatile__("rep; outsw" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1416 }
1417
1418 __INTRIN_INLINE void __outdwordstring(unsigned short const Port, const unsigned long * const Buffer, const unsigned long Count)
1419 {
1420 __asm__ __volatile__("rep; outsl" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1421 }
1422
1423 __INTRIN_INLINE int _inp(unsigned short Port)
1424 {
1425 return __inbyte(Port);
1426 }
1427
1428 __INTRIN_INLINE unsigned short _inpw(unsigned short Port)
1429 {
1430 return __inword(Port);
1431 }
1432
1433 __INTRIN_INLINE unsigned long _inpd(unsigned short Port)
1434 {
1435 return __indword(Port);
1436 }
1437
1438 __INTRIN_INLINE int _outp(unsigned short Port, int databyte)
1439 {
1440 __outbyte(Port, (unsigned char)databyte);
1441 return databyte;
1442 }
1443
1444 __INTRIN_INLINE unsigned short _outpw(unsigned short Port, unsigned short dataword)
1445 {
1446 __outword(Port, dataword);
1447 return dataword;
1448 }
1449
1450 __INTRIN_INLINE unsigned long _outpd(unsigned short Port, unsigned long dataword)
1451 {
1452 __outdword(Port, dataword);
1453 return dataword;
1454 }
1455
1456
1457 /*** System information ***/
1458
1459 __INTRIN_INLINE void __cpuid(int CPUInfo [], const int InfoType);
1460 __INTRIN_INLINE unsigned long long __rdtsc(void);
1461 __INTRIN_INLINE void __writeeflags(uintptr_t Value);
1462 __INTRIN_INLINE uintptr_t __readeflags(void);
1463
1464
1465 __INTRIN_INLINE void __cpuid(int CPUInfo[], const int InfoType)
1466 {
1467 __asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType));
1468 }
1469
1470 __INTRIN_INLINE unsigned long long __rdtsc(void)
1471 {
1472 #ifdef _M_AMD64
1473 unsigned long long low, high;
1474 __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
1475 return low | (high << 32);
1476 #else
1477 unsigned long long retval;
1478 __asm__ __volatile__("rdtsc" : "=A"(retval));
1479 return retval;
1480 #endif
1481 }
1482
1483 __INTRIN_INLINE void __writeeflags(uintptr_t Value)
1484 {
1485 __asm__ __volatile__("push %0\n popf" : : "rim"(Value));
1486 }
1487
1488 __INTRIN_INLINE uintptr_t __readeflags(void)
1489 {
1490 uintptr_t retval;
1491 __asm__ __volatile__("pushf\n pop %0" : "=rm"(retval));
1492 return retval;
1493 }
1494
1495 /*** Interrupts ***/
1496
1497 __INTRIN_INLINE void __int2c(void);
1498 __INTRIN_INLINE void _disable(void);
1499 __INTRIN_INLINE void _enable(void);
1500 __INTRIN_INLINE void __halt(void);
1501
1502 #ifdef __clang__
1503 #define __debugbreak() __asm__("int $3")
1504 #else
1505 __INTRIN_INLINE void __debugbreak(void);
1506 __INTRIN_INLINE void __debugbreak(void)
1507 {
1508 __asm__("int $3");
1509 }
1510 #endif
1511
1512 __INTRIN_INLINE void __int2c(void)
1513 {
1514 __asm__("int $0x2c");
1515 }
1516
1517 __INTRIN_INLINE void _disable(void)
1518 {
1519 __asm__("cli" : : : "memory");
1520 }
1521
1522 __INTRIN_INLINE void _enable(void)
1523 {
1524 __asm__("sti" : : : "memory");
1525 }
1526
1527 __INTRIN_INLINE void __halt(void)
1528 {
1529 __asm__("hlt\n\t" : : : "memory");
1530 }
1531
1532 /*** Protected memory management ***/
1533
1534 __INTRIN_INLINE void __invlpg(void * const Address);
1535 #ifdef _M_AMD64
1536 __INTRIN_INLINE void __writecr0(const unsigned __int64 Data);
1537 __INTRIN_INLINE void __writecr3(const unsigned __int64 Data);
1538 __INTRIN_INLINE void __writecr4(const unsigned __int64 Data);
1539 __INTRIN_INLINE void __writecr8(const unsigned __int64 Data);
1540 __INTRIN_INLINE unsigned __int64 __readcr0(void);
1541 __INTRIN_INLINE unsigned __int64 __readcr2(void);
1542 __INTRIN_INLINE unsigned __int64 __readcr3(void);
1543 __INTRIN_INLINE unsigned __int64 __readcr4(void);
1544 __INTRIN_INLINE unsigned __int64 __readcr8(void);
1545 __INTRIN_INLINE unsigned __int64 __readdr(unsigned int reg);
1546 __INTRIN_INLINE void __writedr(unsigned reg, unsigned __int64 value);
1547 #else /* _M_AMD64 */
1548 __INTRIN_INLINE void __writecr0(const unsigned int Data);
1549 __INTRIN_INLINE void __writecr3(const unsigned int Data);
1550 __INTRIN_INLINE void __writecr4(const unsigned int Data);
1551 __INTRIN_INLINE unsigned long __readcr0(void);
1552 __INTRIN_INLINE unsigned long __readcr2(void);
1553 __INTRIN_INLINE unsigned long __readcr3(void);
1554 __INTRIN_INLINE unsigned long __readcr4(void);
1555 __INTRIN_INLINE unsigned int __readdr(unsigned int reg);
1556 __INTRIN_INLINE void __writedr(unsigned reg, unsigned int value);
1557 #endif /* _M_AMD64 */
1558
1559
1560 #ifdef _M_AMD64
1561
1562 __INTRIN_INLINE void __writecr0(const unsigned __int64 Data)
1563 {
1564 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1565 }
1566
1567 __INTRIN_INLINE void __writecr3(const unsigned __int64 Data)
1568 {
1569 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1570 }
1571
1572 __INTRIN_INLINE void __writecr4(const unsigned __int64 Data)
1573 {
1574 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1575 }
1576
1577 __INTRIN_INLINE void __writecr8(const unsigned __int64 Data)
1578 {
1579 __asm__("mov %[Data], %%cr8" : : [Data] "r" (Data) : "memory");
1580 }
1581
1582 __INTRIN_INLINE unsigned __int64 __readcr0(void)
1583 {
1584 unsigned __int64 value;
1585 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1586 return value;
1587 }
1588
1589 __INTRIN_INLINE unsigned __int64 __readcr2(void)
1590 {
1591 unsigned __int64 value;
1592 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1593 return value;
1594 }
1595
1596 __INTRIN_INLINE unsigned __int64 __readcr3(void)
1597 {
1598 unsigned __int64 value;
1599 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1600 return value;
1601 }
1602
1603 __INTRIN_INLINE unsigned __int64 __readcr4(void)
1604 {
1605 unsigned __int64 value;
1606 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1607 return value;
1608 }
1609
1610 __INTRIN_INLINE unsigned __int64 __readcr8(void)
1611 {
1612 unsigned __int64 value;
1613 __asm__ __volatile__("movq %%cr8, %q[value]" : [value] "=r" (value));
1614 return value;
1615 }
1616
1617 #else /* _M_AMD64 */
1618
1619 __INTRIN_INLINE void __writecr0(const unsigned int Data)
1620 {
1621 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1622 }
1623
1624 __INTRIN_INLINE void __writecr3(const unsigned int Data)
1625 {
1626 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1627 }
1628
1629 __INTRIN_INLINE void __writecr4(const unsigned int Data)
1630 {
1631 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1632 }
1633
1634 __INTRIN_INLINE unsigned long __readcr0(void)
1635 {
1636 unsigned long value;
1637 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1638 return value;
1639 }
1640
1641 __INTRIN_INLINE unsigned long __readcr2(void)
1642 {
1643 unsigned long value;
1644 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1645 return value;
1646 }
1647
1648 __INTRIN_INLINE unsigned long __readcr3(void)
1649 {
1650 unsigned long value;
1651 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1652 return value;
1653 }
1654
1655 __INTRIN_INLINE unsigned long __readcr4(void)
1656 {
1657 unsigned long value;
1658 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1659 return value;
1660 }
1661
1662 #endif /* _M_AMD64 */
1663
1664 #ifdef _M_AMD64
1665
1666 __INTRIN_INLINE unsigned __int64 __readdr(unsigned int reg)
1667 {
1668 unsigned __int64 value;
1669 switch (reg)
1670 {
1671 case 0:
1672 __asm__ __volatile__("movq %%dr0, %q[value]" : [value] "=r" (value));
1673 break;
1674 case 1:
1675 __asm__ __volatile__("movq %%dr1, %q[value]" : [value] "=r" (value));
1676 break;
1677 case 2:
1678 __asm__ __volatile__("movq %%dr2, %q[value]" : [value] "=r" (value));
1679 break;
1680 case 3:
1681 __asm__ __volatile__("movq %%dr3, %q[value]" : [value] "=r" (value));
1682 break;
1683 case 4:
1684 __asm__ __volatile__("movq %%dr4, %q[value]" : [value] "=r" (value));
1685 break;
1686 case 5:
1687 __asm__ __volatile__("movq %%dr5, %q[value]" : [value] "=r" (value));
1688 break;
1689 case 6:
1690 __asm__ __volatile__("movq %%dr6, %q[value]" : [value] "=r" (value));
1691 break;
1692 case 7:
1693 __asm__ __volatile__("movq %%dr7, %q[value]" : [value] "=r" (value));
1694 break;
1695 }
1696 return value;
1697 }
1698
1699 __INTRIN_INLINE void __writedr(unsigned reg, unsigned __int64 value)
1700 {
1701 switch (reg)
1702 {
1703 case 0:
1704 __asm__("movq %q[value], %%dr0" : : [value] "r" (value) : "memory");
1705 break;
1706 case 1:
1707 __asm__("movq %q[value], %%dr1" : : [value] "r" (value) : "memory");
1708 break;
1709 case 2:
1710 __asm__("movq %q[value], %%dr2" : : [value] "r" (value) : "memory");
1711 break;
1712 case 3:
1713 __asm__("movq %q[value], %%dr3" : : [value] "r" (value) : "memory");
1714 break;
1715 case 4:
1716 __asm__("movq %q[value], %%dr4" : : [value] "r" (value) : "memory");
1717 break;
1718 case 5:
1719 __asm__("movq %q[value], %%dr5" : : [value] "r" (value) : "memory");
1720 break;
1721 case 6:
1722 __asm__("movq %q[value], %%dr6" : : [value] "r" (value) : "memory");
1723 break;
1724 case 7:
1725 __asm__("movq %q[value], %%dr7" : : [value] "r" (value) : "memory");
1726 break;
1727 }
1728 }
1729
1730 #else /* _M_AMD64 */
1731
1732 __INTRIN_INLINE unsigned int __readdr(unsigned int reg)
1733 {
1734 unsigned int value;
1735 switch (reg)
1736 {
1737 case 0:
1738 __asm__ __volatile__("mov %%dr0, %[value]" : [value] "=r" (value));
1739 break;
1740 case 1:
1741 __asm__ __volatile__("mov %%dr1, %[value]" : [value] "=r" (value));
1742 break;
1743 case 2:
1744 __asm__ __volatile__("mov %%dr2, %[value]" : [value] "=r" (value));
1745 break;
1746 case 3:
1747 __asm__ __volatile__("mov %%dr3, %[value]" : [value] "=r" (value));
1748 break;
1749 case 4:
1750 __asm__ __volatile__("mov %%dr4, %[value]" : [value] "=r" (value));
1751 break;
1752 case 5:
1753 __asm__ __volatile__("mov %%dr5, %[value]" : [value] "=r" (value));
1754 break;
1755 case 6:
1756 __asm__ __volatile__("mov %%dr6, %[value]" : [value] "=r" (value));
1757 break;
1758 case 7:
1759 __asm__ __volatile__("mov %%dr7, %[value]" : [value] "=r" (value));
1760 break;
1761 }
1762 return value;
1763 }
1764
1765 __INTRIN_INLINE void __writedr(unsigned reg, unsigned int value)
1766 {
1767 switch (reg)
1768 {
1769 case 0:
1770 __asm__("mov %[value], %%dr0" : : [value] "r" (value) : "memory");
1771 break;
1772 case 1:
1773 __asm__("mov %[value], %%dr1" : : [value] "r" (value) : "memory");
1774 break;
1775 case 2:
1776 __asm__("mov %[value], %%dr2" : : [value] "r" (value) : "memory");
1777 break;
1778 case 3:
1779 __asm__("mov %[value], %%dr3" : : [value] "r" (value) : "memory");
1780 break;
1781 case 4:
1782 __asm__("mov %[value], %%dr4" : : [value] "r" (value) : "memory");
1783 break;
1784 case 5:
1785 __asm__("mov %[value], %%dr5" : : [value] "r" (value) : "memory");
1786 break;
1787 case 6:
1788 __asm__("mov %[value], %%dr6" : : [value] "r" (value) : "memory");
1789 break;
1790 case 7:
1791 __asm__("mov %[value], %%dr7" : : [value] "r" (value) : "memory");
1792 break;
1793 }
1794 }
1795
1796 #endif /* _M_AMD64 */
1797
1798 __INTRIN_INLINE void __invlpg(void * const Address)
1799 {
1800 __asm__("invlpg %[Address]" : : [Address] "m" (*((unsigned char *)(Address))) : "memory");
1801 }
1802
1803
1804 /*** System operations ***/
1805
1806 __INTRIN_INLINE unsigned long long __readmsr(const int reg);
1807 __INTRIN_INLINE void __writemsr(const unsigned long Register, const unsigned long long Value);
1808 __INTRIN_INLINE unsigned long long __readpmc(const int counter);
1809 __INTRIN_INLINE unsigned long __segmentlimit(const unsigned long a);
1810 __INTRIN_INLINE void __wbinvd(void);
1811 __INTRIN_INLINE void __lidt(void *Source);
1812 __INTRIN_INLINE void __sidt(void *Destination);
1813
1814
1815 __INTRIN_INLINE unsigned long long __readmsr(const int reg)
1816 {
1817 #ifdef _M_AMD64
1818 unsigned long low, high;
1819 __asm__ __volatile__("rdmsr" : "=a" (low), "=d" (high) : "c" (reg));
1820 return ((unsigned long long)high << 32) | low;
1821 #else
1822 unsigned long long retval;
1823 __asm__ __volatile__("rdmsr" : "=A" (retval) : "c" (reg));
1824 return retval;
1825 #endif
1826 }
1827
1828 __INTRIN_INLINE void __writemsr(const unsigned long Register, const unsigned long long Value)
1829 {
1830 #ifdef _M_AMD64
1831 __asm__ __volatile__("wrmsr" : : "a" (Value), "d" (Value >> 32), "c" (Register));
1832 #else
1833 __asm__ __volatile__("wrmsr" : : "A" (Value), "c" (Register));
1834 #endif
1835 }
1836
1837 __INTRIN_INLINE unsigned long long __readpmc(const int counter)
1838 {
1839 unsigned long long retval;
1840 __asm__ __volatile__("rdpmc" : "=A" (retval) : "c" (counter));
1841 return retval;
1842 }
1843
1844 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1845 __INTRIN_INLINE unsigned long __segmentlimit(const unsigned long a)
1846 {
1847 unsigned long retval;
1848 __asm__ __volatile__("lsl %[a], %[retval]" : [retval] "=r" (retval) : [a] "rm" (a));
1849 return retval;
1850 }
1851
1852 __INTRIN_INLINE void __wbinvd(void)
1853 {
1854 __asm__ __volatile__("wbinvd" : : : "memory");
1855 }
1856
1857 __INTRIN_INLINE void __lidt(void *Source)
1858 {
1859 __asm__ __volatile__("lidt %0" : : "m"(*(short*)Source));
1860 }
1861
1862 __INTRIN_INLINE void __sidt(void *Destination)
1863 {
1864 __asm__ __volatile__("sidt %0" : : "m"(*(short*)Destination) : "memory");
1865 }
1866
1867 /*** Misc operations ***/
1868
1869 __INTRIN_INLINE void _mm_pause(void);
1870 __INTRIN_INLINE void __nop(void);
1871
1872 __INTRIN_INLINE void _mm_pause(void)
1873 {
1874 __asm__ __volatile__("pause" : : : "memory");
1875 }
1876
1877 __INTRIN_INLINE void __nop(void)
1878 {
1879 __asm__ __volatile__("nop");
1880 }
1881
1882 #ifdef __cplusplus
1883 }
1884 #endif
1885
1886 #endif /* KJK_INTRIN_X86_H_ */
1887
1888 /* EOF */