[BRANCHES]
[reactos.git] / reactos / include / crt / mingw32 / intrin_x86.h
1 /*
2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
5 projects.
6
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
8
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
15
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
26 */
27
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
30
31 /*
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
36 */
37
38 /*
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
42
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
46
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
49
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
57
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
60
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
62 ...
63
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
66 */
67
68 #ifdef __cplusplus
69 extern "C" {
70 #endif
71
72 /*** memcopy must be memmove ***/
73 __INTRIN_INLINE void* memcpy(void* dest, const void* source, size_t num)
74 {
75 return memmove(dest, source, num);
76 }
77
78
79 /*** Stack frame juggling ***/
80 #define _ReturnAddress() (__builtin_return_address(0))
81 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
82 /* TODO: __getcallerseflags but how??? */
83
84 /* Maybe the same for x86? */
85 #ifdef __x86_64__
86 #define _alloca(s) __builtin_alloca(s)
87 #endif
88
89 /*** Memory barriers ***/
90
91 __INTRIN_INLINE void _ReadWriteBarrier(void);
92 __INTRIN_INLINE void _mm_mfence(void);
93 __INTRIN_INLINE void _mm_lfence(void);
94 __INTRIN_INLINE void _mm_sfence(void);
95 #ifdef __x86_64__
96 __INTRIN_INLINE void __faststorefence(void);
97 #endif
98
99 __INTRIN_INLINE void _ReadWriteBarrier(void)
100 {
101 __asm__ __volatile__("" : : : "memory");
102 }
103
104 /* GCC only supports full barriers */
105 #define _ReadBarrier _ReadWriteBarrier
106 #define _WriteBarrier _ReadWriteBarrier
107
108 __INTRIN_INLINE void _mm_mfence(void)
109 {
110 __asm__ __volatile__("mfence" : : : "memory");
111 }
112
113 __INTRIN_INLINE void _mm_lfence(void)
114 {
115 _ReadBarrier();
116 __asm__ __volatile__("lfence");
117 _ReadBarrier();
118 }
119
120 __INTRIN_INLINE void _mm_sfence(void)
121 {
122 _WriteBarrier();
123 __asm__ __volatile__("sfence");
124 _WriteBarrier();
125 }
126
127 #ifdef __x86_64__
128 __INTRIN_INLINE void __faststorefence(void)
129 {
130 long local;
131 __asm__ __volatile__("lock; orl $0, %0;" : : "m"(local));
132 }
133 #endif
134
135
136 /*** Atomic operations ***/
137
138 __INTRIN_INLINE long _InterlockedAddLargeStatistic(volatile long long * const Addend, const long Value);
139 __INTRIN_INLINE unsigned char _interlockedbittestandreset(volatile long * a, const long b);
140 __INTRIN_INLINE unsigned char _interlockedbittestandset(volatile long * a, const long b);
141 #if defined(_M_AMD64)
142 __INTRIN_INLINE unsigned char _interlockedbittestandreset64(volatile long long * a, const long long b);
143 __INTRIN_INLINE unsigned char _interlockedbittestandset64(volatile long long * a, const long long b);
144 #endif
145
146 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
147
148 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand);
149 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand);
150 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand);
151 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand);
152 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value);
153 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value);
154 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value);
155 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value);
156 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask);
157 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask);
158 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask);
159 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask);
160 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask);
161 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask);
162 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask);
163 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask);
164 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask);
165 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend);
166 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend);
167 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend);
168 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend);
169 #if defined(_M_AMD64)
170 __INTRIN_INLINE long long _InterlockedExchange64(volatile long long * const Target, const long long Value);
171 __INTRIN_INLINE long long _InterlockedExchangeAdd64(volatile long long * const Addend, const long long Value);
172 __INTRIN_INLINE long long _InterlockedAnd64(volatile long long * const value, const long long mask);
173 __INTRIN_INLINE long long _InterlockedOr64(volatile long long * const value, const long long mask);
174 __INTRIN_INLINE long long _InterlockedXor64(volatile long long * const value, const long long mask);
175 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend);
176 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend);
177 #endif
178
179 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand)
180 {
181 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
182 }
183
184 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand)
185 {
186 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
187 }
188
189 #ifndef __clang__
190 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand)
191 {
192 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
193 }
194 #endif
195
196 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand)
197 {
198 return (void *)__sync_val_compare_and_swap(Destination, Comperand, Exchange);
199 }
200
201 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value)
202 {
203 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
204 __sync_synchronize();
205 return __sync_lock_test_and_set(Target, Value);
206 }
207
208 #if defined(_M_AMD64)
209 __INTRIN_INLINE long long _InterlockedExchange64(volatile long long * const Target, const long long Value)
210 {
211 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
212 __sync_synchronize();
213 return __sync_lock_test_and_set(Target, Value);
214 }
215 #endif
216
217 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value)
218 {
219 /* NOTE: ditto */
220 __sync_synchronize();
221 return (void *)__sync_lock_test_and_set(Target, Value);
222 }
223
224 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value)
225 {
226 return __sync_fetch_and_add(Addend, Value);
227 }
228
229 #ifndef __clang__
230 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value)
231 {
232 return __sync_fetch_and_add(Addend, Value);
233 }
234 #endif
235
236 #if defined(_M_AMD64)
237 __INTRIN_INLINE long long _InterlockedExchangeAdd64(volatile long long * const Addend, const long long Value)
238 {
239 return __sync_fetch_and_add(Addend, Value);
240 }
241 #endif
242
243 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask)
244 {
245 return __sync_fetch_and_and(value, mask);
246 }
247
248 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask)
249 {
250 return __sync_fetch_and_and(value, mask);
251 }
252
253 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask)
254 {
255 return __sync_fetch_and_and(value, mask);
256 }
257
258 #if defined(_M_AMD64)
259 __INTRIN_INLINE long long _InterlockedAnd64(volatile long long * const value, const long long mask)
260 {
261 return __sync_fetch_and_and(value, mask);
262 }
263 #endif
264
265 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask)
266 {
267 return __sync_fetch_and_or(value, mask);
268 }
269
270 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask)
271 {
272 return __sync_fetch_and_or(value, mask);
273 }
274
275 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask)
276 {
277 return __sync_fetch_and_or(value, mask);
278 }
279
280 #if defined(_M_AMD64)
281 __INTRIN_INLINE long long _InterlockedOr64(volatile long long * const value, const long long mask)
282 {
283 return __sync_fetch_and_or(value, mask);
284 }
285 #endif
286
287 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask)
288 {
289 return __sync_fetch_and_xor(value, mask);
290 }
291
292 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask)
293 {
294 return __sync_fetch_and_xor(value, mask);
295 }
296
297 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask)
298 {
299 return __sync_fetch_and_xor(value, mask);
300 }
301
302 #if defined(_M_AMD64)
303 __INTRIN_INLINE long long _InterlockedXor64(volatile long long * const value, const long long mask)
304 {
305 return __sync_fetch_and_xor(value, mask);
306 }
307 #endif
308
309 #ifndef __clang__
310 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend)
311 {
312 return __sync_sub_and_fetch(lpAddend, 1);
313 }
314
315 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend)
316 {
317 return __sync_add_and_fetch(lpAddend, 1);
318 }
319 #endif
320
321 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend)
322 {
323 return __sync_sub_and_fetch(lpAddend, 1);
324 }
325
326 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend)
327 {
328 return __sync_add_and_fetch(lpAddend, 1);
329 }
330
331 #if defined(_M_AMD64)
332 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend)
333 {
334 return __sync_sub_and_fetch(lpAddend, 1);
335 }
336
337 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend)
338 {
339 return __sync_add_and_fetch(lpAddend, 1);
340 }
341 #endif
342
343 #else /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
344
345 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand);
346 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand);
347 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand);
348 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand);
349 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value);
350 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value);
351 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value);
352 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value);
353 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask);
354 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask);
355 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask);
356 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask);
357 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask);
358 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask);
359 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask);
360 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask);
361 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask);
362 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend);
363 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend);
364 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend);
365 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend);
366 #if defined(_M_AMD64)
367 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend);
368 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend);
369 #endif
370
371 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand)
372 {
373 char retval = Comperand;
374 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
375 return retval;
376 }
377
378 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand)
379 {
380 short retval = Comperand;
381 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
382 return retval;
383 }
384
385 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand)
386 {
387 long retval = Comperand;
388 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
389 return retval;
390 }
391
392 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand)
393 {
394 void * retval = (void *)Comperand;
395 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "=a" (retval) : "[retval]" (retval), [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
396 return retval;
397 }
398
399 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value)
400 {
401 long retval = Value;
402 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
403 return retval;
404 }
405
406 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value)
407 {
408 void * retval = Value;
409 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
410 return retval;
411 }
412
413 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value)
414 {
415 long retval = Value;
416 __asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
417 return retval;
418 }
419
420 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value)
421 {
422 long retval = Value;
423 __asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
424 return retval;
425 }
426
427 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask)
428 {
429 char x;
430 char y;
431
432 y = *value;
433
434 do
435 {
436 x = y;
437 y = _InterlockedCompareExchange8(value, x & mask, x);
438 }
439 while(y != x);
440
441 return y;
442 }
443
444 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask)
445 {
446 short x;
447 short y;
448
449 y = *value;
450
451 do
452 {
453 x = y;
454 y = _InterlockedCompareExchange16(value, x & mask, x);
455 }
456 while(y != x);
457
458 return y;
459 }
460
461 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask)
462 {
463 long x;
464 long y;
465
466 y = *value;
467
468 do
469 {
470 x = y;
471 y = _InterlockedCompareExchange(value, x & mask, x);
472 }
473 while(y != x);
474
475 return y;
476 }
477
478 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask)
479 {
480 char x;
481 char y;
482
483 y = *value;
484
485 do
486 {
487 x = y;
488 y = _InterlockedCompareExchange8(value, x | mask, x);
489 }
490 while(y != x);
491
492 return y;
493 }
494
495 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask)
496 {
497 short x;
498 short y;
499
500 y = *value;
501
502 do
503 {
504 x = y;
505 y = _InterlockedCompareExchange16(value, x | mask, x);
506 }
507 while(y != x);
508
509 return y;
510 }
511
512 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask)
513 {
514 long x;
515 long y;
516
517 y = *value;
518
519 do
520 {
521 x = y;
522 y = _InterlockedCompareExchange(value, x | mask, x);
523 }
524 while(y != x);
525
526 return y;
527 }
528
529 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask)
530 {
531 char x;
532 char y;
533
534 y = *value;
535
536 do
537 {
538 x = y;
539 y = _InterlockedCompareExchange8(value, x ^ mask, x);
540 }
541 while(y != x);
542
543 return y;
544 }
545
546 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask)
547 {
548 short x;
549 short y;
550
551 y = *value;
552
553 do
554 {
555 x = y;
556 y = _InterlockedCompareExchange16(value, x ^ mask, x);
557 }
558 while(y != x);
559
560 return y;
561 }
562
563 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask)
564 {
565 long x;
566 long y;
567
568 y = *value;
569
570 do
571 {
572 x = y;
573 y = _InterlockedCompareExchange(value, x ^ mask, x);
574 }
575 while(y != x);
576
577 return y;
578 }
579
580 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend)
581 {
582 return _InterlockedExchangeAdd(lpAddend, -1) - 1;
583 }
584
585 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend)
586 {
587 return _InterlockedExchangeAdd(lpAddend, 1) + 1;
588 }
589
590 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend)
591 {
592 return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
593 }
594
595 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend)
596 {
597 return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
598 }
599
600 #if defined(_M_AMD64)
601 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend)
602 {
603 return _InterlockedExchangeAdd64(lpAddend, -1) - 1;
604 }
605
606 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend)
607 {
608 return _InterlockedExchangeAdd64(lpAddend, 1) + 1;
609 }
610 #endif
611
612 #endif /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
613
614 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
615
616 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand);
617 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand)
618 {
619 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
620 }
621
622 #else
623
624 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand);
625 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand)
626 {
627 long long retval = Comperand;
628
629 __asm__
630 (
631 "lock; cmpxchg8b %[Destination]" :
632 [retval] "+A" (retval) :
633 [Destination] "m" (*Destination),
634 "b" ((unsigned long)((Exchange >> 0) & 0xFFFFFFFF)),
635 "c" ((unsigned long)((Exchange >> 32) & 0xFFFFFFFF)) :
636 "memory"
637 );
638
639 return retval;
640 }
641
642 #endif
643
644 __INTRIN_INLINE long _InterlockedAddLargeStatistic(volatile long long * const Addend, const long Value)
645 {
646 __asm__
647 (
648 "lock; add %[Value], %[Lo32];"
649 "jae LABEL%=;"
650 "lock; adc $0, %[Hi32];"
651 "LABEL%=:;" :
652 [Lo32] "+m" (*((volatile long *)(Addend) + 0)), [Hi32] "+m" (*((volatile long *)(Addend) + 1)) :
653 [Value] "ir" (Value) :
654 "memory"
655 );
656
657 return Value;
658 }
659
660 __INTRIN_INLINE unsigned char _interlockedbittestandreset(volatile long * a, const long b)
661 {
662 unsigned char retval;
663 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
664 return retval;
665 }
666
667 #if defined(_M_AMD64)
668 __INTRIN_INLINE unsigned char _interlockedbittestandreset64(volatile long long * a, const long long b)
669 {
670 unsigned char retval;
671 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
672 return retval;
673 }
674 #endif
675
676 __INTRIN_INLINE unsigned char _interlockedbittestandset(volatile long * a, const long b)
677 {
678 unsigned char retval;
679 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
680 return retval;
681 }
682
683 #if defined(_M_AMD64)
684 __INTRIN_INLINE unsigned char _interlockedbittestandset64(volatile long long * a, const long long b)
685 {
686 unsigned char retval;
687 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
688 return retval;
689 }
690 #endif
691
692 /*** String operations ***/
693
694 __INTRIN_INLINE void __stosb(unsigned char * Dest, const unsigned char Data, size_t Count);
695 __INTRIN_INLINE void __stosw(unsigned short * Dest, const unsigned short Data, size_t Count);
696 __INTRIN_INLINE void __stosd(unsigned long * Dest, const unsigned long Data, size_t Count);
697 __INTRIN_INLINE void __movsb(unsigned char * Destination, const unsigned char * Source, size_t Count);
698 __INTRIN_INLINE void __movsw(unsigned short * Destination, const unsigned short * Source, size_t Count);
699 __INTRIN_INLINE void __movsd(unsigned long * Destination, const unsigned long * Source, size_t Count);
700 #ifdef _M_AMD64
701 __INTRIN_INLINE void __stosq(unsigned __int64 * Dest, const unsigned __int64 Data, size_t Count);
702 __INTRIN_INLINE void __movsq(unsigned long * Destination, const unsigned long * Source, size_t Count);
703 #endif
704
705
706 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
707 __INTRIN_INLINE void __stosb(unsigned char * Dest, const unsigned char Data, size_t Count)
708 {
709 __asm__ __volatile__
710 (
711 "rep; stosb" :
712 [Dest] "=D" (Dest), [Count] "=c" (Count) :
713 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
714 );
715 }
716
717 __INTRIN_INLINE void __stosw(unsigned short * Dest, const unsigned short Data, size_t Count)
718 {
719 __asm__ __volatile__
720 (
721 "rep; stosw" :
722 [Dest] "=D" (Dest), [Count] "=c" (Count) :
723 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
724 );
725 }
726
727 __INTRIN_INLINE void __stosd(unsigned long * Dest, const unsigned long Data, size_t Count)
728 {
729 __asm__ __volatile__
730 (
731 "rep; stosl" :
732 [Dest] "=D" (Dest), [Count] "=c" (Count) :
733 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
734 );
735 }
736
737 #ifdef _M_AMD64
738 __INTRIN_INLINE void __stosq(unsigned __int64 * Dest, const unsigned __int64 Data, size_t Count)
739 {
740 __asm__ __volatile__
741 (
742 "rep; stosq" :
743 [Dest] "=D" (Dest), [Count] "=c" (Count) :
744 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
745 );
746 }
747 #endif
748
749 __INTRIN_INLINE void __movsb(unsigned char * Destination, const unsigned char * Source, size_t Count)
750 {
751 __asm__ __volatile__
752 (
753 "rep; movsb" :
754 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
755 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
756 );
757 }
758
759 __INTRIN_INLINE void __movsw(unsigned short * Destination, const unsigned short * Source, size_t Count)
760 {
761 __asm__ __volatile__
762 (
763 "rep; movsw" :
764 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
765 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
766 );
767 }
768
769 __INTRIN_INLINE void __movsd(unsigned long * Destination, const unsigned long * Source, size_t Count)
770 {
771 __asm__ __volatile__
772 (
773 "rep; movsd" :
774 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
775 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
776 );
777 }
778
779 #ifdef _M_AMD64
780 __INTRIN_INLINE void __movsq(unsigned long * Destination, const unsigned long * Source, size_t Count)
781 {
782 __asm__ __volatile__
783 (
784 "rep; movsq" :
785 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
786 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
787 );
788 }
789 #endif
790
791 #if defined(_M_AMD64)
792
793 /*** GS segment addressing ***/
794
795 __INTRIN_INLINE void __writegsbyte(const unsigned long Offset, const unsigned char Data);
796 __INTRIN_INLINE void __writegsword(const unsigned long Offset, const unsigned short Data);
797 __INTRIN_INLINE void __writegsdword(const unsigned long Offset, const unsigned long Data);
798 __INTRIN_INLINE void __writegsqword(const unsigned long Offset, const unsigned __int64 Data);
799 __INTRIN_INLINE unsigned char __readgsbyte(const unsigned long Offset);
800 __INTRIN_INLINE unsigned short __readgsword(const unsigned long Offset);
801 __INTRIN_INLINE unsigned long __readgsdword(const unsigned long Offset);
802 __INTRIN_INLINE unsigned __int64 __readgsqword(const unsigned long Offset);
803 __INTRIN_INLINE void __incgsbyte(const unsigned long Offset);
804 __INTRIN_INLINE void __incgsword(const unsigned long Offset);
805 __INTRIN_INLINE void __incgsdword(const unsigned long Offset);
806 __INTRIN_INLINE void __addgsbyte(const unsigned long Offset, const unsigned char Data);
807 __INTRIN_INLINE void __addgsword(const unsigned long Offset, const unsigned short Data);
808 __INTRIN_INLINE void __addgsdword(const unsigned long Offset, const unsigned int Data);
809 __INTRIN_INLINE void __addgsqword(const unsigned long Offset, const unsigned __int64 Data);
810
811
812 __INTRIN_INLINE void __writegsbyte(const unsigned long Offset, const unsigned char Data)
813 {
814 __asm__ __volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
815 }
816
817 __INTRIN_INLINE void __writegsword(const unsigned long Offset, const unsigned short Data)
818 {
819 __asm__ __volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
820 }
821
822 __INTRIN_INLINE void __writegsdword(const unsigned long Offset, const unsigned long Data)
823 {
824 __asm__ __volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
825 }
826
827 __INTRIN_INLINE void __writegsqword(const unsigned long Offset, const unsigned __int64 Data)
828 {
829 __asm__ __volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
830 }
831
832 __INTRIN_INLINE unsigned char __readgsbyte(const unsigned long Offset)
833 {
834 unsigned char value;
835 __asm__ __volatile__("movb %%gs:%a[Offset], %b[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
836 return value;
837 }
838
839 __INTRIN_INLINE unsigned short __readgsword(const unsigned long Offset)
840 {
841 unsigned short value;
842 __asm__ __volatile__("movw %%gs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
843 return value;
844 }
845
846 __INTRIN_INLINE unsigned long __readgsdword(const unsigned long Offset)
847 {
848 unsigned long value;
849 __asm__ __volatile__("movl %%gs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
850 return value;
851 }
852
853 __INTRIN_INLINE unsigned __int64 __readgsqword(const unsigned long Offset)
854 {
855 unsigned __int64 value;
856 __asm__ __volatile__("movq %%gs:%a[Offset], %q[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
857 return value;
858 }
859
860 __INTRIN_INLINE void __incgsbyte(const unsigned long Offset)
861 {
862 __asm__ __volatile__("incb %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
863 }
864
865 __INTRIN_INLINE void __incgsword(const unsigned long Offset)
866 {
867 __asm__ __volatile__("incw %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
868 }
869
870 __INTRIN_INLINE void __incgsdword(const unsigned long Offset)
871 {
872 __asm__ __volatile__("incl %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
873 }
874
875 __INTRIN_INLINE void __addgsbyte(const unsigned long Offset, const unsigned char Data)
876 {
877 __asm__ __volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
878 }
879
880 __INTRIN_INLINE void __addgsword(const unsigned long Offset, const unsigned short Data)
881 {
882 __asm__ __volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
883 }
884
885 __INTRIN_INLINE void __addgsdword(const unsigned long Offset, const unsigned int Data)
886 {
887 __asm__ __volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
888 }
889
890 __INTRIN_INLINE void __addgsqword(const unsigned long Offset, const unsigned __int64 Data)
891 {
892 __asm__ __volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
893 }
894
895 #else /* defined(_M_AMD64) */
896
897 /*** FS segment addressing ***/
898
899 __INTRIN_INLINE void __writefsbyte(const unsigned long Offset, const unsigned char Data);
900 __INTRIN_INLINE void __writefsword(const unsigned long Offset, const unsigned short Data);
901 __INTRIN_INLINE void __writefsdword(const unsigned long Offset, const unsigned long Data);
902 __INTRIN_INLINE unsigned char __readfsbyte(const unsigned long Offset);
903 __INTRIN_INLINE unsigned short __readfsword(const unsigned long Offset);
904 __INTRIN_INLINE unsigned long __readfsdword(const unsigned long Offset);
905 __INTRIN_INLINE void __incfsbyte(const unsigned long Offset);
906 __INTRIN_INLINE void __incfsword(const unsigned long Offset);
907 __INTRIN_INLINE void __incfsdword(const unsigned long Offset);
908 __INTRIN_INLINE void __addfsbyte(const unsigned long Offset, const unsigned char Data);
909 __INTRIN_INLINE void __addfsword(const unsigned long Offset, const unsigned short Data);
910 __INTRIN_INLINE void __addfsdword(const unsigned long Offset, const unsigned int Data);
911
912
913 __INTRIN_INLINE void __writefsbyte(const unsigned long Offset, const unsigned char Data)
914 {
915 __asm__ __volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
916 }
917
918 __INTRIN_INLINE void __writefsword(const unsigned long Offset, const unsigned short Data)
919 {
920 __asm__ __volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
921 }
922
923 __INTRIN_INLINE void __writefsdword(const unsigned long Offset, const unsigned long Data)
924 {
925 __asm__ __volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
926 }
927
928 __INTRIN_INLINE unsigned char __readfsbyte(const unsigned long Offset)
929 {
930 unsigned char value;
931 __asm__ __volatile__("movb %%fs:%a[Offset], %b[value]" : [value] "=q" (value) : [Offset] "ir" (Offset));
932 return value;
933 }
934
935 __INTRIN_INLINE unsigned short __readfsword(const unsigned long Offset)
936 {
937 unsigned short value;
938 __asm__ __volatile__("movw %%fs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
939 return value;
940 }
941
942 __INTRIN_INLINE unsigned long __readfsdword(const unsigned long Offset)
943 {
944 unsigned long value;
945 __asm__ __volatile__("movl %%fs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
946 return value;
947 }
948
949 __INTRIN_INLINE void __incfsbyte(const unsigned long Offset)
950 {
951 __asm__ __volatile__("incb %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
952 }
953
954 __INTRIN_INLINE void __incfsword(const unsigned long Offset)
955 {
956 __asm__ __volatile__("incw %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
957 }
958
959 __INTRIN_INLINE void __incfsdword(const unsigned long Offset)
960 {
961 __asm__ __volatile__("incl %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
962 }
963
964 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
965 __INTRIN_INLINE void __addfsbyte(const unsigned long Offset, const unsigned char Data)
966 {
967 if(!__builtin_constant_p(Offset))
968 __asm__ __volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
969 else
970 __asm__ __volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
971 }
972
973 __INTRIN_INLINE void __addfsword(const unsigned long Offset, const unsigned short Data)
974 {
975 if(!__builtin_constant_p(Offset))
976 __asm__ __volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
977 else
978 __asm__ __volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
979 }
980
981 __INTRIN_INLINE void __addfsdword(const unsigned long Offset, const unsigned int Data)
982 {
983 if(!__builtin_constant_p(Offset))
984 __asm__ __volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
985 else
986 __asm__ __volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
987 }
988
989 #endif /* defined(_M_AMD64) */
990
991
992 /*** Bit manipulation ***/
993
994 __INTRIN_INLINE unsigned char _BitScanForward(unsigned long * const Index, const unsigned long Mask);
995 __INTRIN_INLINE unsigned char _BitScanReverse(unsigned long * const Index, const unsigned long Mask);
996 __INTRIN_INLINE unsigned char _bittest(const long * const a, const long b);
997 #ifdef _M_AMD64
998 __INTRIN_INLINE unsigned char _bittest64(const __int64 * const a, const __int64 b);
999 #endif
1000 __INTRIN_INLINE unsigned char _bittestandcomplement(long * const a, const long b);
1001 __INTRIN_INLINE unsigned char _bittestandreset(long * const a, const long b);
1002 __INTRIN_INLINE unsigned char _bittestandset(long * const a, const long b);
1003 __INTRIN_INLINE unsigned char _rotl8(unsigned char value, unsigned char shift);
1004 __INTRIN_INLINE unsigned short _rotl16(unsigned short value, unsigned char shift);
1005 __INTRIN_INLINE unsigned int _rotl(unsigned int value, int shift);
1006 __INTRIN_INLINE unsigned int _rotr(unsigned int value, int shift);
1007 __INTRIN_INLINE unsigned char _rotr8(unsigned char value, unsigned char shift);
1008 __INTRIN_INLINE unsigned short _rotr16(unsigned short value, unsigned char shift);
1009 __INTRIN_INLINE unsigned long long __ll_lshift(const unsigned long long Mask, const int Bit);
1010 __INTRIN_INLINE long long __ll_rshift(const long long Mask, const int Bit);
1011 __INTRIN_INLINE unsigned long long __ull_rshift(const unsigned long long Mask, int Bit);
1012 __INTRIN_INLINE unsigned short _byteswap_ushort(unsigned short value);
1013 __INTRIN_INLINE unsigned long _byteswap_ulong(unsigned long value);
1014 #ifdef _M_AMD64
1015 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value);
1016 #else
1017 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value);
1018 #endif
1019
1020
1021 __INTRIN_INLINE unsigned char _BitScanForward(unsigned long * const Index, const unsigned long Mask)
1022 {
1023 __asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
1024 return Mask ? 1 : 0;
1025 }
1026
1027 __INTRIN_INLINE unsigned char _BitScanReverse(unsigned long * const Index, const unsigned long Mask)
1028 {
1029 __asm__("bsrl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
1030 return Mask ? 1 : 0;
1031 }
1032
1033 /* NOTE: again, the bizarre implementation follows Visual C++ */
1034 __INTRIN_INLINE unsigned char _bittest(const long * const a, const long b)
1035 {
1036 unsigned char retval;
1037
1038 if(__builtin_constant_p(b))
1039 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 32))), [b] "Ir" (b % 32));
1040 else
1041 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
1042
1043 return retval;
1044 }
1045
1046 #ifdef _M_AMD64
1047 __INTRIN_INLINE unsigned char _bittest64(const __int64 * const a, const __int64 b)
1048 {
1049 unsigned char retval;
1050
1051 if(__builtin_constant_p(b))
1052 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 64))), [b] "Ir" (b % 64));
1053 else
1054 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
1055
1056 return retval;
1057 }
1058 #endif
1059
1060 __INTRIN_INLINE unsigned char _bittestandcomplement(long * const a, const long b)
1061 {
1062 unsigned char retval;
1063
1064 if(__builtin_constant_p(b))
1065 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1066 else
1067 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1068
1069 return retval;
1070 }
1071
1072 __INTRIN_INLINE unsigned char _bittestandreset(long * const a, const long b)
1073 {
1074 unsigned char retval;
1075
1076 if(__builtin_constant_p(b))
1077 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1078 else
1079 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1080
1081 return retval;
1082 }
1083
1084 __INTRIN_INLINE unsigned char _bittestandset(long * const a, const long b)
1085 {
1086 unsigned char retval;
1087
1088 if(__builtin_constant_p(b))
1089 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1090 else
1091 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1092
1093 return retval;
1094 }
1095
1096 __INTRIN_INLINE unsigned char _rotl8(unsigned char value, unsigned char shift)
1097 {
1098 unsigned char retval;
1099 __asm__("rolb %b[shift], %b[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1100 return retval;
1101 }
1102
1103 __INTRIN_INLINE unsigned short _rotl16(unsigned short value, unsigned char shift)
1104 {
1105 unsigned short retval;
1106 __asm__("rolw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1107 return retval;
1108 }
1109
1110 __INTRIN_INLINE unsigned int _rotl(unsigned int value, int shift)
1111 {
1112 unsigned long retval;
1113 __asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1114 return retval;
1115 }
1116
1117 __INTRIN_INLINE unsigned int _rotr(unsigned int value, int shift)
1118 {
1119 unsigned long retval;
1120 __asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1121 return retval;
1122 }
1123
1124 __INTRIN_INLINE unsigned char _rotr8(unsigned char value, unsigned char shift)
1125 {
1126 unsigned char retval;
1127 __asm__("rorb %b[shift], %b[retval]" : [retval] "=qm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1128 return retval;
1129 }
1130
1131 __INTRIN_INLINE unsigned short _rotr16(unsigned short value, unsigned char shift)
1132 {
1133 unsigned short retval;
1134 __asm__("rorw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1135 return retval;
1136 }
1137
1138 /*
1139 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
1140 constraint (edx:eax) for the Mask argument, because it's the only way GCC
1141 can pass 64-bit operands around - passing the two 32 bit parts separately
1142 just confuses it. Also we declare Bit as an int and then truncate it to
1143 match Visual C++ behavior
1144 */
1145 __INTRIN_INLINE unsigned long long __ll_lshift(const unsigned long long Mask, const int Bit)
1146 {
1147 unsigned long long retval = Mask;
1148
1149 __asm__
1150 (
1151 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
1152 "+A" (retval) :
1153 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1154 );
1155
1156 return retval;
1157 }
1158
1159 __INTRIN_INLINE long long __ll_rshift(const long long Mask, const int Bit)
1160 {
1161 long long retval = Mask;
1162
1163 __asm__
1164 (
1165 "shrdl %b[Bit], %%edx, %%eax; sarl %b[Bit], %%edx" :
1166 "+A" (retval) :
1167 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1168 );
1169
1170 return retval;
1171 }
1172
1173 __INTRIN_INLINE unsigned long long __ull_rshift(const unsigned long long Mask, int Bit)
1174 {
1175 unsigned long long retval = Mask;
1176
1177 __asm__
1178 (
1179 "shrdl %b[Bit], %%edx, %%eax; shrl %b[Bit], %%edx" :
1180 "+A" (retval) :
1181 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1182 );
1183
1184 return retval;
1185 }
1186
1187 __INTRIN_INLINE unsigned short _byteswap_ushort(unsigned short value)
1188 {
1189 unsigned short retval;
1190 __asm__("rorw $8, %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value));
1191 return retval;
1192 }
1193
1194 __INTRIN_INLINE unsigned long _byteswap_ulong(unsigned long value)
1195 {
1196 unsigned long retval;
1197 __asm__("bswapl %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1198 return retval;
1199 }
1200
1201 #ifdef _M_AMD64
1202 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value)
1203 {
1204 unsigned __int64 retval;
1205 __asm__("bswapq %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1206 return retval;
1207 }
1208 #else
1209 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value)
1210 {
1211 union {
1212 unsigned __int64 int64part;
1213 struct {
1214 unsigned long lowpart;
1215 unsigned long hipart;
1216 };
1217 } retval;
1218 retval.int64part = value;
1219 __asm__("bswapl %[lowpart]\n"
1220 "bswapl %[hipart]\n"
1221 : [lowpart] "=r" (retval.hipart), [hipart] "=r" (retval.lowpart) : "[lowpart]" (retval.lowpart), "[hipart]" (retval.hipart) );
1222 return retval.int64part;
1223 }
1224 #endif
1225
1226 /*** 64-bit math ***/
1227
1228 __INTRIN_INLINE long long __emul(const int a, const int b);
1229 __INTRIN_INLINE unsigned long long __emulu(const unsigned int a, const unsigned int b);
1230 #ifdef _M_AMD64
1231 __INTRIN_INLINE __int64 __mulh(__int64 a, __int64 b);
1232 __INTRIN_INLINE unsigned __int64 __umulh(unsigned __int64 a, unsigned __int64 b);
1233 #endif
1234
1235
1236 __INTRIN_INLINE long long __emul(const int a, const int b)
1237 {
1238 long long retval;
1239 __asm__("imull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1240 return retval;
1241 }
1242
1243 __INTRIN_INLINE unsigned long long __emulu(const unsigned int a, const unsigned int b)
1244 {
1245 unsigned long long retval;
1246 __asm__("mull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1247 return retval;
1248 }
1249
1250 #ifdef _M_AMD64
1251
1252 __INTRIN_INLINE __int64 __mulh(__int64 a, __int64 b)
1253 {
1254 __int64 retval;
1255 __asm__("imulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1256 return retval;
1257 }
1258
1259 __INTRIN_INLINE unsigned __int64 __umulh(unsigned __int64 a, unsigned __int64 b)
1260 {
1261 unsigned __int64 retval;
1262 __asm__("mulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1263 return retval;
1264 }
1265
1266 #endif
1267
1268 /*** Port I/O ***/
1269
1270 __INTRIN_INLINE unsigned char __inbyte(const unsigned short Port);
1271 __INTRIN_INLINE unsigned short __inword(const unsigned short Port);
1272 __INTRIN_INLINE unsigned long __indword(const unsigned short Port);
1273 __INTRIN_INLINE void __inbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count);
1274 __INTRIN_INLINE void __inwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count);
1275 __INTRIN_INLINE void __indwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count);
1276 __INTRIN_INLINE void __outbyte(unsigned short const Port, const unsigned char Data);
1277 __INTRIN_INLINE void __outword(unsigned short const Port, const unsigned short Data);
1278 __INTRIN_INLINE void __outdword(unsigned short const Port, const unsigned long Data);
1279 __INTRIN_INLINE void __outbytestring(unsigned short const Port, const unsigned char * const Buffer, const unsigned long Count);
1280 __INTRIN_INLINE void __outwordstring(unsigned short const Port, const unsigned short * const Buffer, const unsigned long Count);
1281 __INTRIN_INLINE void __outdwordstring(unsigned short const Port, const unsigned long * const Buffer, const unsigned long Count);
1282 __INTRIN_INLINE int _inp(unsigned short Port);
1283 __INTRIN_INLINE unsigned short _inpw(unsigned short Port);
1284 __INTRIN_INLINE unsigned long _inpd(unsigned short Port);
1285 __INTRIN_INLINE int _outp(unsigned short Port, int databyte);
1286 __INTRIN_INLINE unsigned short _outpw(unsigned short Port, unsigned short dataword);
1287 __INTRIN_INLINE unsigned long _outpd(unsigned short Port, unsigned long dataword);
1288
1289
1290 __INTRIN_INLINE unsigned char __inbyte(const unsigned short Port)
1291 {
1292 unsigned char byte;
1293 __asm__ __volatile__("inb %w[Port], %b[byte]" : [byte] "=a" (byte) : [Port] "Nd" (Port));
1294 return byte;
1295 }
1296
1297 __INTRIN_INLINE unsigned short __inword(const unsigned short Port)
1298 {
1299 unsigned short word;
1300 __asm__ __volatile__("inw %w[Port], %w[word]" : [word] "=a" (word) : [Port] "Nd" (Port));
1301 return word;
1302 }
1303
1304 __INTRIN_INLINE unsigned long __indword(const unsigned short Port)
1305 {
1306 unsigned long dword;
1307 __asm__ __volatile__("inl %w[Port], %k[dword]" : [dword] "=a" (dword) : [Port] "Nd" (Port));
1308 return dword;
1309 }
1310
1311 __INTRIN_INLINE void __inbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1312 {
1313 __asm__ __volatile__
1314 (
1315 "rep; insb" :
1316 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1317 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1318 "memory"
1319 );
1320 }
1321
1322 __INTRIN_INLINE void __inwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1323 {
1324 __asm__ __volatile__
1325 (
1326 "rep; insw" :
1327 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1328 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1329 "memory"
1330 );
1331 }
1332
1333 __INTRIN_INLINE void __indwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1334 {
1335 __asm__ __volatile__
1336 (
1337 "rep; insl" :
1338 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1339 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1340 "memory"
1341 );
1342 }
1343
1344 __INTRIN_INLINE void __outbyte(unsigned short const Port, const unsigned char Data)
1345 {
1346 __asm__ __volatile__("outb %b[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1347 }
1348
1349 __INTRIN_INLINE void __outword(unsigned short const Port, const unsigned short Data)
1350 {
1351 __asm__ __volatile__("outw %w[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1352 }
1353
1354 __INTRIN_INLINE void __outdword(unsigned short const Port, const unsigned long Data)
1355 {
1356 __asm__ __volatile__("outl %k[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1357 }
1358
1359 __INTRIN_INLINE void __outbytestring(unsigned short const Port, const unsigned char * const Buffer, const unsigned long Count)
1360 {
1361 __asm__ __volatile__("rep; outsb" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1362 }
1363
1364 __INTRIN_INLINE void __outwordstring(unsigned short const Port, const unsigned short * const Buffer, const unsigned long Count)
1365 {
1366 __asm__ __volatile__("rep; outsw" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1367 }
1368
1369 __INTRIN_INLINE void __outdwordstring(unsigned short const Port, const unsigned long * const Buffer, const unsigned long Count)
1370 {
1371 __asm__ __volatile__("rep; outsl" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1372 }
1373
1374 __INTRIN_INLINE int _inp(unsigned short Port)
1375 {
1376 return __inbyte(Port);
1377 }
1378
1379 __INTRIN_INLINE unsigned short _inpw(unsigned short Port)
1380 {
1381 return __inword(Port);
1382 }
1383
1384 __INTRIN_INLINE unsigned long _inpd(unsigned short Port)
1385 {
1386 return __indword(Port);
1387 }
1388
1389 __INTRIN_INLINE int _outp(unsigned short Port, int databyte)
1390 {
1391 __outbyte(Port, (unsigned char)databyte);
1392 return databyte;
1393 }
1394
1395 __INTRIN_INLINE unsigned short _outpw(unsigned short Port, unsigned short dataword)
1396 {
1397 __outword(Port, dataword);
1398 return dataword;
1399 }
1400
1401 __INTRIN_INLINE unsigned long _outpd(unsigned short Port, unsigned long dataword)
1402 {
1403 __outdword(Port, dataword);
1404 return dataword;
1405 }
1406
1407
1408 /*** System information ***/
1409
1410 __INTRIN_INLINE void __cpuid(int CPUInfo [], const int InfoType);
1411 __INTRIN_INLINE unsigned long long __rdtsc(void);
1412 __INTRIN_INLINE void __writeeflags(uintptr_t Value);
1413 __INTRIN_INLINE uintptr_t __readeflags(void);
1414
1415
1416 __INTRIN_INLINE void __cpuid(int CPUInfo[], const int InfoType)
1417 {
1418 __asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType));
1419 }
1420
1421 __INTRIN_INLINE unsigned long long __rdtsc(void)
1422 {
1423 #ifdef _M_AMD64
1424 unsigned long long low, high;
1425 __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
1426 return low | (high << 32);
1427 #else
1428 unsigned long long retval;
1429 __asm__ __volatile__("rdtsc" : "=A"(retval));
1430 return retval;
1431 #endif
1432 }
1433
1434 __INTRIN_INLINE void __writeeflags(uintptr_t Value)
1435 {
1436 __asm__ __volatile__("push %0\n popf" : : "rim"(Value));
1437 }
1438
1439 __INTRIN_INLINE uintptr_t __readeflags(void)
1440 {
1441 uintptr_t retval;
1442 __asm__ __volatile__("pushf\n pop %0" : "=rm"(retval));
1443 return retval;
1444 }
1445
1446 /*** Interrupts ***/
1447
1448 __INTRIN_INLINE void __int2c(void);
1449 __INTRIN_INLINE void _disable(void);
1450 __INTRIN_INLINE void _enable(void);
1451 __INTRIN_INLINE void __halt(void);
1452
1453 #ifdef __clang__
1454 #define __debugbreak() __asm__("int $3")
1455 #else
1456 __INTRIN_INLINE void __debugbreak(void);
1457 __INTRIN_INLINE void __debugbreak(void)
1458 {
1459 __asm__("int $3");
1460 }
1461 #endif
1462
1463 __INTRIN_INLINE void __int2c(void)
1464 {
1465 __asm__("int $0x2c");
1466 }
1467
1468 __INTRIN_INLINE void _disable(void)
1469 {
1470 __asm__("cli" : : : "memory");
1471 }
1472
1473 __INTRIN_INLINE void _enable(void)
1474 {
1475 __asm__("sti" : : : "memory");
1476 }
1477
1478 __INTRIN_INLINE void __halt(void)
1479 {
1480 __asm__("hlt\n\t" : : : "memory");
1481 }
1482
1483 /*** Protected memory management ***/
1484
1485 __INTRIN_INLINE void __invlpg(void * const Address);
1486 #ifdef _M_AMD64
1487 __INTRIN_INLINE void __writecr0(const unsigned __int64 Data);
1488 __INTRIN_INLINE void __writecr3(const unsigned __int64 Data);
1489 __INTRIN_INLINE void __writecr4(const unsigned __int64 Data);
1490 __INTRIN_INLINE void __writecr8(const unsigned __int64 Data);
1491 __INTRIN_INLINE unsigned __int64 __readcr0(void);
1492 __INTRIN_INLINE unsigned __int64 __readcr2(void);
1493 __INTRIN_INLINE unsigned __int64 __readcr3(void);
1494 __INTRIN_INLINE unsigned __int64 __readcr4(void);
1495 __INTRIN_INLINE unsigned __int64 __readcr8(void);
1496 __INTRIN_INLINE unsigned __int64 __readdr(unsigned int reg);
1497 __INTRIN_INLINE void __writedr(unsigned reg, unsigned __int64 value);
1498 #else /* _M_AMD64 */
1499 __INTRIN_INLINE void __writecr0(const unsigned int Data);
1500 __INTRIN_INLINE void __writecr3(const unsigned int Data);
1501 __INTRIN_INLINE void __writecr4(const unsigned int Data);
1502 __INTRIN_INLINE unsigned long __readcr0(void);
1503 __INTRIN_INLINE unsigned long __readcr2(void);
1504 __INTRIN_INLINE unsigned long __readcr3(void);
1505 __INTRIN_INLINE unsigned long __readcr4(void);
1506 __INTRIN_INLINE unsigned int __readdr(unsigned int reg);
1507 __INTRIN_INLINE void __writedr(unsigned reg, unsigned int value);
1508 #endif /* _M_AMD64 */
1509
1510
1511 #ifdef _M_AMD64
1512
1513 __INTRIN_INLINE void __writecr0(const unsigned __int64 Data)
1514 {
1515 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1516 }
1517
1518 __INTRIN_INLINE void __writecr3(const unsigned __int64 Data)
1519 {
1520 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1521 }
1522
1523 __INTRIN_INLINE void __writecr4(const unsigned __int64 Data)
1524 {
1525 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1526 }
1527
1528 __INTRIN_INLINE void __writecr8(const unsigned __int64 Data)
1529 {
1530 __asm__("mov %[Data], %%cr8" : : [Data] "r" (Data) : "memory");
1531 }
1532
1533 __INTRIN_INLINE unsigned __int64 __readcr0(void)
1534 {
1535 unsigned __int64 value;
1536 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1537 return value;
1538 }
1539
1540 __INTRIN_INLINE unsigned __int64 __readcr2(void)
1541 {
1542 unsigned __int64 value;
1543 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1544 return value;
1545 }
1546
1547 __INTRIN_INLINE unsigned __int64 __readcr3(void)
1548 {
1549 unsigned __int64 value;
1550 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1551 return value;
1552 }
1553
1554 __INTRIN_INLINE unsigned __int64 __readcr4(void)
1555 {
1556 unsigned __int64 value;
1557 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1558 return value;
1559 }
1560
1561 __INTRIN_INLINE unsigned __int64 __readcr8(void)
1562 {
1563 unsigned __int64 value;
1564 __asm__ __volatile__("movq %%cr8, %q[value]" : [value] "=r" (value));
1565 return value;
1566 }
1567
1568 #else /* _M_AMD64 */
1569
1570 __INTRIN_INLINE void __writecr0(const unsigned int Data)
1571 {
1572 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1573 }
1574
1575 __INTRIN_INLINE void __writecr3(const unsigned int Data)
1576 {
1577 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1578 }
1579
1580 __INTRIN_INLINE void __writecr4(const unsigned int Data)
1581 {
1582 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1583 }
1584
1585 __INTRIN_INLINE unsigned long __readcr0(void)
1586 {
1587 unsigned long value;
1588 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1589 return value;
1590 }
1591
1592 __INTRIN_INLINE unsigned long __readcr2(void)
1593 {
1594 unsigned long value;
1595 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1596 return value;
1597 }
1598
1599 __INTRIN_INLINE unsigned long __readcr3(void)
1600 {
1601 unsigned long value;
1602 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1603 return value;
1604 }
1605
1606 __INTRIN_INLINE unsigned long __readcr4(void)
1607 {
1608 unsigned long value;
1609 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1610 return value;
1611 }
1612
1613 #endif /* _M_AMD64 */
1614
1615 #ifdef _M_AMD64
1616
1617 __INTRIN_INLINE unsigned __int64 __readdr(unsigned int reg)
1618 {
1619 unsigned __int64 value;
1620 switch (reg)
1621 {
1622 case 0:
1623 __asm__ __volatile__("movq %%dr0, %q[value]" : [value] "=r" (value));
1624 break;
1625 case 1:
1626 __asm__ __volatile__("movq %%dr1, %q[value]" : [value] "=r" (value));
1627 break;
1628 case 2:
1629 __asm__ __volatile__("movq %%dr2, %q[value]" : [value] "=r" (value));
1630 break;
1631 case 3:
1632 __asm__ __volatile__("movq %%dr3, %q[value]" : [value] "=r" (value));
1633 break;
1634 case 4:
1635 __asm__ __volatile__("movq %%dr4, %q[value]" : [value] "=r" (value));
1636 break;
1637 case 5:
1638 __asm__ __volatile__("movq %%dr5, %q[value]" : [value] "=r" (value));
1639 break;
1640 case 6:
1641 __asm__ __volatile__("movq %%dr6, %q[value]" : [value] "=r" (value));
1642 break;
1643 case 7:
1644 __asm__ __volatile__("movq %%dr7, %q[value]" : [value] "=r" (value));
1645 break;
1646 }
1647 return value;
1648 }
1649
1650 __INTRIN_INLINE void __writedr(unsigned reg, unsigned __int64 value)
1651 {
1652 switch (reg)
1653 {
1654 case 0:
1655 __asm__("movq %q[value], %%dr0" : : [value] "r" (value) : "memory");
1656 break;
1657 case 1:
1658 __asm__("movq %q[value], %%dr1" : : [value] "r" (value) : "memory");
1659 break;
1660 case 2:
1661 __asm__("movq %q[value], %%dr2" : : [value] "r" (value) : "memory");
1662 break;
1663 case 3:
1664 __asm__("movq %q[value], %%dr3" : : [value] "r" (value) : "memory");
1665 break;
1666 case 4:
1667 __asm__("movq %q[value], %%dr4" : : [value] "r" (value) : "memory");
1668 break;
1669 case 5:
1670 __asm__("movq %q[value], %%dr5" : : [value] "r" (value) : "memory");
1671 break;
1672 case 6:
1673 __asm__("movq %q[value], %%dr6" : : [value] "r" (value) : "memory");
1674 break;
1675 case 7:
1676 __asm__("movq %q[value], %%dr7" : : [value] "r" (value) : "memory");
1677 break;
1678 }
1679 }
1680
1681 #else /* _M_AMD64 */
1682
1683 __INTRIN_INLINE unsigned int __readdr(unsigned int reg)
1684 {
1685 unsigned int value;
1686 switch (reg)
1687 {
1688 case 0:
1689 __asm__ __volatile__("mov %%dr0, %[value]" : [value] "=r" (value));
1690 break;
1691 case 1:
1692 __asm__ __volatile__("mov %%dr1, %[value]" : [value] "=r" (value));
1693 break;
1694 case 2:
1695 __asm__ __volatile__("mov %%dr2, %[value]" : [value] "=r" (value));
1696 break;
1697 case 3:
1698 __asm__ __volatile__("mov %%dr3, %[value]" : [value] "=r" (value));
1699 break;
1700 case 4:
1701 __asm__ __volatile__("mov %%dr4, %[value]" : [value] "=r" (value));
1702 break;
1703 case 5:
1704 __asm__ __volatile__("mov %%dr5, %[value]" : [value] "=r" (value));
1705 break;
1706 case 6:
1707 __asm__ __volatile__("mov %%dr6, %[value]" : [value] "=r" (value));
1708 break;
1709 case 7:
1710 __asm__ __volatile__("mov %%dr7, %[value]" : [value] "=r" (value));
1711 break;
1712 }
1713 return value;
1714 }
1715
1716 __INTRIN_INLINE void __writedr(unsigned reg, unsigned int value)
1717 {
1718 switch (reg)
1719 {
1720 case 0:
1721 __asm__("mov %[value], %%dr0" : : [value] "r" (value) : "memory");
1722 break;
1723 case 1:
1724 __asm__("mov %[value], %%dr1" : : [value] "r" (value) : "memory");
1725 break;
1726 case 2:
1727 __asm__("mov %[value], %%dr2" : : [value] "r" (value) : "memory");
1728 break;
1729 case 3:
1730 __asm__("mov %[value], %%dr3" : : [value] "r" (value) : "memory");
1731 break;
1732 case 4:
1733 __asm__("mov %[value], %%dr4" : : [value] "r" (value) : "memory");
1734 break;
1735 case 5:
1736 __asm__("mov %[value], %%dr5" : : [value] "r" (value) : "memory");
1737 break;
1738 case 6:
1739 __asm__("mov %[value], %%dr6" : : [value] "r" (value) : "memory");
1740 break;
1741 case 7:
1742 __asm__("mov %[value], %%dr7" : : [value] "r" (value) : "memory");
1743 break;
1744 }
1745 }
1746
1747 #endif /* _M_AMD64 */
1748
1749 __INTRIN_INLINE void __invlpg(void * const Address)
1750 {
1751 __asm__("invlpg %[Address]" : : [Address] "m" (*((unsigned char *)(Address))) : "memory");
1752 }
1753
1754
1755 /*** System operations ***/
1756
1757 __INTRIN_INLINE unsigned long long __readmsr(const int reg);
1758 __INTRIN_INLINE void __writemsr(const unsigned long Register, const unsigned long long Value);
1759 __INTRIN_INLINE unsigned long long __readpmc(const int counter);
1760 __INTRIN_INLINE unsigned long __segmentlimit(const unsigned long a);
1761 __INTRIN_INLINE void __wbinvd(void);
1762 __INTRIN_INLINE void __lidt(void *Source);
1763 __INTRIN_INLINE void __sidt(void *Destination);
1764
1765
1766 __INTRIN_INLINE unsigned long long __readmsr(const int reg)
1767 {
1768 #ifdef _M_AMD64
1769 unsigned long low, high;
1770 __asm__ __volatile__("rdmsr" : "=a" (low), "=d" (high) : "c" (reg));
1771 return ((unsigned long long)high << 32) | low;
1772 #else
1773 unsigned long long retval;
1774 __asm__ __volatile__("rdmsr" : "=A" (retval) : "c" (reg));
1775 return retval;
1776 #endif
1777 }
1778
1779 __INTRIN_INLINE void __writemsr(const unsigned long Register, const unsigned long long Value)
1780 {
1781 #ifdef _M_AMD64
1782 __asm__ __volatile__("wrmsr" : : "a" (Value), "d" (Value >> 32), "c" (Register));
1783 #else
1784 __asm__ __volatile__("wrmsr" : : "A" (Value), "c" (Register));
1785 #endif
1786 }
1787
1788 __INTRIN_INLINE unsigned long long __readpmc(const int counter)
1789 {
1790 unsigned long long retval;
1791 __asm__ __volatile__("rdpmc" : "=A" (retval) : "c" (counter));
1792 return retval;
1793 }
1794
1795 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1796 __INTRIN_INLINE unsigned long __segmentlimit(const unsigned long a)
1797 {
1798 unsigned long retval;
1799 __asm__ __volatile__("lsl %[a], %[retval]" : [retval] "=r" (retval) : [a] "rm" (a));
1800 return retval;
1801 }
1802
1803 __INTRIN_INLINE void __wbinvd(void)
1804 {
1805 __asm__ __volatile__("wbinvd" : : : "memory");
1806 }
1807
1808 __INTRIN_INLINE void __lidt(void *Source)
1809 {
1810 __asm__ __volatile__("lidt %0" : : "m"(*(short*)Source));
1811 }
1812
1813 __INTRIN_INLINE void __sidt(void *Destination)
1814 {
1815 __asm__ __volatile__("sidt %0" : : "m"(*(short*)Destination) : "memory");
1816 }
1817
1818 /*** Misc operations ***/
1819
1820 __INTRIN_INLINE void _mm_pause(void);
1821 __INTRIN_INLINE void __nop(void);
1822
1823 __INTRIN_INLINE void _mm_pause(void)
1824 {
1825 __asm__ __volatile__("pause" : : : "memory");
1826 }
1827
1828 __INTRIN_INLINE void __nop(void)
1829 {
1830 __asm__ __volatile__("nop");
1831 }
1832
1833 #ifdef __cplusplus
1834 }
1835 #endif
1836
1837 #endif /* KJK_INTRIN_X86_H_ */
1838
1839 /* EOF */