[CRT/INTRIN_X86]
[reactos.git] / reactos / include / crt / mingw32 / intrin_x86.h
1 /*
2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
5 projects.
6
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
8
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
15
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
26 */
27
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
30
31 /*
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
36 */
37
38 /*
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
42
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
46
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
49
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
57
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
60
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
62 ...
63
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
66 */
67
68 #ifdef __cplusplus
69 extern "C" {
70 #endif
71
72 /*** memcopy must be memmove ***/
73 void* memmove(void*, const void*, size_t);
74 __INTRIN_INLINE void* memcpy(void* dest, const void* source, size_t num)
75 {
76 return memmove(dest, source, num);
77 }
78
79
80 /*** Stack frame juggling ***/
81 #define _ReturnAddress() (__builtin_return_address(0))
82 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
83 /* TODO: __getcallerseflags but how??? */
84
85 /* Maybe the same for x86? */
86 #ifdef __x86_64__
87 #define _alloca(s) __builtin_alloca(s)
88 #endif
89
90 /*** Memory barriers ***/
91
92 __INTRIN_INLINE void _ReadWriteBarrier(void);
93 __INTRIN_INLINE void _mm_mfence(void);
94 __INTRIN_INLINE void _mm_lfence(void);
95 __INTRIN_INLINE void _mm_sfence(void);
96 #ifdef __x86_64__
97 __INTRIN_INLINE void __faststorefence(void);
98 #endif
99
100 __INTRIN_INLINE void _ReadWriteBarrier(void)
101 {
102 __asm__ __volatile__("" : : : "memory");
103 }
104
105 /* GCC only supports full barriers */
106 #define _ReadBarrier _ReadWriteBarrier
107 #define _WriteBarrier _ReadWriteBarrier
108
109 __INTRIN_INLINE void _mm_mfence(void)
110 {
111 __asm__ __volatile__("mfence" : : : "memory");
112 }
113
114 __INTRIN_INLINE void _mm_lfence(void)
115 {
116 _ReadBarrier();
117 __asm__ __volatile__("lfence");
118 _ReadBarrier();
119 }
120
121 __INTRIN_INLINE void _mm_sfence(void)
122 {
123 _WriteBarrier();
124 __asm__ __volatile__("sfence");
125 _WriteBarrier();
126 }
127
128 #ifdef __x86_64__
129 __INTRIN_INLINE void __faststorefence(void)
130 {
131 long local;
132 __asm__ __volatile__("lock; orl $0, %0;" : : "m"(local));
133 }
134 #endif
135
136
137 /*** Atomic operations ***/
138
139 __INTRIN_INLINE long _InterlockedAddLargeStatistic(volatile long long * const Addend, const long Value);
140 __INTRIN_INLINE unsigned char _interlockedbittestandreset(volatile long * a, const long b);
141 __INTRIN_INLINE unsigned char _interlockedbittestandset(volatile long * a, const long b);
142 #if defined(_M_AMD64)
143 __INTRIN_INLINE unsigned char _interlockedbittestandreset64(volatile long long * a, const long long b);
144 __INTRIN_INLINE unsigned char _interlockedbittestandset64(volatile long long * a, const long long b);
145 #endif
146
147 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
148
149 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand);
150 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand);
151 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand);
152 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand);
153 __INTRIN_INLINE char _InterlockedExchange8(volatile char * const Target, const char Value);
154 __INTRIN_INLINE short _InterlockedExchange16(volatile short * const Target, const short Value);
155 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value);
156 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value);
157 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value);
158 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value);
159 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask);
160 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask);
161 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask);
162 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask);
163 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask);
164 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask);
165 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask);
166 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask);
167 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask);
168 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend);
169 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend);
170 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend);
171 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend);
172 #if defined(_M_AMD64)
173 __INTRIN_INLINE long long _InterlockedExchange64(volatile long long * const Target, const long long Value);
174 __INTRIN_INLINE long long _InterlockedExchangeAdd64(volatile long long * const Addend, const long long Value);
175 __INTRIN_INLINE long long _InterlockedAnd64(volatile long long * const value, const long long mask);
176 __INTRIN_INLINE long long _InterlockedOr64(volatile long long * const value, const long long mask);
177 __INTRIN_INLINE long long _InterlockedXor64(volatile long long * const value, const long long mask);
178 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend);
179 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend);
180 #endif
181
182 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand)
183 {
184 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
185 }
186
187 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand)
188 {
189 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
190 }
191
192 #ifndef __clang__
193
194 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand)
195 {
196 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
197 }
198
199 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand)
200 {
201 return (void *)__sync_val_compare_and_swap(Destination, Comperand, Exchange);
202 }
203
204 #endif
205
206 __INTRIN_INLINE char _InterlockedExchange8(volatile char * const Target, const char Value)
207 {
208 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
209 __sync_synchronize();
210 return __sync_lock_test_and_set(Target, Value);
211 }
212
213 __INTRIN_INLINE short _InterlockedExchange16(volatile short * const Target, const short Value)
214 {
215 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
216 __sync_synchronize();
217 return __sync_lock_test_and_set(Target, Value);
218 }
219
220 #ifndef __clang__
221
222 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value)
223 {
224 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
225 __sync_synchronize();
226 return __sync_lock_test_and_set(Target, Value);
227 }
228
229 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value)
230 {
231 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
232 __sync_synchronize();
233 return (void *)__sync_lock_test_and_set(Target, Value);
234 }
235
236 #endif
237
238 #if defined(_M_AMD64)
239 __INTRIN_INLINE long long _InterlockedExchange64(volatile long long * const Target, const long long Value)
240 {
241 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
242 __sync_synchronize();
243 return __sync_lock_test_and_set(Target, Value);
244 }
245 #endif
246
247 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value)
248 {
249 return __sync_fetch_and_add(Addend, Value);
250 }
251
252 #ifndef __clang__
253 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value)
254 {
255 return __sync_fetch_and_add(Addend, Value);
256 }
257 #endif
258
259 #if defined(_M_AMD64)
260 __INTRIN_INLINE long long _InterlockedExchangeAdd64(volatile long long * const Addend, const long long Value)
261 {
262 return __sync_fetch_and_add(Addend, Value);
263 }
264 #endif
265
266 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask)
267 {
268 return __sync_fetch_and_and(value, mask);
269 }
270
271 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask)
272 {
273 return __sync_fetch_and_and(value, mask);
274 }
275
276 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask)
277 {
278 return __sync_fetch_and_and(value, mask);
279 }
280
281 #if defined(_M_AMD64)
282 __INTRIN_INLINE long long _InterlockedAnd64(volatile long long * const value, const long long mask)
283 {
284 return __sync_fetch_and_and(value, mask);
285 }
286 #endif
287
288 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask)
289 {
290 return __sync_fetch_and_or(value, mask);
291 }
292
293 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask)
294 {
295 return __sync_fetch_and_or(value, mask);
296 }
297
298 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask)
299 {
300 return __sync_fetch_and_or(value, mask);
301 }
302
303 #if defined(_M_AMD64)
304 __INTRIN_INLINE long long _InterlockedOr64(volatile long long * const value, const long long mask)
305 {
306 return __sync_fetch_and_or(value, mask);
307 }
308 #endif
309
310 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask)
311 {
312 return __sync_fetch_and_xor(value, mask);
313 }
314
315 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask)
316 {
317 return __sync_fetch_and_xor(value, mask);
318 }
319
320 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask)
321 {
322 return __sync_fetch_and_xor(value, mask);
323 }
324
325 #if defined(_M_AMD64)
326 __INTRIN_INLINE long long _InterlockedXor64(volatile long long * const value, const long long mask)
327 {
328 return __sync_fetch_and_xor(value, mask);
329 }
330 #endif
331
332 #ifndef __clang__
333 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend)
334 {
335 return __sync_sub_and_fetch(lpAddend, 1);
336 }
337
338 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend)
339 {
340 return __sync_add_and_fetch(lpAddend, 1);
341 }
342 #endif
343
344 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend)
345 {
346 return __sync_sub_and_fetch(lpAddend, 1);
347 }
348
349 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend)
350 {
351 return __sync_add_and_fetch(lpAddend, 1);
352 }
353
354 #if defined(_M_AMD64)
355 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend)
356 {
357 return __sync_sub_and_fetch(lpAddend, 1);
358 }
359
360 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend)
361 {
362 return __sync_add_and_fetch(lpAddend, 1);
363 }
364 #endif
365
366 #else /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
367
368 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand);
369 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand);
370 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand);
371 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand);
372 __INTRIN_INLINE char _InterlockedExchange8(volatile char * const Target, const char Value);
373 __INTRIN_INLINE short _InterlockedExchange16(volatile short * const Target, const short Value);
374 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value);
375 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value);
376 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value);
377 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value);
378 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask);
379 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask);
380 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask);
381 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask);
382 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask);
383 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask);
384 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask);
385 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask);
386 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask);
387 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend);
388 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend);
389 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend);
390 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend);
391 #if defined(_M_AMD64)
392 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend);
393 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend);
394 #endif
395
396 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand)
397 {
398 char retval = Comperand;
399 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
400 return retval;
401 }
402
403 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand)
404 {
405 short retval = Comperand;
406 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
407 return retval;
408 }
409
410 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand)
411 {
412 long retval = Comperand;
413 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
414 return retval;
415 }
416
417 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand)
418 {
419 void * retval = (void *)Comperand;
420 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "=a" (retval) : "[retval]" (retval), [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
421 return retval;
422 }
423
424 __INTRIN_INLINE char _InterlockedExchange8(volatile char * const Target, const char Value)
425 {
426 char retval = Value;
427 __asm__("xchgb %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
428 return retval;
429 }
430
431 __INTRIN_INLINE short _InterlockedExchange16(volatile short * const Target, const short Value)
432 {
433 short retval = Value;
434 __asm__("xchgw %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
435 return retval;
436 }
437
438 __INTRIN_INLINE long _InterlockedExchange(volatile long * const Target, const long Value)
439 {
440 long retval = Value;
441 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
442 return retval;
443 }
444
445 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value)
446 {
447 void * retval = Value;
448 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
449 return retval;
450 }
451
452 __INTRIN_INLINE long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value)
453 {
454 long retval = Value;
455 __asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
456 return retval;
457 }
458
459 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * const Addend, const long Value)
460 {
461 long retval = Value;
462 __asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
463 return retval;
464 }
465
466 __INTRIN_INLINE char _InterlockedAnd8(volatile char * const value, const char mask)
467 {
468 char x;
469 char y;
470
471 y = *value;
472
473 do
474 {
475 x = y;
476 y = _InterlockedCompareExchange8(value, x & mask, x);
477 }
478 while(y != x);
479
480 return y;
481 }
482
483 __INTRIN_INLINE short _InterlockedAnd16(volatile short * const value, const short mask)
484 {
485 short x;
486 short y;
487
488 y = *value;
489
490 do
491 {
492 x = y;
493 y = _InterlockedCompareExchange16(value, x & mask, x);
494 }
495 while(y != x);
496
497 return y;
498 }
499
500 __INTRIN_INLINE long _InterlockedAnd(volatile long * const value, const long mask)
501 {
502 long x;
503 long y;
504
505 y = *value;
506
507 do
508 {
509 x = y;
510 y = _InterlockedCompareExchange(value, x & mask, x);
511 }
512 while(y != x);
513
514 return y;
515 }
516
517 __INTRIN_INLINE char _InterlockedOr8(volatile char * const value, const char mask)
518 {
519 char x;
520 char y;
521
522 y = *value;
523
524 do
525 {
526 x = y;
527 y = _InterlockedCompareExchange8(value, x | mask, x);
528 }
529 while(y != x);
530
531 return y;
532 }
533
534 __INTRIN_INLINE short _InterlockedOr16(volatile short * const value, const short mask)
535 {
536 short x;
537 short y;
538
539 y = *value;
540
541 do
542 {
543 x = y;
544 y = _InterlockedCompareExchange16(value, x | mask, x);
545 }
546 while(y != x);
547
548 return y;
549 }
550
551 __INTRIN_INLINE long _InterlockedOr(volatile long * const value, const long mask)
552 {
553 long x;
554 long y;
555
556 y = *value;
557
558 do
559 {
560 x = y;
561 y = _InterlockedCompareExchange(value, x | mask, x);
562 }
563 while(y != x);
564
565 return y;
566 }
567
568 __INTRIN_INLINE char _InterlockedXor8(volatile char * const value, const char mask)
569 {
570 char x;
571 char y;
572
573 y = *value;
574
575 do
576 {
577 x = y;
578 y = _InterlockedCompareExchange8(value, x ^ mask, x);
579 }
580 while(y != x);
581
582 return y;
583 }
584
585 __INTRIN_INLINE short _InterlockedXor16(volatile short * const value, const short mask)
586 {
587 short x;
588 short y;
589
590 y = *value;
591
592 do
593 {
594 x = y;
595 y = _InterlockedCompareExchange16(value, x ^ mask, x);
596 }
597 while(y != x);
598
599 return y;
600 }
601
602 __INTRIN_INLINE long _InterlockedXor(volatile long * const value, const long mask)
603 {
604 long x;
605 long y;
606
607 y = *value;
608
609 do
610 {
611 x = y;
612 y = _InterlockedCompareExchange(value, x ^ mask, x);
613 }
614 while(y != x);
615
616 return y;
617 }
618
619 __INTRIN_INLINE long _InterlockedDecrement(volatile long * const lpAddend)
620 {
621 return _InterlockedExchangeAdd(lpAddend, -1) - 1;
622 }
623
624 __INTRIN_INLINE long _InterlockedIncrement(volatile long * const lpAddend)
625 {
626 return _InterlockedExchangeAdd(lpAddend, 1) + 1;
627 }
628
629 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * const lpAddend)
630 {
631 return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
632 }
633
634 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * const lpAddend)
635 {
636 return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
637 }
638
639 #if defined(_M_AMD64)
640 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * const lpAddend)
641 {
642 return _InterlockedExchangeAdd64(lpAddend, -1) - 1;
643 }
644
645 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * const lpAddend)
646 {
647 return _InterlockedExchangeAdd64(lpAddend, 1) + 1;
648 }
649 #endif
650
651 #endif /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
652
653 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
654
655 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand);
656 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand)
657 {
658 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
659 }
660
661 #else
662
663 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand);
664 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand)
665 {
666 long long retval = Comperand;
667
668 __asm__
669 (
670 "lock; cmpxchg8b %[Destination]" :
671 [retval] "+A" (retval) :
672 [Destination] "m" (*Destination),
673 "b" ((unsigned long)((Exchange >> 0) & 0xFFFFFFFF)),
674 "c" ((unsigned long)((Exchange >> 32) & 0xFFFFFFFF)) :
675 "memory"
676 );
677
678 return retval;
679 }
680
681 #endif
682
683 __INTRIN_INLINE long _InterlockedAddLargeStatistic(volatile long long * const Addend, const long Value)
684 {
685 __asm__
686 (
687 "lock; add %[Value], %[Lo32];"
688 "jae LABEL%=;"
689 "lock; adc $0, %[Hi32];"
690 "LABEL%=:;" :
691 [Lo32] "+m" (*((volatile long *)(Addend) + 0)), [Hi32] "+m" (*((volatile long *)(Addend) + 1)) :
692 [Value] "ir" (Value) :
693 "memory"
694 );
695
696 return Value;
697 }
698
699 __INTRIN_INLINE unsigned char _interlockedbittestandreset(volatile long * a, const long b)
700 {
701 unsigned char retval;
702 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
703 return retval;
704 }
705
706 #if defined(_M_AMD64)
707 __INTRIN_INLINE unsigned char _interlockedbittestandreset64(volatile long long * a, const long long b)
708 {
709 unsigned char retval;
710 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
711 return retval;
712 }
713 #endif
714
715 __INTRIN_INLINE unsigned char _interlockedbittestandset(volatile long * a, const long b)
716 {
717 unsigned char retval;
718 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
719 return retval;
720 }
721
722 #if defined(_M_AMD64)
723 __INTRIN_INLINE unsigned char _interlockedbittestandset64(volatile long long * a, const long long b)
724 {
725 unsigned char retval;
726 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
727 return retval;
728 }
729 #endif
730
731 /*** String operations ***/
732
733 __INTRIN_INLINE void __stosb(unsigned char * Dest, const unsigned char Data, size_t Count);
734 __INTRIN_INLINE void __stosw(unsigned short * Dest, const unsigned short Data, size_t Count);
735 __INTRIN_INLINE void __stosd(unsigned long * Dest, const unsigned long Data, size_t Count);
736 __INTRIN_INLINE void __movsb(unsigned char * Destination, const unsigned char * Source, size_t Count);
737 __INTRIN_INLINE void __movsw(unsigned short * Destination, const unsigned short * Source, size_t Count);
738 __INTRIN_INLINE void __movsd(unsigned long * Destination, const unsigned long * Source, size_t Count);
739 #ifdef _M_AMD64
740 __INTRIN_INLINE void __stosq(unsigned __int64 * Dest, const unsigned __int64 Data, size_t Count);
741 __INTRIN_INLINE void __movsq(unsigned long * Destination, const unsigned long * Source, size_t Count);
742 #endif
743
744
745 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
746 __INTRIN_INLINE void __stosb(unsigned char * Dest, const unsigned char Data, size_t Count)
747 {
748 __asm__ __volatile__
749 (
750 "rep; stosb" :
751 [Dest] "=D" (Dest), [Count] "=c" (Count) :
752 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
753 );
754 }
755
756 __INTRIN_INLINE void __stosw(unsigned short * Dest, const unsigned short Data, size_t Count)
757 {
758 __asm__ __volatile__
759 (
760 "rep; stosw" :
761 [Dest] "=D" (Dest), [Count] "=c" (Count) :
762 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
763 );
764 }
765
766 __INTRIN_INLINE void __stosd(unsigned long * Dest, const unsigned long Data, size_t Count)
767 {
768 __asm__ __volatile__
769 (
770 "rep; stosl" :
771 [Dest] "=D" (Dest), [Count] "=c" (Count) :
772 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
773 );
774 }
775
776 #ifdef _M_AMD64
777 __INTRIN_INLINE void __stosq(unsigned __int64 * Dest, const unsigned __int64 Data, size_t Count)
778 {
779 __asm__ __volatile__
780 (
781 "rep; stosq" :
782 [Dest] "=D" (Dest), [Count] "=c" (Count) :
783 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
784 );
785 }
786 #endif
787
788 __INTRIN_INLINE void __movsb(unsigned char * Destination, const unsigned char * Source, size_t Count)
789 {
790 __asm__ __volatile__
791 (
792 "rep; movsb" :
793 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
794 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
795 );
796 }
797
798 __INTRIN_INLINE void __movsw(unsigned short * Destination, const unsigned short * Source, size_t Count)
799 {
800 __asm__ __volatile__
801 (
802 "rep; movsw" :
803 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
804 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
805 );
806 }
807
808 __INTRIN_INLINE void __movsd(unsigned long * Destination, const unsigned long * Source, size_t Count)
809 {
810 __asm__ __volatile__
811 (
812 "rep; movsd" :
813 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
814 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
815 );
816 }
817
818 #ifdef _M_AMD64
819 __INTRIN_INLINE void __movsq(unsigned long * Destination, const unsigned long * Source, size_t Count)
820 {
821 __asm__ __volatile__
822 (
823 "rep; movsq" :
824 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
825 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
826 );
827 }
828 #endif
829
830 #if defined(_M_AMD64)
831
832 /*** GS segment addressing ***/
833
834 __INTRIN_INLINE void __writegsbyte(const unsigned long Offset, const unsigned char Data);
835 __INTRIN_INLINE void __writegsword(const unsigned long Offset, const unsigned short Data);
836 __INTRIN_INLINE void __writegsdword(const unsigned long Offset, const unsigned long Data);
837 __INTRIN_INLINE void __writegsqword(const unsigned long Offset, const unsigned __int64 Data);
838 __INTRIN_INLINE unsigned char __readgsbyte(const unsigned long Offset);
839 __INTRIN_INLINE unsigned short __readgsword(const unsigned long Offset);
840 __INTRIN_INLINE unsigned long __readgsdword(const unsigned long Offset);
841 __INTRIN_INLINE unsigned __int64 __readgsqword(const unsigned long Offset);
842 __INTRIN_INLINE void __incgsbyte(const unsigned long Offset);
843 __INTRIN_INLINE void __incgsword(const unsigned long Offset);
844 __INTRIN_INLINE void __incgsdword(const unsigned long Offset);
845 __INTRIN_INLINE void __addgsbyte(const unsigned long Offset, const unsigned char Data);
846 __INTRIN_INLINE void __addgsword(const unsigned long Offset, const unsigned short Data);
847 __INTRIN_INLINE void __addgsdword(const unsigned long Offset, const unsigned int Data);
848 __INTRIN_INLINE void __addgsqword(const unsigned long Offset, const unsigned __int64 Data);
849
850
851 __INTRIN_INLINE void __writegsbyte(const unsigned long Offset, const unsigned char Data)
852 {
853 __asm__ __volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
854 }
855
856 __INTRIN_INLINE void __writegsword(const unsigned long Offset, const unsigned short Data)
857 {
858 __asm__ __volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
859 }
860
861 __INTRIN_INLINE void __writegsdword(const unsigned long Offset, const unsigned long Data)
862 {
863 __asm__ __volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
864 }
865
866 __INTRIN_INLINE void __writegsqword(const unsigned long Offset, const unsigned __int64 Data)
867 {
868 __asm__ __volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
869 }
870
871 __INTRIN_INLINE unsigned char __readgsbyte(const unsigned long Offset)
872 {
873 unsigned char value;
874 __asm__ __volatile__("movb %%gs:%a[Offset], %b[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
875 return value;
876 }
877
878 __INTRIN_INLINE unsigned short __readgsword(const unsigned long Offset)
879 {
880 unsigned short value;
881 __asm__ __volatile__("movw %%gs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
882 return value;
883 }
884
885 __INTRIN_INLINE unsigned long __readgsdword(const unsigned long Offset)
886 {
887 unsigned long value;
888 __asm__ __volatile__("movl %%gs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
889 return value;
890 }
891
892 __INTRIN_INLINE unsigned __int64 __readgsqword(const unsigned long Offset)
893 {
894 unsigned __int64 value;
895 __asm__ __volatile__("movq %%gs:%a[Offset], %q[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
896 return value;
897 }
898
899 __INTRIN_INLINE void __incgsbyte(const unsigned long Offset)
900 {
901 __asm__ __volatile__("incb %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
902 }
903
904 __INTRIN_INLINE void __incgsword(const unsigned long Offset)
905 {
906 __asm__ __volatile__("incw %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
907 }
908
909 __INTRIN_INLINE void __incgsdword(const unsigned long Offset)
910 {
911 __asm__ __volatile__("incl %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
912 }
913
914 __INTRIN_INLINE void __addgsbyte(const unsigned long Offset, const unsigned char Data)
915 {
916 __asm__ __volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
917 }
918
919 __INTRIN_INLINE void __addgsword(const unsigned long Offset, const unsigned short Data)
920 {
921 __asm__ __volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
922 }
923
924 __INTRIN_INLINE void __addgsdword(const unsigned long Offset, const unsigned int Data)
925 {
926 __asm__ __volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
927 }
928
929 __INTRIN_INLINE void __addgsqword(const unsigned long Offset, const unsigned __int64 Data)
930 {
931 __asm__ __volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
932 }
933
934 #else /* defined(_M_AMD64) */
935
936 /*** FS segment addressing ***/
937
938 __INTRIN_INLINE void __writefsbyte(const unsigned long Offset, const unsigned char Data);
939 __INTRIN_INLINE void __writefsword(const unsigned long Offset, const unsigned short Data);
940 __INTRIN_INLINE void __writefsdword(const unsigned long Offset, const unsigned long Data);
941 __INTRIN_INLINE unsigned char __readfsbyte(const unsigned long Offset);
942 __INTRIN_INLINE unsigned short __readfsword(const unsigned long Offset);
943 __INTRIN_INLINE unsigned long __readfsdword(const unsigned long Offset);
944 __INTRIN_INLINE void __incfsbyte(const unsigned long Offset);
945 __INTRIN_INLINE void __incfsword(const unsigned long Offset);
946 __INTRIN_INLINE void __incfsdword(const unsigned long Offset);
947 __INTRIN_INLINE void __addfsbyte(const unsigned long Offset, const unsigned char Data);
948 __INTRIN_INLINE void __addfsword(const unsigned long Offset, const unsigned short Data);
949 __INTRIN_INLINE void __addfsdword(const unsigned long Offset, const unsigned int Data);
950
951
952 __INTRIN_INLINE void __writefsbyte(const unsigned long Offset, const unsigned char Data)
953 {
954 __asm__ __volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
955 }
956
957 __INTRIN_INLINE void __writefsword(const unsigned long Offset, const unsigned short Data)
958 {
959 __asm__ __volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
960 }
961
962 __INTRIN_INLINE void __writefsdword(const unsigned long Offset, const unsigned long Data)
963 {
964 __asm__ __volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
965 }
966
967 __INTRIN_INLINE unsigned char __readfsbyte(const unsigned long Offset)
968 {
969 unsigned char value;
970 __asm__ __volatile__("movb %%fs:%a[Offset], %b[value]" : [value] "=q" (value) : [Offset] "ir" (Offset));
971 return value;
972 }
973
974 __INTRIN_INLINE unsigned short __readfsword(const unsigned long Offset)
975 {
976 unsigned short value;
977 __asm__ __volatile__("movw %%fs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
978 return value;
979 }
980
981 __INTRIN_INLINE unsigned long __readfsdword(const unsigned long Offset)
982 {
983 unsigned long value;
984 __asm__ __volatile__("movl %%fs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
985 return value;
986 }
987
988 __INTRIN_INLINE void __incfsbyte(const unsigned long Offset)
989 {
990 __asm__ __volatile__("incb %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
991 }
992
993 __INTRIN_INLINE void __incfsword(const unsigned long Offset)
994 {
995 __asm__ __volatile__("incw %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
996 }
997
998 __INTRIN_INLINE void __incfsdword(const unsigned long Offset)
999 {
1000 __asm__ __volatile__("incl %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
1001 }
1002
1003 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
1004 __INTRIN_INLINE void __addfsbyte(const unsigned long Offset, const unsigned char Data)
1005 {
1006 if(!__builtin_constant_p(Offset))
1007 __asm__ __volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
1008 else
1009 __asm__ __volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
1010 }
1011
1012 __INTRIN_INLINE void __addfsword(const unsigned long Offset, const unsigned short Data)
1013 {
1014 if(!__builtin_constant_p(Offset))
1015 __asm__ __volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
1016 else
1017 __asm__ __volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
1018 }
1019
1020 __INTRIN_INLINE void __addfsdword(const unsigned long Offset, const unsigned int Data)
1021 {
1022 if(!__builtin_constant_p(Offset))
1023 __asm__ __volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
1024 else
1025 __asm__ __volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
1026 }
1027
1028 #endif /* defined(_M_AMD64) */
1029
1030
1031 /*** Bit manipulation ***/
1032
1033 __INTRIN_INLINE unsigned char _BitScanForward(unsigned long * const Index, const unsigned long Mask);
1034 __INTRIN_INLINE unsigned char _BitScanReverse(unsigned long * const Index, const unsigned long Mask);
1035 __INTRIN_INLINE unsigned char _bittest(const long * const a, const long b);
1036 #ifdef _M_AMD64
1037 __INTRIN_INLINE unsigned char _bittest64(const __int64 * const a, const __int64 b);
1038 #endif
1039 __INTRIN_INLINE unsigned char _bittestandcomplement(long * const a, const long b);
1040 __INTRIN_INLINE unsigned char _bittestandreset(long * const a, const long b);
1041 __INTRIN_INLINE unsigned char _bittestandset(long * const a, const long b);
1042 __INTRIN_INLINE unsigned char _rotl8(unsigned char value, unsigned char shift);
1043 __INTRIN_INLINE unsigned short _rotl16(unsigned short value, unsigned char shift);
1044 __INTRIN_INLINE unsigned int _rotl(unsigned int value, int shift);
1045 __INTRIN_INLINE unsigned __int64 _rotl64(unsigned __int64 value, int shift);
1046 __INTRIN_INLINE unsigned int _rotr(unsigned int value, int shift);
1047 __INTRIN_INLINE unsigned char _rotr8(unsigned char value, unsigned char shift);
1048 __INTRIN_INLINE unsigned short _rotr16(unsigned short value, unsigned char shift);
1049 __INTRIN_INLINE unsigned long long __ll_lshift(const unsigned long long Mask, const int Bit);
1050 __INTRIN_INLINE long long __ll_rshift(const long long Mask, const int Bit);
1051 __INTRIN_INLINE unsigned long long __ull_rshift(const unsigned long long Mask, int Bit);
1052 __INTRIN_INLINE unsigned short _byteswap_ushort(unsigned short value);
1053 __INTRIN_INLINE unsigned long _byteswap_ulong(unsigned long value);
1054 #ifdef _M_AMD64
1055 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value);
1056 #else
1057 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value);
1058 #endif
1059
1060
1061 __INTRIN_INLINE unsigned char _BitScanForward(unsigned long * const Index, const unsigned long Mask)
1062 {
1063 __asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
1064 return Mask ? 1 : 0;
1065 }
1066
1067 __INTRIN_INLINE unsigned char _BitScanReverse(unsigned long * const Index, const unsigned long Mask)
1068 {
1069 __asm__("bsrl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
1070 return Mask ? 1 : 0;
1071 }
1072
1073 /* NOTE: again, the bizarre implementation follows Visual C++ */
1074 __INTRIN_INLINE unsigned char _bittest(const long * const a, const long b)
1075 {
1076 unsigned char retval;
1077
1078 if(__builtin_constant_p(b))
1079 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 32))), [b] "Ir" (b % 32));
1080 else
1081 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
1082
1083 return retval;
1084 }
1085
1086 #ifdef _M_AMD64
1087 __INTRIN_INLINE unsigned char _bittest64(const __int64 * const a, const __int64 b)
1088 {
1089 unsigned char retval;
1090
1091 if(__builtin_constant_p(b))
1092 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 64))), [b] "Ir" (b % 64));
1093 else
1094 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
1095
1096 return retval;
1097 }
1098 #endif
1099
1100 __INTRIN_INLINE unsigned char _bittestandcomplement(long * const a, const long b)
1101 {
1102 unsigned char retval;
1103
1104 if(__builtin_constant_p(b))
1105 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1106 else
1107 __asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1108
1109 return retval;
1110 }
1111
1112 __INTRIN_INLINE unsigned char _bittestandreset(long * const a, const long b)
1113 {
1114 unsigned char retval;
1115
1116 if(__builtin_constant_p(b))
1117 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1118 else
1119 __asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1120
1121 return retval;
1122 }
1123
1124 __INTRIN_INLINE unsigned char _bittestandset(long * const a, const long b)
1125 {
1126 unsigned char retval;
1127
1128 if(__builtin_constant_p(b))
1129 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1130 else
1131 __asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1132
1133 return retval;
1134 }
1135
1136 __INTRIN_INLINE unsigned char _rotl8(unsigned char value, unsigned char shift)
1137 {
1138 unsigned char retval;
1139 __asm__("rolb %b[shift], %b[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1140 return retval;
1141 }
1142
1143 __INTRIN_INLINE unsigned short _rotl16(unsigned short value, unsigned char shift)
1144 {
1145 unsigned short retval;
1146 __asm__("rolw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1147 return retval;
1148 }
1149
1150 __INTRIN_INLINE unsigned int _rotl(unsigned int value, int shift)
1151 {
1152 unsigned long retval;
1153 __asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1154 return retval;
1155 }
1156
1157 #ifdef _M_AMD64
1158 __INTRIN_INLINE unsigned __int64 _rotl64(unsigned __int64 value, int shift)
1159 {
1160 unsigned __int64 retval;
1161 __asm__("rolq %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1162 return retval;
1163 }
1164 #else
1165 __INTRIN_INLINE unsigned __int64 _rotl64(unsigned __int64 value, int shift)
1166 {
1167 /* FIXME: this is probably not optimal */
1168 return (value << shift) | (value >> (64 - shift));
1169 }
1170 #endif
1171
1172 __INTRIN_INLINE unsigned int _rotr(unsigned int value, int shift)
1173 {
1174 unsigned long retval;
1175 __asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1176 return retval;
1177 }
1178
1179 __INTRIN_INLINE unsigned char _rotr8(unsigned char value, unsigned char shift)
1180 {
1181 unsigned char retval;
1182 __asm__("rorb %b[shift], %b[retval]" : [retval] "=qm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1183 return retval;
1184 }
1185
1186 __INTRIN_INLINE unsigned short _rotr16(unsigned short value, unsigned char shift)
1187 {
1188 unsigned short retval;
1189 __asm__("rorw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1190 return retval;
1191 }
1192
1193 /*
1194 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
1195 constraint (edx:eax) for the Mask argument, because it's the only way GCC
1196 can pass 64-bit operands around - passing the two 32 bit parts separately
1197 just confuses it. Also we declare Bit as an int and then truncate it to
1198 match Visual C++ behavior
1199 */
1200 __INTRIN_INLINE unsigned long long __ll_lshift(const unsigned long long Mask, const int Bit)
1201 {
1202 unsigned long long retval = Mask;
1203
1204 __asm__
1205 (
1206 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
1207 "+A" (retval) :
1208 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1209 );
1210
1211 return retval;
1212 }
1213
1214 __INTRIN_INLINE long long __ll_rshift(const long long Mask, const int Bit)
1215 {
1216 long long retval = Mask;
1217
1218 __asm__
1219 (
1220 "shrdl %b[Bit], %%edx, %%eax; sarl %b[Bit], %%edx" :
1221 "+A" (retval) :
1222 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1223 );
1224
1225 return retval;
1226 }
1227
1228 __INTRIN_INLINE unsigned long long __ull_rshift(const unsigned long long Mask, int Bit)
1229 {
1230 unsigned long long retval = Mask;
1231
1232 __asm__
1233 (
1234 "shrdl %b[Bit], %%edx, %%eax; shrl %b[Bit], %%edx" :
1235 "+A" (retval) :
1236 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1237 );
1238
1239 return retval;
1240 }
1241
1242 __INTRIN_INLINE unsigned short _byteswap_ushort(unsigned short value)
1243 {
1244 unsigned short retval;
1245 __asm__("rorw $8, %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value));
1246 return retval;
1247 }
1248
1249 __INTRIN_INLINE unsigned long _byteswap_ulong(unsigned long value)
1250 {
1251 unsigned long retval;
1252 __asm__("bswapl %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1253 return retval;
1254 }
1255
1256 #ifdef _M_AMD64
1257 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value)
1258 {
1259 unsigned __int64 retval;
1260 __asm__("bswapq %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1261 return retval;
1262 }
1263 #else
1264 __INTRIN_INLINE unsigned __int64 _byteswap_uint64(unsigned __int64 value)
1265 {
1266 union {
1267 unsigned __int64 int64part;
1268 struct {
1269 unsigned long lowpart;
1270 unsigned long hipart;
1271 };
1272 } retval;
1273 retval.int64part = value;
1274 __asm__("bswapl %[lowpart]\n"
1275 "bswapl %[hipart]\n"
1276 : [lowpart] "=r" (retval.hipart), [hipart] "=r" (retval.lowpart) : "[lowpart]" (retval.lowpart), "[hipart]" (retval.hipart) );
1277 return retval.int64part;
1278 }
1279 #endif
1280
1281 /*** 64-bit math ***/
1282
1283 __INTRIN_INLINE long long __emul(const int a, const int b);
1284 __INTRIN_INLINE unsigned long long __emulu(const unsigned int a, const unsigned int b);
1285 #ifdef _M_AMD64
1286 __INTRIN_INLINE __int64 __mulh(__int64 a, __int64 b);
1287 __INTRIN_INLINE unsigned __int64 __umulh(unsigned __int64 a, unsigned __int64 b);
1288 #endif
1289
1290
1291 __INTRIN_INLINE long long __emul(const int a, const int b)
1292 {
1293 long long retval;
1294 __asm__("imull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1295 return retval;
1296 }
1297
1298 __INTRIN_INLINE unsigned long long __emulu(const unsigned int a, const unsigned int b)
1299 {
1300 unsigned long long retval;
1301 __asm__("mull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1302 return retval;
1303 }
1304
1305 #ifdef _M_AMD64
1306
1307 __INTRIN_INLINE __int64 __mulh(__int64 a, __int64 b)
1308 {
1309 __int64 retval;
1310 __asm__("imulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1311 return retval;
1312 }
1313
1314 __INTRIN_INLINE unsigned __int64 __umulh(unsigned __int64 a, unsigned __int64 b)
1315 {
1316 unsigned __int64 retval;
1317 __asm__("mulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1318 return retval;
1319 }
1320
1321 #endif
1322
1323 /*** Port I/O ***/
1324
1325 __INTRIN_INLINE unsigned char __inbyte(const unsigned short Port);
1326 __INTRIN_INLINE unsigned short __inword(const unsigned short Port);
1327 __INTRIN_INLINE unsigned long __indword(const unsigned short Port);
1328 __INTRIN_INLINE void __inbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count);
1329 __INTRIN_INLINE void __inwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count);
1330 __INTRIN_INLINE void __indwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count);
1331 __INTRIN_INLINE void __outbyte(unsigned short const Port, const unsigned char Data);
1332 __INTRIN_INLINE void __outword(unsigned short const Port, const unsigned short Data);
1333 __INTRIN_INLINE void __outdword(unsigned short const Port, const unsigned long Data);
1334 __INTRIN_INLINE void __outbytestring(unsigned short const Port, const unsigned char * const Buffer, const unsigned long Count);
1335 __INTRIN_INLINE void __outwordstring(unsigned short const Port, const unsigned short * const Buffer, const unsigned long Count);
1336 __INTRIN_INLINE void __outdwordstring(unsigned short const Port, const unsigned long * const Buffer, const unsigned long Count);
1337 __INTRIN_INLINE int _inp(unsigned short Port);
1338 __INTRIN_INLINE unsigned short _inpw(unsigned short Port);
1339 __INTRIN_INLINE unsigned long _inpd(unsigned short Port);
1340 __INTRIN_INLINE int _outp(unsigned short Port, int databyte);
1341 __INTRIN_INLINE unsigned short _outpw(unsigned short Port, unsigned short dataword);
1342 __INTRIN_INLINE unsigned long _outpd(unsigned short Port, unsigned long dataword);
1343
1344
1345 __INTRIN_INLINE unsigned char __inbyte(const unsigned short Port)
1346 {
1347 unsigned char byte;
1348 __asm__ __volatile__("inb %w[Port], %b[byte]" : [byte] "=a" (byte) : [Port] "Nd" (Port));
1349 return byte;
1350 }
1351
1352 __INTRIN_INLINE unsigned short __inword(const unsigned short Port)
1353 {
1354 unsigned short word;
1355 __asm__ __volatile__("inw %w[Port], %w[word]" : [word] "=a" (word) : [Port] "Nd" (Port));
1356 return word;
1357 }
1358
1359 __INTRIN_INLINE unsigned long __indword(const unsigned short Port)
1360 {
1361 unsigned long dword;
1362 __asm__ __volatile__("inl %w[Port], %k[dword]" : [dword] "=a" (dword) : [Port] "Nd" (Port));
1363 return dword;
1364 }
1365
1366 __INTRIN_INLINE void __inbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1367 {
1368 __asm__ __volatile__
1369 (
1370 "rep; insb" :
1371 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1372 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1373 "memory"
1374 );
1375 }
1376
1377 __INTRIN_INLINE void __inwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1378 {
1379 __asm__ __volatile__
1380 (
1381 "rep; insw" :
1382 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1383 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1384 "memory"
1385 );
1386 }
1387
1388 __INTRIN_INLINE void __indwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1389 {
1390 __asm__ __volatile__
1391 (
1392 "rep; insl" :
1393 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1394 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1395 "memory"
1396 );
1397 }
1398
1399 __INTRIN_INLINE void __outbyte(unsigned short const Port, const unsigned char Data)
1400 {
1401 __asm__ __volatile__("outb %b[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1402 }
1403
1404 __INTRIN_INLINE void __outword(unsigned short const Port, const unsigned short Data)
1405 {
1406 __asm__ __volatile__("outw %w[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1407 }
1408
1409 __INTRIN_INLINE void __outdword(unsigned short const Port, const unsigned long Data)
1410 {
1411 __asm__ __volatile__("outl %k[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1412 }
1413
1414 __INTRIN_INLINE void __outbytestring(unsigned short const Port, const unsigned char * const Buffer, const unsigned long Count)
1415 {
1416 __asm__ __volatile__("rep; outsb" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1417 }
1418
1419 __INTRIN_INLINE void __outwordstring(unsigned short const Port, const unsigned short * const Buffer, const unsigned long Count)
1420 {
1421 __asm__ __volatile__("rep; outsw" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1422 }
1423
1424 __INTRIN_INLINE void __outdwordstring(unsigned short const Port, const unsigned long * const Buffer, const unsigned long Count)
1425 {
1426 __asm__ __volatile__("rep; outsl" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1427 }
1428
1429 __INTRIN_INLINE int _inp(unsigned short Port)
1430 {
1431 return __inbyte(Port);
1432 }
1433
1434 __INTRIN_INLINE unsigned short _inpw(unsigned short Port)
1435 {
1436 return __inword(Port);
1437 }
1438
1439 __INTRIN_INLINE unsigned long _inpd(unsigned short Port)
1440 {
1441 return __indword(Port);
1442 }
1443
1444 __INTRIN_INLINE int _outp(unsigned short Port, int databyte)
1445 {
1446 __outbyte(Port, (unsigned char)databyte);
1447 return databyte;
1448 }
1449
1450 __INTRIN_INLINE unsigned short _outpw(unsigned short Port, unsigned short dataword)
1451 {
1452 __outword(Port, dataword);
1453 return dataword;
1454 }
1455
1456 __INTRIN_INLINE unsigned long _outpd(unsigned short Port, unsigned long dataword)
1457 {
1458 __outdword(Port, dataword);
1459 return dataword;
1460 }
1461
1462
1463 /*** System information ***/
1464
1465 __INTRIN_INLINE void __cpuid(int CPUInfo [], const int InfoType);
1466 __INTRIN_INLINE unsigned long long __rdtsc(void);
1467 __INTRIN_INLINE void __writeeflags(uintptr_t Value);
1468 __INTRIN_INLINE uintptr_t __readeflags(void);
1469
1470
1471 __INTRIN_INLINE void __cpuid(int CPUInfo[], const int InfoType)
1472 {
1473 __asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType));
1474 }
1475
1476 __INTRIN_INLINE unsigned long long __rdtsc(void)
1477 {
1478 #ifdef _M_AMD64
1479 unsigned long long low, high;
1480 __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
1481 return low | (high << 32);
1482 #else
1483 unsigned long long retval;
1484 __asm__ __volatile__("rdtsc" : "=A"(retval));
1485 return retval;
1486 #endif
1487 }
1488
1489 __INTRIN_INLINE void __writeeflags(uintptr_t Value)
1490 {
1491 __asm__ __volatile__("push %0\n popf" : : "rim"(Value));
1492 }
1493
1494 __INTRIN_INLINE uintptr_t __readeflags(void)
1495 {
1496 uintptr_t retval;
1497 __asm__ __volatile__("pushf\n pop %0" : "=rm"(retval));
1498 return retval;
1499 }
1500
1501 /*** Interrupts ***/
1502
1503 __INTRIN_INLINE void __int2c(void);
1504 __INTRIN_INLINE void _disable(void);
1505 __INTRIN_INLINE void _enable(void);
1506 __INTRIN_INLINE void __halt(void);
1507
1508 #ifdef __clang__
1509 #define __debugbreak() __asm__("int $3")
1510 #else
1511 __INTRIN_INLINE void __debugbreak(void);
1512 __INTRIN_INLINE void __debugbreak(void)
1513 {
1514 __asm__("int $3");
1515 }
1516 #endif
1517
1518 __INTRIN_INLINE void __int2c(void)
1519 {
1520 __asm__("int $0x2c");
1521 }
1522
1523 __INTRIN_INLINE void _disable(void)
1524 {
1525 __asm__("cli" : : : "memory");
1526 }
1527
1528 __INTRIN_INLINE void _enable(void)
1529 {
1530 __asm__("sti" : : : "memory");
1531 }
1532
1533 __INTRIN_INLINE void __halt(void)
1534 {
1535 __asm__("hlt\n\t" : : : "memory");
1536 }
1537
1538 /*** Protected memory management ***/
1539
1540 __INTRIN_INLINE void __invlpg(void * const Address);
1541 #ifdef _M_AMD64
1542 __INTRIN_INLINE void __writecr0(const unsigned __int64 Data);
1543 __INTRIN_INLINE void __writecr3(const unsigned __int64 Data);
1544 __INTRIN_INLINE void __writecr4(const unsigned __int64 Data);
1545 __INTRIN_INLINE void __writecr8(const unsigned __int64 Data);
1546 __INTRIN_INLINE unsigned __int64 __readcr0(void);
1547 __INTRIN_INLINE unsigned __int64 __readcr2(void);
1548 __INTRIN_INLINE unsigned __int64 __readcr3(void);
1549 __INTRIN_INLINE unsigned __int64 __readcr4(void);
1550 __INTRIN_INLINE unsigned __int64 __readcr8(void);
1551 __INTRIN_INLINE unsigned __int64 __readdr(unsigned int reg);
1552 __INTRIN_INLINE void __writedr(unsigned reg, unsigned __int64 value);
1553 #else /* _M_AMD64 */
1554 __INTRIN_INLINE void __writecr0(const unsigned int Data);
1555 __INTRIN_INLINE void __writecr3(const unsigned int Data);
1556 __INTRIN_INLINE void __writecr4(const unsigned int Data);
1557 __INTRIN_INLINE unsigned long __readcr0(void);
1558 __INTRIN_INLINE unsigned long __readcr2(void);
1559 __INTRIN_INLINE unsigned long __readcr3(void);
1560 __INTRIN_INLINE unsigned long __readcr4(void);
1561 __INTRIN_INLINE unsigned int __readdr(unsigned int reg);
1562 __INTRIN_INLINE void __writedr(unsigned reg, unsigned int value);
1563 #endif /* _M_AMD64 */
1564
1565
1566 #ifdef _M_AMD64
1567
1568 __INTRIN_INLINE void __writecr0(const unsigned __int64 Data)
1569 {
1570 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1571 }
1572
1573 __INTRIN_INLINE void __writecr3(const unsigned __int64 Data)
1574 {
1575 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1576 }
1577
1578 __INTRIN_INLINE void __writecr4(const unsigned __int64 Data)
1579 {
1580 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1581 }
1582
1583 __INTRIN_INLINE void __writecr8(const unsigned __int64 Data)
1584 {
1585 __asm__("mov %[Data], %%cr8" : : [Data] "r" (Data) : "memory");
1586 }
1587
1588 __INTRIN_INLINE unsigned __int64 __readcr0(void)
1589 {
1590 unsigned __int64 value;
1591 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1592 return value;
1593 }
1594
1595 __INTRIN_INLINE unsigned __int64 __readcr2(void)
1596 {
1597 unsigned __int64 value;
1598 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1599 return value;
1600 }
1601
1602 __INTRIN_INLINE unsigned __int64 __readcr3(void)
1603 {
1604 unsigned __int64 value;
1605 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1606 return value;
1607 }
1608
1609 __INTRIN_INLINE unsigned __int64 __readcr4(void)
1610 {
1611 unsigned __int64 value;
1612 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1613 return value;
1614 }
1615
1616 __INTRIN_INLINE unsigned __int64 __readcr8(void)
1617 {
1618 unsigned __int64 value;
1619 __asm__ __volatile__("movq %%cr8, %q[value]" : [value] "=r" (value));
1620 return value;
1621 }
1622
1623 #else /* _M_AMD64 */
1624
1625 __INTRIN_INLINE void __writecr0(const unsigned int Data)
1626 {
1627 __asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1628 }
1629
1630 __INTRIN_INLINE void __writecr3(const unsigned int Data)
1631 {
1632 __asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1633 }
1634
1635 __INTRIN_INLINE void __writecr4(const unsigned int Data)
1636 {
1637 __asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1638 }
1639
1640 __INTRIN_INLINE unsigned long __readcr0(void)
1641 {
1642 unsigned long value;
1643 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1644 return value;
1645 }
1646
1647 __INTRIN_INLINE unsigned long __readcr2(void)
1648 {
1649 unsigned long value;
1650 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1651 return value;
1652 }
1653
1654 __INTRIN_INLINE unsigned long __readcr3(void)
1655 {
1656 unsigned long value;
1657 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1658 return value;
1659 }
1660
1661 __INTRIN_INLINE unsigned long __readcr4(void)
1662 {
1663 unsigned long value;
1664 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1665 return value;
1666 }
1667
1668 #endif /* _M_AMD64 */
1669
1670 #ifdef _M_AMD64
1671
1672 __INTRIN_INLINE unsigned __int64 __readdr(unsigned int reg)
1673 {
1674 unsigned __int64 value;
1675 switch (reg)
1676 {
1677 case 0:
1678 __asm__ __volatile__("movq %%dr0, %q[value]" : [value] "=r" (value));
1679 break;
1680 case 1:
1681 __asm__ __volatile__("movq %%dr1, %q[value]" : [value] "=r" (value));
1682 break;
1683 case 2:
1684 __asm__ __volatile__("movq %%dr2, %q[value]" : [value] "=r" (value));
1685 break;
1686 case 3:
1687 __asm__ __volatile__("movq %%dr3, %q[value]" : [value] "=r" (value));
1688 break;
1689 case 4:
1690 __asm__ __volatile__("movq %%dr4, %q[value]" : [value] "=r" (value));
1691 break;
1692 case 5:
1693 __asm__ __volatile__("movq %%dr5, %q[value]" : [value] "=r" (value));
1694 break;
1695 case 6:
1696 __asm__ __volatile__("movq %%dr6, %q[value]" : [value] "=r" (value));
1697 break;
1698 case 7:
1699 __asm__ __volatile__("movq %%dr7, %q[value]" : [value] "=r" (value));
1700 break;
1701 }
1702 return value;
1703 }
1704
1705 __INTRIN_INLINE void __writedr(unsigned reg, unsigned __int64 value)
1706 {
1707 switch (reg)
1708 {
1709 case 0:
1710 __asm__("movq %q[value], %%dr0" : : [value] "r" (value) : "memory");
1711 break;
1712 case 1:
1713 __asm__("movq %q[value], %%dr1" : : [value] "r" (value) : "memory");
1714 break;
1715 case 2:
1716 __asm__("movq %q[value], %%dr2" : : [value] "r" (value) : "memory");
1717 break;
1718 case 3:
1719 __asm__("movq %q[value], %%dr3" : : [value] "r" (value) : "memory");
1720 break;
1721 case 4:
1722 __asm__("movq %q[value], %%dr4" : : [value] "r" (value) : "memory");
1723 break;
1724 case 5:
1725 __asm__("movq %q[value], %%dr5" : : [value] "r" (value) : "memory");
1726 break;
1727 case 6:
1728 __asm__("movq %q[value], %%dr6" : : [value] "r" (value) : "memory");
1729 break;
1730 case 7:
1731 __asm__("movq %q[value], %%dr7" : : [value] "r" (value) : "memory");
1732 break;
1733 }
1734 }
1735
1736 #else /* _M_AMD64 */
1737
1738 __INTRIN_INLINE unsigned int __readdr(unsigned int reg)
1739 {
1740 unsigned int value;
1741 switch (reg)
1742 {
1743 case 0:
1744 __asm__ __volatile__("mov %%dr0, %[value]" : [value] "=r" (value));
1745 break;
1746 case 1:
1747 __asm__ __volatile__("mov %%dr1, %[value]" : [value] "=r" (value));
1748 break;
1749 case 2:
1750 __asm__ __volatile__("mov %%dr2, %[value]" : [value] "=r" (value));
1751 break;
1752 case 3:
1753 __asm__ __volatile__("mov %%dr3, %[value]" : [value] "=r" (value));
1754 break;
1755 case 4:
1756 __asm__ __volatile__("mov %%dr4, %[value]" : [value] "=r" (value));
1757 break;
1758 case 5:
1759 __asm__ __volatile__("mov %%dr5, %[value]" : [value] "=r" (value));
1760 break;
1761 case 6:
1762 __asm__ __volatile__("mov %%dr6, %[value]" : [value] "=r" (value));
1763 break;
1764 case 7:
1765 __asm__ __volatile__("mov %%dr7, %[value]" : [value] "=r" (value));
1766 break;
1767 }
1768 return value;
1769 }
1770
1771 __INTRIN_INLINE void __writedr(unsigned reg, unsigned int value)
1772 {
1773 switch (reg)
1774 {
1775 case 0:
1776 __asm__("mov %[value], %%dr0" : : [value] "r" (value) : "memory");
1777 break;
1778 case 1:
1779 __asm__("mov %[value], %%dr1" : : [value] "r" (value) : "memory");
1780 break;
1781 case 2:
1782 __asm__("mov %[value], %%dr2" : : [value] "r" (value) : "memory");
1783 break;
1784 case 3:
1785 __asm__("mov %[value], %%dr3" : : [value] "r" (value) : "memory");
1786 break;
1787 case 4:
1788 __asm__("mov %[value], %%dr4" : : [value] "r" (value) : "memory");
1789 break;
1790 case 5:
1791 __asm__("mov %[value], %%dr5" : : [value] "r" (value) : "memory");
1792 break;
1793 case 6:
1794 __asm__("mov %[value], %%dr6" : : [value] "r" (value) : "memory");
1795 break;
1796 case 7:
1797 __asm__("mov %[value], %%dr7" : : [value] "r" (value) : "memory");
1798 break;
1799 }
1800 }
1801
1802 #endif /* _M_AMD64 */
1803
1804 __INTRIN_INLINE void __invlpg(void * const Address)
1805 {
1806 __asm__("invlpg %[Address]" : : [Address] "m" (*((unsigned char *)(Address))) : "memory");
1807 }
1808
1809
1810 /*** System operations ***/
1811
1812 __INTRIN_INLINE unsigned long long __readmsr(const int reg);
1813 __INTRIN_INLINE void __writemsr(const unsigned long Register, const unsigned long long Value);
1814 __INTRIN_INLINE unsigned long long __readpmc(const int counter);
1815 __INTRIN_INLINE unsigned long __segmentlimit(const unsigned long a);
1816 __INTRIN_INLINE void __wbinvd(void);
1817 __INTRIN_INLINE void __lidt(void *Source);
1818 __INTRIN_INLINE void __sidt(void *Destination);
1819
1820
1821 __INTRIN_INLINE unsigned long long __readmsr(const int reg)
1822 {
1823 #ifdef _M_AMD64
1824 unsigned long low, high;
1825 __asm__ __volatile__("rdmsr" : "=a" (low), "=d" (high) : "c" (reg));
1826 return ((unsigned long long)high << 32) | low;
1827 #else
1828 unsigned long long retval;
1829 __asm__ __volatile__("rdmsr" : "=A" (retval) : "c" (reg));
1830 return retval;
1831 #endif
1832 }
1833
1834 __INTRIN_INLINE void __writemsr(const unsigned long Register, const unsigned long long Value)
1835 {
1836 #ifdef _M_AMD64
1837 __asm__ __volatile__("wrmsr" : : "a" (Value), "d" (Value >> 32), "c" (Register));
1838 #else
1839 __asm__ __volatile__("wrmsr" : : "A" (Value), "c" (Register));
1840 #endif
1841 }
1842
1843 __INTRIN_INLINE unsigned long long __readpmc(const int counter)
1844 {
1845 unsigned long long retval;
1846 __asm__ __volatile__("rdpmc" : "=A" (retval) : "c" (counter));
1847 return retval;
1848 }
1849
1850 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1851 __INTRIN_INLINE unsigned long __segmentlimit(const unsigned long a)
1852 {
1853 unsigned long retval;
1854 __asm__ __volatile__("lsl %[a], %[retval]" : [retval] "=r" (retval) : [a] "rm" (a));
1855 return retval;
1856 }
1857
1858 __INTRIN_INLINE void __wbinvd(void)
1859 {
1860 __asm__ __volatile__("wbinvd" : : : "memory");
1861 }
1862
1863 __INTRIN_INLINE void __lidt(void *Source)
1864 {
1865 __asm__ __volatile__("lidt %0" : : "m"(*(short*)Source));
1866 }
1867
1868 __INTRIN_INLINE void __sidt(void *Destination)
1869 {
1870 __asm__ __volatile__("sidt %0" : : "m"(*(short*)Destination) : "memory");
1871 }
1872
1873 /*** Misc operations ***/
1874
1875 __INTRIN_INLINE void _mm_pause(void);
1876 __INTRIN_INLINE void __nop(void);
1877
1878 __INTRIN_INLINE void _mm_pause(void)
1879 {
1880 __asm__ __volatile__("pause" : : : "memory");
1881 }
1882
1883 __INTRIN_INLINE void __nop(void)
1884 {
1885 __asm__ __volatile__("nop");
1886 }
1887
1888 #ifdef __cplusplus
1889 }
1890 #endif
1891
1892 #endif /* KJK_INTRIN_X86_H_ */
1893
1894 /* EOF */