sync with trunk head (34904)
[reactos.git] / reactos / include / psdk / intrin_x86.h
1 /*
2 Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 Microsoft Visual C++ functions. Originally developed for the ReactOS
4 (<http://www.reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
5 projects.
6
7 Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
8
9 Permission is hereby granted, free of charge, to any person obtaining a
10 copy of this software and associated documentation files (the "Software"),
11 to deal in the Software without restriction, including without limitation
12 the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 and/or sell copies of the Software, and to permit persons to whom the
14 Software is furnished to do so, subject to the following conditions:
15
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
26 */
27
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
30
31 /*
32 FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 behavior: some "obvious" memory barriers are not present in the Visual C++
34 implementation - e.g. __stosX; on the other hand, some memory barriers that
35 *are* present could have been missed
36 */
37
38 /*
39 NOTE: this is a *compatibility* header. Some functions may look wrong at
40 first, but they're only "as wrong" as they would be on Visual C++. Our
41 priority is compatibility
42
43 NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 constraints and the uses of __volatile__ out of my... hat. Do not touch
45 them. I hate cargo cult programming
46
47 NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 barriers aren't there in Visual C++ (e.g. __stosX)
49
50 NOTE: review all intrinsics with a return value, add/remove __volatile__
51 where necessary. If an intrinsic whose value is ignored generates a no-op
52 under Visual C++, __volatile__ must be omitted; if it always generates code
53 (for example, if it has side effects), __volatile__ must be specified. GCC
54 will only optimize out non-volatile asm blocks with outputs, so input-only
55 blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 and follow Visual C++ behavior
57
58 NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 atomic operations. Test the version like this:
60
61 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
62 ...
63
64 Pay attention to the type of barrier. Make it match with what Visual C++
65 would use in the same case
66 */
67
68 /*** Stack frame juggling ***/
69 #define _ReturnAddress() (__builtin_return_address(0))
70 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
71 /* TODO: __getcallerseflags but how??? */
72
73
74 /*** Atomic operations ***/
75
76 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
77 #define _ReadWriteBarrier() __sync_synchronize()
78 #else
79 static void __inline__ __attribute__((always_inline)) _MemoryBarrier(void)
80 {
81 __asm__ __volatile__("" : : : "memory");
82 }
83 #define _ReadWriteBarrier() _MemoryBarrier()
84 #endif
85
86 /* BUGBUG: GCC only supports full barriers */
87 #define _ReadBarrier _ReadWriteBarrier
88 #define _WriteBarrier _ReadWriteBarrier
89
90 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
91
92 static __inline__ __attribute__((always_inline)) char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand)
93 {
94 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
95 }
96
97 static __inline__ __attribute__((always_inline)) short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand)
98 {
99 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
100 }
101
102 static __inline__ __attribute__((always_inline)) long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand)
103 {
104 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
105 }
106
107 static __inline__ __attribute__((always_inline)) long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand)
108 {
109 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
110 }
111
112 static __inline__ __attribute__((always_inline)) void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand)
113 {
114 return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
115 }
116
117 static __inline__ __attribute__((always_inline)) long _InterlockedExchange(volatile long * const Target, const long Value)
118 {
119 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
120 __sync_synchronize();
121 return __sync_lock_test_and_set(Target, Value);
122 }
123
124 #if defined(_M_AMD64)
125 static __inline__ __attribute__((always_inline)) long long _InterlockedExchange64(volatile long long * const Target, const long long Value)
126 {
127 /* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
128 __sync_synchronize();
129 return __sync_lock_test_and_set(Target, Value);
130 }
131 #endif
132
133 static __inline__ __attribute__((always_inline)) void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value)
134 {
135 /* NOTE: ditto */
136 __sync_synchronize();
137 return __sync_lock_test_and_set(Target, Value);
138 }
139
140 static __inline__ __attribute__((always_inline)) long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value)
141 {
142 return __sync_fetch_and_add(Addend, Value);
143 }
144
145 static __inline__ __attribute__((always_inline)) long _InterlockedExchangeAdd(volatile long * const Addend, const long Value)
146 {
147 return __sync_fetch_and_add(Addend, Value);
148 }
149
150 static __inline__ __attribute__((always_inline)) char _InterlockedAnd8(volatile char * const value, const char mask)
151 {
152 return __sync_fetch_and_and(value, mask);
153 }
154
155 static __inline__ __attribute__((always_inline)) short _InterlockedAnd16(volatile short * const value, const short mask)
156 {
157 return __sync_fetch_and_and(value, mask);
158 }
159
160 static __inline__ __attribute__((always_inline)) long _InterlockedAnd(volatile long * const value, const long mask)
161 {
162 return __sync_fetch_and_and(value, mask);
163 }
164
165 #if defined(_M_AMD64)
166 static __inline__ __attribute__((always_inline)) long _InterlockedAnd64(volatile long long * const value, const long long mask)
167 {
168 return __sync_fetch_and_and(value, mask);
169 }
170 #endif
171
172 static __inline__ __attribute__((always_inline)) char _InterlockedOr8(volatile char * const value, const char mask)
173 {
174 return __sync_fetch_and_or(value, mask);
175 }
176
177 static __inline__ __attribute__((always_inline)) short _InterlockedOr16(volatile short * const value, const short mask)
178 {
179 return __sync_fetch_and_or(value, mask);
180 }
181
182 static __inline__ __attribute__((always_inline)) long _InterlockedOr(volatile long * const value, const long mask)
183 {
184 return __sync_fetch_and_or(value, mask);
185 }
186
187 #if defined(_M_AMD64)
188 static __inline__ __attribute__((always_inline)) long _InterlockedOr64(volatile long long * const value, const long long mask)
189 {
190 return __sync_fetch_and_or(value, mask);
191 }
192 #endif
193
194 static __inline__ __attribute__((always_inline)) char _InterlockedXor8(volatile char * const value, const char mask)
195 {
196 return __sync_fetch_and_xor(value, mask);
197 }
198
199 static __inline__ __attribute__((always_inline)) short _InterlockedXor16(volatile short * const value, const short mask)
200 {
201 return __sync_fetch_and_xor(value, mask);
202 }
203
204 static __inline__ __attribute__((always_inline)) long _InterlockedXor(volatile long * const value, const long mask)
205 {
206 return __sync_fetch_and_xor(value, mask);
207 }
208
209 #else
210
211 static __inline__ __attribute__((always_inline)) char _InterlockedCompareExchange8(volatile char * const Destination, const char Exchange, const char Comperand)
212 {
213 char retval = Comperand;
214 __asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
215 return retval;
216 }
217
218 static __inline__ __attribute__((always_inline)) short _InterlockedCompareExchange16(volatile short * const Destination, const short Exchange, const short Comperand)
219 {
220 short retval = Comperand;
221 __asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
222 return retval;
223 }
224
225 static __inline__ __attribute__((always_inline)) long _InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand)
226 {
227 long retval = Comperand;
228 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
229 return retval;
230 }
231
232 static __inline__ __attribute__((always_inline)) long long _InterlockedCompareExchange64(volatile long long * const Destination, const long long Exchange, const long long Comperand)
233 {
234 long long retval = Comperand;
235
236 __asm__
237 (
238 "cmpxchg8b %[Destination]" :
239 [retval] "+A" (retval) :
240 [Destination] "m" (*Destination),
241 "b" ((unsigned long)((Exchange >> 0) & 0xFFFFFFFF)),
242 "c" ((unsigned long)((Exchange >> 32) & 0xFFFFFFFF)) :
243 "memory"
244 );
245
246 return retval;
247 }
248
249 static __inline__ __attribute__((always_inline)) void * _InterlockedCompareExchangePointer(void * volatile * const Destination, void * const Exchange, void * const Comperand)
250 {
251 void * retval = (void *)Comperand;
252 __asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "=a" (retval) : "[retval]" (retval), [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
253 return retval;
254 }
255
256 static __inline__ __attribute__((always_inline)) long _InterlockedExchange(volatile long * const Target, const long Value)
257 {
258 long retval = Value;
259 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
260 return retval;
261 }
262
263 static __inline__ __attribute__((always_inline)) void * _InterlockedExchangePointer(void * volatile * const Target, void * const Value)
264 {
265 void * retval = Value;
266 __asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
267 return retval;
268 }
269
270 static __inline__ __attribute__((always_inline)) long _InterlockedExchangeAdd16(volatile short * const Addend, const short Value)
271 {
272 long retval = Value;
273 __asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
274 return retval;
275 }
276
277 static __inline__ __attribute__((always_inline)) long _InterlockedExchangeAdd(volatile long * const Addend, const long Value)
278 {
279 long retval = Value;
280 __asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
281 return retval;
282 }
283
284 static __inline__ __attribute__((always_inline)) char _InterlockedAnd8(volatile char * const value, const char mask)
285 {
286 char x;
287 char y;
288
289 y = *value;
290
291 do
292 {
293 x = y;
294 y = _InterlockedCompareExchange8(value, x & mask, x);
295 }
296 while(y != x);
297
298 return y;
299 }
300
301 static __inline__ __attribute__((always_inline)) short _InterlockedAnd16(volatile short * const value, const short mask)
302 {
303 short x;
304 short y;
305
306 y = *value;
307
308 do
309 {
310 x = y;
311 y = _InterlockedCompareExchange16(value, x & mask, x);
312 }
313 while(y != x);
314
315 return y;
316 }
317
318 static __inline__ __attribute__((always_inline)) long _InterlockedAnd(volatile long * const value, const long mask)
319 {
320 long x;
321 long y;
322
323 y = *value;
324
325 do
326 {
327 x = y;
328 y = _InterlockedCompareExchange(value, x & mask, x);
329 }
330 while(y != x);
331
332 return y;
333 }
334
335 static __inline__ __attribute__((always_inline)) char _InterlockedOr8(volatile char * const value, const char mask)
336 {
337 char x;
338 char y;
339
340 y = *value;
341
342 do
343 {
344 x = y;
345 y = _InterlockedCompareExchange8(value, x | mask, x);
346 }
347 while(y != x);
348
349 return y;
350 }
351
352 static __inline__ __attribute__((always_inline)) short _InterlockedOr16(volatile short * const value, const short mask)
353 {
354 short x;
355 short y;
356
357 y = *value;
358
359 do
360 {
361 x = y;
362 y = _InterlockedCompareExchange16(value, x | mask, x);
363 }
364 while(y != x);
365
366 return y;
367 }
368
369 static __inline__ __attribute__((always_inline)) long _InterlockedOr(volatile long * const value, const long mask)
370 {
371 long x;
372 long y;
373
374 y = *value;
375
376 do
377 {
378 x = y;
379 y = _InterlockedCompareExchange(value, x | mask, x);
380 }
381 while(y != x);
382
383 return y;
384 }
385
386 static __inline__ __attribute__((always_inline)) char _InterlockedXor8(volatile char * const value, const char mask)
387 {
388 char x;
389 char y;
390
391 y = *value;
392
393 do
394 {
395 x = y;
396 y = _InterlockedCompareExchange8(value, x ^ mask, x);
397 }
398 while(y != x);
399
400 return y;
401 }
402
403 static __inline__ __attribute__((always_inline)) short _InterlockedXor16(volatile short * const value, const short mask)
404 {
405 short x;
406 short y;
407
408 y = *value;
409
410 do
411 {
412 x = y;
413 y = _InterlockedCompareExchange16(value, x ^ mask, x);
414 }
415 while(y != x);
416
417 return y;
418 }
419
420 static __inline__ __attribute__((always_inline)) long _InterlockedXor(volatile long * const value, const long mask)
421 {
422 long x;
423 long y;
424
425 y = *value;
426
427 do
428 {
429 x = y;
430 y = _InterlockedCompareExchange(value, x ^ mask, x);
431 }
432 while(y != x);
433
434 return y;
435 }
436
437 #endif
438
439 static __inline__ __attribute__((always_inline)) long _InterlockedAddLargeStatistic(volatile long long * const Addend, const long Value)
440 {
441 __asm__
442 (
443 "lock; add %[Value], %[Lo32];"
444 "jae LABEL%=;"
445 "lock; adc $0, %[Hi32];"
446 "LABEL%=:;" :
447 [Lo32] "=m" (*((volatile long *)(Addend) + 0)), [Hi32] "=m" (*((volatile long *)(Addend) + 1)) :
448 [Value] "ir" (Value)
449 );
450
451 return Value;
452 }
453
454 static __inline__ __attribute__((always_inline)) long _InterlockedDecrement(volatile long * const lpAddend)
455 {
456 return _InterlockedExchangeAdd(lpAddend, -1) - 1;
457 }
458
459 static __inline__ __attribute__((always_inline)) long _InterlockedIncrement(volatile long * const lpAddend)
460 {
461 return _InterlockedExchangeAdd(lpAddend, 1) + 1;
462 }
463
464 static __inline__ __attribute__((always_inline)) long _InterlockedDecrement16(volatile short * const lpAddend)
465 {
466 return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
467 }
468
469 static __inline__ __attribute__((always_inline)) long _InterlockedIncrement16(volatile short * const lpAddend)
470 {
471 return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
472 }
473
474 static __inline__ __attribute__((always_inline)) unsigned char _interlockedbittestandreset(volatile long * a, const long b)
475 {
476 unsigned char retval;
477 __asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=r" (retval), [a] "=m" (a) : [b] "Ir" (b) : "memory");
478 return retval;
479 }
480
481 #if defined(_M_AMD64)
482 static __inline__ __attribute__((always_inline)) unsigned char _interlockedbittestandreset64(volatile long long * a, const long long b)
483 {
484 unsigned char retval;
485 __asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval] "=r" (retval), [a] "=m" (a) : [b] "Ir" (b) : "memory");
486 return retval;
487 }
488 #endif
489
490 static __inline__ __attribute__((always_inline)) unsigned char _interlockedbittestandset(volatile long * a, const long b)
491 {
492 unsigned char retval;
493 __asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=r" (retval), [a] "=m" (a) : [b] "Ir" (b) : "memory");
494 return retval;
495 }
496
497 #if defined(_M_AMD64)
498 static __inline__ __attribute__((always_inline)) unsigned char _interlockedbittestandset64(volatile long long * a, const long long b)
499 {
500 unsigned char retval;
501 __asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval] "=r" (retval), [a] "=m" (a) : [b] "Ir" (b) : "memory");
502 return retval;
503 }
504 #endif
505
506 /*** String operations ***/
507 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
508 static __inline__ __attribute__((always_inline)) void __stosb(unsigned char * Dest, const unsigned char Data, size_t Count)
509 {
510 __asm__ __volatile__
511 (
512 "rep; stosb" :
513 [Dest] "=D" (Dest), [Count] "=c" (Count) :
514 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
515 );
516 }
517
518 static __inline__ __attribute__((always_inline)) void __stosw(unsigned short * Dest, const unsigned short Data, size_t Count)
519 {
520 __asm__ __volatile__
521 (
522 "rep; stosw" :
523 [Dest] "=D" (Dest), [Count] "=c" (Count) :
524 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
525 );
526 }
527
528 static __inline__ __attribute__((always_inline)) void __stosd(unsigned long * Dest, const unsigned long Data, size_t Count)
529 {
530 __asm__ __volatile__
531 (
532 "rep; stosl" :
533 [Dest] "=D" (Dest), [Count] "=c" (Count) :
534 "[Dest]" (Dest), "a" (Data), "[Count]" (Count)
535 );
536 }
537
538 static __inline__ __attribute__((always_inline)) void __movsb(unsigned char * Destination, const unsigned char * Source, size_t Count)
539 {
540 __asm__ __volatile__
541 (
542 "rep; movsb" :
543 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
544 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
545 );
546 }
547
548 static __inline__ __attribute__((always_inline)) void __movsw(unsigned short * Destination, const unsigned short * Source, size_t Count)
549 {
550 __asm__ __volatile__
551 (
552 "rep; movsw" :
553 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
554 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
555 );
556 }
557
558 static __inline__ __attribute__((always_inline)) void __movsd(unsigned long * Destination, const unsigned long * Source, size_t Count)
559 {
560 __asm__ __volatile__
561 (
562 "rep; movsd" :
563 [Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
564 "[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
565 );
566 }
567
568 #if defined(_M_AMD64)
569 /*** GS segment addressing ***/
570
571 static __inline__ __attribute__((always_inline)) void __writegsbyte(const unsigned long Offset, const unsigned char Data)
572 {
573 __asm__("movb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data));
574 }
575
576 static __inline__ __attribute__((always_inline)) void __writegsword(const unsigned long Offset, const unsigned short Data)
577 {
578 __asm__("movw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data));
579 }
580
581 static __inline__ __attribute__((always_inline)) void __writegsdword(const unsigned long Offset, const unsigned long Data)
582 {
583 __asm__("movl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data));
584 }
585
586 static __inline__ __attribute__((always_inline)) void __writegsqword(const unsigned long Offset, const unsigned __int64 Data)
587 {
588 __asm__("movq %q[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data));
589 }
590
591 static __inline__ __attribute__((always_inline)) unsigned char __readgsbyte(const unsigned long Offset)
592 {
593 unsigned char value;
594 __asm__("movb %%gs:%a[Offset], %b[value]" : [value] "=q" (value) : [Offset] "irm" (Offset));
595 return value;
596 }
597
598 static __inline__ __attribute__((always_inline)) unsigned short __readgsword(const unsigned long Offset)
599 {
600 unsigned short value;
601 __asm__("movw %%gs:%a[Offset], %w[value]" : [value] "=q" (value) : [Offset] "irm" (Offset));
602 return value;
603 }
604
605 static __inline__ __attribute__((always_inline)) unsigned long __readgsdword(const unsigned long Offset)
606 {
607 unsigned long value;
608 __asm__("movl %%gs:%a[Offset], %k[value]" : [value] "=q" (value) : [Offset] "irm" (Offset));
609 return value;
610 }
611
612 static __inline__ __attribute__((always_inline)) unsigned __int64 __readgsqword(const unsigned long Offset)
613 {
614 unsigned long value;
615 __asm__("movq %%gs:%a[Offset], %q[value]" : [value] "=q" (value) : [Offset] "irm" (Offset));
616 return value;
617 }
618
619 static __inline__ __attribute__((always_inline)) void __incgsbyte(const unsigned long Offset)
620 {
621 __asm__("incb %%gs:%a[Offset]" : : [Offset] "ir" (Offset));
622 }
623
624 static __inline__ __attribute__((always_inline)) void __incgsword(const unsigned long Offset)
625 {
626 __asm__("incw %%gs:%a[Offset]" : : [Offset] "ir" (Offset));
627 }
628
629 static __inline__ __attribute__((always_inline)) void __incgsdword(const unsigned long Offset)
630 {
631 __asm__("incl %%gs:%a[Offset]" : : [Offset] "ir" (Offset));
632 }
633
634 /* NOTE: the bizarre implementation of __addgsxxx mimics the broken Visual C++ behavior */
635 static __inline__ __attribute__((always_inline)) void __addgsbyte(const unsigned long Offset, const unsigned char Data)
636 {
637 if(!__builtin_constant_p(Offset))
638 __asm__("addb %k[Offset], %%gs:%a[Offset]" : : [Offset] "r" (Offset));
639 else
640 __asm__("addb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data));
641 }
642
643 static __inline__ __attribute__((always_inline)) void __addgsword(const unsigned long Offset, const unsigned short Data)
644 {
645 if(!__builtin_constant_p(Offset))
646 __asm__("addw %k[Offset], %%gs:%a[Offset]" : : [Offset] "r" (Offset));
647 else
648 __asm__("addw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data));
649 }
650
651 static __inline__ __attribute__((always_inline)) void __addgsdword(const unsigned long Offset, const unsigned int Data)
652 {
653 if(!__builtin_constant_p(Offset))
654 __asm__("addl %k[Offset], %%gs:%a[Offset]" : : [Offset] "r" (Offset));
655 else
656 __asm__("addl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data));
657 }
658
659 #else
660 /*** FS segment addressing ***/
661 static __inline__ __attribute__((always_inline)) void __writefsbyte(const unsigned long Offset, const unsigned char Data)
662 {
663 __asm__("movb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data));
664 }
665
666 static __inline__ __attribute__((always_inline)) void __writefsword(const unsigned long Offset, const unsigned short Data)
667 {
668 __asm__("movw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data));
669 }
670
671 static __inline__ __attribute__((always_inline)) void __writefsdword(const unsigned long Offset, const unsigned long Data)
672 {
673 __asm__("movl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data));
674 }
675
676 static __inline__ __attribute__((always_inline)) unsigned char __readfsbyte(const unsigned long Offset)
677 {
678 unsigned char value;
679 __asm__("movb %%fs:%a[Offset], %b[value]" : [value] "=q" (value) : [Offset] "irm" (Offset));
680 return value;
681 }
682
683 static __inline__ __attribute__((always_inline)) unsigned short __readfsword(const unsigned long Offset)
684 {
685 unsigned short value;
686 __asm__("movw %%fs:%a[Offset], %w[value]" : [value] "=q" (value) : [Offset] "irm" (Offset));
687 return value;
688 }
689
690 static __inline__ __attribute__((always_inline)) unsigned long __readfsdword(const unsigned long Offset)
691 {
692 unsigned long value;
693 __asm__("movl %%fs:%a[Offset], %k[value]" : [value] "=q" (value) : [Offset] "irm" (Offset));
694 return value;
695 }
696
697 static __inline__ __attribute__((always_inline)) void __incfsbyte(const unsigned long Offset)
698 {
699 __asm__("incb %%fs:%a[Offset]" : : [Offset] "ir" (Offset));
700 }
701
702 static __inline__ __attribute__((always_inline)) void __incfsword(const unsigned long Offset)
703 {
704 __asm__("incw %%fs:%a[Offset]" : : [Offset] "ir" (Offset));
705 }
706
707 static __inline__ __attribute__((always_inline)) void __incfsdword(const unsigned long Offset)
708 {
709 __asm__("incl %%fs:%a[Offset]" : : [Offset] "ir" (Offset));
710 }
711
712 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
713 static __inline__ __attribute__((always_inline)) void __addfsbyte(const unsigned long Offset, const unsigned char Data)
714 {
715 if(!__builtin_constant_p(Offset))
716 __asm__("addb %k[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset));
717 else
718 __asm__("addb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data));
719 }
720
721 static __inline__ __attribute__((always_inline)) void __addfsword(const unsigned long Offset, const unsigned short Data)
722 {
723 if(!__builtin_constant_p(Offset))
724 __asm__("addw %k[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset));
725 else
726 __asm__("addw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data));
727 }
728
729 static __inline__ __attribute__((always_inline)) void __addfsdword(const unsigned long Offset, const unsigned int Data)
730 {
731 if(!__builtin_constant_p(Offset))
732 __asm__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset));
733 else
734 __asm__("addl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data));
735 }
736 #endif
737
738
739 /*** Bit manipulation ***/
740 static __inline__ __attribute__((always_inline)) unsigned char _BitScanForward(unsigned long * const Index, const unsigned long Mask)
741 {
742 __asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
743 return Mask ? 1 : 0;
744 }
745
746 static __inline__ __attribute__((always_inline)) unsigned char _BitScanReverse(unsigned long * const Index, const unsigned long Mask)
747 {
748 __asm__("bsrl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
749 return Mask ? 1 : 0;
750 }
751
752 /* NOTE: again, the bizarre implementation follows Visual C++ */
753 static __inline__ __attribute__((always_inline)) unsigned char _bittest(const long * const a, const long b)
754 {
755 unsigned char retval;
756
757 if(__builtin_constant_p(b))
758 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 32))), [b] "Ir" (b % 32));
759 else
760 __asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*a), [b] "r" (b));
761
762 return retval;
763 }
764
765 static __inline__ __attribute__((always_inline)) unsigned char _bittestandcomplement(long * const a, const long b)
766 {
767 unsigned char retval;
768
769 if(__builtin_constant_p(b))
770 __asm__("btc %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 32))), [b] "Ir" (b % 32));
771 else
772 __asm__("btc %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*a), [b] "r" (b));
773
774 return retval;
775 }
776
777 static __inline__ __attribute__((always_inline)) unsigned char _bittestandreset(long * const a, const long b)
778 {
779 unsigned char retval;
780
781 if(__builtin_constant_p(b))
782 __asm__("btr %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 32))), [b] "Ir" (b % 32));
783 else
784 __asm__("btr %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*a), [b] "r" (b));
785
786 return retval;
787 }
788
789 static __inline__ __attribute__((always_inline)) unsigned char _bittestandset(long * const a, const long b)
790 {
791 unsigned char retval;
792
793 if(__builtin_constant_p(b))
794 __asm__("bts %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 32))), [b] "Ir" (b % 32));
795 else
796 __asm__("bts %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*a), [b] "r" (b));
797
798 return retval;
799 }
800
801 static __inline__ __attribute__((always_inline)) unsigned char _rotl8(const unsigned char value, const unsigned char shift)
802 {
803 unsigned char retval;
804 __asm__("rolb %b[shift], %b[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
805 return retval;
806 }
807
808 static __inline__ __attribute__((always_inline)) unsigned short _rotl16(const unsigned short value, const unsigned char shift)
809 {
810 unsigned short retval;
811 __asm__("rolw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
812 return retval;
813 }
814
815 #ifndef __MSVCRT__
816 static __inline__ __attribute__((always_inline)) unsigned int _rotl(const unsigned int value, const int shift)
817 {
818 unsigned long retval;
819 __asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
820 return retval;
821 }
822
823 static __inline__ __attribute__((always_inline)) unsigned long _rotr(const unsigned int value, const unsigned char shift)
824 {
825 unsigned long retval;
826 __asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
827 return retval;
828 }
829 #endif
830
831 static __inline__ __attribute__((always_inline)) unsigned char _rotr8(const unsigned char value, const unsigned char shift)
832 {
833 unsigned char retval;
834 __asm__("rorb %b[shift], %b[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
835 return retval;
836 }
837
838 static __inline__ __attribute__((always_inline)) unsigned short _rotr16(const unsigned short value, const unsigned char shift)
839 {
840 unsigned short retval;
841 __asm__("rorw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
842 return retval;
843 }
844
845 /*
846 NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
847 constraint (edx:eax) for the Mask argument, because it's the only way GCC
848 can pass 64-bit operands around - passing the two 32 bit parts separately
849 just confuses it. Also we declare Bit as an int and then truncate it to
850 match Visual C++ behavior
851 */
852 static __inline__ __attribute__((always_inline)) unsigned long long __ll_lshift(const unsigned long long Mask, const int Bit)
853 {
854 unsigned long long retval = Mask;
855
856 __asm__
857 (
858 "shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
859 "+A" (retval) :
860 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
861 );
862
863 return retval;
864 }
865
866 static __inline__ __attribute__((always_inline)) long long __ll_rshift(const long long Mask, const int Bit)
867 {
868 unsigned long long retval = Mask;
869
870 __asm__
871 (
872 "shldl %b[Bit], %%eax, %%edx; sarl %b[Bit], %%eax" :
873 "+A" (retval) :
874 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
875 );
876
877 return retval;
878 }
879
880 static __inline__ __attribute__((always_inline)) unsigned long long __ull_rshift(const unsigned long long Mask, int Bit)
881 {
882 unsigned long long retval = Mask;
883
884 __asm__
885 (
886 "shrdl %b[Bit], %%eax, %%edx; shrl %b[Bit], %%eax" :
887 "+A" (retval) :
888 [Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
889 );
890
891 return retval;
892 }
893
894
895 /*** 64-bit math ***/
896 static __inline__ __attribute__((always_inline)) long long __emul(const int a, const int b)
897 {
898 long long retval;
899 __asm__("imull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
900 return retval;
901 }
902
903 static __inline__ __attribute__((always_inline)) unsigned long long __emulu(const unsigned int a, const unsigned int b)
904 {
905 unsigned long long retval;
906 __asm__("mull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
907 return retval;
908 }
909
910
911 /*** Port I/O ***/
912 static __inline__ __attribute__((always_inline)) unsigned char __inbyte(const unsigned short Port)
913 {
914 unsigned char byte;
915 __asm__ __volatile__("inb %w[Port], %b[byte]" : [byte] "=a" (byte) : [Port] "Nd" (Port));
916 return byte;
917 }
918
919 static __inline__ __attribute__((always_inline)) unsigned short __inword(const unsigned short Port)
920 {
921 unsigned short word;
922 __asm__ __volatile__("inw %w[Port], %w[word]" : [word] "=a" (word) : [Port] "Nd" (Port));
923 return word;
924 }
925
926 static __inline__ __attribute__((always_inline)) unsigned long __indword(const unsigned short Port)
927 {
928 unsigned long dword;
929 __asm__ __volatile__("inl %w[Port], %k[dword]" : [dword] "=a" (dword) : [Port] "Nd" (Port));
930 return dword;
931 }
932
933 static __inline__ __attribute__((always_inline)) void __inbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
934 {
935 __asm__ __volatile__
936 (
937 "rep; insb" :
938 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
939 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
940 "memory"
941 );
942 }
943
944 static __inline__ __attribute__((always_inline)) void __inwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
945 {
946 __asm__ __volatile__
947 (
948 "rep; insw" :
949 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
950 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
951 "memory"
952 );
953 }
954
955 static __inline__ __attribute__((always_inline)) void __indwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
956 {
957 __asm__ __volatile__
958 (
959 "rep; insl" :
960 [Buffer] "=D" (Buffer), [Count] "=c" (Count) :
961 "d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
962 "memory"
963 );
964 }
965
966 static __inline__ __attribute__((always_inline)) void __outbyte(unsigned short const Port, const unsigned char Data)
967 {
968 __asm__ __volatile__("outb %b[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
969 }
970
971 static __inline__ __attribute__((always_inline)) void __outword(unsigned short const Port, const unsigned short Data)
972 {
973 __asm__ __volatile__("outw %w[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
974 }
975
976 static __inline__ __attribute__((always_inline)) void __outdword(unsigned short const Port, const unsigned long Data)
977 {
978 __asm__ __volatile__("outl %k[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
979 }
980
981 static __inline__ __attribute__((always_inline)) void __outbytestring(unsigned short const Port, const unsigned char * const Buffer, const unsigned long Count)
982 {
983 __asm__ __volatile__("rep; outsb" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
984 }
985
986 static __inline__ __attribute__((always_inline)) void __outwordstring(unsigned short const Port, const unsigned short * const Buffer, const unsigned long Count)
987 {
988 __asm__ __volatile__("rep; outsw" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
989 }
990
991 static __inline__ __attribute__((always_inline)) void __outdwordstring(unsigned short const Port, const unsigned long * const Buffer, const unsigned long Count)
992 {
993 __asm__ __volatile__("rep; outsl" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
994 }
995
996
997 /*** System information ***/
998 static __inline__ __attribute__((always_inline)) void __cpuid(int CPUInfo[], const int InfoType)
999 {
1000 __asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType));
1001 }
1002
1003 static __inline__ __attribute__((always_inline)) unsigned long long __rdtsc(void)
1004 {
1005 unsigned long long retval;
1006 __asm__ __volatile__("rdtsc" : "=A"(retval));
1007 return retval;
1008 }
1009
1010
1011 /*** Interrupts ***/
1012 static __inline__ __attribute__((always_inline)) void __debugbreak(void)
1013 {
1014 __asm__("int $3");
1015 }
1016
1017 static __inline__ __attribute__((always_inline)) void __int2c(void)
1018 {
1019 __asm__("int $0x2c");
1020 }
1021
1022 static __inline__ __attribute__((always_inline)) void _disable(void)
1023 {
1024 __asm__("cli");
1025 }
1026
1027 static __inline__ __attribute__((always_inline)) void _enable(void)
1028 {
1029 __asm__("sti");
1030 }
1031
1032
1033 /*** Protected memory management ***/
1034 static __inline__ __attribute__((always_inline)) unsigned long __readcr0(void)
1035 {
1036 unsigned long value;
1037 __asm__ __volatile__("mov %%cr0, %[value]" : [value] "=q" (value));
1038 return value;
1039 }
1040
1041 static __inline__ __attribute__((always_inline)) unsigned long __readcr2(void)
1042 {
1043 unsigned long value;
1044 __asm__ __volatile__("mov %%cr2, %[value]" : [value] "=q" (value));
1045 return value;
1046 }
1047
1048 static __inline__ __attribute__((always_inline)) unsigned long __readcr3(void)
1049 {
1050 unsigned long value;
1051 __asm__ __volatile__("mov %%cr3, %[value]" : [value] "=q" (value));
1052 return value;
1053 }
1054
1055 static __inline__ __attribute__((always_inline)) unsigned long __readcr4(void)
1056 {
1057 unsigned long value;
1058 __asm__ __volatile__("mov %%cr4, %[value]" : [value] "=q" (value));
1059 return value;
1060 }
1061
1062 static __inline__ __attribute__((always_inline)) void __writecr0(const unsigned long long Data)
1063 {
1064 __asm__("mov %[Data], %%cr0" : : [Data] "q" ((const unsigned long)(Data & 0xFFFFFFFF)) : "memory");
1065 }
1066
1067 static __inline__ __attribute__((always_inline)) void __writecr3(const unsigned long long Data)
1068 {
1069 __asm__("mov %[Data], %%cr3" : : [Data] "q" ((const unsigned long)(Data & 0xFFFFFFFF)) : "memory");
1070 }
1071
1072 static __inline__ __attribute__((always_inline)) void __writecr4(const unsigned long long Data)
1073 {
1074 __asm__("mov %[Data], %%cr4" : : [Data] "q" ((const unsigned long)(Data & 0xFFFFFFFF)) : "memory");
1075 }
1076
1077 static __inline__ __attribute__((always_inline)) void __invlpg(void * const Address)
1078 {
1079 __asm__("invlpg %[Address]" : : [Address] "m" (*((unsigned char *)(Address))));
1080 }
1081
1082
1083 /*** System operations ***/
1084 static __inline__ __attribute__((always_inline)) unsigned long long __readmsr(const int reg)
1085 {
1086 unsigned long long retval;
1087 __asm__ __volatile__("rdmsr" : "=A" (retval) : "c" (reg));
1088 return retval;
1089 }
1090
1091 static __inline__ __attribute__((always_inline)) void __writemsr(const unsigned long Register, const unsigned long long Value)
1092 {
1093 __asm__ __volatile__("wrmsr" : : "A" (Value), "c" (Register));
1094 }
1095
1096 static __inline__ __attribute__((always_inline)) unsigned long long __readpmc(const int counter)
1097 {
1098 unsigned long long retval;
1099 __asm__ __volatile__("rdpmc" : "=A" (retval) : "c" (counter));
1100 return retval;
1101 }
1102
1103 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
1104 static __inline__ __attribute__((always_inline)) unsigned long __segmentlimit(const unsigned long a)
1105 {
1106 unsigned long retval;
1107 __asm__ __volatile__("lsl %[a], %[retval]" : [retval] "=r" (retval) : [a] "rm" (a));
1108 return retval;
1109 }
1110
1111 static __inline__ __attribute__((always_inline)) void __wbinvd(void)
1112 {
1113 __asm__ __volatile__("wbinvd");
1114 }
1115
1116 #endif /* KJK_INTRIN_X86_H_ */
1117
1118 /* EOF */