* Branch for the 0.3.14 release.
[reactos.git] / ntoskrnl / ke / i386 / cpu.c
1 /*
2 * PROJECT: ReactOS Kernel
3 * LICENSE: GPL - See COPYING in the top level directory
4 * FILE: ntoskrnl/ke/i386/cpu.c
5 * PURPOSE: Routines for CPU-level support
6 * PROGRAMMERS: Alex Ionescu (alex.ionescu@reactos.org)
7 */
8
9 /* INCLUDES *****************************************************************/
10
11 #include <ntoskrnl.h>
12 #define NDEBUG
13 #include <debug.h>
14
15 /* GLOBALS *******************************************************************/
16
17 /* The TSS to use for Double Fault Traps (INT 0x9) */
18 UCHAR KiDoubleFaultTSS[KTSS_IO_MAPS];
19
20 /* The TSS to use for NMI Fault Traps (INT 0x2) */
21 UCHAR KiNMITSS[KTSS_IO_MAPS];
22
23 /* CPU Features and Flags */
24 ULONG KeI386CpuType;
25 ULONG KeI386CpuStep;
26 ULONG KiFastSystemCallDisable = 0;
27 ULONG KeI386NpxPresent = 0;
28 ULONG KiMXCsrMask = 0;
29 ULONG MxcsrFeatureMask = 0;
30 ULONG KeI386XMMIPresent = 0;
31 ULONG KeI386FxsrPresent = 0;
32 ULONG KeI386MachineType;
33 ULONG Ke386Pae = FALSE;
34 ULONG Ke386NoExecute = FALSE;
35 ULONG KeLargestCacheLine = 0x40;
36 ULONG KeDcacheFlushCount = 0;
37 ULONG KeIcacheFlushCount = 0;
38 ULONG KiDmaIoCoherency = 0;
39 ULONG KePrefetchNTAGranularity = 32;
40 BOOLEAN KiI386PentiumLockErrataPresent;
41 BOOLEAN KiSMTProcessorsPresent;
42
43 /* The distance between SYSEXIT and IRETD return modes */
44 UCHAR KiSystemCallExitAdjust;
45
46 /* The offset that was applied -- either 0 or the value above */
47 UCHAR KiSystemCallExitAdjusted;
48
49 /* Whether the adjustment was already done once */
50 BOOLEAN KiFastCallCopyDoneOnce;
51
52 /* Flush data */
53 volatile LONG KiTbFlushTimeStamp;
54
55 /* CPU Signatures */
56 static const CHAR CmpIntelID[] = "GenuineIntel";
57 static const CHAR CmpAmdID[] = "AuthenticAMD";
58 static const CHAR CmpCyrixID[] = "CyrixInstead";
59 static const CHAR CmpTransmetaID[] = "GenuineTMx86";
60 static const CHAR CmpCentaurID[] = "CentaurHauls";
61 static const CHAR CmpRiseID[] = "RiseRiseRise";
62
63 /* SUPPORT ROUTINES FOR MSVC COMPATIBILITY ***********************************/
64
65 VOID
66 NTAPI
67 CPUID(IN ULONG InfoType,
68 OUT PULONG CpuInfoEax,
69 OUT PULONG CpuInfoEbx,
70 OUT PULONG CpuInfoEcx,
71 OUT PULONG CpuInfoEdx)
72 {
73 ULONG CpuInfo[4];
74
75 /* Perform the CPUID Operation */
76 __cpuid((int*)CpuInfo, InfoType);
77
78 /* Return the results */
79 *CpuInfoEax = CpuInfo[0];
80 *CpuInfoEbx = CpuInfo[1];
81 *CpuInfoEcx = CpuInfo[2];
82 *CpuInfoEdx = CpuInfo[3];
83 }
84
85 VOID
86 NTAPI
87 WRMSR(IN ULONG Register,
88 IN LONGLONG Value)
89 {
90 /* Write to the MSR */
91 __writemsr(Register, Value);
92 }
93
94 LONGLONG
95 FASTCALL
96 RDMSR(IN ULONG Register)
97 {
98 /* Read from the MSR */
99 return __readmsr(Register);
100 }
101
102 /* NSC/Cyrix CPU configuration register index */
103 #define CX86_CCR1 0xc1
104
105 /* NSC/Cyrix CPU indexed register access macros */
106 static __inline
107 UCHAR
108 getCx86(UCHAR reg)
109 {
110 WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x22, reg);
111 return READ_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23);
112 }
113
114 static __inline
115 void
116 setCx86(UCHAR reg, UCHAR data)
117 {
118 WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x22, reg);
119 WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23, data);
120 }
121
122
123 /* FUNCTIONS *****************************************************************/
124
125 VOID
126 NTAPI
127 INIT_FUNCTION
128 KiSetProcessorType(VOID)
129 {
130 ULONG EFlags, NewEFlags;
131 ULONG Reg, Dummy;
132 ULONG Stepping, Type;
133
134 /* Start by assuming no CPUID data */
135 KeGetCurrentPrcb()->CpuID = 0;
136
137 /* Save EFlags */
138 EFlags = __readeflags();
139
140 /* XOR out the ID bit and update EFlags */
141 NewEFlags = EFlags ^ EFLAGS_ID;
142 __writeeflags(NewEFlags);
143
144 /* Get them back and see if they were modified */
145 NewEFlags = __readeflags();
146 if (NewEFlags != EFlags)
147 {
148 /* The modification worked, so CPUID exists. Set the ID Bit again. */
149 EFlags |= EFLAGS_ID;
150 __writeeflags(EFlags);
151
152 /* Peform CPUID 0 to see if CPUID 1 is supported */
153 CPUID(0, &Reg, &Dummy, &Dummy, &Dummy);
154 if (Reg > 0)
155 {
156 /* Do CPUID 1 now */
157 CPUID(1, &Reg, &Dummy, &Dummy, &Dummy);
158
159 /*
160 * Get the Stepping and Type. The stepping contains both the
161 * Model and the Step, while the Type contains the returned Type.
162 * We ignore the family.
163 *
164 * For the stepping, we convert this: zzzzzzxy into this: x0y
165 */
166 Stepping = Reg & 0xF0;
167 Stepping <<= 4;
168 Stepping += (Reg & 0xFF);
169 Stepping &= 0xF0F;
170 Type = Reg & 0xF00;
171 Type >>= 8;
172
173 /* Save them in the PRCB */
174 KeGetCurrentPrcb()->CpuID = TRUE;
175 KeGetCurrentPrcb()->CpuType = (UCHAR)Type;
176 KeGetCurrentPrcb()->CpuStep = (USHORT)Stepping;
177 }
178 else
179 {
180 DPRINT1("CPUID Support lacking\n");
181 }
182 }
183 else
184 {
185 DPRINT1("CPUID Support lacking\n");
186 }
187
188 /* Restore EFLAGS */
189 __writeeflags(EFlags);
190 }
191
192 ULONG
193 NTAPI
194 INIT_FUNCTION
195 KiGetCpuVendor(VOID)
196 {
197 PKPRCB Prcb = KeGetCurrentPrcb();
198 ULONG Vendor[5];
199 ULONG Temp;
200
201 /* Assume no Vendor ID and fail if no CPUID Support. */
202 Prcb->VendorString[0] = 0;
203 if (!Prcb->CpuID) return 0;
204
205 /* Get the Vendor ID and null-terminate it */
206 CPUID(0, &Vendor[0], &Vendor[1], &Vendor[2], &Vendor[3]);
207 Vendor[4] = 0;
208
209 /* Re-arrange vendor string */
210 Temp = Vendor[2];
211 Vendor[2] = Vendor[3];
212 Vendor[3] = Temp;
213
214 /* Copy it to the PRCB and null-terminate it again */
215 RtlCopyMemory(Prcb->VendorString,
216 &Vendor[1],
217 sizeof(Prcb->VendorString) - sizeof(CHAR));
218 Prcb->VendorString[sizeof(Prcb->VendorString) - sizeof(CHAR)] = ANSI_NULL;
219
220 /* Now check the CPU Type */
221 if (!strcmp(Prcb->VendorString, CmpIntelID))
222 {
223 return CPU_INTEL;
224 }
225 else if (!strcmp(Prcb->VendorString, CmpAmdID))
226 {
227 return CPU_AMD;
228 }
229 else if (!strcmp(Prcb->VendorString, CmpCyrixID))
230 {
231 DPRINT1("Cyrix CPU support not fully tested!\n");
232 return CPU_CYRIX;
233 }
234 else if (!strcmp(Prcb->VendorString, CmpTransmetaID))
235 {
236 DPRINT1("Transmeta CPU support not fully tested!\n");
237 return CPU_TRANSMETA;
238 }
239 else if (!strcmp(Prcb->VendorString, CmpCentaurID))
240 {
241 DPRINT1("Centaur CPU support not fully tested!\n");
242 return CPU_CENTAUR;
243 }
244 else if (!strcmp(Prcb->VendorString, CmpRiseID))
245 {
246 DPRINT1("Rise CPU support not fully tested!\n");
247 return CPU_RISE;
248 }
249
250 /* Unknown CPU */
251 DPRINT1("%s CPU support not fully tested!\n", Prcb->VendorString);
252 return CPU_UNKNOWN;
253 }
254
255 ULONG
256 NTAPI
257 INIT_FUNCTION
258 KiGetFeatureBits(VOID)
259 {
260 PKPRCB Prcb = KeGetCurrentPrcb();
261 ULONG Vendor;
262 ULONG FeatureBits = KF_WORKING_PTE;
263 ULONG Reg[4], Dummy;
264 UCHAR Ccr1;
265 BOOLEAN ExtendedCPUID = TRUE;
266 ULONG CpuFeatures = 0;
267
268 /* Get the Vendor ID */
269 Vendor = KiGetCpuVendor();
270
271 /* Make sure we got a valid vendor ID at least. */
272 if (!Vendor) return FeatureBits;
273
274 /* Get the CPUID Info. Features are in Reg[3]. */
275 CPUID(1, &Reg[0], &Reg[1], &Dummy, &Reg[3]);
276
277 /* Set the initial APIC ID */
278 Prcb->InitialApicId = (UCHAR)(Reg[1] >> 24);
279
280 switch (Vendor)
281 {
282 /* Intel CPUs */
283 case CPU_INTEL:
284
285 /* Check if it's a P6 */
286 if (Prcb->CpuType == 6)
287 {
288 /* Perform the special sequence to get the MicroCode Signature */
289 WRMSR(0x8B, 0);
290 CPUID(1, &Dummy, &Dummy, &Dummy, &Dummy);
291 Prcb->UpdateSignature.QuadPart = RDMSR(0x8B);
292 }
293 else if (Prcb->CpuType == 5)
294 {
295 /* On P5, enable workaround for the LOCK errata. */
296 KiI386PentiumLockErrataPresent = TRUE;
297 }
298
299 /* Check for broken P6 with bad SMP PTE implementation */
300 if (((Reg[0] & 0x0FF0) == 0x0610 && (Reg[0] & 0x000F) <= 0x9) ||
301 ((Reg[0] & 0x0FF0) == 0x0630 && (Reg[0] & 0x000F) <= 0x4))
302 {
303 /* Remove support for correct PTE support. */
304 FeatureBits &= ~KF_WORKING_PTE;
305 }
306
307 /* Check if the CPU is too old to support SYSENTER */
308 if ((Prcb->CpuType < 6) ||
309 ((Prcb->CpuType == 6) && (Prcb->CpuStep < 0x0303)))
310 {
311 /* Disable it */
312 Reg[3] &= ~0x800;
313 }
314
315 break;
316
317 /* AMD CPUs */
318 case CPU_AMD:
319
320 /* Check if this is a K5 or K6. (family 5) */
321 if ((Reg[0] & 0x0F00) == 0x0500)
322 {
323 /* Get the Model Number */
324 switch (Reg[0] & 0x00F0)
325 {
326 /* Model 1: K5 - 5k86 (initial models) */
327 case 0x0010:
328
329 /* Check if this is Step 0 or 1. They don't support PGE */
330 if ((Reg[0] & 0x000F) > 0x03) break;
331
332 /* Model 0: K5 - SSA5 */
333 case 0x0000:
334
335 /* Model 0 doesn't support PGE at all. */
336 Reg[3] &= ~0x2000;
337 break;
338
339 /* Model 8: K6-2 */
340 case 0x0080:
341
342 /* K6-2, Step 8 and over have support for MTRR. */
343 if ((Reg[0] & 0x000F) >= 0x8) FeatureBits |= KF_AMDK6MTRR;
344 break;
345
346 /* Model 9: K6-III
347 Model D: K6-2+, K6-III+ */
348 case 0x0090:
349 case 0x00D0:
350
351 FeatureBits |= KF_AMDK6MTRR;
352 break;
353 }
354 }
355 else if((Reg[0] & 0x0F00) < 0x0500)
356 {
357 /* Families below 5 don't support PGE, PSE or CMOV at all */
358 Reg[3] &= ~(0x08 | 0x2000 | 0x8000);
359
360 /* They also don't support advanced CPUID functions. */
361 ExtendedCPUID = FALSE;
362 }
363
364 break;
365
366 /* Cyrix CPUs */
367 case CPU_CYRIX:
368
369 /* Workaround the "COMA" bug on 6x family of Cyrix CPUs */
370 if (Prcb->CpuType == 6 &&
371 Prcb->CpuStep <= 1)
372 {
373 /* Get CCR1 value */
374 Ccr1 = getCx86(CX86_CCR1);
375
376 /* Enable the NO_LOCK bit */
377 Ccr1 |= 0x10;
378
379 /* Set the new CCR1 value */
380 setCx86(CX86_CCR1, Ccr1);
381 }
382
383 break;
384
385 /* Transmeta CPUs */
386 case CPU_TRANSMETA:
387
388 /* Enable CMPXCHG8B if the family (>= 5), model and stepping (>= 4.2) support it */
389 if ((Reg[0] & 0x0FFF) >= 0x0542)
390 {
391 WRMSR(0x80860004, RDMSR(0x80860004) | 0x0100);
392 FeatureBits |= KF_CMPXCHG8B;
393 }
394
395 break;
396
397 /* Centaur, IDT, Rise and VIA CPUs */
398 case CPU_CENTAUR:
399 case CPU_RISE:
400
401 /* These CPUs don't report the presence of CMPXCHG8B through CPUID.
402 However, this feature exists and operates properly without any additional steps. */
403 FeatureBits |= KF_CMPXCHG8B;
404
405 break;
406 }
407
408 /* Set the current features */
409 CpuFeatures = Reg[3];
410
411 /* Convert all CPUID Feature bits into our format */
412 if (CpuFeatures & 0x00000002) FeatureBits |= KF_V86_VIS | KF_CR4;
413 if (CpuFeatures & 0x00000008) FeatureBits |= KF_LARGE_PAGE | KF_CR4;
414 if (CpuFeatures & 0x00000010) FeatureBits |= KF_RDTSC;
415 if (CpuFeatures & 0x00000100) FeatureBits |= KF_CMPXCHG8B;
416 if (CpuFeatures & 0x00000800) FeatureBits |= KF_FAST_SYSCALL;
417 if (CpuFeatures & 0x00001000) FeatureBits |= KF_MTRR;
418 if (CpuFeatures & 0x00002000) FeatureBits |= KF_GLOBAL_PAGE | KF_CR4;
419 if (CpuFeatures & 0x00008000) FeatureBits |= KF_CMOV;
420 if (CpuFeatures & 0x00010000) FeatureBits |= KF_PAT;
421 if (CpuFeatures & 0x00200000) FeatureBits |= KF_DTS;
422 if (CpuFeatures & 0x00800000) FeatureBits |= KF_MMX;
423 if (CpuFeatures & 0x01000000) FeatureBits |= KF_FXSR;
424 if (CpuFeatures & 0x02000000) FeatureBits |= KF_XMMI;
425 if (CpuFeatures & 0x04000000) FeatureBits |= KF_XMMI64;
426
427 /* Check if the CPU has hyper-threading */
428 if (CpuFeatures & 0x10000000)
429 {
430 /* Set the number of logical CPUs */
431 Prcb->LogicalProcessorsPerPhysicalProcessor = (UCHAR)(Reg[1] >> 16);
432 if (Prcb->LogicalProcessorsPerPhysicalProcessor > 1)
433 {
434 /* We're on dual-core */
435 KiSMTProcessorsPresent = TRUE;
436 }
437 }
438 else
439 {
440 /* We only have a single CPU */
441 Prcb->LogicalProcessorsPerPhysicalProcessor = 1;
442 }
443
444 /* Check if CPUID 0x80000000 is supported */
445 if (ExtendedCPUID)
446 {
447 /* Do the call */
448 CPUID(0x80000000, &Reg[0], &Dummy, &Dummy, &Dummy);
449 if ((Reg[0] & 0xffffff00) == 0x80000000)
450 {
451 /* Check if CPUID 0x80000001 is supported */
452 if (Reg[0] >= 0x80000001)
453 {
454 /* Check which extended features are available. */
455 CPUID(0x80000001, &Dummy, &Dummy, &Dummy, &Reg[3]);
456
457 /* Check if NX-bit is supported */
458 if (Reg[3] & 0x00100000) FeatureBits |= KF_NX_BIT;
459
460 /* Now handle each features for each CPU Vendor */
461 switch (Vendor)
462 {
463 case CPU_AMD:
464 case CPU_CENTAUR:
465 if (Reg[3] & 0x80000000) FeatureBits |= KF_3DNOW;
466 break;
467 }
468 }
469 }
470 }
471
472 #define print_supported(kf_value) ((FeatureBits & kf_value) ? #kf_value : "")
473 DPRINT1("Supported CPU features : %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n",
474 print_supported(KF_V86_VIS),
475 print_supported(KF_RDTSC),
476 print_supported(KF_CR4),
477 print_supported(KF_CMOV),
478 print_supported(KF_GLOBAL_PAGE),
479 print_supported(KF_LARGE_PAGE),
480 print_supported(KF_MTRR),
481 print_supported(KF_CMPXCHG8B),
482 print_supported(KF_MMX),
483 print_supported(KF_WORKING_PTE),
484 print_supported(KF_PAT),
485 print_supported(KF_FXSR),
486 print_supported(KF_FAST_SYSCALL),
487 print_supported(KF_XMMI),
488 print_supported(KF_3DNOW),
489 print_supported(KF_AMDK6MTRR),
490 print_supported(KF_XMMI64),
491 print_supported(KF_DTS),
492 print_supported(KF_NX_BIT),
493 print_supported(KF_NX_DISABLED),
494 print_supported(KF_NX_ENABLED));
495 #undef print_supported
496
497 /* Return the Feature Bits */
498 return FeatureBits;
499 }
500
501 VOID
502 NTAPI
503 INIT_FUNCTION
504 KiGetCacheInformation(VOID)
505 {
506 PKIPCR Pcr = (PKIPCR)KeGetPcr();
507 ULONG Vendor;
508 ULONG Data[4], Dummy;
509 ULONG CacheRequests = 0, i;
510 ULONG CurrentRegister;
511 UCHAR RegisterByte, Associativity = 0;
512 ULONG Size, CacheLine = 64, CurrentSize = 0;
513 BOOLEAN FirstPass = TRUE;
514
515 /* Set default L2 size */
516 Pcr->SecondLevelCacheSize = 0;
517
518 /* Get the Vendor ID and make sure we support CPUID */
519 Vendor = KiGetCpuVendor();
520 if (!Vendor) return;
521
522 /* Check the Vendor ID */
523 switch (Vendor)
524 {
525 /* Handle Intel case */
526 case CPU_INTEL:
527
528 /*Check if we support CPUID 2 */
529 CPUID(0, &Data[0], &Dummy, &Dummy, &Dummy);
530 if (Data[0] >= 2)
531 {
532 /* We need to loop for the number of times CPUID will tell us to */
533 do
534 {
535 /* Do the CPUID call */
536 CPUID(2, &Data[0], &Data[1], &Data[2], &Data[3]);
537
538 /* Check if it was the first call */
539 if (FirstPass)
540 {
541 /*
542 * The number of times to loop is the first byte. Read
543 * it and then destroy it so we don't get confused.
544 */
545 CacheRequests = Data[0] & 0xFF;
546 Data[0] &= 0xFFFFFF00;
547
548 /* Don't go over this again */
549 FirstPass = FALSE;
550 }
551
552 /* Loop all 4 registers */
553 for (i = 0; i < 4; i++)
554 {
555 /* Get the current register */
556 CurrentRegister = Data[i];
557
558 /*
559 * If the upper bit is set, then this register should
560 * be skipped.
561 */
562 if (CurrentRegister & 0x80000000) continue;
563
564 /* Keep looping for every byte inside this register */
565 while (CurrentRegister)
566 {
567 /* Read a byte, skip a byte. */
568 RegisterByte = (UCHAR)(CurrentRegister & 0xFF);
569 CurrentRegister >>= 8;
570 if (!RegisterByte) continue;
571
572 /*
573 * Valid values are from 0x40 (0 bytes) to 0x49
574 * (32MB), or from 0x80 to 0x89 (same size but
575 * 8-way associative.
576 */
577 if (((RegisterByte > 0x40) && (RegisterByte <= 0x47)) ||
578 ((RegisterByte > 0x78) && (RegisterByte <= 0x7C)) ||
579 ((RegisterByte > 0x80) && (RegisterByte <= 0x85)))
580 {
581 /* Compute associativity */
582 Associativity = 4;
583 if (RegisterByte >= 0x79) Associativity = 8;
584
585 /* Mask out only the first nibble */
586 RegisterByte &= 0x07;
587
588 /* Check if this cache is bigger than the last */
589 Size = 0x10000 << RegisterByte;
590 if ((Size / Associativity) > CurrentSize)
591 {
592 /* Set the L2 Cache Size and Associativity */
593 CurrentSize = Size / Associativity;
594 Pcr->SecondLevelCacheSize = Size;
595 Pcr->SecondLevelCacheAssociativity = Associativity;
596 }
597 }
598 else if ((RegisterByte > 0x21) && (RegisterByte <= 0x29))
599 {
600 /* Set minimum cache line size */
601 if (CacheLine < 128) CacheLine = 128;
602
603 /* Hard-code size/associativity */
604 Associativity = 8;
605 switch (RegisterByte)
606 {
607 case 0x22:
608 Size = 512 * 1024;
609 Associativity = 4;
610 break;
611
612 case 0x23:
613 Size = 1024 * 1024;
614 break;
615
616 case 0x25:
617 Size = 2048 * 1024;
618 break;
619
620 case 0x29:
621 Size = 4096 * 1024;
622 break;
623
624 default:
625 Size = 0;
626 break;
627 }
628
629 /* Check if this cache is bigger than the last */
630 if ((Size / Associativity) > CurrentSize)
631 {
632 /* Set the L2 Cache Size and Associativity */
633 CurrentSize = Size / Associativity;
634 Pcr->SecondLevelCacheSize = Size;
635 Pcr->SecondLevelCacheAssociativity = Associativity;
636 }
637 }
638 else if (((RegisterByte > 0x65) && (RegisterByte < 0x69)) ||
639 (RegisterByte == 0x2C) || (RegisterByte == 0xF0))
640 {
641 /* Indicates L1 cache line of 64 bytes */
642 KePrefetchNTAGranularity = 64;
643 }
644 else if (RegisterByte == 0xF1)
645 {
646 /* Indicates L1 cache line of 128 bytes */
647 KePrefetchNTAGranularity = 128;
648 }
649 else if (((RegisterByte >= 0x4A) && (RegisterByte <= 0x4C)) ||
650 (RegisterByte == 0x78) ||
651 (RegisterByte == 0x7D) ||
652 (RegisterByte == 0x7F) ||
653 (RegisterByte == 0x86) ||
654 (RegisterByte == 0x87))
655 {
656 /* Set minimum cache line size */
657 if (CacheLine < 64) CacheLine = 64;
658
659 /* Hard-code size/associativity */
660 switch (RegisterByte)
661 {
662 case 0x4A:
663 Size = 4 * 1024 * 1024;
664 Associativity = 8;
665 break;
666
667 case 0x4B:
668 Size = 6 * 1024 * 1024;
669 Associativity = 12;
670 break;
671
672 case 0x4C:
673 Size = 8 * 1024 * 1024;
674 Associativity = 16;
675 break;
676
677 case 0x78:
678 Size = 1 * 1024 * 1024;
679 Associativity = 4;
680 break;
681
682 case 0x7D:
683 Size = 2 * 1024 * 1024;
684 Associativity = 8;
685 break;
686
687 case 0x7F:
688 Size = 512 * 1024;
689 Associativity = 2;
690 break;
691
692 case 0x86:
693 Size = 512 * 1024;
694 Associativity = 4;
695 break;
696
697 case 0x87:
698 Size = 1 * 1024 * 1024;
699 Associativity = 8;
700 break;
701
702 default:
703 Size = 0;
704 break;
705 }
706
707 /* Check if this cache is bigger than the last */
708 if ((Size / Associativity) > CurrentSize)
709 {
710 /* Set the L2 Cache Size and Associativity */
711 CurrentSize = Size / Associativity;
712 Pcr->SecondLevelCacheSize = Size;
713 Pcr->SecondLevelCacheAssociativity = Associativity;
714 }
715 }
716 }
717 }
718 } while (--CacheRequests);
719 }
720 break;
721
722 case CPU_AMD:
723
724 /* Check if we support CPUID 0x80000005 */
725 CPUID(0x80000000, &Data[0], &Data[1], &Data[2], &Data[3]);
726 if (Data[0] >= 0x80000006)
727 {
728 /* Get L1 size first */
729 CPUID(0x80000005, &Data[0], &Data[1], &Data[2], &Data[3]);
730 KePrefetchNTAGranularity = Data[2] & 0xFF;
731
732 /* Check if we support CPUID 0x80000006 */
733 CPUID(0x80000000, &Data[0], &Data[1], &Data[2], &Data[3]);
734 if (Data[0] >= 0x80000006)
735 {
736 /* Get 2nd level cache and tlb size */
737 CPUID(0x80000006, &Data[0], &Data[1], &Data[2], &Data[3]);
738
739 /* Cache line size */
740 CacheLine = Data[2] & 0xFF;
741
742 /* Hardcode associativity */
743 RegisterByte = (Data[2] >> 12) & 0xFF;
744 switch (RegisterByte)
745 {
746 case 2:
747 Associativity = 2;
748 break;
749
750 case 4:
751 Associativity = 4;
752 break;
753
754 case 6:
755 Associativity = 8;
756 break;
757
758 case 8:
759 case 15:
760 Associativity = 16;
761 break;
762
763 default:
764 Associativity = 1;
765 break;
766 }
767
768 /* Compute size */
769 Size = (Data[2] >> 16) << 10;
770
771 /* Hack for Model 6, Steping 300 */
772 if ((KeGetCurrentPrcb()->CpuType == 6) &&
773 (KeGetCurrentPrcb()->CpuStep == 0x300))
774 {
775 /* Stick 64K in there */
776 Size = 64 * 1024;
777 }
778
779 /* Set the L2 Cache Size and associativity */
780 Pcr->SecondLevelCacheSize = Size;
781 Pcr->SecondLevelCacheAssociativity = Associativity;
782 }
783 }
784 break;
785
786 case CPU_CYRIX:
787 case CPU_TRANSMETA:
788 case CPU_CENTAUR:
789 case CPU_RISE:
790
791 /* FIXME */
792 break;
793 }
794
795 /* Set the cache line */
796 if (CacheLine > KeLargestCacheLine) KeLargestCacheLine = CacheLine;
797 DPRINT1("Prefetch Cache: %d bytes\tL2 Cache: %d bytes\tL2 Cache Line: %d bytes\tL2 Cache Associativity: %d\n",
798 KePrefetchNTAGranularity,
799 Pcr->SecondLevelCacheSize,
800 KeLargestCacheLine,
801 Pcr->SecondLevelCacheAssociativity);
802 }
803
804 VOID
805 NTAPI
806 INIT_FUNCTION
807 KiSetCR0Bits(VOID)
808 {
809 ULONG Cr0;
810
811 /* Save current CR0 */
812 Cr0 = __readcr0();
813
814 /* If this is a 486, enable Write-Protection */
815 if (KeGetCurrentPrcb()->CpuType > 3) Cr0 |= CR0_WP;
816
817 /* Set new Cr0 */
818 __writecr0(Cr0);
819 }
820
821 VOID
822 NTAPI
823 INIT_FUNCTION
824 KiInitializeTSS2(IN PKTSS Tss,
825 IN PKGDTENTRY TssEntry OPTIONAL)
826 {
827 PUCHAR p;
828
829 /* Make sure the GDT Entry is valid */
830 if (TssEntry)
831 {
832 /* Set the Limit */
833 TssEntry->LimitLow = sizeof(KTSS) - 1;
834 TssEntry->HighWord.Bits.LimitHi = 0;
835 }
836
837 /* Now clear the I/O Map */
838 ASSERT(IOPM_COUNT == 1);
839 RtlFillMemory(Tss->IoMaps[0].IoMap, IOPM_FULL_SIZE, 0xFF);
840
841 /* Initialize Interrupt Direction Maps */
842 p = (PUCHAR)(Tss->IoMaps[0].DirectionMap);
843 RtlZeroMemory(p, IOPM_DIRECTION_MAP_SIZE);
844
845 /* Add DPMI support for interrupts */
846 p[0] = 4;
847 p[3] = 0x18;
848 p[4] = 0x18;
849
850 /* Initialize the default Interrupt Direction Map */
851 p = Tss->IntDirectionMap;
852 RtlZeroMemory(Tss->IntDirectionMap, IOPM_DIRECTION_MAP_SIZE);
853
854 /* Add DPMI support */
855 p[0] = 4;
856 p[3] = 0x18;
857 p[4] = 0x18;
858 }
859
860 VOID
861 NTAPI
862 KiInitializeTSS(IN PKTSS Tss)
863 {
864 /* Set an invalid map base */
865 Tss->IoMapBase = KiComputeIopmOffset(IO_ACCESS_MAP_NONE);
866
867 /* Disable traps during Task Switches */
868 Tss->Flags = 0;
869
870 /* Set LDT and Ring 0 SS */
871 Tss->LDT = 0;
872 Tss->Ss0 = KGDT_R0_DATA;
873 }
874
875 VOID
876 FASTCALL
877 INIT_FUNCTION
878 Ki386InitializeTss(IN PKTSS Tss,
879 IN PKIDTENTRY Idt,
880 IN PKGDTENTRY Gdt)
881 {
882 PKGDTENTRY TssEntry, TaskGateEntry;
883
884 /* Initialize the boot TSS. */
885 TssEntry = &Gdt[KGDT_TSS / sizeof(KGDTENTRY)];
886 TssEntry->HighWord.Bits.Type = I386_TSS;
887 TssEntry->HighWord.Bits.Pres = 1;
888 TssEntry->HighWord.Bits.Dpl = 0;
889 KiInitializeTSS2(Tss, TssEntry);
890 KiInitializeTSS(Tss);
891
892 /* Load the task register */
893 Ke386SetTr(KGDT_TSS);
894
895 /* Setup the Task Gate for Double Fault Traps */
896 TaskGateEntry = (PKGDTENTRY)&Idt[8];
897 TaskGateEntry->HighWord.Bits.Type = I386_TASK_GATE;
898 TaskGateEntry->HighWord.Bits.Pres = 1;
899 TaskGateEntry->HighWord.Bits.Dpl = 0;
900 ((PKIDTENTRY)TaskGateEntry)->Selector = KGDT_DF_TSS;
901
902 /* Initialize the TSS used for handling double faults. */
903 Tss = (PKTSS)KiDoubleFaultTSS;
904 KiInitializeTSS(Tss);
905 Tss->CR3 = __readcr3();
906 Tss->Esp0 = KiDoubleFaultStack;
907 Tss->Esp = KiDoubleFaultStack;
908 Tss->Eip = PtrToUlong(KiTrap08);
909 Tss->Cs = KGDT_R0_CODE;
910 Tss->Fs = KGDT_R0_PCR;
911 Tss->Ss = Ke386GetSs();
912 Tss->Es = KGDT_R3_DATA | RPL_MASK;
913 Tss->Ds = KGDT_R3_DATA | RPL_MASK;
914
915 /* Setup the Double Trap TSS entry in the GDT */
916 TssEntry = &Gdt[KGDT_DF_TSS / sizeof(KGDTENTRY)];
917 TssEntry->HighWord.Bits.Type = I386_TSS;
918 TssEntry->HighWord.Bits.Pres = 1;
919 TssEntry->HighWord.Bits.Dpl = 0;
920 TssEntry->BaseLow = (USHORT)((ULONG_PTR)Tss & 0xFFFF);
921 TssEntry->HighWord.Bytes.BaseMid = (UCHAR)((ULONG_PTR)Tss >> 16);
922 TssEntry->HighWord.Bytes.BaseHi = (UCHAR)((ULONG_PTR)Tss >> 24);
923 TssEntry->LimitLow = KTSS_IO_MAPS;
924
925 /* Now setup the NMI Task Gate */
926 TaskGateEntry = (PKGDTENTRY)&Idt[2];
927 TaskGateEntry->HighWord.Bits.Type = I386_TASK_GATE;
928 TaskGateEntry->HighWord.Bits.Pres = 1;
929 TaskGateEntry->HighWord.Bits.Dpl = 0;
930 ((PKIDTENTRY)TaskGateEntry)->Selector = KGDT_NMI_TSS;
931
932 /* Initialize the actual TSS */
933 Tss = (PKTSS)KiNMITSS;
934 KiInitializeTSS(Tss);
935 Tss->CR3 = __readcr3();
936 Tss->Esp0 = KiDoubleFaultStack;
937 Tss->Esp = KiDoubleFaultStack;
938 Tss->Eip = PtrToUlong(KiTrap02);
939 Tss->Cs = KGDT_R0_CODE;
940 Tss->Fs = KGDT_R0_PCR;
941 Tss->Ss = Ke386GetSs();
942 Tss->Es = KGDT_R3_DATA | RPL_MASK;
943 Tss->Ds = KGDT_R3_DATA | RPL_MASK;
944
945 /* And its associated TSS Entry */
946 TssEntry = &Gdt[KGDT_NMI_TSS / sizeof(KGDTENTRY)];
947 TssEntry->HighWord.Bits.Type = I386_TSS;
948 TssEntry->HighWord.Bits.Pres = 1;
949 TssEntry->HighWord.Bits.Dpl = 0;
950 TssEntry->BaseLow = (USHORT)((ULONG_PTR)Tss & 0xFFFF);
951 TssEntry->HighWord.Bytes.BaseMid = (UCHAR)((ULONG_PTR)Tss >> 16);
952 TssEntry->HighWord.Bytes.BaseHi = (UCHAR)((ULONG_PTR)Tss >> 24);
953 TssEntry->LimitLow = KTSS_IO_MAPS;
954 }
955
956 VOID
957 NTAPI
958 KeFlushCurrentTb(VOID)
959 {
960 /* Flush the TLB by resetting CR3 */
961 __writecr3(__readcr3());
962 }
963
964 VOID
965 NTAPI
966 KiRestoreProcessorControlState(PKPROCESSOR_STATE ProcessorState)
967 {
968 PKGDTENTRY TssEntry;
969
970 //
971 // Restore the CR registers
972 //
973 __writecr0(ProcessorState->SpecialRegisters.Cr0);
974 Ke386SetCr2(ProcessorState->SpecialRegisters.Cr2);
975 __writecr3(ProcessorState->SpecialRegisters.Cr3);
976 if (KeFeatureBits & KF_CR4) __writecr4(ProcessorState->SpecialRegisters.Cr4);
977
978 //
979 // Restore the DR registers
980 //
981 __writedr(0, ProcessorState->SpecialRegisters.KernelDr0);
982 __writedr(1, ProcessorState->SpecialRegisters.KernelDr1);
983 __writedr(2, ProcessorState->SpecialRegisters.KernelDr2);
984 __writedr(3, ProcessorState->SpecialRegisters.KernelDr3);
985 __writedr(6, ProcessorState->SpecialRegisters.KernelDr6);
986 __writedr(7, ProcessorState->SpecialRegisters.KernelDr7);
987
988 //
989 // Restore GDT and IDT
990 //
991 Ke386SetGlobalDescriptorTable(&ProcessorState->SpecialRegisters.Gdtr.Limit);
992 __lidt(&ProcessorState->SpecialRegisters.Idtr.Limit);
993
994 //
995 // Clear the busy flag so we don't crash if we reload the same selector
996 //
997 TssEntry = (PKGDTENTRY)(ProcessorState->SpecialRegisters.Gdtr.Base +
998 ProcessorState->SpecialRegisters.Tr);
999 TssEntry->HighWord.Bytes.Flags1 &= ~0x2;
1000
1001 //
1002 // Restore TSS and LDT
1003 //
1004 Ke386SetTr(ProcessorState->SpecialRegisters.Tr);
1005 Ke386SetLocalDescriptorTable(ProcessorState->SpecialRegisters.Ldtr);
1006 }
1007
1008 VOID
1009 NTAPI
1010 KiSaveProcessorControlState(OUT PKPROCESSOR_STATE ProcessorState)
1011 {
1012 /* Save the CR registers */
1013 ProcessorState->SpecialRegisters.Cr0 = __readcr0();
1014 ProcessorState->SpecialRegisters.Cr2 = __readcr2();
1015 ProcessorState->SpecialRegisters.Cr3 = __readcr3();
1016 ProcessorState->SpecialRegisters.Cr4 = (KeFeatureBits & KF_CR4) ?
1017 __readcr4() : 0;
1018
1019 /* Save the DR registers */
1020 ProcessorState->SpecialRegisters.KernelDr0 = __readdr(0);
1021 ProcessorState->SpecialRegisters.KernelDr1 = __readdr(1);
1022 ProcessorState->SpecialRegisters.KernelDr2 = __readdr(2);
1023 ProcessorState->SpecialRegisters.KernelDr3 = __readdr(3);
1024 ProcessorState->SpecialRegisters.KernelDr6 = __readdr(6);
1025 ProcessorState->SpecialRegisters.KernelDr7 = __readdr(7);
1026 __writedr(7, 0);
1027
1028 /* Save GDT, IDT, LDT and TSS */
1029 Ke386GetGlobalDescriptorTable(&ProcessorState->SpecialRegisters.Gdtr.Limit);
1030 __sidt(&ProcessorState->SpecialRegisters.Idtr.Limit);
1031 ProcessorState->SpecialRegisters.Tr = Ke386GetTr();
1032 ProcessorState->SpecialRegisters.Ldtr = Ke386GetLocalDescriptorTable();
1033 }
1034
1035 VOID
1036 NTAPI
1037 INIT_FUNCTION
1038 KiInitializeMachineType(VOID)
1039 {
1040 /* Set the Machine Type we got from NTLDR */
1041 KeI386MachineType = KeLoaderBlock->u.I386.MachineType & 0x000FF;
1042 }
1043
1044 ULONG_PTR
1045 NTAPI
1046 INIT_FUNCTION
1047 KiLoadFastSyscallMachineSpecificRegisters(IN ULONG_PTR Context)
1048 {
1049 /* Set CS and ESP */
1050 WRMSR(0x174, KGDT_R0_CODE);
1051 WRMSR(0x175, (ULONG_PTR)KeGetCurrentPrcb()->DpcStack);
1052
1053 /* Set LSTAR */
1054 WRMSR(0x176, (ULONG_PTR)KiFastCallEntry);
1055 return 0;
1056 }
1057
1058 VOID
1059 NTAPI
1060 INIT_FUNCTION
1061 KiRestoreFastSyscallReturnState(VOID)
1062 {
1063 /* Check if the CPU Supports fast system call */
1064 if (KeFeatureBits & KF_FAST_SYSCALL)
1065 {
1066 /* Check if it has been disabled */
1067 if (KiFastSystemCallDisable)
1068 {
1069 /* Disable fast system call */
1070 KeFeatureBits &= ~KF_FAST_SYSCALL;
1071 KiFastCallExitHandler = KiSystemCallTrapReturn;
1072 DPRINT1("Support for SYSENTER disabled.\n");
1073 }
1074 else
1075 {
1076 /* Do an IPI to enable it */
1077 KeIpiGenericCall(KiLoadFastSyscallMachineSpecificRegisters, 0);
1078
1079 /* It's enabled, so use the proper exit stub */
1080 KiFastCallExitHandler = KiSystemCallSysExitReturn;
1081 DPRINT("Support for SYSENTER detected.\n");
1082 }
1083 }
1084 else
1085 {
1086 /* Use the IRET handler */
1087 KiFastCallExitHandler = KiSystemCallTrapReturn;
1088 DPRINT1("No support for SYSENTER detected.\n");
1089 }
1090 }
1091
1092 ULONG_PTR
1093 NTAPI
1094 INIT_FUNCTION
1095 Ki386EnableDE(IN ULONG_PTR Context)
1096 {
1097 /* Enable DE */
1098 __writecr4(__readcr4() | CR4_DE);
1099 return 0;
1100 }
1101
1102 ULONG_PTR
1103 NTAPI
1104 INIT_FUNCTION
1105 Ki386EnableFxsr(IN ULONG_PTR Context)
1106 {
1107 /* Enable FXSR */
1108 __writecr4(__readcr4() | CR4_FXSR);
1109 return 0;
1110 }
1111
1112 ULONG_PTR
1113 NTAPI
1114 INIT_FUNCTION
1115 Ki386EnableXMMIExceptions(IN ULONG_PTR Context)
1116 {
1117 PKIDTENTRY IdtEntry;
1118
1119 /* Get the IDT Entry for Interrupt 0x13 */
1120 IdtEntry = &((PKIPCR)KeGetPcr())->IDT[0x13];
1121
1122 /* Set it up */
1123 IdtEntry->Selector = KGDT_R0_CODE;
1124 IdtEntry->Offset = ((ULONG_PTR)KiTrap13 & 0xFFFF);
1125 IdtEntry->ExtendedOffset = ((ULONG_PTR)KiTrap13 >> 16) & 0xFFFF;
1126 ((PKIDT_ACCESS)&IdtEntry->Access)->Dpl = 0;
1127 ((PKIDT_ACCESS)&IdtEntry->Access)->Present = 1;
1128 ((PKIDT_ACCESS)&IdtEntry->Access)->SegmentType = I386_INTERRUPT_GATE;
1129
1130 /* Enable XMMI exceptions */
1131 __writecr4(__readcr4() | CR4_XMMEXCPT);
1132 return 0;
1133 }
1134
1135 VOID
1136 NTAPI
1137 INIT_FUNCTION
1138 KiI386PentiumLockErrataFixup(VOID)
1139 {
1140 KDESCRIPTOR IdtDescriptor;
1141 PKIDTENTRY NewIdt, NewIdt2;
1142
1143 /* Allocate memory for a new IDT */
1144 NewIdt = ExAllocatePool(NonPagedPool, 2 * PAGE_SIZE);
1145
1146 /* Put everything after the first 7 entries on a new page */
1147 NewIdt2 = (PVOID)((ULONG_PTR)NewIdt + PAGE_SIZE - (7 * sizeof(KIDTENTRY)));
1148
1149 /* Disable interrupts */
1150 _disable();
1151
1152 /* Get the current IDT and copy it */
1153 __sidt(&IdtDescriptor.Limit);
1154 RtlCopyMemory(NewIdt2,
1155 (PVOID)IdtDescriptor.Base,
1156 IdtDescriptor.Limit + 1);
1157 IdtDescriptor.Base = (ULONG)NewIdt2;
1158
1159 /* Set the new IDT */
1160 __lidt(&IdtDescriptor.Limit);
1161 ((PKIPCR)KeGetPcr())->IDT = NewIdt2;
1162
1163 /* Restore interrupts */
1164 _enable();
1165
1166 /* Set the first 7 entries as read-only to produce a fault */
1167 MmSetPageProtect(NULL, NewIdt, PAGE_READONLY);
1168 }
1169
1170 BOOLEAN
1171 NTAPI
1172 KeInvalidateAllCaches(VOID)
1173 {
1174 /* Only supported on Pentium Pro and higher */
1175 if (KeI386CpuType < 6) return FALSE;
1176
1177 /* Invalidate all caches */
1178 __wbinvd();
1179 return TRUE;
1180 }
1181
1182 VOID
1183 FASTCALL
1184 KeZeroPages(IN PVOID Address,
1185 IN ULONG Size)
1186 {
1187 /* Not using XMMI in this routine */
1188 RtlZeroMemory(Address, Size);
1189 }
1190
1191 VOID
1192 NTAPI
1193 KiSaveProcessorState(IN PKTRAP_FRAME TrapFrame,
1194 IN PKEXCEPTION_FRAME ExceptionFrame)
1195 {
1196 PKPRCB Prcb = KeGetCurrentPrcb();
1197
1198 //
1199 // Save full context
1200 //
1201 Prcb->ProcessorState.ContextFrame.ContextFlags = CONTEXT_FULL |
1202 CONTEXT_DEBUG_REGISTERS;
1203 KeTrapFrameToContext(TrapFrame, NULL, &Prcb->ProcessorState.ContextFrame);
1204
1205 //
1206 // Save control registers
1207 //
1208 KiSaveProcessorControlState(&Prcb->ProcessorState);
1209 }
1210
1211 BOOLEAN
1212 NTAPI
1213 INIT_FUNCTION
1214 KiIsNpxPresent(VOID)
1215 {
1216 ULONG Cr0;
1217 USHORT Magic;
1218
1219 /* Set magic */
1220 Magic = 0xFFFF;
1221
1222 /* Read CR0 and mask out FPU flags */
1223 Cr0 = __readcr0() & ~(CR0_MP | CR0_TS | CR0_EM | CR0_ET);
1224
1225 /* Store on FPU stack */
1226 #ifdef _MSC_VER
1227 __asm fninit;
1228 __asm fnstsw Magic;
1229 #else
1230 asm volatile ("fninit;" "fnstsw %0" : "+m"(Magic));
1231 #endif
1232
1233 /* Magic should now be cleared */
1234 if (Magic & 0xFF)
1235 {
1236 /* You don't have an FPU -- enable emulation for now */
1237 __writecr0(Cr0 | CR0_EM | CR0_TS);
1238 return FALSE;
1239 }
1240
1241 /* You have an FPU, enable it */
1242 Cr0 |= CR0_ET;
1243
1244 /* Enable INT 16 on 486 and higher */
1245 if (KeGetCurrentPrcb()->CpuType >= 3) Cr0 |= CR0_NE;
1246
1247 /* Set FPU state */
1248 __writecr0(Cr0 | CR0_EM | CR0_TS);
1249 return TRUE;
1250 }
1251
1252 BOOLEAN
1253 NTAPI
1254 INIT_FUNCTION
1255 KiIsNpxErrataPresent(VOID)
1256 {
1257 static double Value1 = 4195835.0, Value2 = 3145727.0;
1258 INT ErrataPresent;
1259 ULONG Cr0;
1260
1261 /* Disable interrupts */
1262 _disable();
1263
1264 /* Read CR0 and remove FPU flags */
1265 Cr0 = __readcr0();
1266 __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
1267
1268 /* Initialize FPU state */
1269 Ke386FnInit();
1270
1271 /* Multiply the magic values and divide, we should get the result back */
1272 #ifdef __GNUC__
1273 __asm__ __volatile__
1274 (
1275 "fldl %1\n\t"
1276 "fdivl %2\n\t"
1277 "fmull %2\n\t"
1278 "fldl %1\n\t"
1279 "fsubp\n\t"
1280 "fistpl %0\n\t"
1281 : "=m" (ErrataPresent)
1282 : "m" (Value1),
1283 "m" (Value2)
1284 );
1285 #else
1286 __asm
1287 {
1288 fld Value1
1289 fdiv Value2
1290 fmul Value2
1291 fld Value1
1292 fsubp st(1), st(0)
1293 fistp ErrataPresent
1294 };
1295 #endif
1296
1297 /* Restore CR0 */
1298 __writecr0(Cr0);
1299
1300 /* Enable interrupts */
1301 _enable();
1302
1303 /* Return if there's an errata */
1304 return ErrataPresent != 0;
1305 }
1306
1307 VOID
1308 NTAPI
1309 KiFlushNPXState(IN PFLOATING_SAVE_AREA SaveArea)
1310 {
1311 ULONG EFlags, Cr0;
1312 PKTHREAD Thread, NpxThread;
1313 PFX_SAVE_AREA FxSaveArea;
1314
1315 /* Save volatiles and disable interrupts */
1316 EFlags = __readeflags();
1317 _disable();
1318
1319 /* Save the PCR and get the current thread */
1320 Thread = KeGetCurrentThread();
1321
1322 /* Check if we're already loaded */
1323 if (Thread->NpxState != NPX_STATE_LOADED)
1324 {
1325 /* If there's nothing to load, quit */
1326 if (!SaveArea) return;
1327
1328 /* Need FXSR support for this */
1329 ASSERT(KeI386FxsrPresent == TRUE);
1330
1331 /* Check for sane CR0 */
1332 Cr0 = __readcr0();
1333 if (Cr0 & (CR0_MP | CR0_TS | CR0_EM))
1334 {
1335 /* Mask out FPU flags */
1336 __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
1337 }
1338
1339 /* Get the NPX thread and check its FPU state */
1340 NpxThread = KeGetCurrentPrcb()->NpxThread;
1341 if ((NpxThread) && (NpxThread->NpxState == NPX_STATE_LOADED))
1342 {
1343 /* Get the FX frame and store the state there */
1344 FxSaveArea = KiGetThreadNpxArea(NpxThread);
1345 Ke386FxSave(FxSaveArea);
1346
1347 /* NPX thread has lost its state */
1348 NpxThread->NpxState = NPX_STATE_NOT_LOADED;
1349 }
1350
1351 /* Now load NPX state from the NPX area */
1352 FxSaveArea = KiGetThreadNpxArea(Thread);
1353 Ke386FxStore(FxSaveArea);
1354 }
1355 else
1356 {
1357 /* Check for sane CR0 */
1358 Cr0 = __readcr0();
1359 if (Cr0 & (CR0_MP | CR0_TS | CR0_EM))
1360 {
1361 /* Mask out FPU flags */
1362 __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
1363 }
1364
1365 /* Get FX frame */
1366 FxSaveArea = KiGetThreadNpxArea(Thread);
1367 Thread->NpxState = NPX_STATE_NOT_LOADED;
1368
1369 /* Save state if supported by CPU */
1370 if (KeI386FxsrPresent) Ke386FxSave(FxSaveArea);
1371 }
1372
1373 /* Now save the FN state wherever it was requested */
1374 if (SaveArea) Ke386FnSave(SaveArea);
1375
1376 /* Clear NPX thread */
1377 KeGetCurrentPrcb()->NpxThread = NULL;
1378
1379 /* Add the CR0 from the NPX frame */
1380 Cr0 |= NPX_STATE_NOT_LOADED;
1381 Cr0 |= FxSaveArea->Cr0NpxState;
1382 __writecr0(Cr0);
1383
1384 /* Restore interrupt state */
1385 __writeeflags(EFlags);
1386 }
1387
1388 /* PUBLIC FUNCTIONS **********************************************************/
1389
1390 /*
1391 * @implemented
1392 */
1393 VOID
1394 NTAPI
1395 KiCoprocessorError(VOID)
1396 {
1397 PFX_SAVE_AREA NpxArea;
1398
1399 /* Get the FPU area */
1400 NpxArea = KiGetThreadNpxArea(KeGetCurrentThread());
1401
1402 /* Set CR0_TS */
1403 NpxArea->Cr0NpxState = CR0_TS;
1404 __writecr0(__readcr0() | CR0_TS);
1405 }
1406
1407 /*
1408 * @implemented
1409 */
1410 NTSTATUS
1411 NTAPI
1412 KeSaveFloatingPointState(OUT PKFLOATING_SAVE Save)
1413 {
1414 PFNSAVE_FORMAT FpState;
1415 ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
1416 DPRINT1("%s is not really implemented\n", __FUNCTION__);
1417
1418 /* check if we are doing software emulation */
1419 if (!KeI386NpxPresent) return STATUS_ILLEGAL_FLOAT_CONTEXT;
1420
1421 FpState = ExAllocatePool(NonPagedPool, sizeof (FNSAVE_FORMAT));
1422 if (!FpState) return STATUS_INSUFFICIENT_RESOURCES;
1423
1424 *((PVOID *) Save) = FpState;
1425 #ifdef __GNUC__
1426 asm volatile("fnsave %0\n\t" : "=m" (*FpState));
1427 #else
1428 __asm
1429 {
1430 fnsave [FpState]
1431 };
1432 #endif
1433
1434 KeGetCurrentThread()->Header.NpxIrql = KeGetCurrentIrql();
1435 return STATUS_SUCCESS;
1436 }
1437
1438 /*
1439 * @implemented
1440 */
1441 NTSTATUS
1442 NTAPI
1443 KeRestoreFloatingPointState(IN PKFLOATING_SAVE Save)
1444 {
1445 PFNSAVE_FORMAT FpState = *((PVOID *) Save);
1446 ASSERT(KeGetCurrentThread()->Header.NpxIrql == KeGetCurrentIrql());
1447 DPRINT1("%s is not really implemented\n", __FUNCTION__);
1448
1449 #ifdef __GNUC__
1450 asm volatile("fnclex\n\t");
1451 asm volatile("frstor %0\n\t" : "=m" (*FpState));
1452 #else
1453 __asm
1454 {
1455 fnclex
1456 frstor [FpState]
1457 };
1458 #endif
1459
1460 ExFreePool(FpState);
1461 return STATUS_SUCCESS;
1462 }
1463
1464 /*
1465 * @implemented
1466 */
1467 ULONG
1468 NTAPI
1469 KeGetRecommendedSharedDataAlignment(VOID)
1470 {
1471 /* Return the global variable */
1472 return KeLargestCacheLine;
1473 }
1474
1475 VOID
1476 NTAPI
1477 KiFlushTargetEntireTb(IN PKIPI_CONTEXT PacketContext,
1478 IN PVOID Ignored1,
1479 IN PVOID Ignored2,
1480 IN PVOID Ignored3)
1481 {
1482 /* Signal this packet as done */
1483 KiIpiSignalPacketDone(PacketContext);
1484
1485 /* Flush the TB for the Current CPU */
1486 KeFlushCurrentTb();
1487 }
1488
1489 /*
1490 * @implemented
1491 */
1492 VOID
1493 NTAPI
1494 KeFlushEntireTb(IN BOOLEAN Invalid,
1495 IN BOOLEAN AllProcessors)
1496 {
1497 KIRQL OldIrql;
1498 #ifdef CONFIG_SMP
1499 KAFFINITY TargetAffinity;
1500 PKPRCB Prcb = KeGetCurrentPrcb();
1501 #endif
1502
1503 /* Raise the IRQL for the TB Flush */
1504 OldIrql = KeRaiseIrqlToSynchLevel();
1505
1506 #ifdef CONFIG_SMP
1507 /* FIXME: Use KiTbFlushTimeStamp to synchronize TB flush */
1508
1509 /* Get the current processor affinity, and exclude ourselves */
1510 TargetAffinity = KeActiveProcessors;
1511 TargetAffinity &= ~Prcb->SetMember;
1512
1513 /* Make sure this is MP */
1514 if (TargetAffinity)
1515 {
1516 /* Send an IPI TB flush to the other processors */
1517 KiIpiSendPacket(TargetAffinity,
1518 KiFlushTargetEntireTb,
1519 NULL,
1520 0,
1521 NULL);
1522 }
1523 #endif
1524
1525 /* Flush the TB for the Current CPU, and update the flush stamp */
1526 KeFlushCurrentTb();
1527
1528 #ifdef CONFIG_SMP
1529 /* If this is MP, wait for the other processors to finish */
1530 if (TargetAffinity)
1531 {
1532 /* Sanity check */
1533 ASSERT(Prcb == (volatile PKPRCB)KeGetCurrentPrcb());
1534
1535 /* FIXME: TODO */
1536 ASSERTMSG("Not yet implemented\n", FALSE);
1537 }
1538 #endif
1539
1540 /* Update the flush stamp and return to original IRQL */
1541 InterlockedExchangeAdd(&KiTbFlushTimeStamp, 1);
1542 KeLowerIrql(OldIrql);
1543 }
1544
1545 /*
1546 * @implemented
1547 */
1548 VOID
1549 NTAPI
1550 KeSetDmaIoCoherency(IN ULONG Coherency)
1551 {
1552 /* Save the coherency globally */
1553 KiDmaIoCoherency = Coherency;
1554 }
1555
1556 /*
1557 * @implemented
1558 */
1559 KAFFINITY
1560 NTAPI
1561 KeQueryActiveProcessors(VOID)
1562 {
1563 PAGED_CODE();
1564
1565 /* Simply return the number of active processors */
1566 return KeActiveProcessors;
1567 }
1568
1569 /*
1570 * @implemented
1571 */
1572 VOID
1573 __cdecl
1574 KeSaveStateForHibernate(IN PKPROCESSOR_STATE State)
1575 {
1576 /* Capture the context */
1577 RtlCaptureContext(&State->ContextFrame);
1578
1579 /* Capture the control state */
1580 KiSaveProcessorControlState(State);
1581 }