ULONG KeProcessorLevel;
ULONG KeProcessorRevision;
ULONG KeFeatureBits;
-ULONG KiFastSystemCallDisable = 1;
+ULONG KiFastSystemCallDisable;
ULONG KeI386NpxPresent = 0;
ULONG KiMXCsrMask = 0;
ULONG MxcsrFeatureMask = 0;
ULONG KeDcacheFlushCount = 0;
ULONG KeIcacheFlushCount = 0;
ULONG KiDmaIoCoherency = 0;
+ULONG KePrefetchNTAGranularity = 32;
CHAR KeNumberProcessors;
KAFFINITY KeActiveProcessors = 1;
BOOLEAN KiI386PentiumLockErrataPresent;
BOOLEAN KiSMTProcessorsPresent;
+/* The distance between SYSEXIT and IRETD return modes */
+UCHAR KiSystemCallExitAdjust;
+
+/* The offset that was applied -- either 0 or the value above */
+UCHAR KiSystemCallExitAdjusted;
+
+/* Whether the adjustment was already done once */
+BOOLEAN KiFastCallCopyDoneOnce;
+
/* Flush data */
volatile LONG KiTbFlushTimeStamp;
ULONG CacheRequests = 0, i;
ULONG CurrentRegister;
UCHAR RegisterByte;
+ ULONG Size, Associativity = 0, CacheLine = 64, CurrentSize = 0;
BOOLEAN FirstPass = TRUE;
/* Set default L2 size */
* (32MB), or from 0x80 to 0x89 (same size but
* 8-way associative.
*/
- if (((RegisterByte > 0x40) &&
- (RegisterByte <= 0x49)) ||
- ((RegisterByte > 0x80) &&
- (RegisterByte <= 0x89)))
+ if (((RegisterByte > 0x40) && (RegisterByte <= 0x47)) ||
+ ((RegisterByte > 0x78) && (RegisterByte <= 0x7C)) ||
+ ((RegisterByte > 0x80) && (RegisterByte <= 0x85)))
{
+ /* Compute associativity */
+ Associativity = 4;
+ if (RegisterByte >= 0x79) Associativity = 8;
+
/* Mask out only the first nibble */
- RegisterByte &= 0x0F;
-
- /* Set the L2 Cache Size */
- Pcr->SecondLevelCacheSize = 0x10000 <<
- RegisterByte;
+ RegisterByte &= 0x07;
+
+ /* Check if this cache is bigger than the last */
+ Size = 0x10000 << RegisterByte;
+ if ((Size / Associativity) > CurrentSize)
+ {
+ /* Set the L2 Cache Size and Associativity */
+ CurrentSize = Size / Associativity;
+ Pcr->SecondLevelCacheSize = Size;
+ Pcr->SecondLevelCacheAssociativity = Associativity;
+ }
+ }
+ else if ((RegisterByte > 0x21) && (RegisterByte <= 0x29))
+ {
+ /* Set minimum cache line size */
+ if (CacheLine < 128) CacheLine = 128;
+
+ /* Hard-code size/associativity */
+ Associativity = 8;
+ switch (RegisterByte)
+ {
+ case 0x22:
+ Size = 512 * 1024;
+ Associativity = 4;
+ break;
+
+ case 0x23:
+ Size = 1024 * 1024;
+ break;
+
+ case 0x25:
+ Size = 2048 * 1024;
+ break;
+
+ case 0x29:
+ Size = 4096 * 1024;
+ break;
+
+ default:
+ Size = 0;
+ break;
+ }
+
+ /* Check if this cache is bigger than the last */
+ if ((Size / Associativity) > CurrentSize)
+ {
+ /* Set the L2 Cache Size and Associativity */
+ CurrentSize = Size / Associativity;
+ Pcr->SecondLevelCacheSize = Size;
+ Pcr->SecondLevelCacheAssociativity = Associativity;
+ }
+ }
+ else if (((RegisterByte > 0x65) && (RegisterByte < 0x69)) ||
+ (RegisterByte == 0x2C) || (RegisterByte == 0xF0))
+ {
+ /* Indicates L1 cache line of 64 bytes */
+ KePrefetchNTAGranularity = 64;
+ }
+ else if (RegisterByte == 0xF1)
+ {
+ /* Indicates L1 cache line of 128 bytes */
+ KePrefetchNTAGranularity = 128;
+ }
+ else if (((RegisterByte >= 0x4A) && (RegisterByte <= 0x4C)) ||
+ (RegisterByte == 0x78) ||
+ (RegisterByte == 0x7D) ||
+ (RegisterByte == 0x7F) ||
+ (RegisterByte == 0x86) ||
+ (RegisterByte == 0x87))
+ {
+ /* Set minimum cache line size */
+ if (CacheLine < 64) CacheLine = 64;
+
+ /* Hard-code size/associativity */
+ switch (RegisterByte)
+ {
+ case 0x4A:
+ Size = 4 * 1024 * 1024;
+ Associativity = 8;
+ break;
+
+ case 0x4B:
+ Size = 6 * 1024 * 1024;
+ Associativity = 12;
+ break;
+
+ case 0x4C:
+ Size = 8 * 1024 * 1024;
+ Associativity = 16;
+ break;
+
+ case 0x78:
+ Size = 1 * 1024 * 1024;
+ Associativity = 4;
+ break;
+
+ case 0x7D:
+ Size = 2 * 1024 * 1024;
+ Associativity = 8;
+ break;
+
+ case 0x7F:
+ Size = 512 * 1024;
+ Associativity = 2;
+ break;
+
+ case 0x86:
+ Size = 512 * 1024;
+ Associativity = 4;
+ break;
+
+ case 0x87:
+ Size = 1 * 1024 * 1024;
+ Associativity = 8;
+ break;
+
+ default:
+ Size = 0;
+ break;
+ }
+
+ /* Check if this cache is bigger than the last */
+ if ((Size / Associativity) > CurrentSize)
+ {
+ /* Set the L2 Cache Size and Associativity */
+ CurrentSize = Size / Associativity;
+ Pcr->SecondLevelCacheSize = Size;
+ Pcr->SecondLevelCacheAssociativity = Associativity;
+ }
}
}
}
case CPU_AMD:
- /* Check if we support CPUID 0x80000006 */
- CPUID(0x80000000, &Data[0], &Dummy, &Dummy, &Dummy);
- if (Data[0] >= 6)
+ /* Check if we support CPUID 0x80000005 */
+ CPUID(0x80000000, &Data[0], &Data[1], &Data[2], &Data[3]);
+ if (Data[0] >= 0x80000006)
{
- /* Get 2nd level cache and tlb size */
- CPUID(0x80000006, &Dummy, &Dummy, &Data[2], &Dummy);
+ /* Get L1 size first */
+ CPUID(0x80000005, &Data[0], &Data[1], &Data[2], &Data[3]);
+ KePrefetchNTAGranularity = Data[2] & 0xFF;
+
+ /* Check if we support CPUID 0x80000006 */
+ CPUID(0x80000000, &Data[0], &Data[1], &Data[2], &Data[3]);
+ if (Data[0] >= 0x80000006)
+ {
+ /* Get 2nd level cache and tlb size */
+ CPUID(0x80000006, &Data[0], &Data[1], &Data[2], &Data[3]);
+
+ /* Cache line size */
+ CacheLine = Data[2] & 0xFF;
+
+ /* Hardcode associativity */
+ RegisterByte = Data[2] >> 12;
+ switch (RegisterByte)
+ {
+ case 2:
+ Associativity = 2;
+ break;
+
+ case 4:
+ Associativity = 4;
+ break;
+
+ case 6:
+ Associativity = 8;
+ break;
+
+ case 8:
+ case 15:
+ Associativity = 16;
+ break;
+
+ default:
+ Associativity = 1;
+ break;
+ }
+
+ /* Compute size */
+ Size = (Data[2] >> 16) << 10;
+
+ /* Hack for Model 6, Steping 300 */
+ if ((KeGetCurrentPrcb()->CpuType == 6) &&
+ (KeGetCurrentPrcb()->CpuStep == 0x300))
+ {
+ /* Stick 64K in there */
+ Size = 64 * 1024;
+ }
- /* Set the L2 Cache Size */
- Pcr->SecondLevelCacheSize = (Data[2] & 0xFFFF0000) >> 6;
+ /* Set the L2 Cache Size and associativity */
+ Pcr->SecondLevelCacheSize = Size;
+ Pcr->SecondLevelCacheAssociativity = Associativity;
+ }
}
break;
/* FIXME */
break;
}
+
+ /* Set the cache line */
+ if (CacheLine > KeLargestCacheLine) KeLargestCacheLine = CacheLine;
+ DPRINT1("Prefetch Cache: %d bytes\tL2 Cache: %d bytes\tL2 Cache Line: %d bytes\tL2 Cache Associativity: %d\n",
+ KePrefetchNTAGranularity,
+ Pcr->SecondLevelCacheSize,
+ KeLargestCacheLine,
+ Pcr->SecondLevelCacheAssociativity);
}
VOID
Tss = (PKTSS)KiDoubleFaultTSS;
KiInitializeTSS(Tss);
Tss->CR3 = __readcr3();
- Tss->Esp0 = PtrToUlong(KiDoubleFaultStack);
- Tss->Esp = PtrToUlong(KiDoubleFaultStack);
- Tss->Eip = PtrToUlong(KiTrap8);
+ Tss->Esp0 = KiDoubleFaultStack;
+ Tss->Esp = KiDoubleFaultStack;
+ Tss->Eip = PtrToUlong(KiTrap08);
Tss->Cs = KGDT_R0_CODE;
Tss->Fs = KGDT_R0_PCR;
Tss->Ss = Ke386GetSs();
Tss = (PKTSS)KiNMITSS;
KiInitializeTSS(Tss);
Tss->CR3 = __readcr3();
- Tss->Esp0 = PtrToUlong(KiDoubleFaultStack);
- Tss->Esp = PtrToUlong(KiDoubleFaultStack);
- Tss->Eip = PtrToUlong(KiTrap2);
+ Tss->Esp0 = KiDoubleFaultStack;
+ Tss->Esp = KiDoubleFaultStack;
+ Tss->Eip = PtrToUlong(KiTrap02);
Tss->Cs = KGDT_R0_CODE;
Tss->Fs = KGDT_R0_PCR;
Tss->Ss = Ke386GetSs();
NTAPI
KiRestoreFastSyscallReturnState(VOID)
{
- /* FIXME: NT has support for SYSCALL, IA64-SYSENTER, etc. */
-
/* Check if the CPU Supports fast system call */
if (KeFeatureBits & KF_FAST_SYSCALL)
{
- /* Do an IPI to enable it */
- KeIpiGenericCall(KiLoadFastSyscallMachineSpecificRegisters, 0);
+ /* Check if it has been disabled */
+ if (!KiFastSystemCallDisable)
+ {
+ /* Do an IPI to enable it */
+ KeIpiGenericCall(KiLoadFastSyscallMachineSpecificRegisters, 0);
+
+ /* It's enabled, so use the proper exit stub */
+ KiFastCallExitHandler = KiSystemCallSysExitReturn;
+ DPRINT1("Support for SYSENTER detected.\n");
+ }
+ else
+ {
+ /* Disable fast system call */
+ KeFeatureBits &= ~KF_FAST_SYSCALL;
+ KiFastCallExitHandler = KiSystemCallTrapReturn;
+ DPRINT1("Support for SYSENTER disabled.\n");
+ }
+ }
+ else
+ {
+ /* Use the IRET handler */
+ KiFastCallExitHandler = KiSystemCallTrapReturn;
+ DPRINT1("No support for SYSENTER detected.\n");
}
}
{
PKIDTENTRY IdtEntry;
- /* Get the IDT Entry for Interrupt 19 */
- IdtEntry = &((PKIPCR)KeGetPcr())->IDT[19];
+ /* Get the IDT Entry for Interrupt 0x13 */
+ IdtEntry = &((PKIPCR)KeGetPcr())->IDT[0x13];
/* Set it up */
IdtEntry->Selector = KGDT_R0_CODE;
- IdtEntry->Offset = ((ULONG_PTR)KiTrap19 & 0xFFFF);
- IdtEntry->ExtendedOffset = ((ULONG_PTR)KiTrap19 >> 16) & 0xFFFF;
+ IdtEntry->Offset = ((ULONG_PTR)KiTrap13 & 0xFFFF);
+ IdtEntry->ExtendedOffset = ((ULONG_PTR)KiTrap13 >> 16) & 0xFFFF;
((PKIDT_ACCESS)&IdtEntry->Access)->Dpl = 0;
((PKIDT_ACCESS)&IdtEntry->Access)->Present = 1;
((PKIDT_ACCESS)&IdtEntry->Access)->SegmentType = I386_INTERRUPT_GATE;
KiSaveProcessorControlState(&Prcb->ProcessorState);
}
-/* C TRAP HANDLERS ************************************************************/
+BOOLEAN
+NTAPI
+KiIsNpxPresent(VOID)
+{
+ ULONG Cr0;
+ USHORT Magic;
+
+ /* Set magic */
+ Magic = 0xFFFF;
+
+ /* Read CR0 and mask out FPU flags */
+ Cr0 = __readcr0() & ~(CR0_MP | CR0_TS | CR0_EM | CR0_ET);
+
+ /* Store on FPU stack */
+ asm volatile ("fninit;" "fnstsw %0" : "+m"(Magic));
+
+ /* Magic should now be cleared */
+ if (Magic & 0xFF)
+ {
+ /* You don't have an FPU -- enable emulation for now */
+ __writecr0(Cr0 | CR0_EM | CR0_TS);
+ return FALSE;
+ }
+
+ /* You have an FPU, enable it */
+ Cr0 |= CR0_ET;
+
+ /* Enable INT 16 on 486 and higher */
+ if (KeGetCurrentPrcb()->CpuType >= 3) Cr0 |= CR0_NE;
+
+ /* Set FPU state */
+ __writecr0(Cr0 | CR0_EM | CR0_TS);
+ return TRUE;
+}
BOOLEAN
NTAPI
-KiNmiFault(IN PVOID InterruptStack)
+KiIsNpxErrataPresent(VOID)
{
- PKTSS Tss, NmiTss;
- PKTHREAD Thread;
- PKPROCESS Process;
- PKGDTENTRY TssGdt;
- KTRAP_FRAME TrapFrame;
- KIRQL OldIrql;
+ BOOLEAN ErrataPresent;
+ ULONG Cr0;
+ volatile double Value1, Value2;
- //
- // In some sort of strange recursion case, we might end up here with the IF
- // flag incorrectly on the interrupt frame -- during a normal NMI this would
- // normally already be set.
- //
- // For sanity's sake, make sure interrupts are disabled for sure.
- // NMIs will already be since the CPU does it for us.
- //
+ /* Disable interrupts */
_disable();
-
- //
- // Get the current TSS, thread, and process
- //
- Tss = PCR->TSS;
- Thread = ((PKIPCR)PCR)->PrcbData.CurrentThread;
- Process = Thread->ApcState.Process;
- //
- // Save data usually not in the TSS
- //
- Tss->CR3 = Process->DirectoryTableBase[0];
- Tss->IoMapBase = Process->IopmOffset;
- Tss->LDT = Process->LdtDescriptor.LimitLow ? KGDT_LDT : 0;
+ /* Read CR0 and remove FPU flags */
+ Cr0 = __readcr0();
+ __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
- //
- // Now get the base address of the NMI TSS
- //
- TssGdt = &((PKIPCR)KeGetPcr())->GDT[KGDT_NMI_TSS / sizeof(KGDTENTRY)];
- NmiTss = (PKTSS)(ULONG_PTR)(TssGdt->BaseLow |
- TssGdt->HighWord.Bytes.BaseMid << 16 |
- TssGdt->HighWord.Bytes.BaseHi << 24);
-
- //
- // Switch to it and activate it, masking off the nested flag
- //
- // Note that in reality, we are already on the NMI tss -- we just need to
- // update the PCR to reflect this
- //
- PCR->TSS = NmiTss;
- __writeeflags(__readeflags() &~ EFLAGS_NESTED_TASK);
- TssGdt->HighWord.Bits.Dpl = 0;
- TssGdt->HighWord.Bits.Pres = 1;
- TssGdt->HighWord.Bits.Type = I386_TSS;
+ /* Initialize FPU state */
+ asm volatile ("fninit");
- //
- // Now build the trap frame based on the original TSS
- //
- // The CPU does a hardware "Context switch" / task switch of sorts and so it
- // takes care of saving our context in the normal TSS.
- //
- // We just have to go get the values...
- //
- RtlZeroMemory(&TrapFrame, sizeof(KTRAP_FRAME));
- TrapFrame.HardwareSegSs = Tss->Ss0;
- TrapFrame.HardwareEsp = Tss->Esp0;
- TrapFrame.EFlags = Tss->EFlags;
- TrapFrame.SegCs = Tss->Cs;
- TrapFrame.Eip = Tss->Eip;
- TrapFrame.Ebp = Tss->Ebp;
- TrapFrame.Ebx = Tss->Ebx;
- TrapFrame.Esi = Tss->Esi;
- TrapFrame.Edi = Tss->Edi;
- TrapFrame.SegFs = Tss->Fs;
- TrapFrame.ExceptionList = PCR->Tib.ExceptionList;
- TrapFrame.PreviousPreviousMode = -1;
- TrapFrame.Eax = Tss->Eax;
- TrapFrame.Ecx = Tss->Ecx;
- TrapFrame.Edx = Tss->Edx;
- TrapFrame.SegDs = Tss->Ds;
- TrapFrame.SegEs = Tss->Es;
- TrapFrame.SegGs = Tss->Gs;
- TrapFrame.DbgEip = Tss->Eip;
- TrapFrame.DbgEbp = Tss->Ebp;
+ /* Multiply the magic values and divide, we should get the result back */
+ Value1 = 4195835.0;
+ Value2 = 3145727.0;
+ ErrataPresent = (Value1 * Value2 / 3145727.0) != 4195835.0;
- //
- // Store the trap frame in the KPRCB
- //
- KiSaveProcessorState(&TrapFrame, NULL);
+ /* Restore CR0 */
+ __writecr0(Cr0);
- //
- // Call any registered NMI handlers and see if they handled it or not
- //
- if (!KiHandleNmi())
- {
- //
- // They did not, so call the platform HAL routine to bugcheck the system
- //
- // Make sure the HAL believes it's running at HIGH IRQL... we can't use
- // the normal APIs here as playing with the IRQL could change the system
- // state
- //
- OldIrql = PCR->Irql;
- PCR->Irql = HIGH_LEVEL;
- HalHandleNMI(NULL);
- PCR->Irql = OldIrql;
- }
+ /* Enable interrupts */
+ _enable();
+
+ /* Return if there's an errata */
+ return ErrataPresent;
+}
- //
- // Although the CPU disabled NMIs, we just did a BIOS Call, which could've
- // totally changed things.
- //
- // We have to make sure we're still in our original NMI -- a nested NMI
- // will point back to the NMI TSS, and in that case we're hosed.
- //
- if (PCR->TSS->Backlink != KGDT_NMI_TSS)
+NTAPI
+VOID
+KiFlushNPXState(IN PFLOATING_SAVE_AREA SaveArea)
+{
+ ULONG EFlags, Cr0;
+ PKTHREAD Thread, NpxThread;
+ PFX_SAVE_AREA FxSaveArea;
+
+ /* Save volatiles and disable interrupts */
+ EFlags = __readeflags();
+ _disable();
+
+ /* Save the PCR and get the current thread */
+ Thread = KeGetCurrentThread();
+
+ /* Check if we're already loaded */
+ if (Thread->NpxState != NPX_STATE_LOADED)
{
- //
- // Restore original TSS
- //
- PCR->TSS = Tss;
+ /* If there's nothing to load, quit */
+ if (!SaveArea) return;
- //
- // Set it back to busy
- //
- TssGdt->HighWord.Bits.Dpl = 0;
- TssGdt->HighWord.Bits.Pres = 1;
- TssGdt->HighWord.Bits.Type = I386_ACTIVE_TSS;
+ /* Need FXSR support for this */
+ ASSERT(KeI386FxsrPresent == TRUE);
- //
- // Restore nested flag
- //
- __writeeflags(__readeflags() | EFLAGS_NESTED_TASK);
+ /* Check for sane CR0 */
+ Cr0 = __readcr0();
+ if (Cr0 & (CR0_MP | CR0_TS | CR0_EM))
+ {
+ /* Mask out FPU flags */
+ __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
+ }
+
+ /* Get the NPX thread and check its FPU state */
+ NpxThread = KeGetCurrentPrcb()->NpxThread;
+ if ((NpxThread) && (NpxThread->NpxState == NPX_STATE_LOADED))
+ {
+ /* Get the FX frame and store the state there */
+ FxSaveArea = KiGetThreadNpxArea(NpxThread);
+ Ke386FxSave(FxSaveArea);
+
+ /* NPX thread has lost its state */
+ NpxThread->NpxState = NPX_STATE_NOT_LOADED;
+ }
- //
- // Handled, return from interrupt
- //
- return TRUE;
+ /* Now load NPX state from the NPX area */
+ FxSaveArea = KiGetThreadNpxArea(Thread);
+ Ke386FxStore(FxSaveArea);
}
+ else
+ {
+ /* Check for sane CR0 */
+ Cr0 = __readcr0();
+ if (Cr0 & (CR0_MP | CR0_TS | CR0_EM))
+ {
+ /* Mask out FPU flags */
+ __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
+ }
+
+ /* Get FX frame */
+ FxSaveArea = KiGetThreadNpxArea(Thread);
+ Thread->NpxState = NPX_STATE_NOT_LOADED;
+
+ /* Save state if supported by CPU */
+ if (KeI386FxsrPresent) Ke386FxSave(FxSaveArea);
+ }
+
+ /* Now save the FN state wherever it was requested */
+ if (SaveArea) Ke386FnSave(SaveArea);
+
+ /* Clear NPX thread */
+ KeGetCurrentPrcb()->NpxThread = NULL;
- //
- // Unhandled: crash the system
- //
- return FALSE;
+ /* Add the CR0 from the NPX frame */
+ Cr0 |= NPX_STATE_NOT_LOADED;
+ Cr0 |= FxSaveArea->Cr0NpxState;
+ __writecr0(Cr0);
+
+ /* Restore interrupt state */
+ __writeeflags(EFlags);
}
/* PUBLIC FUNCTIONS **********************************************************/
+/*
+ * @implemented
+ */
+VOID
+NTAPI
+KiCoprocessorError(VOID)
+{
+ PFX_SAVE_AREA NpxArea;
+
+ /* Get the FPU area */
+ NpxArea = KiGetThreadNpxArea(KeGetCurrentThread());
+
+ /* Set CR0_TS */
+ NpxArea->Cr0NpxState = CR0_TS;
+ __writecr0(__readcr0() | CR0_TS);
+}
+
/*
* @implemented
*/