[NTOSKRNL]
[reactos.git] / reactos / ntoskrnl / ke / i386 / cpu.c
index 833ab52..38539b1 100644 (file)
@@ -40,6 +40,7 @@ ULONG KeLargestCacheLine = 0x40;
 ULONG KeDcacheFlushCount = 0;
 ULONG KeIcacheFlushCount = 0;
 ULONG KiDmaIoCoherency = 0;
+ULONG KePrefetchNTAGranularity = 32;
 CHAR KeNumberProcessors;
 KAFFINITY KeActiveProcessors = 1;
 BOOLEAN KiI386PentiumLockErrataPresent;
@@ -104,10 +105,28 @@ RDMSR(IN ULONG Register)
     return __readmsr(Register);
 }
 
+/* NSC/Cyrix CPU configuration register index */
+#define CX86_CCR1 0xc1
+
+/* NSC/Cyrix CPU indexed register access macros */
+static __inline
+ULONG
+getCx86(UCHAR reg)
+{
+    WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x22, reg);
+    return READ_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23);
+}
+
+#define setCx86(reg, data) do { \
+   WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x22,(reg)); \
+   WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23,(data)); \
+} while (0)
+
 /* FUNCTIONS *****************************************************************/
 
 VOID
 NTAPI
+INIT_FUNCTION
 KiSetProcessorType(VOID)
 {
     ULONG EFlags, NewEFlags;
@@ -174,6 +193,7 @@ KiSetProcessorType(VOID)
 
 ULONG
 NTAPI
+INIT_FUNCTION
 KiGetCpuVendor(VOID)
 {
     PKPRCB Prcb = KeGetCurrentPrcb();
@@ -235,12 +255,13 @@ KiGetCpuVendor(VOID)
 
 ULONG
 NTAPI
+INIT_FUNCTION
 KiGetFeatureBits(VOID)
 {
     PKPRCB Prcb = KeGetCurrentPrcb();
     ULONG Vendor;
     ULONG FeatureBits = KF_WORKING_PTE;
-    ULONG Reg[4], Dummy;
+    ULONG Reg[4], Dummy, Ccr1;
     BOOLEAN ExtendedCPUID = TRUE;
     ULONG CpuFeatures = 0;
 
@@ -282,10 +303,17 @@ KiGetFeatureBits(VOID)
                 /* Remove support for correct PTE support. */
                 FeatureBits &= ~KF_WORKING_PTE;
             }
+            
+            /* Virtualbox claims to have no SYSENTER support,
+             * which is false for processors >= Pentium Pro */
+            if(Prcb->CpuType >= 6)
+            {
+                Reg[3] |= 0x800;
+            }
 
-            /* Check if the CPU is too old to support SYSENTER */
-            if ((Prcb->CpuType < 6) ||
-                ((Prcb->CpuType == 6) && (Prcb->CpuStep < 0x0303)))
+            /* Check if the CPU is too old to support SYSENTER,
+             * See Intel CPUID instruction manual for details*/
+            if ((Reg[0] & 0x0FFF3FFF) < 0x00000633)
             {
                 /* Disable it */
                 Reg[3] &= ~0x800;
@@ -351,7 +379,22 @@ KiGetFeatureBits(VOID)
         /* Cyrix CPUs */
         case CPU_CYRIX:
 
-            /* FIXME: CMPXCGH8B */
+            /* Workaround the "COMA" bug on 6x family of Cyrix CPUs */
+            if (Prcb->CpuType == 6 &&
+                Prcb->CpuStep <= 1)
+            {
+                /* Get CCR1 value */
+                Ccr1 = getCx86(CX86_CCR1);
+
+                /* Enable the NO_LOCK bit */
+                Ccr1 |= 0x10;
+
+                /* Set the new CCR1 value */
+                setCx86(CX86_CCR1, Ccr1);
+            }
+
+            /* Set the current features */
+            CpuFeatures = Reg[3];
 
             break;
 
@@ -438,6 +481,32 @@ KiGetFeatureBits(VOID)
             }
         }
     }
+    
+    DPRINT1("Supported CPU features : %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n",
+#define print_supported(kf_value) \
+    FeatureBits & kf_value ? #kf_value : ""
+    print_supported(KF_V86_VIS),
+    print_supported(KF_RDTSC),
+    print_supported(KF_CR4),
+    print_supported(KF_CMOV),
+    print_supported(KF_GLOBAL_PAGE),
+    print_supported(KF_LARGE_PAGE),
+    print_supported(KF_MTRR),
+    print_supported(KF_CMPXCHG8B),
+    print_supported(KF_MMX),
+    print_supported(KF_WORKING_PTE),
+    print_supported(KF_PAT),
+    print_supported(KF_FXSR),
+    print_supported(KF_FAST_SYSCALL),
+    print_supported(KF_XMMI),
+    print_supported(KF_3DNOW),
+    print_supported(KF_AMDK6MTRR),
+    print_supported(KF_XMMI64),
+    print_supported(KF_DTS),
+    print_supported(KF_NX_BIT),
+    print_supported(KF_NX_DISABLED),
+    print_supported(KF_NX_ENABLED));
+#undef print_supported
 
     /* Return the Feature Bits */
     return FeatureBits;
@@ -445,6 +514,7 @@ KiGetFeatureBits(VOID)
 
 VOID
 NTAPI
+INIT_FUNCTION
 KiGetCacheInformation(VOID)
 {
     PKIPCR Pcr = (PKIPCR)KeGetPcr();
@@ -453,6 +523,7 @@ KiGetCacheInformation(VOID)
     ULONG CacheRequests = 0, i;
     ULONG CurrentRegister;
     UCHAR RegisterByte;
+    ULONG Size, Associativity = 0, CacheLine = 64, CurrentSize = 0;
     BOOLEAN FirstPass = TRUE;
 
     /* Set default L2 size */
@@ -517,17 +588,144 @@ KiGetCacheInformation(VOID)
                              * (32MB), or from 0x80 to 0x89 (same size but
                              * 8-way associative.
                              */
-                            if (((RegisterByte > 0x40) &&
-                                 (RegisterByte <= 0x49)) ||
-                                ((RegisterByte > 0x80) &&
-                                (RegisterByte <= 0x89)))
+                            if (((RegisterByte > 0x40) && (RegisterByte <= 0x47)) ||
+                                ((RegisterByte > 0x78) && (RegisterByte <= 0x7C)) ||
+                                ((RegisterByte > 0x80) && (RegisterByte <= 0x85)))
                             {
+                                /* Compute associativity */
+                                Associativity = 4;
+                                if (RegisterByte >= 0x79) Associativity = 8;
+                                
                                 /* Mask out only the first nibble */
-                                RegisterByte &= 0x0F;
-
-                                /* Set the L2 Cache Size */
-                                Pcr->SecondLevelCacheSize = 0x10000 <<
-                                                            RegisterByte;
+                                RegisterByte &= 0x07;
+
+                                /* Check if this cache is bigger than the last */
+                                Size = 0x10000 << RegisterByte;
+                                if ((Size / Associativity) > CurrentSize)
+                                {
+                                    /* Set the L2 Cache Size and Associativity */
+                                    CurrentSize = Size / Associativity;
+                                    Pcr->SecondLevelCacheSize = Size;
+                                    Pcr->SecondLevelCacheAssociativity = Associativity;
+                                }
+                            }
+                            else if ((RegisterByte > 0x21) && (RegisterByte <= 0x29))
+                            {
+                                /* Set minimum cache line size */
+                                if (CacheLine < 128) CacheLine = 128;
+                                
+                                /* Hard-code size/associativity */
+                                Associativity = 8;
+                                switch (RegisterByte)
+                                {
+                                    case 0x22:
+                                        Size = 512 * 1024;
+                                        Associativity = 4;
+                                        break;
+                                        
+                                    case 0x23:
+                                        Size = 1024 * 1024;
+                                        break;
+                                        
+                                    case 0x25:
+                                        Size = 2048 * 1024;
+                                        break;
+                                        
+                                    case 0x29:
+                                        Size = 4096 * 1024;
+                                        break;
+                                    
+                                    default:
+                                        Size = 0;
+                                        break;
+                                }
+                                
+                                /* Check if this cache is bigger than the last */
+                                if ((Size / Associativity) > CurrentSize)
+                                {
+                                    /* Set the L2 Cache Size and Associativity */
+                                    CurrentSize = Size / Associativity;
+                                    Pcr->SecondLevelCacheSize = Size;
+                                    Pcr->SecondLevelCacheAssociativity = Associativity;
+                                }
+                            }
+                            else if (((RegisterByte > 0x65) && (RegisterByte < 0x69)) ||
+                                      (RegisterByte == 0x2C) || (RegisterByte == 0xF0))
+                            {
+                                /* Indicates L1 cache line of 64 bytes */
+                                KePrefetchNTAGranularity = 64;
+                            }
+                            else if (RegisterByte == 0xF1)
+                            {
+                                /* Indicates L1 cache line of 128 bytes */
+                                KePrefetchNTAGranularity = 128;
+                            }
+                            else if (((RegisterByte >= 0x4A) && (RegisterByte <= 0x4C)) ||
+                                      (RegisterByte == 0x78) ||
+                                      (RegisterByte == 0x7D) ||
+                                      (RegisterByte == 0x7F) ||
+                                      (RegisterByte == 0x86) ||
+                                      (RegisterByte == 0x87))
+                            {
+                                /* Set minimum cache line size */
+                                if (CacheLine < 64) CacheLine = 64;
+                                
+                                /* Hard-code size/associativity */
+                                switch (RegisterByte)
+                                {
+                                    case 0x4A:
+                                        Size = 4 * 1024 * 1024;
+                                        Associativity = 8;
+                                        break;
+                                        
+                                    case 0x4B:
+                                        Size = 6 * 1024 * 1024;
+                                        Associativity = 12;
+                                        break;
+                                        
+                                    case 0x4C:
+                                        Size = 8 * 1024 * 1024;
+                                        Associativity = 16;
+                                        break;
+                                        
+                                    case 0x78:
+                                        Size = 1 * 1024 * 1024;
+                                        Associativity = 4;
+                                        break;
+                                        
+                                    case 0x7D:
+                                        Size = 2 * 1024 * 1024;
+                                        Associativity = 8;
+                                        break;
+                                            
+                                    case 0x7F:
+                                        Size = 512 * 1024;
+                                        Associativity = 2;
+                                        break;
+                                                
+                                    case 0x86:
+                                        Size = 512 * 1024;
+                                        Associativity = 4;
+                                        break;
+                                    
+                                    case 0x87:
+                                        Size = 1 * 1024 * 1024;
+                                        Associativity = 8;
+                                        break;
+
+                                    default:
+                                        Size = 0;
+                                        break;
+                                }
+                                
+                                /* Check if this cache is bigger than the last */
+                                if ((Size / Associativity) > CurrentSize)
+                                {
+                                    /* Set the L2 Cache Size and Associativity */
+                                    CurrentSize = Size / Associativity;
+                                    Pcr->SecondLevelCacheSize = Size;
+                                    Pcr->SecondLevelCacheAssociativity = Associativity;
+                                }
                             }
                         }
                     }
@@ -537,15 +735,65 @@ KiGetCacheInformation(VOID)
 
         case CPU_AMD:
 
-            /* Check if we support CPUID 0x80000006 */
-            CPUID(0x80000000, &Data[0], &Dummy, &Dummy, &Dummy);
-            if (Data[0] >= 6)
+            /* Check if we support CPUID 0x80000005 */
+            CPUID(0x80000000, &Data[0], &Data[1], &Data[2], &Data[3]);
+            if (Data[0] >= 0x80000006)
             {
-                /* Get 2nd level cache and tlb size */
-                CPUID(0x80000006, &Dummy, &Dummy, &Data[2], &Dummy);
+                /* Get L1 size first */
+                CPUID(0x80000005, &Data[0], &Data[1], &Data[2], &Data[3]);
+                KePrefetchNTAGranularity = Data[2] & 0xFF;
+                
+                /* Check if we support CPUID 0x80000006 */
+                CPUID(0x80000000, &Data[0], &Data[1], &Data[2], &Data[3]);
+                if (Data[0] >= 0x80000006)
+                {   
+                    /* Get 2nd level cache and tlb size */
+                    CPUID(0x80000006, &Data[0], &Data[1], &Data[2], &Data[3]);
+                    
+                    /* Cache line size */
+                    CacheLine = Data[2] & 0xFF;
+                    
+                    /* Hardcode associativity */
+                    RegisterByte = Data[2] >> 12;
+                    switch (RegisterByte)
+                    {
+                        case 2:
+                            Associativity = 2;
+                            break;
+                        
+                        case 4:
+                            Associativity = 4;
+                            break;
+                            
+                        case 6:
+                            Associativity = 8;
+                            break;
+                            
+                        case 8:
+                        case 15:
+                            Associativity = 16;
+                            break;
+                        
+                        default:
+                            Associativity = 1;
+                            break;
+                    }
+                    
+                    /* Compute size */
+                    Size = (Data[2] >> 16) << 10;
+                    
+                    /* Hack for Model 6, Steping 300 */
+                    if ((KeGetCurrentPrcb()->CpuType == 6) &&
+                        (KeGetCurrentPrcb()->CpuStep == 0x300))
+                    {
+                        /* Stick 64K in there */
+                        Size = 64 * 1024;
+                    }
 
-                /* Set the L2 Cache Size */
-                Pcr->SecondLevelCacheSize = (Data[2] & 0xFFFF0000) >> 6;
+                    /* Set the L2 Cache Size and associativity */
+                    Pcr->SecondLevelCacheSize = Size;
+                    Pcr->SecondLevelCacheAssociativity = Associativity;
+                }
             }
             break;
 
@@ -557,10 +805,19 @@ KiGetCacheInformation(VOID)
             /* FIXME */
             break;
     }
+    
+    /* Set the cache line */
+    if (CacheLine > KeLargestCacheLine) KeLargestCacheLine = CacheLine;
+    DPRINT1("Prefetch Cache: %d bytes\tL2 Cache: %d bytes\tL2 Cache Line: %d bytes\tL2 Cache Associativity: %d\n",
+            KePrefetchNTAGranularity,
+            Pcr->SecondLevelCacheSize,
+            KeLargestCacheLine,
+            Pcr->SecondLevelCacheAssociativity);
 }
 
 VOID
 NTAPI
+INIT_FUNCTION
 KiSetCR0Bits(VOID)
 {
     ULONG Cr0;
@@ -577,6 +834,7 @@ KiSetCR0Bits(VOID)
 
 VOID
 NTAPI
+INIT_FUNCTION
 KiInitializeTSS2(IN PKTSS Tss,
                  IN PKGDTENTRY TssEntry OPTIONAL)
 {
@@ -630,6 +888,7 @@ KiInitializeTSS(IN PKTSS Tss)
 
 VOID
 FASTCALL
+INIT_FUNCTION
 Ki386InitializeTss(IN PKTSS Tss,
                    IN PKIDTENTRY Idt,
                    IN PKGDTENTRY Gdt)
@@ -789,6 +1048,7 @@ KiSaveProcessorControlState(OUT PKPROCESSOR_STATE ProcessorState)
 
 VOID
 NTAPI
+INIT_FUNCTION
 KiInitializeMachineType(VOID)
 {
     /* Set the Machine Type we got from NTLDR */
@@ -797,6 +1057,7 @@ KiInitializeMachineType(VOID)
 
 ULONG_PTR
 NTAPI
+INIT_FUNCTION
 KiLoadFastSyscallMachineSpecificRegisters(IN ULONG_PTR Context)
 {
     /* Set CS and ESP */
@@ -810,60 +1071,7 @@ KiLoadFastSyscallMachineSpecificRegisters(IN ULONG_PTR Context)
 
 VOID
 NTAPI
-KiDisableFastSyscallReturn(VOID)
-{
-    /* Was it applied? */
-    if (KiSystemCallExitAdjusted)
-    {        
-        /* Restore the original value */
-        KiSystemCallExitBranch[1] = KiSystemCallExitBranch[1] - KiSystemCallExitAdjusted;
-                
-        /* It's not adjusted anymore */
-        KiSystemCallExitAdjusted = FALSE;
-    }
-}
-
-VOID
-NTAPI
-KiEnableFastSyscallReturn(VOID)
-{
-    /* Check if the patch has already been done */
-    if ((KiSystemCallExitAdjusted == KiSystemCallExitAdjust) &&
-        (KiFastCallCopyDoneOnce))
-    {
-        DPRINT1("SYSEXIT Code Patch was already done!\n");
-        return;
-    }
-    
-    /* Make sure the offset is within the distance of a Jxx SHORT */
-    if ((KiSystemCallExitBranch[1] - KiSystemCallExitAdjust) < 0x80)
-    {
-        /* Remove any existing code patch */
-        DPRINT1("Correct SHORT size found\n");
-        KiDisableFastSyscallReturn();
-        
-        /* We should have a JNZ there */
-        ASSERT(KiSystemCallExitBranch[0] == 0x75);
-
-        /* Do the patch */        
-        DPRINT1("Current jump offset: %lx\n", KiSystemCallExitBranch[1]);
-        KiSystemCallExitAdjusted = KiSystemCallExitAdjust;
-        KiSystemCallExitBranch[1] -= KiSystemCallExitAdjusted;
-        DPRINT1("New jump offset: %lx\n", KiSystemCallExitBranch[1]);
-        
-        /* Remember that we've done it */
-        KiFastCallCopyDoneOnce = TRUE;
-    }
-    else
-    {
-        /* This shouldn't happen unless we've messed the macros up */
-        DPRINT1("Your compiled kernel is broken!\n");
-        DbgBreakPoint();
-    }
-}
-
-VOID
-NTAPI
+INIT_FUNCTION
 KiRestoreFastSyscallReturnState(VOID)
 {
     /* Check if the CPU Supports fast system call */
@@ -872,34 +1080,32 @@ KiRestoreFastSyscallReturnState(VOID)
         /* Check if it has been disabled */
         if (!KiFastSystemCallDisable)
         {
-            /* KiSystemCallExit2 should come BEFORE KiSystemCallExit */
-            DPRINT1("Exit2: %p Exit1: %p\n", KiSystemCallExit2, KiSystemCallExit);
-            ASSERT(KiSystemCallExit2 < KiSystemCallExit);
-            
-            /* It's enabled, so we'll have to do a code patch */
-            KiSystemCallExitAdjust = KiSystemCallExit - KiSystemCallExit2;
-            DPRINT1("SYSENTER Capable Machine. Jump Offset Delta: %lx\n", KiSystemCallExitAdjust);
+            /* Do an IPI to enable it */
+            KeIpiGenericCall(KiLoadFastSyscallMachineSpecificRegisters, 0);
+
+            /* It's enabled, so use the proper exit stub */
+            KiFastCallExitHandler = KiSystemCallSysExitReturn;
+            DPRINT1("Support for SYSENTER detected.\n");
         }
         else
         {
             /* Disable fast system call */
             KeFeatureBits &= ~KF_FAST_SYSCALL;
+            KiFastCallExitHandler = KiSystemCallTrapReturn;
+            DPRINT1("Support for SYSENTER disabled.\n");
         }
     }
-    
-    /* Now check if all CPUs support fast system call, and the registry allows it */
-    if (KeFeatureBits & KF_FAST_SYSCALL)
+    else
     {
-        /* Do an IPI to enable it */
-        KeIpiGenericCall(KiLoadFastSyscallMachineSpecificRegisters, 0);
+        /* Use the IRET handler */
+        KiFastCallExitHandler = KiSystemCallTrapReturn;
+        DPRINT1("No support for SYSENTER detected.\n");
     }
-    
-    /* Perform the code patch that is required */
-    KiEnableFastSyscallReturn();
 }
 
 ULONG_PTR
 NTAPI
+INIT_FUNCTION
 Ki386EnableDE(IN ULONG_PTR Context)
 {
     /* Enable DE */
@@ -909,6 +1115,7 @@ Ki386EnableDE(IN ULONG_PTR Context)
 
 ULONG_PTR
 NTAPI
+INIT_FUNCTION
 Ki386EnableFxsr(IN ULONG_PTR Context)
 {
     /* Enable FXSR */
@@ -918,6 +1125,7 @@ Ki386EnableFxsr(IN ULONG_PTR Context)
 
 ULONG_PTR
 NTAPI
+INIT_FUNCTION
 Ki386EnableXMMIExceptions(IN ULONG_PTR Context)
 {
     PKIDTENTRY IdtEntry;
@@ -940,6 +1148,7 @@ Ki386EnableXMMIExceptions(IN ULONG_PTR Context)
 
 VOID
 NTAPI
+INIT_FUNCTION
 KiI386PentiumLockErrataFixup(VOID)
 {
     KDESCRIPTOR IdtDescriptor;
@@ -1031,6 +1240,7 @@ KiSaveProcessorState(IN PKTRAP_FRAME TrapFrame,
 
 BOOLEAN
 NTAPI
+INIT_FUNCTION
 KiIsNpxPresent(VOID)
 {
     ULONG Cr0;
@@ -1043,7 +1253,12 @@ KiIsNpxPresent(VOID)
     Cr0 = __readcr0() & ~(CR0_MP | CR0_TS | CR0_EM | CR0_ET);
     
     /* Store on FPU stack */
+#ifdef _MSC_VER
+    __asm fninit;
+    __asm fnstsw Magic;
+#else
     asm volatile ("fninit;" "fnstsw %0" : "+m"(Magic));
+#endif
     
     /* Magic should now be cleared */
     if (Magic & 0xFF)
@@ -1066,6 +1281,7 @@ KiIsNpxPresent(VOID)
 
 BOOLEAN
 NTAPI
+INIT_FUNCTION
 KiIsNpxErrataPresent(VOID)
 {
     BOOLEAN ErrataPresent;
@@ -1080,7 +1296,7 @@ KiIsNpxErrataPresent(VOID)
     __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
     
     /* Initialize FPU state */
-    asm volatile ("fninit");
+    Ke386FnInit();
     
     /* Multiply the magic values and divide, we should get the result back */
     Value1 = 4195835.0;
@@ -1097,8 +1313,8 @@ KiIsNpxErrataPresent(VOID)
     return ErrataPresent;
 }
 
-NTAPI
 VOID
+NTAPI
 KiFlushNPXState(IN PFLOATING_SAVE_AREA SaveArea)
 {
     ULONG EFlags, Cr0;