- Implement RtlPrefectMemoryNonTemporal. Patch by Patrick Baggett <baggett.patrick...
[reactos.git] / reactos / ntoskrnl / ke / i386 / kernel.c
index 37852a5..8f40320 100644 (file)
@@ -4,7 +4,7 @@
  * PROJECT:         ReactOS kernel
  * FILE:            ntoskrnl/ke/i386/kernel.c
  * PURPOSE:         Initializes the kernel
- * 
+ *
  * PROGRAMMERS:     David Welch (welch@mcmail.com)
  */
 
 
 ULONG KiPcrInitDone = 0;
 static ULONG PcrsAllocated = 0;
-static ULONG Ke386CpuidFlags2, Ke386CpuidExFlags;
+static ULONG Ke386CpuidFlags2, Ke386CpuidExFlags, Ke386CpuidExMisc;
 ULONG Ke386CacheAlignment;
 CHAR Ke386CpuidModel[49] = {0,};
 ULONG Ke386L1CacheSize;
+ULONG Ke386CacheGranularity = 0x40;      /* FIXME: Default to 64 bytes for RtlPrefetchMemoryNonTemporal(), need real size */
 BOOLEAN Ke386NoExecute = FALSE;
 BOOLEAN Ke386Pae = FALSE;
 BOOLEAN Ke386GlobalPagesEnabled = FALSE;
 ULONG KiFastSystemCallDisable = 1;
+extern PVOID Ki386InitialStackArray[MAXIMUM_PROCESSORS];
+extern ULONG IdleProcessorMask;
+
+static VOID INIT_FUNCTION Ki386GetCpuId(VOID);
+
+#if defined (ALLOC_PRAGMA)
+#pragma alloc_text(INIT, Ki386GetCpuId)
+#pragma alloc_text(INIT, KeCreateApplicationProcessorIdleThread)
+#pragma alloc_text(INIT, KePrepareForApplicationProcessorInit)
+#pragma alloc_text(INIT, KeInit1)
+#pragma alloc_text(INIT, KeInit2)
+#pragma alloc_text(INIT, Ki386SetProcessorFeatures)
+#endif
 
 /* FUNCTIONS *****************************************************************/
 
-VOID INIT_FUNCTION STATIC
+static VOID INIT_FUNCTION
 Ki386GetCpuId(VOID)
 {
    ULONG OrigFlags, Flags, FinalFlags;
    ULONG MaxCpuidLevel;
-   ULONG Dummy, Eax, Ebx, Ecx, Edx;
-   PKPCR Pcr = KeGetCurrentKPCR();
+   ULONG Dummy, Eax, Ecx, Edx;
+   PKIPCR Pcr = (PKIPCR)KeGetCurrentKPCR();
 
    Ke386CpuidFlags2 =  Ke386CpuidExFlags = 0;
    Ke386CacheAlignment = 32;
@@ -45,6 +59,10 @@ Ki386GetCpuId(VOID)
    Flags = OrigFlags ^ X86_EFLAGS_ID;
    Ke386RestoreFlags(Flags);
    Ke386SaveFlags(FinalFlags);
+
+   Pcr->PrcbData.LogicalProcessorsPerPhysicalProcessor = 1;
+   Pcr->PrcbData.InitialApicId = 0xff;
+
    if ((OrigFlags & X86_EFLAGS_ID) == (FinalFlags & X86_EFLAGS_ID))
    {
       /* No cpuid supported. */
@@ -57,16 +75,30 @@ Ki386GetCpuId(VOID)
    /* Get the vendor name and the maximum cpuid level supported. */
    Ki386Cpuid(0, &MaxCpuidLevel, (PULONG)&Pcr->PrcbData.VendorString[0], (PULONG)&Pcr->PrcbData.VendorString[8], (PULONG)&Pcr->PrcbData.VendorString[4]);
    if (MaxCpuidLevel > 0)
-   { 
+   {
       /* Get the feature flags. */
-      Ki386Cpuid(1, &Eax, &Ebx, &Ke386CpuidFlags2, &Pcr->PrcbData.FeatureBits);
+      Ki386Cpuid(1, &Eax, &Ke386CpuidExMisc, &Ke386CpuidFlags2, &Pcr->PrcbData.FeatureBits);
+
+      DPRINT ("Model:  %x\n", (Eax & 0xf00) == 0xf00 ? ((Eax >> 4) & 0xf) | ((Eax >> 12) & 0xf0) : (Eax >> 4) & 0xf);
+      DPRINT ("Family: %x\n", (Eax & 0xf00) == 0xf00 ? ((Eax >> 8) & 0xf) + ((Eax >> 20) & 0xff) : (Eax >> 8) & 0xf);
+
       /* Get the cache alignment, if it is available */
       if (Pcr->PrcbData.FeatureBits & (1<<19))
       {
-         Ke386CacheAlignment = ((Ebx >> 8) & 0xff) * 8;
+         Ke386CacheAlignment = ((Ke386CpuidExMisc >> 8) & 0xff) * 8;
       }
       Pcr->PrcbData.CpuType = (Eax >> 8) & 0xf;
       Pcr->PrcbData.CpuStep = (Eax & 0xf) | ((Eax << 4) & 0xf00);
+
+      Pcr->PrcbData.InitialApicId = (Ke386CpuidExMisc >> 24) & 0xff;
+
+      /* detect Hyper-Threading on Pentium 4 CPUs or later */
+      if ((Pcr->PrcbData.CpuType == 0xf || (Eax & 0x0f00000)) &&
+          !strncmp(Pcr->PrcbData.VendorString, "GenuineIntel", 12) &&
+          Pcr->PrcbData.FeatureBits & X86_FEATURE_HT)
+      {
+        Pcr->PrcbData.LogicalProcessorsPerPhysicalProcessor = (Ke386CpuidExMisc >> 16) & 0xff;
+      }
    }
    else
    {
@@ -109,16 +141,55 @@ Ki386GetCpuId(VOID)
    }
 }
 
-VOID INIT_FUNCTION
+VOID
+KeApplicationProcessorInitDispatcher(VOID)
+{
+   KIRQL oldIrql;
+   oldIrql = KeAcquireDispatcherDatabaseLock();
+   IdleProcessorMask |= (1 << KeGetCurrentProcessorNumber());
+   KeReleaseDispatcherDatabaseLock(oldIrql);
+}
+
+VOID
+INIT_FUNCTION
+KeCreateApplicationProcessorIdleThread(ULONG Id)
+{
+  PETHREAD IdleThread;
+  PKPRCB Prcb = ((PKPCR)((ULONG_PTR)KPCR_BASE + Id * PAGE_SIZE))->Prcb;
+
+  PsInitializeIdleOrFirstThread(PsIdleProcess,
+                    &IdleThread,
+                    NULL,
+                    KernelMode,
+             FALSE);
+  IdleThread->Tcb.State = Running;
+  IdleThread->Tcb.FreezeCount = 0;
+  IdleThread->Tcb.Affinity = 1 << Id;
+  IdleThread->Tcb.UserAffinity = 1 << Id;
+  IdleThread->Tcb.Priority = LOW_PRIORITY;
+  IdleThread->Tcb.BasePriority = LOW_PRIORITY;
+  Prcb->IdleThread = &IdleThread->Tcb;
+  Prcb->CurrentThread = &IdleThread->Tcb;
+
+  Ki386InitialStackArray[Id] = (PVOID)IdleThread->Tcb.StackLimit;
+
+  DPRINT("IdleThread for Processor %d has PID %d\n",
+          Id, IdleThread->Cid.UniqueThread);
+}
+
+VOID
+INIT_FUNCTION
+NTAPI
 KePrepareForApplicationProcessorInit(ULONG Id)
 {
-  DPRINT("KePrepareForApplicationProcessorInit(Id %d)\n", Id);
   PFN_TYPE PrcPfn;
-  PKPCR Pcr;
-  PKPCR BootPcr;
+  PKIPCR Pcr;
+  PKIPCR BootPcr;
+
+  DPRINT("KePrepareForApplicationProcessorInit(Id %d)\n", Id);
 
-  BootPcr = (PKPCR)KPCR_BASE;
-  Pcr = (PKPCR)((ULONG_PTR)KPCR_BASE + Id * PAGE_SIZE);
+  BootPcr = (PKIPCR)KPCR_BASE;
+  Pcr = (PKIPCR)((ULONG_PTR)KPCR_BASE + Id * PAGE_SIZE);
 
   MmRequestPageMemoryConsumer(MC_NPPOOL, TRUE, &PrcPfn);
   MmCreateVirtualMappingForKernel((PVOID)Pcr,
@@ -129,25 +200,31 @@ KePrepareForApplicationProcessorInit(ULONG Id)
    * Create a PCR for this processor
    */
   memset(Pcr, 0, PAGE_SIZE);
-  Pcr->ProcessorNumber = Id;
-  Pcr->Tib.Self = &Pcr->Tib;
-  Pcr->Self = Pcr;
+  Pcr->Number = Id;
+  Pcr->SetMember = 1 << Id;
+  Pcr->NtTib.Self = &Pcr->NtTib;
+  Pcr->Self = (PKPCR)Pcr;
+  Pcr->Prcb = &Pcr->PrcbData;
   Pcr->Irql = SYNCH_LEVEL;
 
+  Pcr->PrcbData.SetMember = 1 << Id;
   Pcr->PrcbData.MHz = BootPcr->PrcbData.MHz;
-  Pcr->StallScaleFactor = BootPcr->StallScaleFactor; 
+  Pcr->StallScaleFactor = BootPcr->StallScaleFactor;
 
   /* Mark the end of the exception handler list */
-  Pcr->Tib.ExceptionList = (PVOID)-1;
+  Pcr->NtTib.ExceptionList = (PVOID)-1;
 
   KiGdtPrepareForApplicationProcessorInit(Id);
+
+  KeActiveProcessors |= 1 << Id;
 }
 
 VOID
+NTAPI
 KeApplicationProcessorInit(VOID)
 {
   ULONG Offset;
-  PKPCR Pcr;
+  PKIPCR Pcr;
 
   DPRINT("KeApplicationProcessorInit()\n");
 
@@ -156,15 +233,15 @@ KeApplicationProcessorInit(VOID)
      /* Enable global pages */
      Ke386SetCr4(Ke386GetCr4() | X86_CR4_PGE);
   }
-  
+
 
   Offset = InterlockedIncrementUL(&PcrsAllocated) - 1;
-  Pcr = (PKPCR)((ULONG_PTR)KPCR_BASE + Offset * PAGE_SIZE);
+  Pcr = (PKIPCR)((ULONG_PTR)KPCR_BASE + Offset * PAGE_SIZE);
 
   /*
    * Initialize the GDT
    */
-  KiInitializeGdt(Pcr);
+  KiInitializeGdt((PKPCR)Pcr);
 
   /* Get processor information. */
   Ki386GetCpuId();
@@ -172,14 +249,14 @@ KeApplicationProcessorInit(VOID)
   /* Check FPU/MMX/SSE support. */
   KiCheckFPU();
 
-  KeInitDpc(Pcr);
+  KeInitDpc(Pcr->Prcb);
 
   if (Pcr->PrcbData.FeatureBits & X86_FEATURE_SYSCALL)
   {
      extern void KiFastCallEntry(void);
 
      /* CS Selector of the target segment. */
-     Ke386Wrmsr(0x174, KERNEL_CS, 0);
+     Ke386Wrmsr(0x174, KGDT_R0_CODE, 0);
      /* Target ESP. */
      Ke386Wrmsr(0x175, 0, 0);
      /* Target EIP. */
@@ -205,10 +282,12 @@ KeApplicationProcessorInit(VOID)
   Ke386EnableInterrupts();
 }
 
-VOID INIT_FUNCTION
+VOID
+INIT_FUNCTION
+NTAPI
 KeInit1(PCHAR CommandLine, PULONG LastKernelAddress)
 {
-   PKPCR KPCR;
+   PKIPCR KPCR;
    BOOLEAN Pae = FALSE;
    BOOLEAN NoExecute = FALSE;
    PCHAR p1, p2;
@@ -218,18 +297,22 @@ KeInit1(PCHAR CommandLine, PULONG LastKernelAddress)
    /*
     * Initialize the initial PCR region. We can't allocate a page
     * with MmAllocPage() here because MmInit1() has not yet been
-    * called, so we use a predefined page in low memory 
+    * called, so we use a predefined page in low memory
     */
 
-   KPCR = (PKPCR)KPCR_BASE;
+   KPCR = (PKIPCR)KPCR_BASE;
    memset(KPCR, 0, PAGE_SIZE);
-   KPCR->Self = KPCR;
+   KPCR->Self = (PKPCR)KPCR;
+   KPCR->Prcb = &KPCR->PrcbData;
    KPCR->Irql = SYNCH_LEVEL;
-   KPCR->Tib.Self  = &KPCR->Tib;
+   KPCR->NtTib.Self = &KPCR->NtTib;
    KPCR->GDT = KiBootGdt;
    KPCR->IDT = (PUSHORT)KiIdt;
    KPCR->TSS = &KiBootTss;
-   KPCR->ProcessorNumber = 0;
+   KPCR->Number = 0;
+   KPCR->SetMember = 1 << 0;
+   KeActiveProcessors = 1 << 0;
+   KPCR->PrcbData.SetMember = 1 << 0;
    KiPcrInitDone = 1;
    PcrsAllocated++;
 
@@ -244,13 +327,16 @@ KeInit1(PCHAR CommandLine, PULONG LastKernelAddress)
    KiCheckFPU();
 
    /* Mark the end of the exception handler list */
-   KPCR->Tib.ExceptionList = (PVOID)-1;
+   KPCR->NtTib.ExceptionList = (PVOID)-1;
 
-   KeInitDpc(KPCR);
+   KeInitDpc(KPCR->Prcb);
 
    KeInitExceptions ();
    KeInitInterrupts ();
 
+   KeActiveProcessors |= 1 << 0;
+
+
    if (KPCR->PrcbData.FeatureBits & X86_FEATURE_PGE)
    {
       ULONG Flags;
@@ -286,7 +372,7 @@ KeInit1(PCHAR CommandLine, PULONG LastKernelAddress)
       p1 = p2;
    }
 #if 0
-   /* 
+   /*
     * FIXME:
     *   Make the detection of the noexecute feature more portable.
     */
@@ -305,15 +391,15 @@ KeInit1(PCHAR CommandLine, PULONG LastKernelAddress)
         Ke386NoExecute = TRUE;
          Ke386RestoreFlags(Flags);
       }
-   }  
+   }
    else
    {
       NoExecute=FALSE;
    }
 #endif
 
-   Ke386Pae = Ke386GetCr4() & X86_CR4_PAE ? TRUE : FALSE; 
-#if 0      
+   Ke386Pae = Ke386GetCr4() & X86_CR4_PAE ? TRUE : FALSE;
+#if 0
    /* Enable PAE mode */
    if ((Pae && (KPCR->PrcbData.FeatureBits & X86_FEATURE_PAE)) || NoExecute)
    {
@@ -326,7 +412,7 @@ KeInit1(PCHAR CommandLine, PULONG LastKernelAddress)
       extern void KiFastCallEntry(void);
 
       /* CS Selector of the target segment. */
-      Ke386Wrmsr(0x174, KERNEL_CS, 0);
+      Ke386Wrmsr(0x174, KGDT_R0_CODE, 0);
       /* Target ESP. */
       Ke386Wrmsr(0x175, 0, 0);
       /* Target EIP. */
@@ -334,12 +420,14 @@ KeInit1(PCHAR CommandLine, PULONG LastKernelAddress)
    }
 }
 
-VOID INIT_FUNCTION
+VOID
+INIT_FUNCTION
+NTAPI
 KeInit2(VOID)
 {
-   PKPCR Pcr = KeGetCurrentKPCR();
+   PKIPCR Pcr = (PKIPCR)KeGetCurrentKPCR();
 
-   KeInitializeBugCheck();
+   KiInitializeBugCheck();
    KeInitializeDispatcher();
    KiInitializeSystemClock();
 
@@ -389,6 +477,7 @@ KeInit2(VOID)
    DPRINT("Ke386CacheAlignment: %d\n", Ke386CacheAlignment);
    if (Ke386L1CacheSize)
    {
+
       DPRINT("Ke386L1CacheSize: %dkB\n", Ke386L1CacheSize);
    }
    if (Pcr->L2CacheSize)
@@ -400,16 +489,17 @@ KeInit2(VOID)
 VOID INIT_FUNCTION
 Ki386SetProcessorFeatures(VOID)
 {
-   PKPCR Pcr = KeGetCurrentKPCR();
+   PKIPCR Pcr = (PKIPCR)KeGetCurrentKPCR();
    OBJECT_ATTRIBUTES ObjectAttributes;
-   UNICODE_STRING KeyName;
-   UNICODE_STRING ValueName;
+   UNICODE_STRING KeyName =
+   RTL_CONSTANT_STRING(L"\\Registry\\Machine\\System\\CurrentControlSet\\Control\\Session Manager\\Kernel");
+   UNICODE_STRING ValueName = RTL_CONSTANT_STRING(L"FastSystemCallDisable");
    HANDLE KeyHandle;
    ULONG ResultLength;
    KEY_VALUE_PARTIAL_INFORMATION ValueData;
    NTSTATUS Status;
    ULONG FastSystemCallDisable = 0;
-
+   
    SharedUserData->ProcessorFeatures[PF_FLOATING_POINT_PRECISION_ERRATA] = FALSE;
    SharedUserData->ProcessorFeatures[PF_FLOATING_POINT_EMULATED] = FALSE;
    SharedUserData->ProcessorFeatures[PF_COMPARE_EXCHANGE_DOUBLE] =
@@ -418,7 +508,7 @@ Ki386SetProcessorFeatures(VOID)
       (Pcr->PrcbData.FeatureBits & X86_FEATURE_MMX);
    SharedUserData->ProcessorFeatures[PF_PPC_MOVEMEM_64BIT_OK] = FALSE;
    SharedUserData->ProcessorFeatures[PF_ALPHA_BYTE_INSTRUCTIONS] = FALSE;
-   SharedUserData->ProcessorFeatures[PF_XMMI_INSTRUCTIONS_AVAILABLE] = 
+   SharedUserData->ProcessorFeatures[PF_XMMI_INSTRUCTIONS_AVAILABLE] =
       (Pcr->PrcbData.FeatureBits & X86_FEATURE_SSE);
    SharedUserData->ProcessorFeatures[PF_3DNOW_INSTRUCTIONS_AVAILABLE] =
       (Ke386CpuidExFlags & X86_EXT_FEATURE_3DNOW);
@@ -428,25 +518,28 @@ Ki386SetProcessorFeatures(VOID)
    SharedUserData->ProcessorFeatures[PF_XMMI64_INSTRUCTIONS_AVAILABLE] =
       (Pcr->PrcbData.FeatureBits & X86_FEATURE_SSE2);
 
+   /* Does the CPU Support 'prefetchnta' (SSE)  */
+   if(Pcr->PrcbData.FeatureBits & X86_FEATURE_SSE)
+   {
+       /* Replace the ret by a nop */
+       *(PCHAR)RtlPrefetchMemoryNonTemporal = 0x90;
+   }
+
    /* Does the CPU Support Fast System Call? */
    if (Pcr->PrcbData.FeatureBits & X86_FEATURE_SYSCALL) {
-   
+
         /* FIXME: Check for Family == 6, Model < 3 and Stepping < 3 and disable */
-       
+
         /* Make sure it's not disabled in registry */
-        RtlRosInitUnicodeStringFromLiteral(&KeyName, 
-                                           L"\\Registry\\Machine\\System\\CurrentControlSet\\Control\\Session Manager\\Kernel");
-        RtlRosInitUnicodeStringFromLiteral(&ValueName, 
-                                           L"FastSystemCallDisable");
         InitializeObjectAttributes(&ObjectAttributes,
                                    &KeyName,
                                    OBJ_CASE_INSENSITIVE,
                                    NULL,
                                    NULL);
         Status = NtOpenKey(&KeyHandle, KEY_ALL_ACCESS, &ObjectAttributes);
-        
+
         if (NT_SUCCESS(Status)) {
-        
+
             /* Read the Value then Close the Key */
             Status = NtQueryValueKey(KeyHandle,
                                      &ValueName,
@@ -455,37 +548,29 @@ Ki386SetProcessorFeatures(VOID)
                                      sizeof(ValueData),
                                      &ResultLength);
             RtlMoveMemory(&FastSystemCallDisable, ValueData.Data, sizeof(ULONG));
-            
+
             NtClose(KeyHandle);
         }
-        
+
     } else {
-    
+
         /* Disable SYSENTER/SYSEXIT, because the CPU doesn't support it */
         FastSystemCallDisable = 1;
-        
+
     }
-    
+
     if (FastSystemCallDisable) {
-        
-        /* Use INT2E */   
-        SharedUserData->SystemCall[0] = 0x8D;
-        SharedUserData->SystemCall[1] = 0x54;
-        SharedUserData->SystemCall[2] = 0x24;
-        SharedUserData->SystemCall[3] = 0x08;
-        SharedUserData->SystemCall[4] = 0xCD;
-        SharedUserData->SystemCall[5] = 0x2E;
-        SharedUserData->SystemCall[6] = 0xC3;
-                         
+        /* Use INT2E */
+        const unsigned char Entry[7] = {0x8D, 0x54, 0x24, 0x08,     /* lea    0x8(%esp),%edx    */
+                                        0xCD, 0x2E,                 /* int    0x2e              */
+                                        0xC3};                      /* ret                      */
+        memcpy(&SharedUserData->SystemCall, Entry, sizeof(Entry));
     } else {
-    
         /* Use SYSENTER */
-        SharedUserData->SystemCall[0] = 0x8B;
-        SharedUserData->SystemCall[1] = 0xD4;
-        SharedUserData->SystemCall[2] = 0x0F;
-        SharedUserData->SystemCall[3] = 0x34;
-        SharedUserData->SystemCall[4] = 0xC3;    
-
+        const unsigned char Entry[5] = {0x8B, 0xD4,                 /* movl    %esp,%edx        */ 
+                                        0x0F, 0x34,                 /* sysenter                 */
+                                        0xC3};                      /* ret                      */    
+        memcpy(&SharedUserData->SystemCall, Entry, sizeof(Entry));
         /* Enable SYSENTER/SYSEXIT */
         KiFastSystemCallDisable = 0;
     }