3 * COPYRIGHT: See COPYING in the top level directory
4 * PROJECT: ReactOS kernel
5 * FILE: ntoskrnl/ke/i386/fpu.c
6 * PURPOSE: Handles the FPU
8 * PROGRAMMERS: David Welch (welch@mcmail.com)
11 /* INCLUDES *****************************************************************/
16 #include <internal/debug.h>
18 /* DEFINES *******************************************************************/
20 /* x87 Status Word exception flags */
21 #define X87_SW_IE (1<<0) /* Invalid Operation */
22 #define X87_SW_DE (1<<1) /* Denormalized Operand */
23 #define X87_SW_ZE (1<<2) /* Zero Devide */
24 #define X87_SW_OE (1<<3) /* Overflow */
25 #define X87_SW_UE (1<<4) /* Underflow */
26 #define X87_SW_PE (1<<5) /* Precision */
27 #define X87_SW_SE (1<<6) /* Stack Fault */
29 #define X87_SW_ES (1<<7) /* Error Summary */
31 /* MXCSR exception flags */
32 #define MXCSR_IE (1<<0) /* Invalid Operation */
33 #define MXCSR_DE (1<<1) /* Denormalized Operand */
34 #define MXCSR_ZE (1<<2) /* Zero Devide */
35 #define MXCSR_OE (1<<3) /* Overflow */
36 #define MXCSR_UE (1<<4) /* Underflow */
37 #define MXCSR_PE (1<<5) /* Precision */
38 #define MXCSR_DAZ (1<<6) /* Denormals Are Zeros (P4 only) */
40 /* GLOBALS *******************************************************************/
42 ULONG HardwareMathSupport
= 0;
43 static ULONG MxcsrFeatureMask
= 0, XmmSupport
= 0;
44 ULONG FxsrSupport
= 0; /* used by Ki386ContextSwitch for SMP */
46 /* FUNCTIONS *****************************************************************/
49 KiTagWordFnsaveToFxsave(USHORT TagWord
)
54 * Converts the tag-word. 11 (Empty) is converted into 0, everything else into 1
56 tmp
= ~TagWord
; /* Empty is now 00, any 2 bits containing 1 mean valid */
57 tmp
= (tmp
| (tmp
>> 1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
58 tmp
= (tmp
| (tmp
>> 1)) & 0x3333; /* 00VV00VV00VV00VV */
59 tmp
= (tmp
| (tmp
>> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
60 tmp
= (tmp
| (tmp
>> 4)) & 0x00ff; /* 00000000VVVVVVVV */
67 KiTagWordFxsaveToFnsave(PFXSAVE_FORMAT FxSave
)
72 struct FPREG
{ USHORT Significand
[4]; USHORT Exponent
; } *FpReg
;
74 for (i
= 0; i
< 8; i
++)
76 if (FxSave
->TagWord
& (1 << i
)) /* valid */
78 FpReg
= (struct FPREG
*)(FxSave
->RegisterArea
+ (i
* 16));
79 switch (FpReg
->Exponent
& 0x00007fff)
82 if (FpReg
->Significand
[0] == 0 && FpReg
->Significand
[1] == 0 &&
83 FpReg
->Significand
[2] == 0 && FpReg
->Significand
[3] == 0)
86 Tag
= 2; /* Special */
90 Tag
= 2; /* Special */
94 if (FpReg
->Significand
[3] & 0x00008000)
97 Tag
= 2; /* Special */
105 TagWord
|= Tag
<< (i
* 2);
113 KiFnsaveToFxsaveFormat(PFXSAVE_FORMAT FxSave
, CONST PFNSAVE_FORMAT FnSave
)
117 FxSave
->ControlWord
= (USHORT
)FnSave
->ControlWord
;
118 FxSave
->StatusWord
= (USHORT
)FnSave
->StatusWord
;
119 FxSave
->TagWord
= KiTagWordFnsaveToFxsave((USHORT
)FnSave
->TagWord
);
120 FxSave
->ErrorOpcode
= (USHORT
)(FnSave
->ErrorSelector
>> 16);
121 FxSave
->ErrorOffset
= FnSave
->ErrorOffset
;
122 FxSave
->ErrorSelector
= FnSave
->ErrorSelector
& 0x0000ffff;
123 FxSave
->DataOffset
= FnSave
->DataOffset
;
124 FxSave
->DataSelector
= FnSave
->DataSelector
& 0x0000ffff;
126 FxSave
->MXCsr
= 0x00001f80 & MxcsrFeatureMask
;
129 FxSave
->MXCsrMask
= MxcsrFeatureMask
;
130 memset(FxSave
->Reserved3
, 0, sizeof(FxSave
->Reserved3
) +
131 sizeof(FxSave
->Reserved4
)); /* Don't zero Align16Byte because Context->ExtendedRegisters
132 is only 512 bytes, not 520 */
133 for (i
= 0; i
< 8; i
++)
135 memcpy(FxSave
->RegisterArea
+ (i
* 16), FnSave
->RegisterArea
+ (i
* 10), 10);
136 memset(FxSave
->RegisterArea
+ (i
* 16) + 10, 0, 6);
141 KiFxsaveToFnsaveFormat(PFNSAVE_FORMAT FnSave
, CONST PFXSAVE_FORMAT FxSave
)
145 FnSave
->ControlWord
= 0xffff0000 | FxSave
->ControlWord
;
146 FnSave
->StatusWord
= 0xffff0000 | FxSave
->StatusWord
;
147 FnSave
->TagWord
= 0xffff0000 | KiTagWordFxsaveToFnsave(FxSave
);
148 FnSave
->ErrorOffset
= FxSave
->ErrorOffset
;
149 FnSave
->ErrorSelector
= FxSave
->ErrorSelector
& 0x0000ffff;
150 FnSave
->ErrorSelector
|= FxSave
->ErrorOpcode
<< 16;
151 FnSave
->DataOffset
= FxSave
->DataOffset
;
152 FnSave
->DataSelector
= FxSave
->DataSelector
| 0xffff0000;
153 for (i
= 0; i
< 8; i
++)
155 memcpy(FnSave
->RegisterArea
+ (i
* 10), FxSave
->RegisterArea
+ (i
* 16), 10);
161 KiFloatingSaveAreaToFxSaveArea(PFX_SAVE_AREA FxSaveArea
, CONST FLOATING_SAVE_AREA
*FloatingSaveArea
)
165 KiFnsaveToFxsaveFormat(&FxSaveArea
->U
.FxArea
, (PFNSAVE_FORMAT
)FloatingSaveArea
);
169 memcpy(&FxSaveArea
->U
.FnArea
, FloatingSaveArea
, sizeof(FxSaveArea
->U
.FnArea
));
171 FxSaveArea
->NpxSavedCpu
= 0;
172 FxSaveArea
->Cr0NpxState
= FloatingSaveArea
->Cr0NpxState
;
177 KiFxSaveAreaToFloatingSaveArea(FLOATING_SAVE_AREA
*FloatingSaveArea
, CONST PFX_SAVE_AREA FxSaveArea
)
181 KiFxsaveToFnsaveFormat((PFNSAVE_FORMAT
)FloatingSaveArea
, &FxSaveArea
->U
.FxArea
);
185 memcpy(FloatingSaveArea
, &FxSaveArea
->U
.FnArea
, sizeof(FxSaveArea
->U
.FnArea
));
187 FloatingSaveArea
->Cr0NpxState
= FxSaveArea
->Cr0NpxState
;
192 KiContextToFxSaveArea(PFX_SAVE_AREA FxSaveArea
, PCONTEXT Context
)
194 BOOL FpuContextChanged
= FALSE
;
196 /* First of all convert the FLOATING_SAVE_AREA into the FX_SAVE_AREA */
197 if ((Context
->ContextFlags
& CONTEXT_FLOATING_POINT
) == CONTEXT_FLOATING_POINT
)
199 KiFloatingSaveAreaToFxSaveArea(FxSaveArea
, &Context
->FloatSave
);
200 FpuContextChanged
= TRUE
;
203 /* Now merge the FX_SAVE_AREA from the context with the destination area */
204 if ((Context
->ContextFlags
& CONTEXT_EXTENDED_REGISTERS
) == CONTEXT_EXTENDED_REGISTERS
)
208 PFXSAVE_FORMAT src
= (PFXSAVE_FORMAT
)Context
->ExtendedRegisters
;
209 PFXSAVE_FORMAT dst
= &FxSaveArea
->U
.FxArea
;
210 dst
->MXCsr
= src
->MXCsr
& MxcsrFeatureMask
;
211 memcpy(dst
->Reserved3
, src
->Reserved3
,
212 sizeof(src
->Reserved3
) + sizeof(src
->Reserved4
));
214 if ((Context
->ContextFlags
& CONTEXT_FLOATING_POINT
) != CONTEXT_FLOATING_POINT
)
216 dst
->ControlWord
= src
->ControlWord
;
217 dst
->StatusWord
= src
->StatusWord
;
218 dst
->TagWord
= src
->TagWord
;
219 dst
->ErrorOpcode
= src
->ErrorOpcode
;
220 dst
->ErrorOffset
= src
->ErrorOffset
;
221 dst
->ErrorSelector
= src
->ErrorSelector
;
222 dst
->DataOffset
= src
->DataOffset
;
223 dst
->DataSelector
= src
->DataSelector
;
224 memcpy(dst
->RegisterArea
, src
->RegisterArea
, sizeof(src
->RegisterArea
));
226 FxSaveArea
->NpxSavedCpu
= 0;
227 FxSaveArea
->Cr0NpxState
= 0;
229 FpuContextChanged
= TRUE
;
233 return FpuContextChanged
;
240 unsigned short int status
;
243 PKPRCB Prcb
= KeGetCurrentPrcb();
245 Ke386SaveFlags(Flags
);
246 Ke386DisableInterrupts();
248 HardwareMathSupport
= 0;
253 cr0
|= X86_CR0_NE
| X86_CR0_MP
;
254 cr0
&= ~(X86_CR0_EM
| X86_CR0_TS
);
257 #if defined(__GNUC__)
258 asm volatile("fninit\n\t");
259 asm volatile("fstsw %0\n\t" : "=a" (status
));
260 #elif defined(_MSC_VER)
267 #error Unknown compiler for inline assembler
272 /* Set the EM flag in CR0 so any FPU instructions cause a trap. */
273 Ke386SetCr0(Ke386GetCr0() | X86_CR0_EM
);
274 Ke386RestoreFlags(Flags
);
278 /* fsetpm for i287, ignored by i387 */
279 #if defined(__GNUC__)
280 asm volatile(".byte 0xDB, 0xE4\n\t");
281 #elif defined(_MSC_VER)
282 __asm _emit
0xDB __asm _emit
0xe4
284 #error Unknown compiler for inline assembler
287 HardwareMathSupport
= 1;
289 /* check for and enable MMX/SSE support if possible */
290 if ((Prcb
->FeatureBits
& X86_FEATURE_FXSR
) != 0)
292 BYTE DummyArea
[sizeof(FX_SAVE_AREA
) + 15];
293 PFX_SAVE_AREA FxSaveArea
;
298 /* we need a 16 byte aligned FX_SAVE_AREA */
299 FxSaveArea
= (PFX_SAVE_AREA
)(((ULONG_PTR
)DummyArea
+ 0xf) & (~0x0f));
301 Ke386SetCr4(Ke386GetCr4() | X86_CR4_OSFXSR
);
302 memset(&FxSaveArea
->U
.FxArea
, 0, sizeof(FxSaveArea
->U
.FxArea
));
303 asm volatile("fxsave %0" : : "m"(FxSaveArea
->U
.FxArea
));
304 MxcsrFeatureMask
= FxSaveArea
->U
.FxArea
.MXCsrMask
;
305 if (MxcsrFeatureMask
== 0)
307 MxcsrFeatureMask
= 0x0000ffbf;
310 /* FIXME: Check for SSE3 in Ke386CpuidFlags2! */
311 if (Prcb
->FeatureBits
& (X86_FEATURE_SSE
| X86_FEATURE_SSE2
))
313 Ke386SetCr4(Ke386GetCr4() | X86_CR4_OSXMMEXCPT
);
319 Ke386SetCr0(Ke386GetCr0() | X86_CR0_TS
);
320 Ke386RestoreFlags(Flags
);
325 KiGetFpuState(PKTHREAD Thread
)
327 PFX_SAVE_AREA FxSaveArea
= NULL
;
331 KeRaiseIrql(DISPATCH_LEVEL
, &OldIrql
);
332 if (Thread
->NpxState
& NPX_STATE_VALID
)
334 FxSaveArea
= (PFX_SAVE_AREA
)((ULONG_PTR
)Thread
->InitialStack
- sizeof (FX_SAVE_AREA
));
335 if (Thread
->NpxState
& NPX_STATE_DIRTY
)
337 ASSERT(KeGetCurrentPrcb()->NpxThread
== Thread
);
340 asm volatile("clts");
342 asm volatile("fxsave %0" : : "m"(FxSaveArea
->U
.FxArea
));
345 asm volatile("fnsave %0" : : "m"(FxSaveArea
->U
.FnArea
));
346 /* FPU state has to be reloaded because fnsave changes it. */
348 KeGetCurrentPrcb()->NpxThread
= NULL
;
351 Thread
->NpxState
= NPX_STATE_VALID
;
354 KeLowerIrql(OldIrql
);
361 KiHandleFpuFault(PKTRAP_FRAME Tf
, ULONG ExceptionNr
)
363 if (ExceptionNr
== 7) /* device not present */
365 BOOL FpuInitialized
= FALSE
;
366 unsigned int cr0
= Ke386GetCr0();
367 PKTHREAD CurrentThread
;
368 PFX_SAVE_AREA FxSaveArea
;
375 ASSERT((cr0
& X86_CR0_TS
) == X86_CR0_TS
);
376 ASSERT((Tf
->EFlags
& X86_EFLAGS_VM
) == 0);
377 ASSERT((cr0
& X86_CR0_EM
) == 0);
379 /* disable scheduler, clear TS in cr0 */
380 ASSERT_IRQL(DISPATCH_LEVEL
);
381 KeRaiseIrql(DISPATCH_LEVEL
, &oldIrql
);
382 asm volatile("clts");
384 CurrentThread
= KeGetCurrentThread();
386 NpxThread
= KeGetCurrentPrcb()->NpxThread
;
389 ASSERT(CurrentThread
!= NULL
);
390 DPRINT("Device not present exception happened! (Cr0 = 0x%x, NpxState = 0x%x)\n", cr0
, CurrentThread
->NpxState
);
393 /* check if the current thread already owns the FPU */
394 if (NpxThread
!= CurrentThread
) /* FIXME: maybe this could be an assertation */
396 /* save the FPU state into the owner's save area */
397 if (NpxThread
!= NULL
)
399 KeGetCurrentPrcb()->NpxThread
= NULL
;
400 FxSaveArea
= (PFX_SAVE_AREA
)((ULONG_PTR
)NpxThread
->InitialStack
- sizeof (FX_SAVE_AREA
));
401 /* the fnsave might raise a delayed #MF exception */
404 asm volatile("fxsave %0" : : "m"(FxSaveArea
->U
.FxArea
));
408 asm volatile("fnsave %0" : : "m"(FxSaveArea
->U
.FnArea
));
409 FpuInitialized
= TRUE
;
411 NpxThread
->NpxState
= NPX_STATE_VALID
;
413 #endif /* !CONFIG_SMP */
415 /* restore the state of the current thread */
416 ASSERT((CurrentThread
->NpxState
& NPX_STATE_DIRTY
) == 0);
417 FxSaveArea
= (PFX_SAVE_AREA
)((ULONG_PTR
)CurrentThread
->InitialStack
- sizeof (FX_SAVE_AREA
));
418 if (CurrentThread
->NpxState
& NPX_STATE_VALID
)
422 FxSaveArea
->U
.FxArea
.MXCsr
&= MxcsrFeatureMask
;
423 asm volatile("fxrstor %0" : : "m"(FxSaveArea
->U
.FxArea
));
427 asm volatile("frstor %0" : : "m"(FxSaveArea
->U
.FnArea
));
430 else /* NpxState & NPX_STATE_INVALID */
432 DPRINT("Setting up clean FPU state\n");
435 memset(&FxSaveArea
->U
.FxArea
, 0, sizeof(FxSaveArea
->U
.FxArea
));
436 FxSaveArea
->U
.FxArea
.ControlWord
= 0x037f;
439 FxSaveArea
->U
.FxArea
.MXCsr
= 0x00001f80 & MxcsrFeatureMask
;
441 asm volatile("fxrstor %0" : : "m"(FxSaveArea
->U
.FxArea
));
443 else if (!FpuInitialized
)
445 asm volatile("fninit");
448 KeGetCurrentPrcb()->NpxThread
= CurrentThread
;
453 CurrentThread
->NpxState
|= NPX_STATE_DIRTY
;
454 KeLowerIrql(oldIrql
);
455 DPRINT("Device not present exception handled!\n");
457 return STATUS_SUCCESS
;
459 else /* ExceptionNr == 16 || ExceptionNr == 19 */
462 KPROCESSOR_MODE PreviousMode
;
463 PKTHREAD CurrentThread
, NpxThread
;
465 ULONG FpuEnvBuffer
[7];
466 PFNSAVE_FORMAT FpuEnv
= (PFNSAVE_FORMAT
)FpuEnvBuffer
;
468 ASSERT(ExceptionNr
== 16 || ExceptionNr
== 19); /* math fault or XMM fault*/
470 KeRaiseIrql(DISPATCH_LEVEL
, &OldIrql
);
472 NpxThread
= KeGetCurrentPrcb()->NpxThread
;
473 CurrentThread
= KeGetCurrentThread();
474 if (NpxThread
== NULL
)
476 KeLowerIrql(OldIrql
);
477 DPRINT("Math/Xmm fault ignored! (NpxThread == NULL)\n");
478 return STATUS_SUCCESS
;
480 if (ExceptionNr
== 16)
482 asm volatile("fnstenv %0" : : "m"(*FpuEnv
));
483 asm volatile("fldenv %0" : : "m"(*FpuEnv
)); /* Stupid x87... */
484 FpuEnv
->StatusWord
&= 0xffff;
486 KeLowerIrql(OldIrql
);
488 PreviousMode
= ((Tf
->SegCs
& 0xffff) == (KGDT_R3_CODE
| RPL_MASK
)) ? (UserMode
) : (KernelMode
);
489 DPRINT("Math/Xmm fault happened! (PreviousMode = %s)\n",
490 (PreviousMode
!= KernelMode
) ? ("UserMode") : ("KernelMode"));
492 ASSERT(NpxThread
== CurrentThread
); /* FIXME: Is not always true I think */
494 /* Get FPU/XMM state */
495 KeLowerIrql(OldIrql
);
497 /* Determine exception code */
498 if (ExceptionNr
== 16)
500 DPRINT("FpuStatusWord = 0x%04x\n", FpuStatusWord
);
502 if (FpuEnv
->StatusWord
& X87_SW_IE
)
503 Er
.ExceptionCode
= STATUS_FLOAT_INVALID_OPERATION
;
504 else if (FpuEnv
->StatusWord
& X87_SW_DE
)
505 Er
.ExceptionCode
= STATUS_FLOAT_DENORMAL_OPERAND
;
506 else if (FpuEnv
->StatusWord
& X87_SW_ZE
)
507 Er
.ExceptionCode
= STATUS_FLOAT_DIVIDE_BY_ZERO
;
508 else if (FpuEnv
->StatusWord
& X87_SW_OE
)
509 Er
.ExceptionCode
= STATUS_FLOAT_OVERFLOW
;
510 else if (FpuEnv
->StatusWord
& X87_SW_UE
)
511 Er
.ExceptionCode
= STATUS_FLOAT_UNDERFLOW
;
512 else if (FpuEnv
->StatusWord
& X87_SW_PE
)
513 Er
.ExceptionCode
= STATUS_FLOAT_INEXACT_RESULT
;
514 else if (FpuEnv
->StatusWord
& X87_SW_SE
)
515 Er
.ExceptionCode
= STATUS_FLOAT_STACK_CHECK
;
517 ASSERT(0); /* not reached */
518 Er
.ExceptionAddress
= (PVOID
)FpuEnv
->ErrorOffset
;
520 else /* ExceptionNr == 19 */
522 Er
.ExceptionCode
= STATUS_FLOAT_MULTIPLE_TRAPS
;
523 Er
.ExceptionAddress
= (PVOID
)Tf
->Eip
;
526 Er
.ExceptionFlags
= 0;
527 Er
.ExceptionRecord
= NULL
;
528 Er
.NumberParameters
= 0;
530 /* Dispatch exception */
531 DPRINT("Dispatching exception (ExceptionCode = 0x%08x)\n", Er
.ExceptionCode
);
532 KiDispatchException(&Er
, NULL
, Tf
, PreviousMode
, TRUE
);
534 DPRINT("Math-fault handled!\n");
535 return STATUS_SUCCESS
;
538 return STATUS_UNSUCCESSFUL
;
542 /* This is a rather naive implementation of Ke(Save/Restore)FloatingPointState
543 which will not work for WDM drivers. Please feel free to improve */
546 KeSaveFloatingPointState(OUT PKFLOATING_SAVE Save
)
548 PFNSAVE_FORMAT FpState
;
550 ASSERT_IRQL(DISPATCH_LEVEL
);
552 /* check if we are doing software emulation */
553 if (!HardwareMathSupport
)
555 return STATUS_ILLEGAL_FLOAT_CONTEXT
;
558 FpState
= ExAllocatePool(NonPagedPool
, sizeof (FNSAVE_FORMAT
));
561 return STATUS_INSUFFICIENT_RESOURCES
;
563 *((PVOID
*) Save
) = FpState
;
565 #if defined(__GNUC__)
566 asm volatile("fnsave %0\n\t" : "=m" (*FpState
));
567 #elif defined(_MSC_VER)
568 __asm mov eax
, FpState
;
571 #error Unknown compiler for inline assembler
574 KeGetCurrentThread()->DispatcherHeader
.NpxIrql
= KeGetCurrentIrql();
576 return STATUS_SUCCESS
;
581 KeRestoreFloatingPointState(IN PKFLOATING_SAVE Save
)
583 PFNSAVE_FORMAT FpState
= *((PVOID
*) Save
);
585 if (KeGetCurrentThread()->DispatcherHeader
.NpxIrql
!= KeGetCurrentIrql())
587 KEBUGCHECK(UNDEFINED_BUG_CODE
);
590 #if defined(__GNUC__)
591 asm volatile("fnclex\n\t");
592 asm volatile("frstor %0\n\t" : "=m" (*FpState
));
593 #elif defined(_MSC_VER)
594 __asm mov eax
, FpState
;
597 #error Unknown compiler for inline assembler
602 return STATUS_SUCCESS
;