3 * COPYRIGHT: See COPYING in the top level directory
4 * PROJECT: ReactOS kernel
5 * FILE: ntoskrnl/ke/i386/fpu.c
6 * PURPOSE: Handles the FPU
8 * PROGRAMMERS: David Welch (welch@mcmail.com)
11 /* INCLUDES *****************************************************************/
16 #include <internal/debug.h>
18 /* DEFINES *******************************************************************/
20 /* x87 Status Word exception flags */
21 #define X87_SW_IE (1<<0) /* Invalid Operation */
22 #define X87_SW_DE (1<<1) /* Denormalized Operand */
23 #define X87_SW_ZE (1<<2) /* Zero Devide */
24 #define X87_SW_OE (1<<3) /* Overflow */
25 #define X87_SW_UE (1<<4) /* Underflow */
26 #define X87_SW_PE (1<<5) /* Precision */
27 #define X87_SW_SE (1<<6) /* Stack Fault */
29 #define X87_SW_ES (1<<7) /* Error Summary */
31 /* MXCSR exception flags */
32 #define MXCSR_IE (1<<0) /* Invalid Operation */
33 #define MXCSR_DE (1<<1) /* Denormalized Operand */
34 #define MXCSR_ZE (1<<2) /* Zero Devide */
35 #define MXCSR_OE (1<<3) /* Overflow */
36 #define MXCSR_UE (1<<4) /* Underflow */
37 #define MXCSR_PE (1<<5) /* Precision */
38 #define MXCSR_DAZ (1<<6) /* Denormals Are Zeros (P4 only) */
40 /* GLOBALS *******************************************************************/
42 ULONG HardwareMathSupport
= 0;
43 static ULONG MxcsrFeatureMask
= 0, XmmSupport
= 0;
44 ULONG FxsrSupport
= 0; /* used by Ki386ContextSwitch for SMP */
46 /* FUNCTIONS *****************************************************************/
49 KiTagWordFnsaveToFxsave(USHORT TagWord
)
54 * Converts the tag-word. 11 (Empty) is converted into 0, everything else into 1
56 tmp
= ~TagWord
; /* Empty is now 00, any 2 bits containing 1 mean valid */
57 tmp
= (tmp
| (tmp
>> 1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
58 tmp
= (tmp
| (tmp
>> 1)) & 0x3333; /* 00VV00VV00VV00VV */
59 tmp
= (tmp
| (tmp
>> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
60 tmp
= (tmp
| (tmp
>> 4)) & 0x00ff; /* 00000000VVVVVVVV */
66 KiTagWordFxsaveToFnsave(PFXSAVE_FORMAT FxSave
)
71 struct FPREG
{ USHORT Significand
[4]; USHORT Exponent
; } *FpReg
;
73 for (i
= 0; i
< 8; i
++)
75 if (FxSave
->TagWord
& (1 << i
)) /* valid */
77 FpReg
= (struct FPREG
*)(FxSave
->RegisterArea
+ (i
* 16));
78 switch (FpReg
->Exponent
& 0x00007fff)
81 if (FpReg
->Significand
[0] == 0 && FpReg
->Significand
[1] == 0 &&
82 FpReg
->Significand
[2] == 0 && FpReg
->Significand
[3] == 0)
88 Tag
= 2; /* Special */
93 Tag
= 2; /* Special */
97 if (FpReg
->Significand
[3] & 0x00008000)
103 Tag
= 2; /* Special */
112 TagWord
|= Tag
<< (i
* 2);
119 KiFnsaveToFxsaveFormat(PFXSAVE_FORMAT FxSave
, CONST PFNSAVE_FORMAT FnSave
)
123 FxSave
->ControlWord
= (USHORT
)FnSave
->ControlWord
;
124 FxSave
->StatusWord
= (USHORT
)FnSave
->StatusWord
;
125 FxSave
->TagWord
= KiTagWordFnsaveToFxsave((USHORT
)FnSave
->TagWord
);
126 FxSave
->ErrorOpcode
= (USHORT
)(FnSave
->ErrorSelector
>> 16);
127 FxSave
->ErrorOffset
= FnSave
->ErrorOffset
;
128 FxSave
->ErrorSelector
= FnSave
->ErrorSelector
& 0x0000ffff;
129 FxSave
->DataOffset
= FnSave
->DataOffset
;
130 FxSave
->DataSelector
= FnSave
->DataSelector
& 0x0000ffff;
132 FxSave
->MXCsr
= 0x00001f80 & MxcsrFeatureMask
;
135 FxSave
->MXCsrMask
= MxcsrFeatureMask
;
136 memset(FxSave
->Reserved3
, 0, sizeof(FxSave
->Reserved3
) +
137 sizeof(FxSave
->Reserved4
)); /* XXX - doesnt zero Align16Byte because
138 Context->ExtendedRegisters is only 512 bytes, not 520 */
139 for (i
= 0; i
< 8; i
++)
141 memcpy(FxSave
->RegisterArea
+ (i
* 16), FnSave
->RegisterArea
+ (i
* 10), 10);
142 memset(FxSave
->RegisterArea
+ (i
* 16) + 10, 0, 6);
147 KiFxsaveToFnsaveFormat(PFNSAVE_FORMAT FnSave
, CONST PFXSAVE_FORMAT FxSave
)
151 FnSave
->ControlWord
= 0xffff0000 | FxSave
->ControlWord
;
152 FnSave
->StatusWord
= 0xffff0000 | FxSave
->StatusWord
;
153 FnSave
->TagWord
= 0xffff0000 | KiTagWordFxsaveToFnsave(FxSave
);
154 FnSave
->ErrorOffset
= FxSave
->ErrorOffset
;
155 FnSave
->ErrorSelector
= FxSave
->ErrorSelector
& 0x0000ffff;
156 FnSave
->ErrorSelector
|= FxSave
->ErrorOpcode
<< 16;
157 FnSave
->DataOffset
= FxSave
->DataOffset
;
158 FnSave
->DataSelector
= FxSave
->DataSelector
| 0xffff0000;
159 for (i
= 0; i
< 8; i
++)
161 memcpy(FnSave
->RegisterArea
+ (i
* 10), FxSave
->RegisterArea
+ (i
* 16), 10);
166 KiFloatingSaveAreaToFxSaveArea(PFX_SAVE_AREA FxSaveArea
, CONST FLOATING_SAVE_AREA
*FloatingSaveArea
)
170 KiFnsaveToFxsaveFormat(&FxSaveArea
->U
.FxArea
, (PFNSAVE_FORMAT
)FloatingSaveArea
);
174 memcpy(&FxSaveArea
->U
.FnArea
, FloatingSaveArea
, sizeof(FxSaveArea
->U
.FnArea
));
176 FxSaveArea
->NpxSavedCpu
= 0;
177 FxSaveArea
->Cr0NpxState
= FloatingSaveArea
->Cr0NpxState
;
181 KiContextToFxSaveArea(PFX_SAVE_AREA FxSaveArea
, PCONTEXT Context
)
183 BOOL FpuContextChanged
= FALSE
;
185 /* First of all convert the FLOATING_SAVE_AREA into the FX_SAVE_AREA */
186 if ((Context
->ContextFlags
& CONTEXT_FLOATING_POINT
) == CONTEXT_FLOATING_POINT
)
188 KiFloatingSaveAreaToFxSaveArea(FxSaveArea
, &Context
->FloatSave
);
189 FpuContextChanged
= TRUE
;
192 /* Now merge the FX_SAVE_AREA from the context with the destination area */
193 if ((Context
->ContextFlags
& CONTEXT_EXTENDED_REGISTERS
) == CONTEXT_EXTENDED_REGISTERS
)
197 PFXSAVE_FORMAT src
= (PFXSAVE_FORMAT
)Context
->ExtendedRegisters
;
198 PFXSAVE_FORMAT dst
= &FxSaveArea
->U
.FxArea
;
199 dst
->MXCsr
= src
->MXCsr
& MxcsrFeatureMask
;
200 memcpy(dst
->Reserved3
, src
->Reserved3
,
201 sizeof(src
->Reserved3
) + sizeof(src
->Reserved4
));
203 if ((Context
->ContextFlags
& CONTEXT_FLOATING_POINT
) != CONTEXT_FLOATING_POINT
)
205 dst
->ControlWord
= src
->ControlWord
;
206 dst
->StatusWord
= src
->StatusWord
;
207 dst
->TagWord
= src
->TagWord
;
208 dst
->ErrorOpcode
= src
->ErrorOpcode
;
209 dst
->ErrorOffset
= src
->ErrorOffset
;
210 dst
->ErrorSelector
= src
->ErrorSelector
;
211 dst
->DataOffset
= src
->DataOffset
;
212 dst
->DataSelector
= src
->DataSelector
;
213 memcpy(dst
->RegisterArea
, src
->RegisterArea
, sizeof(src
->RegisterArea
));
215 FxSaveArea
->NpxSavedCpu
= 0;
216 FxSaveArea
->Cr0NpxState
= 0;
218 FpuContextChanged
= TRUE
;
222 return FpuContextChanged
;
228 unsigned short int status
;
231 PKPRCB Prcb
= KeGetCurrentPrcb();
233 Ke386SaveFlags(Flags
);
234 Ke386DisableInterrupts();
236 HardwareMathSupport
= 0;
241 cr0
|= X86_CR0_NE
| X86_CR0_MP
;
242 cr0
&= ~(X86_CR0_EM
| X86_CR0_TS
);
245 #if defined(__GNUC__)
246 asm volatile("fninit\n\t");
247 asm volatile("fstsw %0\n\t" : "=a" (status
));
248 #elif defined(_MSC_VER)
255 #error Unknown compiler for inline assembler
260 /* Set the EM flag in CR0 so any FPU instructions cause a trap. */
261 Ke386SetCr0(Ke386GetCr0() | X86_CR0_EM
);
262 Ke386RestoreFlags(Flags
);
266 /* fsetpm for i287, ignored by i387 */
267 #if defined(__GNUC__)
268 asm volatile(".byte 0xDB, 0xE4\n\t");
269 #elif defined(_MSC_VER)
270 __asm _emit
0xDB __asm _emit
0xe4
272 #error Unknown compiler for inline assembler
275 HardwareMathSupport
= 1;
277 /* check for and enable MMX/SSE support if possible */
278 if ((Prcb
->FeatureBits
& X86_FEATURE_FXSR
) != 0)
280 BYTE DummyArea
[sizeof(FX_SAVE_AREA
) + 15];
281 PFX_SAVE_AREA FxSaveArea
;
286 /* we need a 16 byte aligned FX_SAVE_AREA */
287 FxSaveArea
= (PFX_SAVE_AREA
)DummyArea
;
288 if ((ULONG_PTR
)FxSaveArea
& 0x0f)
290 FxSaveArea
= (PFX_SAVE_AREA
)(((ULONG_PTR
)FxSaveArea
+ 0x10) & (~0x0f));
293 Ke386SetCr4(Ke386GetCr4() | X86_CR4_OSFXSR
);
294 memset(&FxSaveArea
->U
.FxArea
, 0, sizeof(FxSaveArea
->U
.FxArea
));
295 asm volatile("fxsave %0" : : "m"(FxSaveArea
->U
.FxArea
));
296 MxcsrFeatureMask
= FxSaveArea
->U
.FxArea
.MXCsrMask
;
297 if (MxcsrFeatureMask
== 0)
299 MxcsrFeatureMask
= 0x0000ffbf;
302 /* FIXME: Check for SSE3 in Ke386CpuidFlags2! */
303 if (Prcb
->FeatureBits
& (X86_FEATURE_SSE
| X86_FEATURE_SSE2
))
305 Ke386SetCr4(Ke386GetCr4() | X86_CR4_OSXMMEXCPT
);
311 Ke386SetCr0(Ke386GetCr0() | X86_CR0_TS
);
312 Ke386RestoreFlags(Flags
);
315 /* This is a rather naive implementation of Ke(Save/Restore)FloatingPointState
316 which will not work for WDM drivers. Please feel free to improve */
318 #define FPU_STATE_SIZE 108
321 KeSaveFloatingPointState(OUT PKFLOATING_SAVE Save
)
325 ASSERT_IRQL(DISPATCH_LEVEL
); /* FIXME: is this removed for non-debug builds? I hope not! */
327 /* check if we are doing software emulation */
328 if (!HardwareMathSupport
)
330 return STATUS_ILLEGAL_FLOAT_CONTEXT
;
333 FpState
= ExAllocatePool(PagedPool
, FPU_STATE_SIZE
);
336 return STATUS_INSUFFICIENT_RESOURCES
;
338 *((PVOID
*) Save
) = FpState
;
340 #if defined(__GNUC__)
341 asm volatile("fsave %0\n\t" : "=m" (*FpState
));
342 #elif defined(_MSC_VER)
343 __asm mov eax
, FpState
;
346 #error Unknown compiler for inline assembler
349 KeGetCurrentThread()->NpxIrql
= KeGetCurrentIrql();
351 return STATUS_SUCCESS
;
355 KeRestoreFloatingPointState(IN PKFLOATING_SAVE Save
)
357 char *FpState
= *((PVOID
*) Save
);
359 if (KeGetCurrentThread()->NpxIrql
!= KeGetCurrentIrql())
361 KEBUGCHECK(UNDEFINED_BUG_CODE
);
364 #if defined(__GNUC__)
365 __asm__("frstor %0\n\t" : "=m" (*FpState
));
366 #elif defined(_MSC_VER)
367 __asm mov eax
, FpState
;
370 #error Unknown compiler for inline assembler
375 return STATUS_SUCCESS
;
379 KiHandleFpuFault(PKTRAP_FRAME Tf
, ULONG ExceptionNr
)
381 if (ExceptionNr
== 7) /* device not present */
383 BOOL FpuInitialized
= FALSE
;
384 unsigned int cr0
= Ke386GetCr0();
385 PKTHREAD CurrentThread
;
386 PFX_SAVE_AREA FxSaveArea
;
393 ASSERT((cr0
& X86_CR0_TS
) == X86_CR0_TS
);
394 ASSERT((Tf
->Eflags
& X86_EFLAGS_VM
) == 0);
395 ASSERT((cr0
& X86_CR0_EM
) == 0);
397 /* disable scheduler, clear TS in cr0 */
398 ASSERT_IRQL(DISPATCH_LEVEL
);
399 KeRaiseIrql(DISPATCH_LEVEL
, &oldIrql
);
400 asm volatile("clts");
402 CurrentThread
= KeGetCurrentThread();
404 NpxThread
= KeGetCurrentPrcb()->NpxThread
;
407 ASSERT(CurrentThread
!= NULL
);
408 DPRINT("Device not present exception happened! (Cr0 = 0x%x, NpxState = 0x%x)\n", cr0
, CurrentThread
->NpxState
);
411 /* check if the current thread already owns the FPU */
412 if (NpxThread
!= CurrentThread
) /* FIXME: maybe this could be an assertation */
414 /* save the FPU state into the owner's save area */
415 if (NpxThread
!= NULL
)
417 KeGetCurrentPrcb()->NpxThread
= NULL
;
418 FxSaveArea
= (PFX_SAVE_AREA
)((char *)NpxThread
->InitialStack
- sizeof (FX_SAVE_AREA
));
419 /* the fnsave might raise a delayed #MF exception */
422 asm volatile("fxsave %0" : : "m"(FxSaveArea
->U
.FxArea
));
426 asm volatile("fnsave %0" : : "m"(FxSaveArea
->U
.FnArea
));
427 FpuInitialized
= TRUE
;
429 NpxThread
->NpxState
= NPX_STATE_VALID
;
431 #endif /* !CONFIG_SMP */
433 /* restore the state of the current thread */
434 ASSERT((CurrentThread
->NpxState
& NPX_STATE_DIRTY
) == 0);
435 FxSaveArea
= (PFX_SAVE_AREA
)((char *)CurrentThread
->InitialStack
- sizeof (FX_SAVE_AREA
));
436 if (CurrentThread
->NpxState
& NPX_STATE_VALID
)
440 FxSaveArea
->U
.FxArea
.MXCsr
&= MxcsrFeatureMask
;
441 asm volatile("fxrstor %0" : : "m"(FxSaveArea
->U
.FxArea
));
445 asm volatile("frstor %0" : : "m"(FxSaveArea
->U
.FnArea
));
448 else /* NpxState & NPX_STATE_INVALID */
450 DPRINT("Setting up clean FPU state\n");
453 memset(&FxSaveArea
->U
.FxArea
, 0, sizeof(FxSaveArea
->U
.FxArea
));
454 FxSaveArea
->U
.FxArea
.ControlWord
= 0x037f;
457 FxSaveArea
->U
.FxArea
.MXCsr
= 0x00001f80 & MxcsrFeatureMask
;
459 asm volatile("fxrstor %0" : : "m"(FxSaveArea
->U
.FxArea
));
461 else if (!FpuInitialized
)
463 asm volatile("finit");
466 KeGetCurrentPrcb()->NpxThread
= CurrentThread
;
471 CurrentThread
->NpxState
|= NPX_STATE_DIRTY
;
472 KeLowerIrql(oldIrql
);
473 DPRINT("Device not present exception handled!\n");
475 return STATUS_SUCCESS
;
477 else /* ExceptionNr == 16 || ExceptionNr == 19 */
480 UCHAR DummyContext
[sizeof(CONTEXT
) + 16];
482 KPROCESSOR_MODE PreviousMode
;
483 PKTHREAD CurrentThread
, NpxThread
;
486 ASSERT(ExceptionNr
== 16 || ExceptionNr
== 19); /* math fault or XMM fault*/
488 KeRaiseIrql(DISPATCH_LEVEL
, &oldIrql
);
490 NpxThread
= KeGetCurrentPrcb()->NpxThread
;
491 CurrentThread
= KeGetCurrentThread();
492 if (NpxThread
== NULL
)
494 KeLowerIrql(oldIrql
);
495 DPRINT1("!!! Math/Xmm fault ignored! (NpxThread == NULL)\n");
496 return STATUS_SUCCESS
;
499 PreviousMode
= ((Tf
->Cs
& 0xffff) == USER_CS
) ? (UserMode
) : (KernelMode
);
500 DPRINT("Math/Xmm fault happened! (PreviousMode = %s)\n",
501 (PreviousMode
== UserMode
) ? ("UserMode") : ("KernelMode"));
503 ASSERT(NpxThread
== CurrentThread
); /* FIXME: Is not always true I think */
505 /* For fxsave we have to align Context->ExtendedRegisters on 16 bytes */
506 Context
= (PCONTEXT
)DummyContext
;
507 Context
= (PCONTEXT
)((ULONG_PTR
)Context
+ 0x10 - ((ULONG_PTR
)Context
->ExtendedRegisters
& 0x0f));
509 /* Get FPU/XMM state */
510 Context
->FloatSave
.Cr0NpxState
= 0;
513 PFXSAVE_FORMAT FxSave
= (PFXSAVE_FORMAT
)Context
->ExtendedRegisters
;
514 FxSave
->MXCsrMask
= MxcsrFeatureMask
;
515 memset(FxSave
->RegisterArea
, 0, sizeof(FxSave
->RegisterArea
) +
516 sizeof(FxSave
->Reserved3
) + sizeof(FxSave
->Reserved4
));
517 asm volatile("fxsave %0" : : "m"(*FxSave
));
518 KeLowerIrql(oldIrql
);
519 KiFxsaveToFnsaveFormat((PFNSAVE_FORMAT
)&Context
->FloatSave
, FxSave
);
523 PFNSAVE_FORMAT FnSave
= (PFNSAVE_FORMAT
)&Context
->FloatSave
;
524 asm volatile("fnsave %0" : : "m"(*FnSave
));
525 KeLowerIrql(oldIrql
);
526 KiFnsaveToFxsaveFormat((PFXSAVE_FORMAT
)Context
->ExtendedRegisters
, FnSave
);
529 /* Fill the rest of the context */
530 Context
->ContextFlags
= CONTEXT_FULL
;
531 KeTrapFrameToContext(Tf
, Context
);
532 Context
->ContextFlags
|= CONTEXT_FLOATING_POINT
| CONTEXT_EXTENDED_REGISTERS
;
534 /* Determine exception code */
535 if (ExceptionNr
== 16)
537 USHORT FpuStatusWord
= Context
->FloatSave
.StatusWord
& 0xffff;
538 DPRINT("FpuStatusWord = 0x%04x\n", FpuStatusWord
);
540 if (FpuStatusWord
& X87_SW_IE
)
541 Er
.ExceptionCode
= STATUS_FLOAT_INVALID_OPERATION
;
542 else if (FpuStatusWord
& X87_SW_DE
)
543 Er
.ExceptionCode
= STATUS_FLOAT_DENORMAL_OPERAND
;
544 else if (FpuStatusWord
& X87_SW_ZE
)
545 Er
.ExceptionCode
= STATUS_FLOAT_DIVIDE_BY_ZERO
;
546 else if (FpuStatusWord
& X87_SW_OE
)
547 Er
.ExceptionCode
= STATUS_FLOAT_OVERFLOW
;
548 else if (FpuStatusWord
& X87_SW_UE
)
549 Er
.ExceptionCode
= STATUS_FLOAT_UNDERFLOW
;
550 else if (FpuStatusWord
& X87_SW_PE
)
551 Er
.ExceptionCode
= STATUS_FLOAT_INEXACT_RESULT
;
552 else if (FpuStatusWord
& X87_SW_SE
)
553 Er
.ExceptionCode
= STATUS_FLOAT_STACK_CHECK
;
555 ASSERT(0); /* not reached */
556 /* FIXME: is this the right way to get the correct EIP of the faulting instruction? */
557 Er
.ExceptionAddress
= (PVOID
)Context
->FloatSave
.ErrorOffset
;
559 else /* ExceptionNr == 19 */
561 /* FIXME: When should we use STATUS_FLOAT_MULTIPLE_FAULTS? */
562 Er
.ExceptionCode
= STATUS_FLOAT_MULTIPLE_TRAPS
;
563 Er
.ExceptionAddress
= (PVOID
)Tf
->Eip
;
566 Er
.ExceptionFlags
= 0;
567 Er
.ExceptionRecord
= NULL
;
568 Er
.NumberParameters
= 0;
570 /* Dispatch exception */
571 DPRINT("Dispatching exception (ExceptionCode = 0x%08x)\n", Er
.ExceptionCode
);
572 KiDispatchException(&Er
, Context
, Tf
, PreviousMode
, TRUE
);
574 DPRINT("Math-fault handled!\n");
575 return STATUS_SUCCESS
;
578 return STATUS_UNSUCCESSFUL
;