lib/sdk/crt/math/i386/pow_asm.s

   1 /* ix87 specific implementation of pow function.
   2    Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004, 2005, 2007
   3    Free Software Foundation, Inc.
   4    This file is part of the GNU C Library.
   5    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
   6
   7    The GNU C Library is free software; you can redistribute it and/or
   8    modify it under the terms of the GNU Lesser General Public
   9    License as published by the Free Software Foundation; either
  10    version 2.1 of the License, or (at your option) any later version.
  11
  12    The GNU C Library is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    Lesser General Public License for more details.
  16
  17    You should have received a copy of the GNU Lesser General Public
  18    License along with the GNU C Library; if not, write to the Free
  19    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  20    02111-1307 USA.  */
  21
  22 /* Reactos modifications */
  23 #include <asm.inc>
  24
  25 #define ALIGNARG(log2) log2
  26 #define ASM_TYPE_DIRECTIVE(name,typearg)
  27 #define ASM_SIZE_DIRECTIVE(name)
  28 #define cfi_adjust_cfa_offset(x)
  29
  30 PUBLIC _pow
  31
  32 .data
  33 ASSUME nothing
  34
  35         .align ALIGNARG(4)
  36         ASM_TYPE_DIRECTIVE(infinity,@object)
  37
  38 inf_zero:
  39 infinity:
  40         .byte 0, 0, 0, 0, 0, 0, HEX(f0), HEX(7f)
  41         ASM_SIZE_DIRECTIVE(infinity)
  42         ASM_TYPE_DIRECTIVE(zero,@object)
  43 zero:
  44         .double 0.0
  45         ASM_SIZE_DIRECTIVE(zero)
  46         ASM_TYPE_DIRECTIVE(minf_mzero,@object)
  47
  48 minf_mzero:
  49 minfinity:
  50         .byte 0, 0, 0, 0, 0, 0, HEX(f0), HEX(ff)
  51
  52 mzero:
  53         .byte 0, 0, 0, 0, 0, 0, 0, HEX(80)
  54         ASM_SIZE_DIRECTIVE(minf_mzero)
  55         ASM_TYPE_DIRECTIVE(one,@object)
  56
  57 one:
  58         .double 1.0
  59         ASM_SIZE_DIRECTIVE(one)
  60         ASM_TYPE_DIRECTIVE(limit,@object)
  61
  62 limit:
  63         .double 0.29
  64         ASM_SIZE_DIRECTIVE(limit)
  65         ASM_TYPE_DIRECTIVE(p63,@object)
  66
  67 p63:
  68         .byte 0, 0, 0, 0, 0, 0, HEX(e0), HEX(43)
  69         ASM_SIZE_DIRECTIVE(p63)
  70
  71 #ifdef PIC
  72 #define MO(op) op##@GOTOFF(%ecx)
  73 #define MOX(op,x,f) op##@GOTOFF(%ecx,x,f)
  74 #else
  75 #define MO(op) op
  76 #define MOX(op,x,f) op[x*f]
  77 #endif
  78
  79 .code
  80 _pow:
  81         fld qword ptr [esp + 12]        // y
  82         fxam
  83
  84 #ifdef  PIC
  85         LOAD_PIC_REG (cx)
  86 #endif
  87
  88         fnstsw ax
  89         mov dl, ah
  90         and ah, HEX(045)
  91         cmp     ah, HEX(040)    // is y == 0 ?
  92         je      L11
  93
  94         cmp ah, 5       // is y == ±inf ?
  95         je      L12
  96
  97         cmp ah, 1       // is y == NaN ?
  98         je      L30
  99
 100         fld qword ptr [esp + 4] // x : y
 101
 102         sub esp, 8
 103         cfi_adjust_cfa_offset (8)
 104
 105         fxam
 106         fnstsw ax
 107         mov dh, ah
 108         and ah, HEX(45)
 109         cmp ah, HEX(040)
 110         je      L20             // x is ±0
 111
 112         cmp ah, 5
 113         je      L15             // x is ±inf
 114
 115         fxch st(1)                      // y : x
 116
 117         /* fistpll raises invalid exception for |y| >= 1L<<63.  */
 118         fld     st              // y : y : x
 119         fabs                    // |y| : y : x
 120         fcomp qword ptr ds:MO(p63)              // y : x
 121         fnstsw ax
 122         sahf
 123         jnc     L2
 124
 125         /* First see whether `y' is a natural number.  In this case we
 126            can use a more precise algorithm.  */
 127         fld     st              // y : y : x
 128         fistp qword ptr [esp]           // y : x
 129         fild qword ptr [esp]            // int(y) : y : x
 130         fucomp st(1)            // y : x
 131         fnstsw ax
 132         sahf
 133         jne     L2
 134
 135         /* OK, we have an integer value for y.  */
 136         pop eax
 137         cfi_adjust_cfa_offset (-4)
 138         pop     edx
 139         cfi_adjust_cfa_offset (-4)
 140         or edx, 0
 141         fstp st         // x
 142         jns     L4              // y >= 0, jump
 143         fdivr qword ptr MO(one)         // 1/x          (now referred to as x)
 144         neg eax
 145         adc edx, 0
 146         neg edx
 147 L4:     fld qword ptr MO(one)           // 1 : x
 148         fxch st(1)
 149
 150 L6:     shrd eax, edx, 1
 151         jnc     L5
 152         fxch st(1)
 153         fmul st, st(1)          // x : ST*x
 154         fxch st(1)
 155 L5:     fmul st, st     // x*x : ST*x
 156         shr edx, 1
 157         mov ecx, eax
 158         or ecx, edx
 159         jnz     L6
 160         fstp st         // ST*x
 161         ret
 162
 163         /* y is ±NAN */
 164 L30:
 165         fld qword ptr [esp + 4]         // x : y
 166         fld qword ptr MO(one)           // 1.0 : x : y
 167         fucomp st(1)            // x : y
 168         fnstsw ax
 169         sahf
 170         je      L31
 171         fxch st(1)                      // y : x
 172 L31:fstp st(1)
 173         ret
 174
 175         cfi_adjust_cfa_offset (8)
 176         .align ALIGNARG(4)
 177 L2:     /* y is a real number.  */
 178         fxch st(1)                      // x : y
 179         fld qword ptr MO(one)           // 1.0 : x : y
 180         fld qword ptr MO(limit) // 0.29 : 1.0 : x : y
 181         fld     st(2)           // x : 0.29 : 1.0 : x : y
 182         fsub st, st(2)          // x-1 : 0.29 : 1.0 : x : y
 183         fabs                    // |x-1| : 0.29 : 1.0 : x : y
 184         fucompp                 // 1.0 : x : y
 185         fnstsw ax
 186         fxch st(1)                      // x : 1.0 : y
 187         sahf
 188         ja      L7
 189         fsub st, st(1)          // x-1 : 1.0 : y
 190         fyl2xp1                 // log2(x) : y
 191         jmp     L8
 192
 193 L7:     fyl2x                   // log2(x) : y
 194 L8:     fmul st, st(1)          // y*log2(x) : y
 195         fst st(1)               // y*log2(x) : y*log2(x)
 196         frndint                 // int(y*log2(x)) : y*log2(x)
 197         fsub st(1), st  // int(y*log2(x)) : fract(y*log2(x))
 198         fxch                    // fract(y*log2(x)) : int(y*log2(x))
 199         f2xm1                   // 2^fract(y*log2(x))-1 : int(y*log2(x))
 200         fadd qword ptr MO(one)          // 2^fract(y*log2(x)) : int(y*log2(x))
 201         fscale                  // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
 202         add esp, 8
 203         cfi_adjust_cfa_offset (-8)
 204         fstp st(1)              // 2^fract(y*log2(x))*2^int(y*log2(x))
 205         ret
 206
 207
 208         // pow(x,±0) = 1
 209         .align ALIGNARG(4)
 210 L11:fstp st(0)          // pop y
 211         fld qword ptr MO(one)
 212         ret
 213
 214         // y == ±inf
 215         .align ALIGNARG(4)
 216 L12:    fstp st(0)              // pop y
 217         fld qword ptr MO(one)           // 1
 218         fld qword ptr [esp + 4]         // x : 1
 219         fabs                    // abs(x) : 1
 220         fucompp                 // < 1, == 1, or > 1
 221         fnstsw ax
 222         and ah, HEX(45)
 223         cmp ah, HEX(45)
 224         je      L13             // jump if x is NaN
 225
 226         cmp ah, HEX(40)
 227         je      L14             // jump if |x| == 1
 228
 229         shl ah, 1
 230         xor dl, ah
 231         and edx, 2
 232         fld qword ptr MOX(inf_zero, edx, 4)
 233         ret
 234
 235         .align ALIGNARG(4)
 236 L14:fld qword ptr MO(one)
 237         ret
 238
 239         .align ALIGNARG(4)
 240 L13:fld qword ptr [esp + 4]             // load x == NaN
 241         ret
 242
 243         cfi_adjust_cfa_offset (8)
 244         .align ALIGNARG(4)
 245         // x is ±inf
 246 L15:    fstp st(0)              // y
 247         test dh, 2
 248         jz      L16             // jump if x == +inf
 249
 250         // We must find out whether y is an odd integer.
 251         fld     st              // y : y
 252         fistp qword ptr [esp]           // y
 253         fild qword ptr [esp]            // int(y) : y
 254         fucompp                 // <empty>
 255         fnstsw ax
 256         sahf
 257         jne     L17
 258
 259         // OK, the value is an integer, but is the number of bits small
 260         // enough so that all are coming from the mantissa?
 261         pop eax
 262         cfi_adjust_cfa_offset (-4)
 263         pop edx
 264         cfi_adjust_cfa_offset (-4)
 265         and al, 1
 266         jz      L18             // jump if not odd
 267         mov eax, edx
 268         or edx, edx
 269         jns     L155
 270         neg eax
 271 L155:
 272         cmp eax, HEX(000200000)
 273         ja      L18             // does not fit in mantissa bits
 274         // It's an odd integer.
 275         shr edx, 31
 276         fld qword ptr MOX(minf_mzero, edx, 8)
 277         ret
 278
 279         cfi_adjust_cfa_offset (8)
 280         .align ALIGNARG(4)
 281 L16:fcomp qword ptr ds:MO(zero)
 282         add esp, 8
 283         cfi_adjust_cfa_offset (-8)
 284         fnstsw ax
 285         shr eax, 5
 286         and eax, 8
 287         fld qword ptr MOX(inf_zero, eax, 1)
 288         ret
 289
 290         cfi_adjust_cfa_offset (8)
 291         .align ALIGNARG(4)
 292 L17:    shl edx, 30     // sign bit for y in right position
 293         add esp, 8
 294         cfi_adjust_cfa_offset (-8)
 295 L18:    shr edx, 31
 296         fld qword ptr MOX(inf_zero, edx, 8)
 297         ret
 298
 299         cfi_adjust_cfa_offset (8)
 300         .align ALIGNARG(4)
 301         // x is ±0
 302 L20:    fstp st(0)              // y
 303         test dl, 2
 304         jz      L21             // y > 0
 305
 306         // x is ±0 and y is < 0.  We must find out whether y is an odd integer.
 307         test dh, 2
 308         jz      L25
 309
 310         fld st          // y : y
 311         fistp qword ptr [esp]           // y
 312         fild qword ptr [esp]            // int(y) : y
 313         fucompp                 // <empty>
 314         fnstsw ax
 315         sahf
 316         jne     L26
 317
 318         // OK, the value is an integer, but is the number of bits small
 319         // enough so that all are coming from the mantissa?
 320         pop eax
 321         cfi_adjust_cfa_offset (-4)
 322         pop edx
 323         cfi_adjust_cfa_offset (-4)
 324         and al, 1
 325         jz      L27             // jump if not odd
 326         cmp edx, HEX(0ffe00000)
 327         jbe     L27             // does not fit in mantissa bits
 328         // It's an odd integer.
 329         // Raise divide-by-zero exception and get minus infinity value.
 330         fld qword ptr MO(one)
 331         fdiv qword ptr MO(zero)
 332         fchs
 333         ret
 334
 335         cfi_adjust_cfa_offset (8)
 336 L25:    fstp st(0)
 337 L26:    add esp, 8
 338         cfi_adjust_cfa_offset (-8)
 339 L27:    // Raise divide-by-zero exception and get infinity value.
 340         fld qword ptr MO(one)
 341         fdiv qword ptr MO(zero)
 342         ret
 343
 344         cfi_adjust_cfa_offset (8)
 345         .align ALIGNARG(4)
 346         // x is ±0 and y is > 0.  We must find out whether y is an odd integer.
 347 L21:test dh, 2
 348         jz      L22
 349
 350         fld st          // y : y
 351         fistp qword ptr [esp]           // y
 352         fild qword ptr [esp]            // int(y) : y
 353         fucompp                 // <empty>
 354         fnstsw ax
 355         sahf
 356         jne     L23
 357
 358         // OK, the value is an integer, but is the number of bits small
 359         // enough so that all are coming from the mantissa?
 360         pop eax
 361         cfi_adjust_cfa_offset (-4)
 362         pop edx
 363         cfi_adjust_cfa_offset (-4)
 364         and al, 1
 365         jz      L24             // jump if not odd
 366         cmp edx, HEX(0ffe00000)
 367         jae     L24             // does not fit in mantissa bits
 368         // It's an odd integer.
 369         fld qword ptr MO(mzero)
 370         ret
 371
 372         cfi_adjust_cfa_offset (8)
 373 L22:    fstp st(0)
 374 L23:    add esp, 8      // Don't use 2 x pop
 375         cfi_adjust_cfa_offset (-8)
 376 L24:    fld qword ptr MO(zero)
 377         ret
 378
 379 END
 380
 381