Statistics
| Branch: | Revision:

root / fpu / softfloat.c @ 067d01de

History | View | Annotate | Download (198.1 kB)

1 158142c2 bellard
2 158142c2 bellard
/*============================================================================
3 158142c2 bellard

4 158142c2 bellard
This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
5 158142c2 bellard
Package, Release 2b.
6 158142c2 bellard

7 158142c2 bellard
Written by John R. Hauser.  This work was made possible in part by the
8 158142c2 bellard
International Computer Science Institute, located at Suite 600, 1947 Center
9 158142c2 bellard
Street, Berkeley, California 94704.  Funding was partially provided by the
10 158142c2 bellard
National Science Foundation under grant MIP-9311980.  The original version
11 158142c2 bellard
of this code was written as part of a project to build a fixed-point vector
12 158142c2 bellard
processor in collaboration with the University of California at Berkeley,
13 158142c2 bellard
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
14 158142c2 bellard
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
15 158142c2 bellard
arithmetic/SoftFloat.html'.
16 158142c2 bellard

17 158142c2 bellard
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
18 158142c2 bellard
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
19 158142c2 bellard
RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
20 158142c2 bellard
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
21 158142c2 bellard
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
22 158142c2 bellard
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
23 158142c2 bellard
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
24 158142c2 bellard
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
25 158142c2 bellard

26 158142c2 bellard
Derivative works are acceptable, even for commercial purposes, so long as
27 158142c2 bellard
(1) the source code for the derivative work includes prominent notice that
28 158142c2 bellard
the work is derivative, and (2) the source code includes prominent notice with
29 158142c2 bellard
these four paragraphs for those parts of this code that are retained.
30 158142c2 bellard

31 158142c2 bellard
=============================================================================*/
32 158142c2 bellard
33 fe76d976 pbrook
/* FIXME: Flush-To-Zero only effects results.  Denormal inputs should also
34 fe76d976 pbrook
   be flushed to zero.  */
35 158142c2 bellard
#include "softfloat.h"
36 158142c2 bellard
37 158142c2 bellard
/*----------------------------------------------------------------------------
38 158142c2 bellard
| Primitive arithmetic functions, including multi-word arithmetic, and
39 158142c2 bellard
| division and square root approximations.  (Can be specialized to target if
40 158142c2 bellard
| desired.)
41 158142c2 bellard
*----------------------------------------------------------------------------*/
42 158142c2 bellard
#include "softfloat-macros.h"
43 158142c2 bellard
44 158142c2 bellard
/*----------------------------------------------------------------------------
45 158142c2 bellard
| Functions and definitions to determine:  (1) whether tininess for underflow
46 158142c2 bellard
| is detected before or after rounding by default, (2) what (if anything)
47 158142c2 bellard
| happens when exceptions are raised, (3) how signaling NaNs are distinguished
48 158142c2 bellard
| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
49 158142c2 bellard
| are propagated from function inputs to output.  These details are target-
50 158142c2 bellard
| specific.
51 158142c2 bellard
*----------------------------------------------------------------------------*/
52 158142c2 bellard
#include "softfloat-specialize.h"
53 158142c2 bellard
54 158142c2 bellard
void set_float_rounding_mode(int val STATUS_PARAM)
55 158142c2 bellard
{
56 158142c2 bellard
    STATUS(float_rounding_mode) = val;
57 158142c2 bellard
}
58 158142c2 bellard
59 1d6bda35 bellard
void set_float_exception_flags(int val STATUS_PARAM)
60 1d6bda35 bellard
{
61 1d6bda35 bellard
    STATUS(float_exception_flags) = val;
62 1d6bda35 bellard
}
63 1d6bda35 bellard
64 158142c2 bellard
#ifdef FLOATX80
65 158142c2 bellard
void set_floatx80_rounding_precision(int val STATUS_PARAM)
66 158142c2 bellard
{
67 158142c2 bellard
    STATUS(floatx80_rounding_precision) = val;
68 158142c2 bellard
}
69 158142c2 bellard
#endif
70 158142c2 bellard
71 158142c2 bellard
/*----------------------------------------------------------------------------
72 158142c2 bellard
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
73 158142c2 bellard
| and 7, and returns the properly rounded 32-bit integer corresponding to the
74 158142c2 bellard
| input.  If `zSign' is 1, the input is negated before being converted to an
75 158142c2 bellard
| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
76 158142c2 bellard
| is simply rounded to an integer, with the inexact exception raised if the
77 158142c2 bellard
| input cannot be represented exactly as an integer.  However, if the fixed-
78 158142c2 bellard
| point input is too large, the invalid exception is raised and the largest
79 158142c2 bellard
| positive or negative integer is returned.
80 158142c2 bellard
*----------------------------------------------------------------------------*/
81 158142c2 bellard
82 158142c2 bellard
static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM)
83 158142c2 bellard
{
84 158142c2 bellard
    int8 roundingMode;
85 158142c2 bellard
    flag roundNearestEven;
86 158142c2 bellard
    int8 roundIncrement, roundBits;
87 158142c2 bellard
    int32 z;
88 158142c2 bellard
89 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
90 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
91 158142c2 bellard
    roundIncrement = 0x40;
92 158142c2 bellard
    if ( ! roundNearestEven ) {
93 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
94 158142c2 bellard
            roundIncrement = 0;
95 158142c2 bellard
        }
96 158142c2 bellard
        else {
97 158142c2 bellard
            roundIncrement = 0x7F;
98 158142c2 bellard
            if ( zSign ) {
99 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
100 158142c2 bellard
            }
101 158142c2 bellard
            else {
102 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
103 158142c2 bellard
            }
104 158142c2 bellard
        }
105 158142c2 bellard
    }
106 158142c2 bellard
    roundBits = absZ & 0x7F;
107 158142c2 bellard
    absZ = ( absZ + roundIncrement )>>7;
108 158142c2 bellard
    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
109 158142c2 bellard
    z = absZ;
110 158142c2 bellard
    if ( zSign ) z = - z;
111 158142c2 bellard
    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
112 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
113 158142c2 bellard
        return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
114 158142c2 bellard
    }
115 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
116 158142c2 bellard
    return z;
117 158142c2 bellard
118 158142c2 bellard
}
119 158142c2 bellard
120 158142c2 bellard
/*----------------------------------------------------------------------------
121 158142c2 bellard
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
122 158142c2 bellard
| `absZ1', with binary point between bits 63 and 64 (between the input words),
123 158142c2 bellard
| and returns the properly rounded 64-bit integer corresponding to the input.
124 158142c2 bellard
| If `zSign' is 1, the input is negated before being converted to an integer.
125 158142c2 bellard
| Ordinarily, the fixed-point input is simply rounded to an integer, with
126 158142c2 bellard
| the inexact exception raised if the input cannot be represented exactly as
127 158142c2 bellard
| an integer.  However, if the fixed-point input is too large, the invalid
128 158142c2 bellard
| exception is raised and the largest positive or negative integer is
129 158142c2 bellard
| returned.
130 158142c2 bellard
*----------------------------------------------------------------------------*/
131 158142c2 bellard
132 158142c2 bellard
static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM)
133 158142c2 bellard
{
134 158142c2 bellard
    int8 roundingMode;
135 158142c2 bellard
    flag roundNearestEven, increment;
136 158142c2 bellard
    int64 z;
137 158142c2 bellard
138 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
139 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
140 158142c2 bellard
    increment = ( (sbits64) absZ1 < 0 );
141 158142c2 bellard
    if ( ! roundNearestEven ) {
142 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
143 158142c2 bellard
            increment = 0;
144 158142c2 bellard
        }
145 158142c2 bellard
        else {
146 158142c2 bellard
            if ( zSign ) {
147 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && absZ1;
148 158142c2 bellard
            }
149 158142c2 bellard
            else {
150 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && absZ1;
151 158142c2 bellard
            }
152 158142c2 bellard
        }
153 158142c2 bellard
    }
154 158142c2 bellard
    if ( increment ) {
155 158142c2 bellard
        ++absZ0;
156 158142c2 bellard
        if ( absZ0 == 0 ) goto overflow;
157 158142c2 bellard
        absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158 158142c2 bellard
    }
159 158142c2 bellard
    z = absZ0;
160 158142c2 bellard
    if ( zSign ) z = - z;
161 158142c2 bellard
    if ( z && ( ( z < 0 ) ^ zSign ) ) {
162 158142c2 bellard
 overflow:
163 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
164 158142c2 bellard
        return
165 158142c2 bellard
              zSign ? (sbits64) LIT64( 0x8000000000000000 )
166 158142c2 bellard
            : LIT64( 0x7FFFFFFFFFFFFFFF );
167 158142c2 bellard
    }
168 158142c2 bellard
    if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
169 158142c2 bellard
    return z;
170 158142c2 bellard
171 158142c2 bellard
}
172 158142c2 bellard
173 158142c2 bellard
/*----------------------------------------------------------------------------
174 158142c2 bellard
| Returns the fraction bits of the single-precision floating-point value `a'.
175 158142c2 bellard
*----------------------------------------------------------------------------*/
176 158142c2 bellard
177 158142c2 bellard
INLINE bits32 extractFloat32Frac( float32 a )
178 158142c2 bellard
{
179 158142c2 bellard
180 f090c9d4 pbrook
    return float32_val(a) & 0x007FFFFF;
181 158142c2 bellard
182 158142c2 bellard
}
183 158142c2 bellard
184 158142c2 bellard
/*----------------------------------------------------------------------------
185 158142c2 bellard
| Returns the exponent bits of the single-precision floating-point value `a'.
186 158142c2 bellard
*----------------------------------------------------------------------------*/
187 158142c2 bellard
188 158142c2 bellard
INLINE int16 extractFloat32Exp( float32 a )
189 158142c2 bellard
{
190 158142c2 bellard
191 f090c9d4 pbrook
    return ( float32_val(a)>>23 ) & 0xFF;
192 158142c2 bellard
193 158142c2 bellard
}
194 158142c2 bellard
195 158142c2 bellard
/*----------------------------------------------------------------------------
196 158142c2 bellard
| Returns the sign bit of the single-precision floating-point value `a'.
197 158142c2 bellard
*----------------------------------------------------------------------------*/
198 158142c2 bellard
199 158142c2 bellard
INLINE flag extractFloat32Sign( float32 a )
200 158142c2 bellard
{
201 158142c2 bellard
202 f090c9d4 pbrook
    return float32_val(a)>>31;
203 158142c2 bellard
204 158142c2 bellard
}
205 158142c2 bellard
206 158142c2 bellard
/*----------------------------------------------------------------------------
207 158142c2 bellard
| Normalizes the subnormal single-precision floating-point value represented
208 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
209 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
210 158142c2 bellard
| `zSigPtr', respectively.
211 158142c2 bellard
*----------------------------------------------------------------------------*/
212 158142c2 bellard
213 158142c2 bellard
static void
214 158142c2 bellard
 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
215 158142c2 bellard
{
216 158142c2 bellard
    int8 shiftCount;
217 158142c2 bellard
218 158142c2 bellard
    shiftCount = countLeadingZeros32( aSig ) - 8;
219 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
220 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
221 158142c2 bellard
222 158142c2 bellard
}
223 158142c2 bellard
224 158142c2 bellard
/*----------------------------------------------------------------------------
225 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
226 158142c2 bellard
| single-precision floating-point value, returning the result.  After being
227 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
228 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
229 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
230 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
231 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
232 158142c2 bellard
| significand.
233 158142c2 bellard
*----------------------------------------------------------------------------*/
234 158142c2 bellard
235 158142c2 bellard
INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
236 158142c2 bellard
{
237 158142c2 bellard
238 f090c9d4 pbrook
    return make_float32(
239 f090c9d4 pbrook
          ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig);
240 158142c2 bellard
241 158142c2 bellard
}
242 158142c2 bellard
243 158142c2 bellard
/*----------------------------------------------------------------------------
244 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
245 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
246 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
247 158142c2 bellard
| value is simply rounded and packed into the single-precision format, with
248 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
249 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
250 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
251 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
252 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
253 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal single-
254 158142c2 bellard
| precision floating-point number.
255 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 30
256 158142c2 bellard
| and 29, which is 7 bits to the left of the usual location.  This shifted
257 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
258 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
259 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
260 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
261 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
262 158142c2 bellard
| Binary Floating-Point Arithmetic.
263 158142c2 bellard
*----------------------------------------------------------------------------*/
264 158142c2 bellard
265 158142c2 bellard
static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
266 158142c2 bellard
{
267 158142c2 bellard
    int8 roundingMode;
268 158142c2 bellard
    flag roundNearestEven;
269 158142c2 bellard
    int8 roundIncrement, roundBits;
270 158142c2 bellard
    flag isTiny;
271 158142c2 bellard
272 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
273 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
274 158142c2 bellard
    roundIncrement = 0x40;
275 158142c2 bellard
    if ( ! roundNearestEven ) {
276 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
277 158142c2 bellard
            roundIncrement = 0;
278 158142c2 bellard
        }
279 158142c2 bellard
        else {
280 158142c2 bellard
            roundIncrement = 0x7F;
281 158142c2 bellard
            if ( zSign ) {
282 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
283 158142c2 bellard
            }
284 158142c2 bellard
            else {
285 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
286 158142c2 bellard
            }
287 158142c2 bellard
        }
288 158142c2 bellard
    }
289 158142c2 bellard
    roundBits = zSig & 0x7F;
290 158142c2 bellard
    if ( 0xFD <= (bits16) zExp ) {
291 158142c2 bellard
        if (    ( 0xFD < zExp )
292 158142c2 bellard
             || (    ( zExp == 0xFD )
293 158142c2 bellard
                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
294 158142c2 bellard
           ) {
295 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
296 f090c9d4 pbrook
            return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
297 158142c2 bellard
        }
298 158142c2 bellard
        if ( zExp < 0 ) {
299 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
300 158142c2 bellard
            isTiny =
301 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
302 158142c2 bellard
                || ( zExp < -1 )
303 158142c2 bellard
                || ( zSig + roundIncrement < 0x80000000 );
304 158142c2 bellard
            shift32RightJamming( zSig, - zExp, &zSig );
305 158142c2 bellard
            zExp = 0;
306 158142c2 bellard
            roundBits = zSig & 0x7F;
307 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
308 158142c2 bellard
        }
309 158142c2 bellard
    }
310 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
311 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>7;
312 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
313 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
314 158142c2 bellard
    return packFloat32( zSign, zExp, zSig );
315 158142c2 bellard
316 158142c2 bellard
}
317 158142c2 bellard
318 158142c2 bellard
/*----------------------------------------------------------------------------
319 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
320 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
321 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
322 158142c2 bellard
| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
323 158142c2 bellard
| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
324 158142c2 bellard
| floating-point exponent.
325 158142c2 bellard
*----------------------------------------------------------------------------*/
326 158142c2 bellard
327 158142c2 bellard
static float32
328 158142c2 bellard
 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
329 158142c2 bellard
{
330 158142c2 bellard
    int8 shiftCount;
331 158142c2 bellard
332 158142c2 bellard
    shiftCount = countLeadingZeros32( zSig ) - 1;
333 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
334 158142c2 bellard
335 158142c2 bellard
}
336 158142c2 bellard
337 158142c2 bellard
/*----------------------------------------------------------------------------
338 158142c2 bellard
| Returns the fraction bits of the double-precision floating-point value `a'.
339 158142c2 bellard
*----------------------------------------------------------------------------*/
340 158142c2 bellard
341 158142c2 bellard
INLINE bits64 extractFloat64Frac( float64 a )
342 158142c2 bellard
{
343 158142c2 bellard
344 f090c9d4 pbrook
    return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
345 158142c2 bellard
346 158142c2 bellard
}
347 158142c2 bellard
348 158142c2 bellard
/*----------------------------------------------------------------------------
349 158142c2 bellard
| Returns the exponent bits of the double-precision floating-point value `a'.
350 158142c2 bellard
*----------------------------------------------------------------------------*/
351 158142c2 bellard
352 158142c2 bellard
INLINE int16 extractFloat64Exp( float64 a )
353 158142c2 bellard
{
354 158142c2 bellard
355 f090c9d4 pbrook
    return ( float64_val(a)>>52 ) & 0x7FF;
356 158142c2 bellard
357 158142c2 bellard
}
358 158142c2 bellard
359 158142c2 bellard
/*----------------------------------------------------------------------------
360 158142c2 bellard
| Returns the sign bit of the double-precision floating-point value `a'.
361 158142c2 bellard
*----------------------------------------------------------------------------*/
362 158142c2 bellard
363 158142c2 bellard
INLINE flag extractFloat64Sign( float64 a )
364 158142c2 bellard
{
365 158142c2 bellard
366 f090c9d4 pbrook
    return float64_val(a)>>63;
367 158142c2 bellard
368 158142c2 bellard
}
369 158142c2 bellard
370 158142c2 bellard
/*----------------------------------------------------------------------------
371 158142c2 bellard
| Normalizes the subnormal double-precision floating-point value represented
372 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
373 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
374 158142c2 bellard
| `zSigPtr', respectively.
375 158142c2 bellard
*----------------------------------------------------------------------------*/
376 158142c2 bellard
377 158142c2 bellard
static void
378 158142c2 bellard
 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
379 158142c2 bellard
{
380 158142c2 bellard
    int8 shiftCount;
381 158142c2 bellard
382 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig ) - 11;
383 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
384 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
385 158142c2 bellard
386 158142c2 bellard
}
387 158142c2 bellard
388 158142c2 bellard
/*----------------------------------------------------------------------------
389 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
390 158142c2 bellard
| double-precision floating-point value, returning the result.  After being
391 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
392 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
393 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
394 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
395 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
396 158142c2 bellard
| significand.
397 158142c2 bellard
*----------------------------------------------------------------------------*/
398 158142c2 bellard
399 158142c2 bellard
INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
400 158142c2 bellard
{
401 158142c2 bellard
402 f090c9d4 pbrook
    return make_float64(
403 f090c9d4 pbrook
        ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig);
404 158142c2 bellard
405 158142c2 bellard
}
406 158142c2 bellard
407 158142c2 bellard
/*----------------------------------------------------------------------------
408 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
409 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
410 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
411 158142c2 bellard
| value is simply rounded and packed into the double-precision format, with
412 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
413 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
414 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
415 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded
416 158142c2 bellard
| to a subnormal number, and the underflow and inexact exceptions are raised
417 158142c2 bellard
| if the abstract input cannot be represented exactly as a subnormal double-
418 158142c2 bellard
| precision floating-point number.
419 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 62
420 158142c2 bellard
| and 61, which is 10 bits to the left of the usual location.  This shifted
421 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
422 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
423 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
424 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
425 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
426 158142c2 bellard
| Binary Floating-Point Arithmetic.
427 158142c2 bellard
*----------------------------------------------------------------------------*/
428 158142c2 bellard
429 158142c2 bellard
static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
430 158142c2 bellard
{
431 158142c2 bellard
    int8 roundingMode;
432 158142c2 bellard
    flag roundNearestEven;
433 158142c2 bellard
    int16 roundIncrement, roundBits;
434 158142c2 bellard
    flag isTiny;
435 158142c2 bellard
436 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
437 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
438 158142c2 bellard
    roundIncrement = 0x200;
439 158142c2 bellard
    if ( ! roundNearestEven ) {
440 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
441 158142c2 bellard
            roundIncrement = 0;
442 158142c2 bellard
        }
443 158142c2 bellard
        else {
444 158142c2 bellard
            roundIncrement = 0x3FF;
445 158142c2 bellard
            if ( zSign ) {
446 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
447 158142c2 bellard
            }
448 158142c2 bellard
            else {
449 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
450 158142c2 bellard
            }
451 158142c2 bellard
        }
452 158142c2 bellard
    }
453 158142c2 bellard
    roundBits = zSig & 0x3FF;
454 158142c2 bellard
    if ( 0x7FD <= (bits16) zExp ) {
455 158142c2 bellard
        if (    ( 0x7FD < zExp )
456 158142c2 bellard
             || (    ( zExp == 0x7FD )
457 158142c2 bellard
                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
458 158142c2 bellard
           ) {
459 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
460 f090c9d4 pbrook
            return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
461 158142c2 bellard
        }
462 158142c2 bellard
        if ( zExp < 0 ) {
463 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
464 158142c2 bellard
            isTiny =
465 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
466 158142c2 bellard
                || ( zExp < -1 )
467 158142c2 bellard
                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
468 158142c2 bellard
            shift64RightJamming( zSig, - zExp, &zSig );
469 158142c2 bellard
            zExp = 0;
470 158142c2 bellard
            roundBits = zSig & 0x3FF;
471 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
472 158142c2 bellard
        }
473 158142c2 bellard
    }
474 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
475 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>10;
476 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
477 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
478 158142c2 bellard
    return packFloat64( zSign, zExp, zSig );
479 158142c2 bellard
480 158142c2 bellard
}
481 158142c2 bellard
482 158142c2 bellard
/*----------------------------------------------------------------------------
483 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
484 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
485 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
486 158142c2 bellard
| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
487 158142c2 bellard
| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
488 158142c2 bellard
| floating-point exponent.
489 158142c2 bellard
*----------------------------------------------------------------------------*/
490 158142c2 bellard
491 158142c2 bellard
static float64
492 158142c2 bellard
 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
493 158142c2 bellard
{
494 158142c2 bellard
    int8 shiftCount;
495 158142c2 bellard
496 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig ) - 1;
497 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
498 158142c2 bellard
499 158142c2 bellard
}
500 158142c2 bellard
501 158142c2 bellard
#ifdef FLOATX80
502 158142c2 bellard
503 158142c2 bellard
/*----------------------------------------------------------------------------
504 158142c2 bellard
| Returns the fraction bits of the extended double-precision floating-point
505 158142c2 bellard
| value `a'.
506 158142c2 bellard
*----------------------------------------------------------------------------*/
507 158142c2 bellard
508 158142c2 bellard
INLINE bits64 extractFloatx80Frac( floatx80 a )
509 158142c2 bellard
{
510 158142c2 bellard
511 158142c2 bellard
    return a.low;
512 158142c2 bellard
513 158142c2 bellard
}
514 158142c2 bellard
515 158142c2 bellard
/*----------------------------------------------------------------------------
516 158142c2 bellard
| Returns the exponent bits of the extended double-precision floating-point
517 158142c2 bellard
| value `a'.
518 158142c2 bellard
*----------------------------------------------------------------------------*/
519 158142c2 bellard
520 158142c2 bellard
INLINE int32 extractFloatx80Exp( floatx80 a )
521 158142c2 bellard
{
522 158142c2 bellard
523 158142c2 bellard
    return a.high & 0x7FFF;
524 158142c2 bellard
525 158142c2 bellard
}
526 158142c2 bellard
527 158142c2 bellard
/*----------------------------------------------------------------------------
528 158142c2 bellard
| Returns the sign bit of the extended double-precision floating-point value
529 158142c2 bellard
| `a'.
530 158142c2 bellard
*----------------------------------------------------------------------------*/
531 158142c2 bellard
532 158142c2 bellard
INLINE flag extractFloatx80Sign( floatx80 a )
533 158142c2 bellard
{
534 158142c2 bellard
535 158142c2 bellard
    return a.high>>15;
536 158142c2 bellard
537 158142c2 bellard
}
538 158142c2 bellard
539 158142c2 bellard
/*----------------------------------------------------------------------------
540 158142c2 bellard
| Normalizes the subnormal extended double-precision floating-point value
541 158142c2 bellard
| represented by the denormalized significand `aSig'.  The normalized exponent
542 158142c2 bellard
| and significand are stored at the locations pointed to by `zExpPtr' and
543 158142c2 bellard
| `zSigPtr', respectively.
544 158142c2 bellard
*----------------------------------------------------------------------------*/
545 158142c2 bellard
546 158142c2 bellard
static void
547 158142c2 bellard
 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
548 158142c2 bellard
{
549 158142c2 bellard
    int8 shiftCount;
550 158142c2 bellard
551 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig );
552 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
553 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
554 158142c2 bellard
555 158142c2 bellard
}
556 158142c2 bellard
557 158142c2 bellard
/*----------------------------------------------------------------------------
558 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
559 158142c2 bellard
| extended double-precision floating-point value, returning the result.
560 158142c2 bellard
*----------------------------------------------------------------------------*/
561 158142c2 bellard
562 158142c2 bellard
INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
563 158142c2 bellard
{
564 158142c2 bellard
    floatx80 z;
565 158142c2 bellard
566 158142c2 bellard
    z.low = zSig;
567 158142c2 bellard
    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
568 158142c2 bellard
    return z;
569 158142c2 bellard
570 158142c2 bellard
}
571 158142c2 bellard
572 158142c2 bellard
/*----------------------------------------------------------------------------
573 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
574 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0' and `zSig1',
575 158142c2 bellard
| and returns the proper extended double-precision floating-point value
576 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
577 158142c2 bellard
| rounded and packed into the extended double-precision format, with the
578 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
579 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
580 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
581 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
582 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
583 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal extended
584 158142c2 bellard
| double-precision floating-point number.
585 158142c2 bellard
|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
586 158142c2 bellard
| number of bits as single or double precision, respectively.  Otherwise, the
587 158142c2 bellard
| result is rounded to the full precision of the extended double-precision
588 158142c2 bellard
| format.
589 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
590 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
591 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  The
592 158142c2 bellard
| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
593 158142c2 bellard
| Floating-Point Arithmetic.
594 158142c2 bellard
*----------------------------------------------------------------------------*/
595 158142c2 bellard
596 158142c2 bellard
static floatx80
597 158142c2 bellard
 roundAndPackFloatx80(
598 158142c2 bellard
     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
599 158142c2 bellard
 STATUS_PARAM)
600 158142c2 bellard
{
601 158142c2 bellard
    int8 roundingMode;
602 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
603 158142c2 bellard
    int64 roundIncrement, roundMask, roundBits;
604 158142c2 bellard
605 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
606 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
607 158142c2 bellard
    if ( roundingPrecision == 80 ) goto precision80;
608 158142c2 bellard
    if ( roundingPrecision == 64 ) {
609 158142c2 bellard
        roundIncrement = LIT64( 0x0000000000000400 );
610 158142c2 bellard
        roundMask = LIT64( 0x00000000000007FF );
611 158142c2 bellard
    }
612 158142c2 bellard
    else if ( roundingPrecision == 32 ) {
613 158142c2 bellard
        roundIncrement = LIT64( 0x0000008000000000 );
614 158142c2 bellard
        roundMask = LIT64( 0x000000FFFFFFFFFF );
615 158142c2 bellard
    }
616 158142c2 bellard
    else {
617 158142c2 bellard
        goto precision80;
618 158142c2 bellard
    }
619 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
620 158142c2 bellard
    if ( ! roundNearestEven ) {
621 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
622 158142c2 bellard
            roundIncrement = 0;
623 158142c2 bellard
        }
624 158142c2 bellard
        else {
625 158142c2 bellard
            roundIncrement = roundMask;
626 158142c2 bellard
            if ( zSign ) {
627 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
628 158142c2 bellard
            }
629 158142c2 bellard
            else {
630 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
631 158142c2 bellard
            }
632 158142c2 bellard
        }
633 158142c2 bellard
    }
634 158142c2 bellard
    roundBits = zSig0 & roundMask;
635 158142c2 bellard
    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
636 158142c2 bellard
        if (    ( 0x7FFE < zExp )
637 158142c2 bellard
             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
638 158142c2 bellard
           ) {
639 158142c2 bellard
            goto overflow;
640 158142c2 bellard
        }
641 158142c2 bellard
        if ( zExp <= 0 ) {
642 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloatx80( zSign, 0, 0 );
643 158142c2 bellard
            isTiny =
644 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
645 158142c2 bellard
                || ( zExp < 0 )
646 158142c2 bellard
                || ( zSig0 <= zSig0 + roundIncrement );
647 158142c2 bellard
            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
648 158142c2 bellard
            zExp = 0;
649 158142c2 bellard
            roundBits = zSig0 & roundMask;
650 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
651 158142c2 bellard
            if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
652 158142c2 bellard
            zSig0 += roundIncrement;
653 158142c2 bellard
            if ( (sbits64) zSig0 < 0 ) zExp = 1;
654 158142c2 bellard
            roundIncrement = roundMask + 1;
655 158142c2 bellard
            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
656 158142c2 bellard
                roundMask |= roundIncrement;
657 158142c2 bellard
            }
658 158142c2 bellard
            zSig0 &= ~ roundMask;
659 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
660 158142c2 bellard
        }
661 158142c2 bellard
    }
662 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
663 158142c2 bellard
    zSig0 += roundIncrement;
664 158142c2 bellard
    if ( zSig0 < roundIncrement ) {
665 158142c2 bellard
        ++zExp;
666 158142c2 bellard
        zSig0 = LIT64( 0x8000000000000000 );
667 158142c2 bellard
    }
668 158142c2 bellard
    roundIncrement = roundMask + 1;
669 158142c2 bellard
    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
670 158142c2 bellard
        roundMask |= roundIncrement;
671 158142c2 bellard
    }
672 158142c2 bellard
    zSig0 &= ~ roundMask;
673 158142c2 bellard
    if ( zSig0 == 0 ) zExp = 0;
674 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
675 158142c2 bellard
 precision80:
676 158142c2 bellard
    increment = ( (sbits64) zSig1 < 0 );
677 158142c2 bellard
    if ( ! roundNearestEven ) {
678 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
679 158142c2 bellard
            increment = 0;
680 158142c2 bellard
        }
681 158142c2 bellard
        else {
682 158142c2 bellard
            if ( zSign ) {
683 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig1;
684 158142c2 bellard
            }
685 158142c2 bellard
            else {
686 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig1;
687 158142c2 bellard
            }
688 158142c2 bellard
        }
689 158142c2 bellard
    }
690 158142c2 bellard
    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
691 158142c2 bellard
        if (    ( 0x7FFE < zExp )
692 158142c2 bellard
             || (    ( zExp == 0x7FFE )
693 158142c2 bellard
                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
694 158142c2 bellard
                  && increment
695 158142c2 bellard
                )
696 158142c2 bellard
           ) {
697 158142c2 bellard
            roundMask = 0;
698 158142c2 bellard
 overflow:
699 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
700 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
701 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
702 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
703 158142c2 bellard
               ) {
704 158142c2 bellard
                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
705 158142c2 bellard
            }
706 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
707 158142c2 bellard
        }
708 158142c2 bellard
        if ( zExp <= 0 ) {
709 158142c2 bellard
            isTiny =
710 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
711 158142c2 bellard
                || ( zExp < 0 )
712 158142c2 bellard
                || ! increment
713 158142c2 bellard
                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
714 158142c2 bellard
            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
715 158142c2 bellard
            zExp = 0;
716 158142c2 bellard
            if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
717 158142c2 bellard
            if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
718 158142c2 bellard
            if ( roundNearestEven ) {
719 158142c2 bellard
                increment = ( (sbits64) zSig1 < 0 );
720 158142c2 bellard
            }
721 158142c2 bellard
            else {
722 158142c2 bellard
                if ( zSign ) {
723 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig1;
724 158142c2 bellard
                }
725 158142c2 bellard
                else {
726 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig1;
727 158142c2 bellard
                }
728 158142c2 bellard
            }
729 158142c2 bellard
            if ( increment ) {
730 158142c2 bellard
                ++zSig0;
731 158142c2 bellard
                zSig0 &=
732 158142c2 bellard
                    ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
733 158142c2 bellard
                if ( (sbits64) zSig0 < 0 ) zExp = 1;
734 158142c2 bellard
            }
735 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
736 158142c2 bellard
        }
737 158142c2 bellard
    }
738 158142c2 bellard
    if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
739 158142c2 bellard
    if ( increment ) {
740 158142c2 bellard
        ++zSig0;
741 158142c2 bellard
        if ( zSig0 == 0 ) {
742 158142c2 bellard
            ++zExp;
743 158142c2 bellard
            zSig0 = LIT64( 0x8000000000000000 );
744 158142c2 bellard
        }
745 158142c2 bellard
        else {
746 158142c2 bellard
            zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
747 158142c2 bellard
        }
748 158142c2 bellard
    }
749 158142c2 bellard
    else {
750 158142c2 bellard
        if ( zSig0 == 0 ) zExp = 0;
751 158142c2 bellard
    }
752 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
753 158142c2 bellard
754 158142c2 bellard
}
755 158142c2 bellard
756 158142c2 bellard
/*----------------------------------------------------------------------------
757 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent
758 158142c2 bellard
| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
759 158142c2 bellard
| and returns the proper extended double-precision floating-point value
760 158142c2 bellard
| corresponding to the abstract input.  This routine is just like
761 158142c2 bellard
| `roundAndPackFloatx80' except that the input significand does not have to be
762 158142c2 bellard
| normalized.
763 158142c2 bellard
*----------------------------------------------------------------------------*/
764 158142c2 bellard
765 158142c2 bellard
static floatx80
766 158142c2 bellard
 normalizeRoundAndPackFloatx80(
767 158142c2 bellard
     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
768 158142c2 bellard
 STATUS_PARAM)
769 158142c2 bellard
{
770 158142c2 bellard
    int8 shiftCount;
771 158142c2 bellard
772 158142c2 bellard
    if ( zSig0 == 0 ) {
773 158142c2 bellard
        zSig0 = zSig1;
774 158142c2 bellard
        zSig1 = 0;
775 158142c2 bellard
        zExp -= 64;
776 158142c2 bellard
    }
777 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 );
778 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
779 158142c2 bellard
    zExp -= shiftCount;
780 158142c2 bellard
    return
781 158142c2 bellard
        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
782 158142c2 bellard
783 158142c2 bellard
}
784 158142c2 bellard
785 158142c2 bellard
#endif
786 158142c2 bellard
787 158142c2 bellard
#ifdef FLOAT128
788 158142c2 bellard
789 158142c2 bellard
/*----------------------------------------------------------------------------
790 158142c2 bellard
| Returns the least-significant 64 fraction bits of the quadruple-precision
791 158142c2 bellard
| floating-point value `a'.
792 158142c2 bellard
*----------------------------------------------------------------------------*/
793 158142c2 bellard
794 158142c2 bellard
INLINE bits64 extractFloat128Frac1( float128 a )
795 158142c2 bellard
{
796 158142c2 bellard
797 158142c2 bellard
    return a.low;
798 158142c2 bellard
799 158142c2 bellard
}
800 158142c2 bellard
801 158142c2 bellard
/*----------------------------------------------------------------------------
802 158142c2 bellard
| Returns the most-significant 48 fraction bits of the quadruple-precision
803 158142c2 bellard
| floating-point value `a'.
804 158142c2 bellard
*----------------------------------------------------------------------------*/
805 158142c2 bellard
806 158142c2 bellard
INLINE bits64 extractFloat128Frac0( float128 a )
807 158142c2 bellard
{
808 158142c2 bellard
809 158142c2 bellard
    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
810 158142c2 bellard
811 158142c2 bellard
}
812 158142c2 bellard
813 158142c2 bellard
/*----------------------------------------------------------------------------
814 158142c2 bellard
| Returns the exponent bits of the quadruple-precision floating-point value
815 158142c2 bellard
| `a'.
816 158142c2 bellard
*----------------------------------------------------------------------------*/
817 158142c2 bellard
818 158142c2 bellard
INLINE int32 extractFloat128Exp( float128 a )
819 158142c2 bellard
{
820 158142c2 bellard
821 158142c2 bellard
    return ( a.high>>48 ) & 0x7FFF;
822 158142c2 bellard
823 158142c2 bellard
}
824 158142c2 bellard
825 158142c2 bellard
/*----------------------------------------------------------------------------
826 158142c2 bellard
| Returns the sign bit of the quadruple-precision floating-point value `a'.
827 158142c2 bellard
*----------------------------------------------------------------------------*/
828 158142c2 bellard
829 158142c2 bellard
INLINE flag extractFloat128Sign( float128 a )
830 158142c2 bellard
{
831 158142c2 bellard
832 158142c2 bellard
    return a.high>>63;
833 158142c2 bellard
834 158142c2 bellard
}
835 158142c2 bellard
836 158142c2 bellard
/*----------------------------------------------------------------------------
837 158142c2 bellard
| Normalizes the subnormal quadruple-precision floating-point value
838 158142c2 bellard
| represented by the denormalized significand formed by the concatenation of
839 158142c2 bellard
| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
840 158142c2 bellard
| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
841 158142c2 bellard
| significand are stored at the location pointed to by `zSig0Ptr', and the
842 158142c2 bellard
| least significant 64 bits of the normalized significand are stored at the
843 158142c2 bellard
| location pointed to by `zSig1Ptr'.
844 158142c2 bellard
*----------------------------------------------------------------------------*/
845 158142c2 bellard
846 158142c2 bellard
static void
847 158142c2 bellard
 normalizeFloat128Subnormal(
848 158142c2 bellard
     bits64 aSig0,
849 158142c2 bellard
     bits64 aSig1,
850 158142c2 bellard
     int32 *zExpPtr,
851 158142c2 bellard
     bits64 *zSig0Ptr,
852 158142c2 bellard
     bits64 *zSig1Ptr
853 158142c2 bellard
 )
854 158142c2 bellard
{
855 158142c2 bellard
    int8 shiftCount;
856 158142c2 bellard
857 158142c2 bellard
    if ( aSig0 == 0 ) {
858 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig1 ) - 15;
859 158142c2 bellard
        if ( shiftCount < 0 ) {
860 158142c2 bellard
            *zSig0Ptr = aSig1>>( - shiftCount );
861 158142c2 bellard
            *zSig1Ptr = aSig1<<( shiftCount & 63 );
862 158142c2 bellard
        }
863 158142c2 bellard
        else {
864 158142c2 bellard
            *zSig0Ptr = aSig1<<shiftCount;
865 158142c2 bellard
            *zSig1Ptr = 0;
866 158142c2 bellard
        }
867 158142c2 bellard
        *zExpPtr = - shiftCount - 63;
868 158142c2 bellard
    }
869 158142c2 bellard
    else {
870 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig0 ) - 15;
871 158142c2 bellard
        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
872 158142c2 bellard
        *zExpPtr = 1 - shiftCount;
873 158142c2 bellard
    }
874 158142c2 bellard
875 158142c2 bellard
}
876 158142c2 bellard
877 158142c2 bellard
/*----------------------------------------------------------------------------
878 158142c2 bellard
| Packs the sign `zSign', the exponent `zExp', and the significand formed
879 158142c2 bellard
| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
880 158142c2 bellard
| floating-point value, returning the result.  After being shifted into the
881 158142c2 bellard
| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
882 158142c2 bellard
| added together to form the most significant 32 bits of the result.  This
883 158142c2 bellard
| means that any integer portion of `zSig0' will be added into the exponent.
884 158142c2 bellard
| Since a properly normalized significand will have an integer portion equal
885 158142c2 bellard
| to 1, the `zExp' input should be 1 less than the desired result exponent
886 158142c2 bellard
| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
887 158142c2 bellard
| significand.
888 158142c2 bellard
*----------------------------------------------------------------------------*/
889 158142c2 bellard
890 158142c2 bellard
INLINE float128
891 158142c2 bellard
 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
892 158142c2 bellard
{
893 158142c2 bellard
    float128 z;
894 158142c2 bellard
895 158142c2 bellard
    z.low = zSig1;
896 158142c2 bellard
    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
897 158142c2 bellard
    return z;
898 158142c2 bellard
899 158142c2 bellard
}
900 158142c2 bellard
901 158142c2 bellard
/*----------------------------------------------------------------------------
902 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
903 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0', `zSig1',
904 158142c2 bellard
| and `zSig2', and returns the proper quadruple-precision floating-point value
905 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
906 158142c2 bellard
| simply rounded and packed into the quadruple-precision format, with the
907 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
908 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
909 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
910 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
911 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
912 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal quadruple-
913 158142c2 bellard
| precision floating-point number.
914 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
915 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
916 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  In the
917 158142c2 bellard
| usual case that the input significand is normalized, `zExp' must be 1 less
918 158142c2 bellard
| than the ``true'' floating-point exponent.  The handling of underflow and
919 158142c2 bellard
| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
920 158142c2 bellard
*----------------------------------------------------------------------------*/
921 158142c2 bellard
922 158142c2 bellard
static float128
923 158142c2 bellard
 roundAndPackFloat128(
924 158142c2 bellard
     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM)
925 158142c2 bellard
{
926 158142c2 bellard
    int8 roundingMode;
927 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
928 158142c2 bellard
929 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
930 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
931 158142c2 bellard
    increment = ( (sbits64) zSig2 < 0 );
932 158142c2 bellard
    if ( ! roundNearestEven ) {
933 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
934 158142c2 bellard
            increment = 0;
935 158142c2 bellard
        }
936 158142c2 bellard
        else {
937 158142c2 bellard
            if ( zSign ) {
938 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig2;
939 158142c2 bellard
            }
940 158142c2 bellard
            else {
941 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig2;
942 158142c2 bellard
            }
943 158142c2 bellard
        }
944 158142c2 bellard
    }
945 158142c2 bellard
    if ( 0x7FFD <= (bits32) zExp ) {
946 158142c2 bellard
        if (    ( 0x7FFD < zExp )
947 158142c2 bellard
             || (    ( zExp == 0x7FFD )
948 158142c2 bellard
                  && eq128(
949 158142c2 bellard
                         LIT64( 0x0001FFFFFFFFFFFF ),
950 158142c2 bellard
                         LIT64( 0xFFFFFFFFFFFFFFFF ),
951 158142c2 bellard
                         zSig0,
952 158142c2 bellard
                         zSig1
953 158142c2 bellard
                     )
954 158142c2 bellard
                  && increment
955 158142c2 bellard
                )
956 158142c2 bellard
           ) {
957 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
958 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
959 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
960 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
961 158142c2 bellard
               ) {
962 158142c2 bellard
                return
963 158142c2 bellard
                    packFloat128(
964 158142c2 bellard
                        zSign,
965 158142c2 bellard
                        0x7FFE,
966 158142c2 bellard
                        LIT64( 0x0000FFFFFFFFFFFF ),
967 158142c2 bellard
                        LIT64( 0xFFFFFFFFFFFFFFFF )
968 158142c2 bellard
                    );
969 158142c2 bellard
            }
970 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
971 158142c2 bellard
        }
972 158142c2 bellard
        if ( zExp < 0 ) {
973 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
974 158142c2 bellard
            isTiny =
975 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
976 158142c2 bellard
                || ( zExp < -1 )
977 158142c2 bellard
                || ! increment
978 158142c2 bellard
                || lt128(
979 158142c2 bellard
                       zSig0,
980 158142c2 bellard
                       zSig1,
981 158142c2 bellard
                       LIT64( 0x0001FFFFFFFFFFFF ),
982 158142c2 bellard
                       LIT64( 0xFFFFFFFFFFFFFFFF )
983 158142c2 bellard
                   );
984 158142c2 bellard
            shift128ExtraRightJamming(
985 158142c2 bellard
                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
986 158142c2 bellard
            zExp = 0;
987 158142c2 bellard
            if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
988 158142c2 bellard
            if ( roundNearestEven ) {
989 158142c2 bellard
                increment = ( (sbits64) zSig2 < 0 );
990 158142c2 bellard
            }
991 158142c2 bellard
            else {
992 158142c2 bellard
                if ( zSign ) {
993 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig2;
994 158142c2 bellard
                }
995 158142c2 bellard
                else {
996 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig2;
997 158142c2 bellard
                }
998 158142c2 bellard
            }
999 158142c2 bellard
        }
1000 158142c2 bellard
    }
1001 158142c2 bellard
    if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
1002 158142c2 bellard
    if ( increment ) {
1003 158142c2 bellard
        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1004 158142c2 bellard
        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1005 158142c2 bellard
    }
1006 158142c2 bellard
    else {
1007 158142c2 bellard
        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1008 158142c2 bellard
    }
1009 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1010 158142c2 bellard
1011 158142c2 bellard
}
1012 158142c2 bellard
1013 158142c2 bellard
/*----------------------------------------------------------------------------
1014 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1015 158142c2 bellard
| and significand formed by the concatenation of `zSig0' and `zSig1', and
1016 158142c2 bellard
| returns the proper quadruple-precision floating-point value corresponding
1017 158142c2 bellard
| to the abstract input.  This routine is just like `roundAndPackFloat128'
1018 158142c2 bellard
| except that the input significand has fewer bits and does not have to be
1019 158142c2 bellard
| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1020 158142c2 bellard
| point exponent.
1021 158142c2 bellard
*----------------------------------------------------------------------------*/
1022 158142c2 bellard
1023 158142c2 bellard
static float128
1024 158142c2 bellard
 normalizeRoundAndPackFloat128(
1025 158142c2 bellard
     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM)
1026 158142c2 bellard
{
1027 158142c2 bellard
    int8 shiftCount;
1028 158142c2 bellard
    bits64 zSig2;
1029 158142c2 bellard
1030 158142c2 bellard
    if ( zSig0 == 0 ) {
1031 158142c2 bellard
        zSig0 = zSig1;
1032 158142c2 bellard
        zSig1 = 0;
1033 158142c2 bellard
        zExp -= 64;
1034 158142c2 bellard
    }
1035 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 ) - 15;
1036 158142c2 bellard
    if ( 0 <= shiftCount ) {
1037 158142c2 bellard
        zSig2 = 0;
1038 158142c2 bellard
        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1039 158142c2 bellard
    }
1040 158142c2 bellard
    else {
1041 158142c2 bellard
        shift128ExtraRightJamming(
1042 158142c2 bellard
            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1043 158142c2 bellard
    }
1044 158142c2 bellard
    zExp -= shiftCount;
1045 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
1046 158142c2 bellard
1047 158142c2 bellard
}
1048 158142c2 bellard
1049 158142c2 bellard
#endif
1050 158142c2 bellard
1051 158142c2 bellard
/*----------------------------------------------------------------------------
1052 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1053 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1054 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1055 158142c2 bellard
*----------------------------------------------------------------------------*/
1056 158142c2 bellard
1057 158142c2 bellard
float32 int32_to_float32( int32 a STATUS_PARAM )
1058 158142c2 bellard
{
1059 158142c2 bellard
    flag zSign;
1060 158142c2 bellard
1061 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1062 158142c2 bellard
    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1063 158142c2 bellard
    zSign = ( a < 0 );
1064 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
1065 158142c2 bellard
1066 158142c2 bellard
}
1067 158142c2 bellard
1068 158142c2 bellard
/*----------------------------------------------------------------------------
1069 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1070 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1071 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1072 158142c2 bellard
*----------------------------------------------------------------------------*/
1073 158142c2 bellard
1074 158142c2 bellard
float64 int32_to_float64( int32 a STATUS_PARAM )
1075 158142c2 bellard
{
1076 158142c2 bellard
    flag zSign;
1077 158142c2 bellard
    uint32 absA;
1078 158142c2 bellard
    int8 shiftCount;
1079 158142c2 bellard
    bits64 zSig;
1080 158142c2 bellard
1081 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1082 158142c2 bellard
    zSign = ( a < 0 );
1083 158142c2 bellard
    absA = zSign ? - a : a;
1084 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 21;
1085 158142c2 bellard
    zSig = absA;
1086 158142c2 bellard
    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1087 158142c2 bellard
1088 158142c2 bellard
}
1089 158142c2 bellard
1090 158142c2 bellard
#ifdef FLOATX80
1091 158142c2 bellard
1092 158142c2 bellard
/*----------------------------------------------------------------------------
1093 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1094 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1095 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1096 158142c2 bellard
| Arithmetic.
1097 158142c2 bellard
*----------------------------------------------------------------------------*/
1098 158142c2 bellard
1099 158142c2 bellard
floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
1100 158142c2 bellard
{
1101 158142c2 bellard
    flag zSign;
1102 158142c2 bellard
    uint32 absA;
1103 158142c2 bellard
    int8 shiftCount;
1104 158142c2 bellard
    bits64 zSig;
1105 158142c2 bellard
1106 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1107 158142c2 bellard
    zSign = ( a < 0 );
1108 158142c2 bellard
    absA = zSign ? - a : a;
1109 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 32;
1110 158142c2 bellard
    zSig = absA;
1111 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1112 158142c2 bellard
1113 158142c2 bellard
}
1114 158142c2 bellard
1115 158142c2 bellard
#endif
1116 158142c2 bellard
1117 158142c2 bellard
#ifdef FLOAT128
1118 158142c2 bellard
1119 158142c2 bellard
/*----------------------------------------------------------------------------
1120 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a' to
1121 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1122 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1123 158142c2 bellard
*----------------------------------------------------------------------------*/
1124 158142c2 bellard
1125 158142c2 bellard
float128 int32_to_float128( int32 a STATUS_PARAM )
1126 158142c2 bellard
{
1127 158142c2 bellard
    flag zSign;
1128 158142c2 bellard
    uint32 absA;
1129 158142c2 bellard
    int8 shiftCount;
1130 158142c2 bellard
    bits64 zSig0;
1131 158142c2 bellard
1132 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1133 158142c2 bellard
    zSign = ( a < 0 );
1134 158142c2 bellard
    absA = zSign ? - a : a;
1135 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 17;
1136 158142c2 bellard
    zSig0 = absA;
1137 158142c2 bellard
    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1138 158142c2 bellard
1139 158142c2 bellard
}
1140 158142c2 bellard
1141 158142c2 bellard
#endif
1142 158142c2 bellard
1143 158142c2 bellard
/*----------------------------------------------------------------------------
1144 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1145 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1146 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1147 158142c2 bellard
*----------------------------------------------------------------------------*/
1148 158142c2 bellard
1149 158142c2 bellard
float32 int64_to_float32( int64 a STATUS_PARAM )
1150 158142c2 bellard
{
1151 158142c2 bellard
    flag zSign;
1152 158142c2 bellard
    uint64 absA;
1153 158142c2 bellard
    int8 shiftCount;
1154 158142c2 bellard
1155 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1156 158142c2 bellard
    zSign = ( a < 0 );
1157 158142c2 bellard
    absA = zSign ? - a : a;
1158 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) - 40;
1159 158142c2 bellard
    if ( 0 <= shiftCount ) {
1160 158142c2 bellard
        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1161 158142c2 bellard
    }
1162 158142c2 bellard
    else {
1163 158142c2 bellard
        shiftCount += 7;
1164 158142c2 bellard
        if ( shiftCount < 0 ) {
1165 158142c2 bellard
            shift64RightJamming( absA, - shiftCount, &absA );
1166 158142c2 bellard
        }
1167 158142c2 bellard
        else {
1168 158142c2 bellard
            absA <<= shiftCount;
1169 158142c2 bellard
        }
1170 158142c2 bellard
        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
1171 158142c2 bellard
    }
1172 158142c2 bellard
1173 158142c2 bellard
}
1174 158142c2 bellard
1175 3430b0be j_mayer
float32 uint64_to_float32( uint64 a STATUS_PARAM )
1176 75d62a58 j_mayer
{
1177 75d62a58 j_mayer
    int8 shiftCount;
1178 75d62a58 j_mayer
1179 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1180 75d62a58 j_mayer
    shiftCount = countLeadingZeros64( a ) - 40;
1181 75d62a58 j_mayer
    if ( 0 <= shiftCount ) {
1182 75d62a58 j_mayer
        return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
1183 75d62a58 j_mayer
    }
1184 75d62a58 j_mayer
    else {
1185 75d62a58 j_mayer
        shiftCount += 7;
1186 75d62a58 j_mayer
        if ( shiftCount < 0 ) {
1187 75d62a58 j_mayer
            shift64RightJamming( a, - shiftCount, &a );
1188 75d62a58 j_mayer
        }
1189 75d62a58 j_mayer
        else {
1190 75d62a58 j_mayer
            a <<= shiftCount;
1191 75d62a58 j_mayer
        }
1192 75d62a58 j_mayer
        return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
1193 75d62a58 j_mayer
    }
1194 75d62a58 j_mayer
}
1195 75d62a58 j_mayer
1196 158142c2 bellard
/*----------------------------------------------------------------------------
1197 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1198 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1199 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1200 158142c2 bellard
*----------------------------------------------------------------------------*/
1201 158142c2 bellard
1202 158142c2 bellard
float64 int64_to_float64( int64 a STATUS_PARAM )
1203 158142c2 bellard
{
1204 158142c2 bellard
    flag zSign;
1205 158142c2 bellard
1206 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1207 158142c2 bellard
    if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
1208 158142c2 bellard
        return packFloat64( 1, 0x43E, 0 );
1209 158142c2 bellard
    }
1210 158142c2 bellard
    zSign = ( a < 0 );
1211 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
1212 158142c2 bellard
1213 158142c2 bellard
}
1214 158142c2 bellard
1215 75d62a58 j_mayer
float64 uint64_to_float64( uint64 a STATUS_PARAM )
1216 75d62a58 j_mayer
{
1217 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1218 75d62a58 j_mayer
    return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
1219 75d62a58 j_mayer
1220 75d62a58 j_mayer
}
1221 75d62a58 j_mayer
1222 158142c2 bellard
#ifdef FLOATX80
1223 158142c2 bellard
1224 158142c2 bellard
/*----------------------------------------------------------------------------
1225 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1226 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1227 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1228 158142c2 bellard
| Arithmetic.
1229 158142c2 bellard
*----------------------------------------------------------------------------*/
1230 158142c2 bellard
1231 158142c2 bellard
floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
1232 158142c2 bellard
{
1233 158142c2 bellard
    flag zSign;
1234 158142c2 bellard
    uint64 absA;
1235 158142c2 bellard
    int8 shiftCount;
1236 158142c2 bellard
1237 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1238 158142c2 bellard
    zSign = ( a < 0 );
1239 158142c2 bellard
    absA = zSign ? - a : a;
1240 158142c2 bellard
    shiftCount = countLeadingZeros64( absA );
1241 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1242 158142c2 bellard
1243 158142c2 bellard
}
1244 158142c2 bellard
1245 158142c2 bellard
#endif
1246 158142c2 bellard
1247 158142c2 bellard
#ifdef FLOAT128
1248 158142c2 bellard
1249 158142c2 bellard
/*----------------------------------------------------------------------------
1250 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a' to
1251 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1252 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1253 158142c2 bellard
*----------------------------------------------------------------------------*/
1254 158142c2 bellard
1255 158142c2 bellard
float128 int64_to_float128( int64 a STATUS_PARAM )
1256 158142c2 bellard
{
1257 158142c2 bellard
    flag zSign;
1258 158142c2 bellard
    uint64 absA;
1259 158142c2 bellard
    int8 shiftCount;
1260 158142c2 bellard
    int32 zExp;
1261 158142c2 bellard
    bits64 zSig0, zSig1;
1262 158142c2 bellard
1263 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1264 158142c2 bellard
    zSign = ( a < 0 );
1265 158142c2 bellard
    absA = zSign ? - a : a;
1266 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) + 49;
1267 158142c2 bellard
    zExp = 0x406E - shiftCount;
1268 158142c2 bellard
    if ( 64 <= shiftCount ) {
1269 158142c2 bellard
        zSig1 = 0;
1270 158142c2 bellard
        zSig0 = absA;
1271 158142c2 bellard
        shiftCount -= 64;
1272 158142c2 bellard
    }
1273 158142c2 bellard
    else {
1274 158142c2 bellard
        zSig1 = absA;
1275 158142c2 bellard
        zSig0 = 0;
1276 158142c2 bellard
    }
1277 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1278 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1279 158142c2 bellard
1280 158142c2 bellard
}
1281 158142c2 bellard
1282 158142c2 bellard
#endif
1283 158142c2 bellard
1284 158142c2 bellard
/*----------------------------------------------------------------------------
1285 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1286 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1287 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1288 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1289 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1290 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1291 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1292 158142c2 bellard
*----------------------------------------------------------------------------*/
1293 158142c2 bellard
1294 158142c2 bellard
int32 float32_to_int32( float32 a STATUS_PARAM )
1295 158142c2 bellard
{
1296 158142c2 bellard
    flag aSign;
1297 158142c2 bellard
    int16 aExp, shiftCount;
1298 158142c2 bellard
    bits32 aSig;
1299 158142c2 bellard
    bits64 aSig64;
1300 158142c2 bellard
1301 158142c2 bellard
    aSig = extractFloat32Frac( a );
1302 158142c2 bellard
    aExp = extractFloat32Exp( a );
1303 158142c2 bellard
    aSign = extractFloat32Sign( a );
1304 158142c2 bellard
    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1305 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1306 158142c2 bellard
    shiftCount = 0xAF - aExp;
1307 158142c2 bellard
    aSig64 = aSig;
1308 158142c2 bellard
    aSig64 <<= 32;
1309 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1310 158142c2 bellard
    return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
1311 158142c2 bellard
1312 158142c2 bellard
}
1313 158142c2 bellard
1314 158142c2 bellard
/*----------------------------------------------------------------------------
1315 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1316 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1317 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1318 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
1319 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1320 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
1321 158142c2 bellard
| returned.
1322 158142c2 bellard
*----------------------------------------------------------------------------*/
1323 158142c2 bellard
1324 158142c2 bellard
int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
1325 158142c2 bellard
{
1326 158142c2 bellard
    flag aSign;
1327 158142c2 bellard
    int16 aExp, shiftCount;
1328 158142c2 bellard
    bits32 aSig;
1329 158142c2 bellard
    int32 z;
1330 158142c2 bellard
1331 158142c2 bellard
    aSig = extractFloat32Frac( a );
1332 158142c2 bellard
    aExp = extractFloat32Exp( a );
1333 158142c2 bellard
    aSign = extractFloat32Sign( a );
1334 158142c2 bellard
    shiftCount = aExp - 0x9E;
1335 158142c2 bellard
    if ( 0 <= shiftCount ) {
1336 f090c9d4 pbrook
        if ( float32_val(a) != 0xCF000000 ) {
1337 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1338 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1339 158142c2 bellard
        }
1340 158142c2 bellard
        return (sbits32) 0x80000000;
1341 158142c2 bellard
    }
1342 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1343 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1344 158142c2 bellard
        return 0;
1345 158142c2 bellard
    }
1346 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
1347 158142c2 bellard
    z = aSig>>( - shiftCount );
1348 158142c2 bellard
    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1349 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1350 158142c2 bellard
    }
1351 158142c2 bellard
    if ( aSign ) z = - z;
1352 158142c2 bellard
    return z;
1353 158142c2 bellard
1354 158142c2 bellard
}
1355 158142c2 bellard
1356 158142c2 bellard
/*----------------------------------------------------------------------------
1357 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1358 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1359 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1360 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1361 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1362 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1363 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1364 158142c2 bellard
*----------------------------------------------------------------------------*/
1365 158142c2 bellard
1366 158142c2 bellard
int64 float32_to_int64( float32 a STATUS_PARAM )
1367 158142c2 bellard
{
1368 158142c2 bellard
    flag aSign;
1369 158142c2 bellard
    int16 aExp, shiftCount;
1370 158142c2 bellard
    bits32 aSig;
1371 158142c2 bellard
    bits64 aSig64, aSigExtra;
1372 158142c2 bellard
1373 158142c2 bellard
    aSig = extractFloat32Frac( a );
1374 158142c2 bellard
    aExp = extractFloat32Exp( a );
1375 158142c2 bellard
    aSign = extractFloat32Sign( a );
1376 158142c2 bellard
    shiftCount = 0xBE - aExp;
1377 158142c2 bellard
    if ( shiftCount < 0 ) {
1378 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1379 158142c2 bellard
        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1380 158142c2 bellard
            return LIT64( 0x7FFFFFFFFFFFFFFF );
1381 158142c2 bellard
        }
1382 158142c2 bellard
        return (sbits64) LIT64( 0x8000000000000000 );
1383 158142c2 bellard
    }
1384 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1385 158142c2 bellard
    aSig64 = aSig;
1386 158142c2 bellard
    aSig64 <<= 40;
1387 158142c2 bellard
    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1388 158142c2 bellard
    return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
1389 158142c2 bellard
1390 158142c2 bellard
}
1391 158142c2 bellard
1392 158142c2 bellard
/*----------------------------------------------------------------------------
1393 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1394 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1395 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1396 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
1397 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1398 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
1399 158142c2 bellard
| returned.
1400 158142c2 bellard
*----------------------------------------------------------------------------*/
1401 158142c2 bellard
1402 158142c2 bellard
int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
1403 158142c2 bellard
{
1404 158142c2 bellard
    flag aSign;
1405 158142c2 bellard
    int16 aExp, shiftCount;
1406 158142c2 bellard
    bits32 aSig;
1407 158142c2 bellard
    bits64 aSig64;
1408 158142c2 bellard
    int64 z;
1409 158142c2 bellard
1410 158142c2 bellard
    aSig = extractFloat32Frac( a );
1411 158142c2 bellard
    aExp = extractFloat32Exp( a );
1412 158142c2 bellard
    aSign = extractFloat32Sign( a );
1413 158142c2 bellard
    shiftCount = aExp - 0xBE;
1414 158142c2 bellard
    if ( 0 <= shiftCount ) {
1415 f090c9d4 pbrook
        if ( float32_val(a) != 0xDF000000 ) {
1416 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1417 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1418 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
1419 158142c2 bellard
            }
1420 158142c2 bellard
        }
1421 158142c2 bellard
        return (sbits64) LIT64( 0x8000000000000000 );
1422 158142c2 bellard
    }
1423 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1424 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1425 158142c2 bellard
        return 0;
1426 158142c2 bellard
    }
1427 158142c2 bellard
    aSig64 = aSig | 0x00800000;
1428 158142c2 bellard
    aSig64 <<= 40;
1429 158142c2 bellard
    z = aSig64>>( - shiftCount );
1430 158142c2 bellard
    if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1431 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1432 158142c2 bellard
    }
1433 158142c2 bellard
    if ( aSign ) z = - z;
1434 158142c2 bellard
    return z;
1435 158142c2 bellard
1436 158142c2 bellard
}
1437 158142c2 bellard
1438 158142c2 bellard
/*----------------------------------------------------------------------------
1439 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1440 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1441 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1442 158142c2 bellard
| Arithmetic.
1443 158142c2 bellard
*----------------------------------------------------------------------------*/
1444 158142c2 bellard
1445 158142c2 bellard
float64 float32_to_float64( float32 a STATUS_PARAM )
1446 158142c2 bellard
{
1447 158142c2 bellard
    flag aSign;
1448 158142c2 bellard
    int16 aExp;
1449 158142c2 bellard
    bits32 aSig;
1450 158142c2 bellard
1451 158142c2 bellard
    aSig = extractFloat32Frac( a );
1452 158142c2 bellard
    aExp = extractFloat32Exp( a );
1453 158142c2 bellard
    aSign = extractFloat32Sign( a );
1454 158142c2 bellard
    if ( aExp == 0xFF ) {
1455 158142c2 bellard
        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ));
1456 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
1457 158142c2 bellard
    }
1458 158142c2 bellard
    if ( aExp == 0 ) {
1459 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1460 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1461 158142c2 bellard
        --aExp;
1462 158142c2 bellard
    }
1463 158142c2 bellard
    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
1464 158142c2 bellard
1465 158142c2 bellard
}
1466 158142c2 bellard
1467 158142c2 bellard
#ifdef FLOATX80
1468 158142c2 bellard
1469 158142c2 bellard
/*----------------------------------------------------------------------------
1470 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1471 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
1472 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1473 158142c2 bellard
| Arithmetic.
1474 158142c2 bellard
*----------------------------------------------------------------------------*/
1475 158142c2 bellard
1476 158142c2 bellard
floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
1477 158142c2 bellard
{
1478 158142c2 bellard
    flag aSign;
1479 158142c2 bellard
    int16 aExp;
1480 158142c2 bellard
    bits32 aSig;
1481 158142c2 bellard
1482 158142c2 bellard
    aSig = extractFloat32Frac( a );
1483 158142c2 bellard
    aExp = extractFloat32Exp( a );
1484 158142c2 bellard
    aSign = extractFloat32Sign( a );
1485 158142c2 bellard
    if ( aExp == 0xFF ) {
1486 158142c2 bellard
        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) );
1487 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1488 158142c2 bellard
    }
1489 158142c2 bellard
    if ( aExp == 0 ) {
1490 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1491 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1492 158142c2 bellard
    }
1493 158142c2 bellard
    aSig |= 0x00800000;
1494 158142c2 bellard
    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
1495 158142c2 bellard
1496 158142c2 bellard
}
1497 158142c2 bellard
1498 158142c2 bellard
#endif
1499 158142c2 bellard
1500 158142c2 bellard
#ifdef FLOAT128
1501 158142c2 bellard
1502 158142c2 bellard
/*----------------------------------------------------------------------------
1503 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1504 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1505 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1506 158142c2 bellard
| Arithmetic.
1507 158142c2 bellard
*----------------------------------------------------------------------------*/
1508 158142c2 bellard
1509 158142c2 bellard
float128 float32_to_float128( float32 a STATUS_PARAM )
1510 158142c2 bellard
{
1511 158142c2 bellard
    flag aSign;
1512 158142c2 bellard
    int16 aExp;
1513 158142c2 bellard
    bits32 aSig;
1514 158142c2 bellard
1515 158142c2 bellard
    aSig = extractFloat32Frac( a );
1516 158142c2 bellard
    aExp = extractFloat32Exp( a );
1517 158142c2 bellard
    aSign = extractFloat32Sign( a );
1518 158142c2 bellard
    if ( aExp == 0xFF ) {
1519 158142c2 bellard
        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) );
1520 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
1521 158142c2 bellard
    }
1522 158142c2 bellard
    if ( aExp == 0 ) {
1523 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1524 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1525 158142c2 bellard
        --aExp;
1526 158142c2 bellard
    }
1527 158142c2 bellard
    return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
1528 158142c2 bellard
1529 158142c2 bellard
}
1530 158142c2 bellard
1531 158142c2 bellard
#endif
1532 158142c2 bellard
1533 158142c2 bellard
/*----------------------------------------------------------------------------
1534 158142c2 bellard
| Rounds the single-precision floating-point value `a' to an integer, and
1535 158142c2 bellard
| returns the result as a single-precision floating-point value.  The
1536 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
1537 158142c2 bellard
| Floating-Point Arithmetic.
1538 158142c2 bellard
*----------------------------------------------------------------------------*/
1539 158142c2 bellard
1540 158142c2 bellard
float32 float32_round_to_int( float32 a STATUS_PARAM)
1541 158142c2 bellard
{
1542 158142c2 bellard
    flag aSign;
1543 158142c2 bellard
    int16 aExp;
1544 158142c2 bellard
    bits32 lastBitMask, roundBitsMask;
1545 158142c2 bellard
    int8 roundingMode;
1546 f090c9d4 pbrook
    bits32 z;
1547 158142c2 bellard
1548 158142c2 bellard
    aExp = extractFloat32Exp( a );
1549 158142c2 bellard
    if ( 0x96 <= aExp ) {
1550 158142c2 bellard
        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1551 158142c2 bellard
            return propagateFloat32NaN( a, a STATUS_VAR );
1552 158142c2 bellard
        }
1553 158142c2 bellard
        return a;
1554 158142c2 bellard
    }
1555 158142c2 bellard
    if ( aExp <= 0x7E ) {
1556 f090c9d4 pbrook
        if ( (bits32) ( float32_val(a)<<1 ) == 0 ) return a;
1557 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1558 158142c2 bellard
        aSign = extractFloat32Sign( a );
1559 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
1560 158142c2 bellard
         case float_round_nearest_even:
1561 158142c2 bellard
            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1562 158142c2 bellard
                return packFloat32( aSign, 0x7F, 0 );
1563 158142c2 bellard
            }
1564 158142c2 bellard
            break;
1565 158142c2 bellard
         case float_round_down:
1566 f090c9d4 pbrook
            return make_float32(aSign ? 0xBF800000 : 0);
1567 158142c2 bellard
         case float_round_up:
1568 f090c9d4 pbrook
            return make_float32(aSign ? 0x80000000 : 0x3F800000);
1569 158142c2 bellard
        }
1570 158142c2 bellard
        return packFloat32( aSign, 0, 0 );
1571 158142c2 bellard
    }
1572 158142c2 bellard
    lastBitMask = 1;
1573 158142c2 bellard
    lastBitMask <<= 0x96 - aExp;
1574 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
1575 f090c9d4 pbrook
    z = float32_val(a);
1576 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
1577 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
1578 158142c2 bellard
        z += lastBitMask>>1;
1579 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1580 158142c2 bellard
    }
1581 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
1582 f090c9d4 pbrook
        if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
1583 158142c2 bellard
            z += roundBitsMask;
1584 158142c2 bellard
        }
1585 158142c2 bellard
    }
1586 158142c2 bellard
    z &= ~ roundBitsMask;
1587 f090c9d4 pbrook
    if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
1588 f090c9d4 pbrook
    return make_float32(z);
1589 158142c2 bellard
1590 158142c2 bellard
}
1591 158142c2 bellard
1592 158142c2 bellard
/*----------------------------------------------------------------------------
1593 158142c2 bellard
| Returns the result of adding the absolute values of the single-precision
1594 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1595 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
1596 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
1597 158142c2 bellard
| Floating-Point Arithmetic.
1598 158142c2 bellard
*----------------------------------------------------------------------------*/
1599 158142c2 bellard
1600 158142c2 bellard
static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1601 158142c2 bellard
{
1602 158142c2 bellard
    int16 aExp, bExp, zExp;
1603 158142c2 bellard
    bits32 aSig, bSig, zSig;
1604 158142c2 bellard
    int16 expDiff;
1605 158142c2 bellard
1606 158142c2 bellard
    aSig = extractFloat32Frac( a );
1607 158142c2 bellard
    aExp = extractFloat32Exp( a );
1608 158142c2 bellard
    bSig = extractFloat32Frac( b );
1609 158142c2 bellard
    bExp = extractFloat32Exp( b );
1610 158142c2 bellard
    expDiff = aExp - bExp;
1611 158142c2 bellard
    aSig <<= 6;
1612 158142c2 bellard
    bSig <<= 6;
1613 158142c2 bellard
    if ( 0 < expDiff ) {
1614 158142c2 bellard
        if ( aExp == 0xFF ) {
1615 158142c2 bellard
            if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1616 158142c2 bellard
            return a;
1617 158142c2 bellard
        }
1618 158142c2 bellard
        if ( bExp == 0 ) {
1619 158142c2 bellard
            --expDiff;
1620 158142c2 bellard
        }
1621 158142c2 bellard
        else {
1622 158142c2 bellard
            bSig |= 0x20000000;
1623 158142c2 bellard
        }
1624 158142c2 bellard
        shift32RightJamming( bSig, expDiff, &bSig );
1625 158142c2 bellard
        zExp = aExp;
1626 158142c2 bellard
    }
1627 158142c2 bellard
    else if ( expDiff < 0 ) {
1628 158142c2 bellard
        if ( bExp == 0xFF ) {
1629 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1630 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1631 158142c2 bellard
        }
1632 158142c2 bellard
        if ( aExp == 0 ) {
1633 158142c2 bellard
            ++expDiff;
1634 158142c2 bellard
        }
1635 158142c2 bellard
        else {
1636 158142c2 bellard
            aSig |= 0x20000000;
1637 158142c2 bellard
        }
1638 158142c2 bellard
        shift32RightJamming( aSig, - expDiff, &aSig );
1639 158142c2 bellard
        zExp = bExp;
1640 158142c2 bellard
    }
1641 158142c2 bellard
    else {
1642 158142c2 bellard
        if ( aExp == 0xFF ) {
1643 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1644 158142c2 bellard
            return a;
1645 158142c2 bellard
        }
1646 fe76d976 pbrook
        if ( aExp == 0 ) {
1647 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
1648 fe76d976 pbrook
            return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1649 fe76d976 pbrook
        }
1650 158142c2 bellard
        zSig = 0x40000000 + aSig + bSig;
1651 158142c2 bellard
        zExp = aExp;
1652 158142c2 bellard
        goto roundAndPack;
1653 158142c2 bellard
    }
1654 158142c2 bellard
    aSig |= 0x20000000;
1655 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
1656 158142c2 bellard
    --zExp;
1657 158142c2 bellard
    if ( (sbits32) zSig < 0 ) {
1658 158142c2 bellard
        zSig = aSig + bSig;
1659 158142c2 bellard
        ++zExp;
1660 158142c2 bellard
    }
1661 158142c2 bellard
 roundAndPack:
1662 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1663 158142c2 bellard
1664 158142c2 bellard
}
1665 158142c2 bellard
1666 158142c2 bellard
/*----------------------------------------------------------------------------
1667 158142c2 bellard
| Returns the result of subtracting the absolute values of the single-
1668 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
1669 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
1670 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
1671 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
1672 158142c2 bellard
*----------------------------------------------------------------------------*/
1673 158142c2 bellard
1674 158142c2 bellard
static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1675 158142c2 bellard
{
1676 158142c2 bellard
    int16 aExp, bExp, zExp;
1677 158142c2 bellard
    bits32 aSig, bSig, zSig;
1678 158142c2 bellard
    int16 expDiff;
1679 158142c2 bellard
1680 158142c2 bellard
    aSig = extractFloat32Frac( a );
1681 158142c2 bellard
    aExp = extractFloat32Exp( a );
1682 158142c2 bellard
    bSig = extractFloat32Frac( b );
1683 158142c2 bellard
    bExp = extractFloat32Exp( b );
1684 158142c2 bellard
    expDiff = aExp - bExp;
1685 158142c2 bellard
    aSig <<= 7;
1686 158142c2 bellard
    bSig <<= 7;
1687 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
1688 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
1689 158142c2 bellard
    if ( aExp == 0xFF ) {
1690 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1691 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1692 158142c2 bellard
        return float32_default_nan;
1693 158142c2 bellard
    }
1694 158142c2 bellard
    if ( aExp == 0 ) {
1695 158142c2 bellard
        aExp = 1;
1696 158142c2 bellard
        bExp = 1;
1697 158142c2 bellard
    }
1698 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
1699 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
1700 158142c2 bellard
    return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
1701 158142c2 bellard
 bExpBigger:
1702 158142c2 bellard
    if ( bExp == 0xFF ) {
1703 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1704 158142c2 bellard
        return packFloat32( zSign ^ 1, 0xFF, 0 );
1705 158142c2 bellard
    }
1706 158142c2 bellard
    if ( aExp == 0 ) {
1707 158142c2 bellard
        ++expDiff;
1708 158142c2 bellard
    }
1709 158142c2 bellard
    else {
1710 158142c2 bellard
        aSig |= 0x40000000;
1711 158142c2 bellard
    }
1712 158142c2 bellard
    shift32RightJamming( aSig, - expDiff, &aSig );
1713 158142c2 bellard
    bSig |= 0x40000000;
1714 158142c2 bellard
 bBigger:
1715 158142c2 bellard
    zSig = bSig - aSig;
1716 158142c2 bellard
    zExp = bExp;
1717 158142c2 bellard
    zSign ^= 1;
1718 158142c2 bellard
    goto normalizeRoundAndPack;
1719 158142c2 bellard
 aExpBigger:
1720 158142c2 bellard
    if ( aExp == 0xFF ) {
1721 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1722 158142c2 bellard
        return a;
1723 158142c2 bellard
    }
1724 158142c2 bellard
    if ( bExp == 0 ) {
1725 158142c2 bellard
        --expDiff;
1726 158142c2 bellard
    }
1727 158142c2 bellard
    else {
1728 158142c2 bellard
        bSig |= 0x40000000;
1729 158142c2 bellard
    }
1730 158142c2 bellard
    shift32RightJamming( bSig, expDiff, &bSig );
1731 158142c2 bellard
    aSig |= 0x40000000;
1732 158142c2 bellard
 aBigger:
1733 158142c2 bellard
    zSig = aSig - bSig;
1734 158142c2 bellard
    zExp = aExp;
1735 158142c2 bellard
 normalizeRoundAndPack:
1736 158142c2 bellard
    --zExp;
1737 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1738 158142c2 bellard
1739 158142c2 bellard
}
1740 158142c2 bellard
1741 158142c2 bellard
/*----------------------------------------------------------------------------
1742 158142c2 bellard
| Returns the result of adding the single-precision floating-point values `a'
1743 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
1744 158142c2 bellard
| Binary Floating-Point Arithmetic.
1745 158142c2 bellard
*----------------------------------------------------------------------------*/
1746 158142c2 bellard
1747 158142c2 bellard
float32 float32_add( float32 a, float32 b STATUS_PARAM )
1748 158142c2 bellard
{
1749 158142c2 bellard
    flag aSign, bSign;
1750 158142c2 bellard
1751 158142c2 bellard
    aSign = extractFloat32Sign( a );
1752 158142c2 bellard
    bSign = extractFloat32Sign( b );
1753 158142c2 bellard
    if ( aSign == bSign ) {
1754 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR);
1755 158142c2 bellard
    }
1756 158142c2 bellard
    else {
1757 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1758 158142c2 bellard
    }
1759 158142c2 bellard
1760 158142c2 bellard
}
1761 158142c2 bellard
1762 158142c2 bellard
/*----------------------------------------------------------------------------
1763 158142c2 bellard
| Returns the result of subtracting the single-precision floating-point values
1764 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1765 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1766 158142c2 bellard
*----------------------------------------------------------------------------*/
1767 158142c2 bellard
1768 158142c2 bellard
float32 float32_sub( float32 a, float32 b STATUS_PARAM )
1769 158142c2 bellard
{
1770 158142c2 bellard
    flag aSign, bSign;
1771 158142c2 bellard
1772 158142c2 bellard
    aSign = extractFloat32Sign( a );
1773 158142c2 bellard
    bSign = extractFloat32Sign( b );
1774 158142c2 bellard
    if ( aSign == bSign ) {
1775 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1776 158142c2 bellard
    }
1777 158142c2 bellard
    else {
1778 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR );
1779 158142c2 bellard
    }
1780 158142c2 bellard
1781 158142c2 bellard
}
1782 158142c2 bellard
1783 158142c2 bellard
/*----------------------------------------------------------------------------
1784 158142c2 bellard
| Returns the result of multiplying the single-precision floating-point values
1785 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1786 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1787 158142c2 bellard
*----------------------------------------------------------------------------*/
1788 158142c2 bellard
1789 158142c2 bellard
float32 float32_mul( float32 a, float32 b STATUS_PARAM )
1790 158142c2 bellard
{
1791 158142c2 bellard
    flag aSign, bSign, zSign;
1792 158142c2 bellard
    int16 aExp, bExp, zExp;
1793 158142c2 bellard
    bits32 aSig, bSig;
1794 158142c2 bellard
    bits64 zSig64;
1795 158142c2 bellard
    bits32 zSig;
1796 158142c2 bellard
1797 158142c2 bellard
    aSig = extractFloat32Frac( a );
1798 158142c2 bellard
    aExp = extractFloat32Exp( a );
1799 158142c2 bellard
    aSign = extractFloat32Sign( a );
1800 158142c2 bellard
    bSig = extractFloat32Frac( b );
1801 158142c2 bellard
    bExp = extractFloat32Exp( b );
1802 158142c2 bellard
    bSign = extractFloat32Sign( b );
1803 158142c2 bellard
    zSign = aSign ^ bSign;
1804 158142c2 bellard
    if ( aExp == 0xFF ) {
1805 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1806 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
1807 158142c2 bellard
        }
1808 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
1809 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1810 158142c2 bellard
            return float32_default_nan;
1811 158142c2 bellard
        }
1812 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1813 158142c2 bellard
    }
1814 158142c2 bellard
    if ( bExp == 0xFF ) {
1815 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1816 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
1817 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1818 158142c2 bellard
            return float32_default_nan;
1819 158142c2 bellard
        }
1820 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1821 158142c2 bellard
    }
1822 158142c2 bellard
    if ( aExp == 0 ) {
1823 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1824 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1825 158142c2 bellard
    }
1826 158142c2 bellard
    if ( bExp == 0 ) {
1827 158142c2 bellard
        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1828 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1829 158142c2 bellard
    }
1830 158142c2 bellard
    zExp = aExp + bExp - 0x7F;
1831 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
1832 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
1833 158142c2 bellard
    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1834 158142c2 bellard
    zSig = zSig64;
1835 158142c2 bellard
    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1836 158142c2 bellard
        zSig <<= 1;
1837 158142c2 bellard
        --zExp;
1838 158142c2 bellard
    }
1839 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1840 158142c2 bellard
1841 158142c2 bellard
}
1842 158142c2 bellard
1843 158142c2 bellard
/*----------------------------------------------------------------------------
1844 158142c2 bellard
| Returns the result of dividing the single-precision floating-point value `a'
1845 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to the
1846 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1847 158142c2 bellard
*----------------------------------------------------------------------------*/
1848 158142c2 bellard
1849 158142c2 bellard
float32 float32_div( float32 a, float32 b STATUS_PARAM )
1850 158142c2 bellard
{
1851 158142c2 bellard
    flag aSign, bSign, zSign;
1852 158142c2 bellard
    int16 aExp, bExp, zExp;
1853 158142c2 bellard
    bits32 aSig, bSig, zSig;
1854 158142c2 bellard
1855 158142c2 bellard
    aSig = extractFloat32Frac( a );
1856 158142c2 bellard
    aExp = extractFloat32Exp( a );
1857 158142c2 bellard
    aSign = extractFloat32Sign( a );
1858 158142c2 bellard
    bSig = extractFloat32Frac( b );
1859 158142c2 bellard
    bExp = extractFloat32Exp( b );
1860 158142c2 bellard
    bSign = extractFloat32Sign( b );
1861 158142c2 bellard
    zSign = aSign ^ bSign;
1862 158142c2 bellard
    if ( aExp == 0xFF ) {
1863 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1864 158142c2 bellard
        if ( bExp == 0xFF ) {
1865 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1866 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1867 158142c2 bellard
            return float32_default_nan;
1868 158142c2 bellard
        }
1869 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1870 158142c2 bellard
    }
1871 158142c2 bellard
    if ( bExp == 0xFF ) {
1872 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1873 158142c2 bellard
        return packFloat32( zSign, 0, 0 );
1874 158142c2 bellard
    }
1875 158142c2 bellard
    if ( bExp == 0 ) {
1876 158142c2 bellard
        if ( bSig == 0 ) {
1877 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
1878 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
1879 158142c2 bellard
                return float32_default_nan;
1880 158142c2 bellard
            }
1881 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
1882 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1883 158142c2 bellard
        }
1884 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1885 158142c2 bellard
    }
1886 158142c2 bellard
    if ( aExp == 0 ) {
1887 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1888 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1889 158142c2 bellard
    }
1890 158142c2 bellard
    zExp = aExp - bExp + 0x7D;
1891 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
1892 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
1893 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
1894 158142c2 bellard
        aSig >>= 1;
1895 158142c2 bellard
        ++zExp;
1896 158142c2 bellard
    }
1897 158142c2 bellard
    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
1898 158142c2 bellard
    if ( ( zSig & 0x3F ) == 0 ) {
1899 158142c2 bellard
        zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
1900 158142c2 bellard
    }
1901 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1902 158142c2 bellard
1903 158142c2 bellard
}
1904 158142c2 bellard
1905 158142c2 bellard
/*----------------------------------------------------------------------------
1906 158142c2 bellard
| Returns the remainder of the single-precision floating-point value `a'
1907 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
1908 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1909 158142c2 bellard
*----------------------------------------------------------------------------*/
1910 158142c2 bellard
1911 158142c2 bellard
float32 float32_rem( float32 a, float32 b STATUS_PARAM )
1912 158142c2 bellard
{
1913 158142c2 bellard
    flag aSign, bSign, zSign;
1914 158142c2 bellard
    int16 aExp, bExp, expDiff;
1915 158142c2 bellard
    bits32 aSig, bSig;
1916 158142c2 bellard
    bits32 q;
1917 158142c2 bellard
    bits64 aSig64, bSig64, q64;
1918 158142c2 bellard
    bits32 alternateASig;
1919 158142c2 bellard
    sbits32 sigMean;
1920 158142c2 bellard
1921 158142c2 bellard
    aSig = extractFloat32Frac( a );
1922 158142c2 bellard
    aExp = extractFloat32Exp( a );
1923 158142c2 bellard
    aSign = extractFloat32Sign( a );
1924 158142c2 bellard
    bSig = extractFloat32Frac( b );
1925 158142c2 bellard
    bExp = extractFloat32Exp( b );
1926 158142c2 bellard
    bSign = extractFloat32Sign( b );
1927 158142c2 bellard
    if ( aExp == 0xFF ) {
1928 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1929 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
1930 158142c2 bellard
        }
1931 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1932 158142c2 bellard
        return float32_default_nan;
1933 158142c2 bellard
    }
1934 158142c2 bellard
    if ( bExp == 0xFF ) {
1935 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1936 158142c2 bellard
        return a;
1937 158142c2 bellard
    }
1938 158142c2 bellard
    if ( bExp == 0 ) {
1939 158142c2 bellard
        if ( bSig == 0 ) {
1940 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1941 158142c2 bellard
            return float32_default_nan;
1942 158142c2 bellard
        }
1943 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1944 158142c2 bellard
    }
1945 158142c2 bellard
    if ( aExp == 0 ) {
1946 158142c2 bellard
        if ( aSig == 0 ) return a;
1947 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1948 158142c2 bellard
    }
1949 158142c2 bellard
    expDiff = aExp - bExp;
1950 158142c2 bellard
    aSig |= 0x00800000;
1951 158142c2 bellard
    bSig |= 0x00800000;
1952 158142c2 bellard
    if ( expDiff < 32 ) {
1953 158142c2 bellard
        aSig <<= 8;
1954 158142c2 bellard
        bSig <<= 8;
1955 158142c2 bellard
        if ( expDiff < 0 ) {
1956 158142c2 bellard
            if ( expDiff < -1 ) return a;
1957 158142c2 bellard
            aSig >>= 1;
1958 158142c2 bellard
        }
1959 158142c2 bellard
        q = ( bSig <= aSig );
1960 158142c2 bellard
        if ( q ) aSig -= bSig;
1961 158142c2 bellard
        if ( 0 < expDiff ) {
1962 158142c2 bellard
            q = ( ( (bits64) aSig )<<32 ) / bSig;
1963 158142c2 bellard
            q >>= 32 - expDiff;
1964 158142c2 bellard
            bSig >>= 2;
1965 158142c2 bellard
            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1966 158142c2 bellard
        }
1967 158142c2 bellard
        else {
1968 158142c2 bellard
            aSig >>= 2;
1969 158142c2 bellard
            bSig >>= 2;
1970 158142c2 bellard
        }
1971 158142c2 bellard
    }
1972 158142c2 bellard
    else {
1973 158142c2 bellard
        if ( bSig <= aSig ) aSig -= bSig;
1974 158142c2 bellard
        aSig64 = ( (bits64) aSig )<<40;
1975 158142c2 bellard
        bSig64 = ( (bits64) bSig )<<40;
1976 158142c2 bellard
        expDiff -= 64;
1977 158142c2 bellard
        while ( 0 < expDiff ) {
1978 158142c2 bellard
            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1979 158142c2 bellard
            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1980 158142c2 bellard
            aSig64 = - ( ( bSig * q64 )<<38 );
1981 158142c2 bellard
            expDiff -= 62;
1982 158142c2 bellard
        }
1983 158142c2 bellard
        expDiff += 64;
1984 158142c2 bellard
        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1985 158142c2 bellard
        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1986 158142c2 bellard
        q = q64>>( 64 - expDiff );
1987 158142c2 bellard
        bSig <<= 6;
1988 158142c2 bellard
        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
1989 158142c2 bellard
    }
1990 158142c2 bellard
    do {
1991 158142c2 bellard
        alternateASig = aSig;
1992 158142c2 bellard
        ++q;
1993 158142c2 bellard
        aSig -= bSig;
1994 158142c2 bellard
    } while ( 0 <= (sbits32) aSig );
1995 158142c2 bellard
    sigMean = aSig + alternateASig;
1996 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1997 158142c2 bellard
        aSig = alternateASig;
1998 158142c2 bellard
    }
1999 158142c2 bellard
    zSign = ( (sbits32) aSig < 0 );
2000 158142c2 bellard
    if ( zSign ) aSig = - aSig;
2001 158142c2 bellard
    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
2002 158142c2 bellard
2003 158142c2 bellard
}
2004 158142c2 bellard
2005 158142c2 bellard
/*----------------------------------------------------------------------------
2006 158142c2 bellard
| Returns the square root of the single-precision floating-point value `a'.
2007 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
2008 158142c2 bellard
| Floating-Point Arithmetic.
2009 158142c2 bellard
*----------------------------------------------------------------------------*/
2010 158142c2 bellard
2011 158142c2 bellard
float32 float32_sqrt( float32 a STATUS_PARAM )
2012 158142c2 bellard
{
2013 158142c2 bellard
    flag aSign;
2014 158142c2 bellard
    int16 aExp, zExp;
2015 158142c2 bellard
    bits32 aSig, zSig;
2016 158142c2 bellard
    bits64 rem, term;
2017 158142c2 bellard
2018 158142c2 bellard
    aSig = extractFloat32Frac( a );
2019 158142c2 bellard
    aExp = extractFloat32Exp( a );
2020 158142c2 bellard
    aSign = extractFloat32Sign( a );
2021 158142c2 bellard
    if ( aExp == 0xFF ) {
2022 f090c9d4 pbrook
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2023 158142c2 bellard
        if ( ! aSign ) return a;
2024 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2025 158142c2 bellard
        return float32_default_nan;
2026 158142c2 bellard
    }
2027 158142c2 bellard
    if ( aSign ) {
2028 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
2029 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2030 158142c2 bellard
        return float32_default_nan;
2031 158142c2 bellard
    }
2032 158142c2 bellard
    if ( aExp == 0 ) {
2033 f090c9d4 pbrook
        if ( aSig == 0 ) return float32_zero;
2034 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2035 158142c2 bellard
    }
2036 158142c2 bellard
    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2037 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
2038 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig ) + 2;
2039 158142c2 bellard
    if ( ( zSig & 0x7F ) <= 5 ) {
2040 158142c2 bellard
        if ( zSig < 2 ) {
2041 158142c2 bellard
            zSig = 0x7FFFFFFF;
2042 158142c2 bellard
            goto roundAndPack;
2043 158142c2 bellard
        }
2044 158142c2 bellard
        aSig >>= aExp & 1;
2045 158142c2 bellard
        term = ( (bits64) zSig ) * zSig;
2046 158142c2 bellard
        rem = ( ( (bits64) aSig )<<32 ) - term;
2047 158142c2 bellard
        while ( (sbits64) rem < 0 ) {
2048 158142c2 bellard
            --zSig;
2049 158142c2 bellard
            rem += ( ( (bits64) zSig )<<1 ) | 1;
2050 158142c2 bellard
        }
2051 158142c2 bellard
        zSig |= ( rem != 0 );
2052 158142c2 bellard
    }
2053 158142c2 bellard
    shift32RightJamming( zSig, 1, &zSig );
2054 158142c2 bellard
 roundAndPack:
2055 158142c2 bellard
    return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
2056 158142c2 bellard
2057 158142c2 bellard
}
2058 158142c2 bellard
2059 158142c2 bellard
/*----------------------------------------------------------------------------
2060 374dfc33 aurel32
| Returns the binary log of the single-precision floating-point value `a'.
2061 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
2062 374dfc33 aurel32
| Floating-Point Arithmetic.
2063 374dfc33 aurel32
*----------------------------------------------------------------------------*/
2064 374dfc33 aurel32
float32 float32_log2( float32 a STATUS_PARAM )
2065 374dfc33 aurel32
{
2066 374dfc33 aurel32
    flag aSign, zSign;
2067 374dfc33 aurel32
    int16 aExp;
2068 374dfc33 aurel32
    bits32 aSig, zSig, i;
2069 374dfc33 aurel32
2070 374dfc33 aurel32
    aSig = extractFloat32Frac( a );
2071 374dfc33 aurel32
    aExp = extractFloat32Exp( a );
2072 374dfc33 aurel32
    aSign = extractFloat32Sign( a );
2073 374dfc33 aurel32
2074 374dfc33 aurel32
    if ( aExp == 0 ) {
2075 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2076 374dfc33 aurel32
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2077 374dfc33 aurel32
    }
2078 374dfc33 aurel32
    if ( aSign ) {
2079 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
2080 374dfc33 aurel32
        return float32_default_nan;
2081 374dfc33 aurel32
    }
2082 374dfc33 aurel32
    if ( aExp == 0xFF ) {
2083 374dfc33 aurel32
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2084 374dfc33 aurel32
        return a;
2085 374dfc33 aurel32
    }
2086 374dfc33 aurel32
2087 374dfc33 aurel32
    aExp -= 0x7F;
2088 374dfc33 aurel32
    aSig |= 0x00800000;
2089 374dfc33 aurel32
    zSign = aExp < 0;
2090 374dfc33 aurel32
    zSig = aExp << 23;
2091 374dfc33 aurel32
2092 374dfc33 aurel32
    for (i = 1 << 22; i > 0; i >>= 1) {
2093 374dfc33 aurel32
        aSig = ( (bits64)aSig * aSig ) >> 23;
2094 374dfc33 aurel32
        if ( aSig & 0x01000000 ) {
2095 374dfc33 aurel32
            aSig >>= 1;
2096 374dfc33 aurel32
            zSig |= i;
2097 374dfc33 aurel32
        }
2098 374dfc33 aurel32
    }
2099 374dfc33 aurel32
2100 374dfc33 aurel32
    if ( zSign )
2101 374dfc33 aurel32
        zSig = -zSig;
2102 374dfc33 aurel32
2103 374dfc33 aurel32
    return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
2104 374dfc33 aurel32
}
2105 374dfc33 aurel32
2106 374dfc33 aurel32
/*----------------------------------------------------------------------------
2107 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2108 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
2109 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2110 158142c2 bellard
*----------------------------------------------------------------------------*/
2111 158142c2 bellard
2112 750afe93 bellard
int float32_eq( float32 a, float32 b STATUS_PARAM )
2113 158142c2 bellard
{
2114 158142c2 bellard
2115 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2116 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2117 158142c2 bellard
       ) {
2118 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2119 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2120 158142c2 bellard
        }
2121 158142c2 bellard
        return 0;
2122 158142c2 bellard
    }
2123 f090c9d4 pbrook
    return ( float32_val(a) == float32_val(b) ) ||
2124 f090c9d4 pbrook
            ( (bits32) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
2125 158142c2 bellard
2126 158142c2 bellard
}
2127 158142c2 bellard
2128 158142c2 bellard
/*----------------------------------------------------------------------------
2129 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2130 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  The comparison
2131 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2132 158142c2 bellard
| Arithmetic.
2133 158142c2 bellard
*----------------------------------------------------------------------------*/
2134 158142c2 bellard
2135 750afe93 bellard
int float32_le( float32 a, float32 b STATUS_PARAM )
2136 158142c2 bellard
{
2137 158142c2 bellard
    flag aSign, bSign;
2138 f090c9d4 pbrook
    bits32 av, bv;
2139 158142c2 bellard
2140 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2141 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2142 158142c2 bellard
       ) {
2143 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2144 158142c2 bellard
        return 0;
2145 158142c2 bellard
    }
2146 158142c2 bellard
    aSign = extractFloat32Sign( a );
2147 158142c2 bellard
    bSign = extractFloat32Sign( b );
2148 f090c9d4 pbrook
    av = float32_val(a);
2149 f090c9d4 pbrook
    bv = float32_val(b);
2150 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
2151 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2152 158142c2 bellard
2153 158142c2 bellard
}
2154 158142c2 bellard
2155 158142c2 bellard
/*----------------------------------------------------------------------------
2156 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2157 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
2158 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2159 158142c2 bellard
*----------------------------------------------------------------------------*/
2160 158142c2 bellard
2161 750afe93 bellard
int float32_lt( float32 a, float32 b STATUS_PARAM )
2162 158142c2 bellard
{
2163 158142c2 bellard
    flag aSign, bSign;
2164 f090c9d4 pbrook
    bits32 av, bv;
2165 158142c2 bellard
2166 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2167 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2168 158142c2 bellard
       ) {
2169 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2170 158142c2 bellard
        return 0;
2171 158142c2 bellard
    }
2172 158142c2 bellard
    aSign = extractFloat32Sign( a );
2173 158142c2 bellard
    bSign = extractFloat32Sign( b );
2174 f090c9d4 pbrook
    av = float32_val(a);
2175 f090c9d4 pbrook
    bv = float32_val(b);
2176 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
2177 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2178 158142c2 bellard
2179 158142c2 bellard
}
2180 158142c2 bellard
2181 158142c2 bellard
/*----------------------------------------------------------------------------
2182 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2183 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The invalid exception is
2184 158142c2 bellard
| raised if either operand is a NaN.  Otherwise, the comparison is performed
2185 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2186 158142c2 bellard
*----------------------------------------------------------------------------*/
2187 158142c2 bellard
2188 750afe93 bellard
int float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
2189 158142c2 bellard
{
2190 f090c9d4 pbrook
    bits32 av, bv;
2191 158142c2 bellard
2192 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2193 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2194 158142c2 bellard
       ) {
2195 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2196 158142c2 bellard
        return 0;
2197 158142c2 bellard
    }
2198 f090c9d4 pbrook
    av = float32_val(a);
2199 f090c9d4 pbrook
    bv = float32_val(b);
2200 f090c9d4 pbrook
    return ( av == bv ) || ( (bits32) ( ( av | bv )<<1 ) == 0 );
2201 158142c2 bellard
2202 158142c2 bellard
}
2203 158142c2 bellard
2204 158142c2 bellard
/*----------------------------------------------------------------------------
2205 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than or
2206 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2207 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
2208 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2209 158142c2 bellard
*----------------------------------------------------------------------------*/
2210 158142c2 bellard
2211 750afe93 bellard
int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
2212 158142c2 bellard
{
2213 158142c2 bellard
    flag aSign, bSign;
2214 f090c9d4 pbrook
    bits32 av, bv;
2215 158142c2 bellard
2216 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2217 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2218 158142c2 bellard
       ) {
2219 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2220 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2221 158142c2 bellard
        }
2222 158142c2 bellard
        return 0;
2223 158142c2 bellard
    }
2224 158142c2 bellard
    aSign = extractFloat32Sign( a );
2225 158142c2 bellard
    bSign = extractFloat32Sign( b );
2226 f090c9d4 pbrook
    av = float32_val(a);
2227 f090c9d4 pbrook
    bv = float32_val(b);
2228 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
2229 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2230 158142c2 bellard
2231 158142c2 bellard
}
2232 158142c2 bellard
2233 158142c2 bellard
/*----------------------------------------------------------------------------
2234 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2235 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2236 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2237 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
2238 158142c2 bellard
*----------------------------------------------------------------------------*/
2239 158142c2 bellard
2240 750afe93 bellard
int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
2241 158142c2 bellard
{
2242 158142c2 bellard
    flag aSign, bSign;
2243 f090c9d4 pbrook
    bits32 av, bv;
2244 158142c2 bellard
2245 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2246 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2247 158142c2 bellard
       ) {
2248 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2249 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2250 158142c2 bellard
        }
2251 158142c2 bellard
        return 0;
2252 158142c2 bellard
    }
2253 158142c2 bellard
    aSign = extractFloat32Sign( a );
2254 158142c2 bellard
    bSign = extractFloat32Sign( b );
2255 f090c9d4 pbrook
    av = float32_val(a);
2256 f090c9d4 pbrook
    bv = float32_val(b);
2257 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
2258 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2259 158142c2 bellard
2260 158142c2 bellard
}
2261 158142c2 bellard
2262 158142c2 bellard
/*----------------------------------------------------------------------------
2263 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2264 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2265 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2266 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2267 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2268 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2269 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2270 158142c2 bellard
*----------------------------------------------------------------------------*/
2271 158142c2 bellard
2272 158142c2 bellard
int32 float64_to_int32( float64 a STATUS_PARAM )
2273 158142c2 bellard
{
2274 158142c2 bellard
    flag aSign;
2275 158142c2 bellard
    int16 aExp, shiftCount;
2276 158142c2 bellard
    bits64 aSig;
2277 158142c2 bellard
2278 158142c2 bellard
    aSig = extractFloat64Frac( a );
2279 158142c2 bellard
    aExp = extractFloat64Exp( a );
2280 158142c2 bellard
    aSign = extractFloat64Sign( a );
2281 158142c2 bellard
    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2282 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2283 158142c2 bellard
    shiftCount = 0x42C - aExp;
2284 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2285 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
2286 158142c2 bellard
2287 158142c2 bellard
}
2288 158142c2 bellard
2289 158142c2 bellard
/*----------------------------------------------------------------------------
2290 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2291 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2292 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2293 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2294 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2295 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2296 158142c2 bellard
| returned.
2297 158142c2 bellard
*----------------------------------------------------------------------------*/
2298 158142c2 bellard
2299 158142c2 bellard
int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
2300 158142c2 bellard
{
2301 158142c2 bellard
    flag aSign;
2302 158142c2 bellard
    int16 aExp, shiftCount;
2303 158142c2 bellard
    bits64 aSig, savedASig;
2304 158142c2 bellard
    int32 z;
2305 158142c2 bellard
2306 158142c2 bellard
    aSig = extractFloat64Frac( a );
2307 158142c2 bellard
    aExp = extractFloat64Exp( a );
2308 158142c2 bellard
    aSign = extractFloat64Sign( a );
2309 158142c2 bellard
    if ( 0x41E < aExp ) {
2310 158142c2 bellard
        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2311 158142c2 bellard
        goto invalid;
2312 158142c2 bellard
    }
2313 158142c2 bellard
    else if ( aExp < 0x3FF ) {
2314 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2315 158142c2 bellard
        return 0;
2316 158142c2 bellard
    }
2317 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
2318 158142c2 bellard
    shiftCount = 0x433 - aExp;
2319 158142c2 bellard
    savedASig = aSig;
2320 158142c2 bellard
    aSig >>= shiftCount;
2321 158142c2 bellard
    z = aSig;
2322 158142c2 bellard
    if ( aSign ) z = - z;
2323 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
2324 158142c2 bellard
 invalid:
2325 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2326 158142c2 bellard
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
2327 158142c2 bellard
    }
2328 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
2329 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2330 158142c2 bellard
    }
2331 158142c2 bellard
    return z;
2332 158142c2 bellard
2333 158142c2 bellard
}
2334 158142c2 bellard
2335 158142c2 bellard
/*----------------------------------------------------------------------------
2336 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2337 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2338 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2339 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2340 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2341 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2342 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2343 158142c2 bellard
*----------------------------------------------------------------------------*/
2344 158142c2 bellard
2345 158142c2 bellard
int64 float64_to_int64( float64 a STATUS_PARAM )
2346 158142c2 bellard
{
2347 158142c2 bellard
    flag aSign;
2348 158142c2 bellard
    int16 aExp, shiftCount;
2349 158142c2 bellard
    bits64 aSig, aSigExtra;
2350 158142c2 bellard
2351 158142c2 bellard
    aSig = extractFloat64Frac( a );
2352 158142c2 bellard
    aExp = extractFloat64Exp( a );
2353 158142c2 bellard
    aSign = extractFloat64Sign( a );
2354 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2355 158142c2 bellard
    shiftCount = 0x433 - aExp;
2356 158142c2 bellard
    if ( shiftCount <= 0 ) {
2357 158142c2 bellard
        if ( 0x43E < aExp ) {
2358 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2359 158142c2 bellard
            if (    ! aSign
2360 158142c2 bellard
                 || (    ( aExp == 0x7FF )
2361 158142c2 bellard
                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
2362 158142c2 bellard
               ) {
2363 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
2364 158142c2 bellard
            }
2365 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
2366 158142c2 bellard
        }
2367 158142c2 bellard
        aSigExtra = 0;
2368 158142c2 bellard
        aSig <<= - shiftCount;
2369 158142c2 bellard
    }
2370 158142c2 bellard
    else {
2371 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2372 158142c2 bellard
    }
2373 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
2374 158142c2 bellard
2375 158142c2 bellard
}
2376 158142c2 bellard
2377 158142c2 bellard
/*----------------------------------------------------------------------------
2378 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2379 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2380 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2381 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2382 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2383 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2384 158142c2 bellard
| returned.
2385 158142c2 bellard
*----------------------------------------------------------------------------*/
2386 158142c2 bellard
2387 158142c2 bellard
int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
2388 158142c2 bellard
{
2389 158142c2 bellard
    flag aSign;
2390 158142c2 bellard
    int16 aExp, shiftCount;
2391 158142c2 bellard
    bits64 aSig;
2392 158142c2 bellard
    int64 z;
2393 158142c2 bellard
2394 158142c2 bellard
    aSig = extractFloat64Frac( a );
2395 158142c2 bellard
    aExp = extractFloat64Exp( a );
2396 158142c2 bellard
    aSign = extractFloat64Sign( a );
2397 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2398 158142c2 bellard
    shiftCount = aExp - 0x433;
2399 158142c2 bellard
    if ( 0 <= shiftCount ) {
2400 158142c2 bellard
        if ( 0x43E <= aExp ) {
2401 f090c9d4 pbrook
            if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
2402 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
2403 158142c2 bellard
                if (    ! aSign
2404 158142c2 bellard
                     || (    ( aExp == 0x7FF )
2405 158142c2 bellard
                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
2406 158142c2 bellard
                   ) {
2407 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
2408 158142c2 bellard
                }
2409 158142c2 bellard
            }
2410 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
2411 158142c2 bellard
        }
2412 158142c2 bellard
        z = aSig<<shiftCount;
2413 158142c2 bellard
    }
2414 158142c2 bellard
    else {
2415 158142c2 bellard
        if ( aExp < 0x3FE ) {
2416 158142c2 bellard
            if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2417 158142c2 bellard
            return 0;
2418 158142c2 bellard
        }
2419 158142c2 bellard
        z = aSig>>( - shiftCount );
2420 158142c2 bellard
        if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
2421 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
2422 158142c2 bellard
        }
2423 158142c2 bellard
    }
2424 158142c2 bellard
    if ( aSign ) z = - z;
2425 158142c2 bellard
    return z;
2426 158142c2 bellard
2427 158142c2 bellard
}
2428 158142c2 bellard
2429 158142c2 bellard
/*----------------------------------------------------------------------------
2430 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2431 158142c2 bellard
| `a' to the single-precision floating-point format.  The conversion is
2432 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2433 158142c2 bellard
| Arithmetic.
2434 158142c2 bellard
*----------------------------------------------------------------------------*/
2435 158142c2 bellard
2436 158142c2 bellard
float32 float64_to_float32( float64 a STATUS_PARAM )
2437 158142c2 bellard
{
2438 158142c2 bellard
    flag aSign;
2439 158142c2 bellard
    int16 aExp;
2440 158142c2 bellard
    bits64 aSig;
2441 158142c2 bellard
    bits32 zSig;
2442 158142c2 bellard
2443 158142c2 bellard
    aSig = extractFloat64Frac( a );
2444 158142c2 bellard
    aExp = extractFloat64Exp( a );
2445 158142c2 bellard
    aSign = extractFloat64Sign( a );
2446 158142c2 bellard
    if ( aExp == 0x7FF ) {
2447 158142c2 bellard
        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) );
2448 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
2449 158142c2 bellard
    }
2450 158142c2 bellard
    shift64RightJamming( aSig, 22, &aSig );
2451 158142c2 bellard
    zSig = aSig;
2452 158142c2 bellard
    if ( aExp || zSig ) {
2453 158142c2 bellard
        zSig |= 0x40000000;
2454 158142c2 bellard
        aExp -= 0x381;
2455 158142c2 bellard
    }
2456 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
2457 158142c2 bellard
2458 158142c2 bellard
}
2459 158142c2 bellard
2460 158142c2 bellard
#ifdef FLOATX80
2461 158142c2 bellard
2462 158142c2 bellard
/*----------------------------------------------------------------------------
2463 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2464 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
2465 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2466 158142c2 bellard
| Arithmetic.
2467 158142c2 bellard
*----------------------------------------------------------------------------*/
2468 158142c2 bellard
2469 158142c2 bellard
floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
2470 158142c2 bellard
{
2471 158142c2 bellard
    flag aSign;
2472 158142c2 bellard
    int16 aExp;
2473 158142c2 bellard
    bits64 aSig;
2474 158142c2 bellard
2475 158142c2 bellard
    aSig = extractFloat64Frac( a );
2476 158142c2 bellard
    aExp = extractFloat64Exp( a );
2477 158142c2 bellard
    aSign = extractFloat64Sign( a );
2478 158142c2 bellard
    if ( aExp == 0x7FF ) {
2479 158142c2 bellard
        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) );
2480 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2481 158142c2 bellard
    }
2482 158142c2 bellard
    if ( aExp == 0 ) {
2483 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2484 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2485 158142c2 bellard
    }
2486 158142c2 bellard
    return
2487 158142c2 bellard
        packFloatx80(
2488 158142c2 bellard
            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2489 158142c2 bellard
2490 158142c2 bellard
}
2491 158142c2 bellard
2492 158142c2 bellard
#endif
2493 158142c2 bellard
2494 158142c2 bellard
#ifdef FLOAT128
2495 158142c2 bellard
2496 158142c2 bellard
/*----------------------------------------------------------------------------
2497 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2498 158142c2 bellard
| `a' to the quadruple-precision floating-point format.  The conversion is
2499 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2500 158142c2 bellard
| Arithmetic.
2501 158142c2 bellard
*----------------------------------------------------------------------------*/
2502 158142c2 bellard
2503 158142c2 bellard
float128 float64_to_float128( float64 a STATUS_PARAM )
2504 158142c2 bellard
{
2505 158142c2 bellard
    flag aSign;
2506 158142c2 bellard
    int16 aExp;
2507 158142c2 bellard
    bits64 aSig, zSig0, zSig1;
2508 158142c2 bellard
2509 158142c2 bellard
    aSig = extractFloat64Frac( a );
2510 158142c2 bellard
    aExp = extractFloat64Exp( a );
2511 158142c2 bellard
    aSign = extractFloat64Sign( a );
2512 158142c2 bellard
    if ( aExp == 0x7FF ) {
2513 158142c2 bellard
        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) );
2514 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
2515 158142c2 bellard
    }
2516 158142c2 bellard
    if ( aExp == 0 ) {
2517 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2518 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2519 158142c2 bellard
        --aExp;
2520 158142c2 bellard
    }
2521 158142c2 bellard
    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2522 158142c2 bellard
    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2523 158142c2 bellard
2524 158142c2 bellard
}
2525 158142c2 bellard
2526 158142c2 bellard
#endif
2527 158142c2 bellard
2528 158142c2 bellard
/*----------------------------------------------------------------------------
2529 158142c2 bellard
| Rounds the double-precision floating-point value `a' to an integer, and
2530 158142c2 bellard
| returns the result as a double-precision floating-point value.  The
2531 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
2532 158142c2 bellard
| Floating-Point Arithmetic.
2533 158142c2 bellard
*----------------------------------------------------------------------------*/
2534 158142c2 bellard
2535 158142c2 bellard
float64 float64_round_to_int( float64 a STATUS_PARAM )
2536 158142c2 bellard
{
2537 158142c2 bellard
    flag aSign;
2538 158142c2 bellard
    int16 aExp;
2539 158142c2 bellard
    bits64 lastBitMask, roundBitsMask;
2540 158142c2 bellard
    int8 roundingMode;
2541 f090c9d4 pbrook
    bits64 z;
2542 158142c2 bellard
2543 158142c2 bellard
    aExp = extractFloat64Exp( a );
2544 158142c2 bellard
    if ( 0x433 <= aExp ) {
2545 158142c2 bellard
        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
2546 158142c2 bellard
            return propagateFloat64NaN( a, a STATUS_VAR );
2547 158142c2 bellard
        }
2548 158142c2 bellard
        return a;
2549 158142c2 bellard
    }
2550 158142c2 bellard
    if ( aExp < 0x3FF ) {
2551 f090c9d4 pbrook
        if ( (bits64) ( float64_val(a)<<1 ) == 0 ) return a;
2552 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2553 158142c2 bellard
        aSign = extractFloat64Sign( a );
2554 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
2555 158142c2 bellard
         case float_round_nearest_even:
2556 158142c2 bellard
            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
2557 158142c2 bellard
                return packFloat64( aSign, 0x3FF, 0 );
2558 158142c2 bellard
            }
2559 158142c2 bellard
            break;
2560 158142c2 bellard
         case float_round_down:
2561 f090c9d4 pbrook
            return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
2562 158142c2 bellard
         case float_round_up:
2563 f090c9d4 pbrook
            return make_float64(
2564 f090c9d4 pbrook
            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
2565 158142c2 bellard
        }
2566 158142c2 bellard
        return packFloat64( aSign, 0, 0 );
2567 158142c2 bellard
    }
2568 158142c2 bellard
    lastBitMask = 1;
2569 158142c2 bellard
    lastBitMask <<= 0x433 - aExp;
2570 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
2571 f090c9d4 pbrook
    z = float64_val(a);
2572 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
2573 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
2574 158142c2 bellard
        z += lastBitMask>>1;
2575 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
2576 158142c2 bellard
    }
2577 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
2578 f090c9d4 pbrook
        if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
2579 158142c2 bellard
            z += roundBitsMask;
2580 158142c2 bellard
        }
2581 158142c2 bellard
    }
2582 158142c2 bellard
    z &= ~ roundBitsMask;
2583 f090c9d4 pbrook
    if ( z != float64_val(a) )
2584 f090c9d4 pbrook
        STATUS(float_exception_flags) |= float_flag_inexact;
2585 f090c9d4 pbrook
    return make_float64(z);
2586 158142c2 bellard
2587 158142c2 bellard
}
2588 158142c2 bellard
2589 e6e5906b pbrook
float64 float64_trunc_to_int( float64 a STATUS_PARAM)
2590 e6e5906b pbrook
{
2591 e6e5906b pbrook
    int oldmode;
2592 e6e5906b pbrook
    float64 res;
2593 e6e5906b pbrook
    oldmode = STATUS(float_rounding_mode);
2594 e6e5906b pbrook
    STATUS(float_rounding_mode) = float_round_to_zero;
2595 e6e5906b pbrook
    res = float64_round_to_int(a STATUS_VAR);
2596 e6e5906b pbrook
    STATUS(float_rounding_mode) = oldmode;
2597 e6e5906b pbrook
    return res;
2598 e6e5906b pbrook
}
2599 e6e5906b pbrook
2600 158142c2 bellard
/*----------------------------------------------------------------------------
2601 158142c2 bellard
| Returns the result of adding the absolute values of the double-precision
2602 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
2603 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
2604 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
2605 158142c2 bellard
| Floating-Point Arithmetic.
2606 158142c2 bellard
*----------------------------------------------------------------------------*/
2607 158142c2 bellard
2608 158142c2 bellard
static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
2609 158142c2 bellard
{
2610 158142c2 bellard
    int16 aExp, bExp, zExp;
2611 158142c2 bellard
    bits64 aSig, bSig, zSig;
2612 158142c2 bellard
    int16 expDiff;
2613 158142c2 bellard
2614 158142c2 bellard
    aSig = extractFloat64Frac( a );
2615 158142c2 bellard
    aExp = extractFloat64Exp( a );
2616 158142c2 bellard
    bSig = extractFloat64Frac( b );
2617 158142c2 bellard
    bExp = extractFloat64Exp( b );
2618 158142c2 bellard
    expDiff = aExp - bExp;
2619 158142c2 bellard
    aSig <<= 9;
2620 158142c2 bellard
    bSig <<= 9;
2621 158142c2 bellard
    if ( 0 < expDiff ) {
2622 158142c2 bellard
        if ( aExp == 0x7FF ) {
2623 158142c2 bellard
            if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2624 158142c2 bellard
            return a;
2625 158142c2 bellard
        }
2626 158142c2 bellard
        if ( bExp == 0 ) {
2627 158142c2 bellard
            --expDiff;
2628 158142c2 bellard
        }
2629 158142c2 bellard
        else {
2630 158142c2 bellard
            bSig |= LIT64( 0x2000000000000000 );
2631 158142c2 bellard
        }
2632 158142c2 bellard
        shift64RightJamming( bSig, expDiff, &bSig );
2633 158142c2 bellard
        zExp = aExp;
2634 158142c2 bellard
    }
2635 158142c2 bellard
    else if ( expDiff < 0 ) {
2636 158142c2 bellard
        if ( bExp == 0x7FF ) {
2637 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2638 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
2639 158142c2 bellard
        }
2640 158142c2 bellard
        if ( aExp == 0 ) {
2641 158142c2 bellard
            ++expDiff;
2642 158142c2 bellard
        }
2643 158142c2 bellard
        else {
2644 158142c2 bellard
            aSig |= LIT64( 0x2000000000000000 );
2645 158142c2 bellard
        }
2646 158142c2 bellard
        shift64RightJamming( aSig, - expDiff, &aSig );
2647 158142c2 bellard
        zExp = bExp;
2648 158142c2 bellard
    }
2649 158142c2 bellard
    else {
2650 158142c2 bellard
        if ( aExp == 0x7FF ) {
2651 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2652 158142c2 bellard
            return a;
2653 158142c2 bellard
        }
2654 fe76d976 pbrook
        if ( aExp == 0 ) {
2655 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
2656 fe76d976 pbrook
            return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
2657 fe76d976 pbrook
        }
2658 158142c2 bellard
        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
2659 158142c2 bellard
        zExp = aExp;
2660 158142c2 bellard
        goto roundAndPack;
2661 158142c2 bellard
    }
2662 158142c2 bellard
    aSig |= LIT64( 0x2000000000000000 );
2663 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
2664 158142c2 bellard
    --zExp;
2665 158142c2 bellard
    if ( (sbits64) zSig < 0 ) {
2666 158142c2 bellard
        zSig = aSig + bSig;
2667 158142c2 bellard
        ++zExp;
2668 158142c2 bellard
    }
2669 158142c2 bellard
 roundAndPack:
2670 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
2671 158142c2 bellard
2672 158142c2 bellard
}
2673 158142c2 bellard
2674 158142c2 bellard
/*----------------------------------------------------------------------------
2675 158142c2 bellard
| Returns the result of subtracting the absolute values of the double-
2676 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
2677 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
2678 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
2679 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
2680 158142c2 bellard
*----------------------------------------------------------------------------*/
2681 158142c2 bellard
2682 158142c2 bellard
static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
2683 158142c2 bellard
{
2684 158142c2 bellard
    int16 aExp, bExp, zExp;
2685 158142c2 bellard
    bits64 aSig, bSig, zSig;
2686 158142c2 bellard
    int16 expDiff;
2687 158142c2 bellard
2688 158142c2 bellard
    aSig = extractFloat64Frac( a );
2689 158142c2 bellard
    aExp = extractFloat64Exp( a );
2690 158142c2 bellard
    bSig = extractFloat64Frac( b );
2691 158142c2 bellard
    bExp = extractFloat64Exp( b );
2692 158142c2 bellard
    expDiff = aExp - bExp;
2693 158142c2 bellard
    aSig <<= 10;
2694 158142c2 bellard
    bSig <<= 10;
2695 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
2696 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
2697 158142c2 bellard
    if ( aExp == 0x7FF ) {
2698 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2699 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2700 158142c2 bellard
        return float64_default_nan;
2701 158142c2 bellard
    }
2702 158142c2 bellard
    if ( aExp == 0 ) {
2703 158142c2 bellard
        aExp = 1;
2704 158142c2 bellard
        bExp = 1;
2705 158142c2 bellard
    }
2706 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
2707 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
2708 158142c2 bellard
    return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
2709 158142c2 bellard
 bExpBigger:
2710 158142c2 bellard
    if ( bExp == 0x7FF ) {
2711 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2712 158142c2 bellard
        return packFloat64( zSign ^ 1, 0x7FF, 0 );
2713 158142c2 bellard
    }
2714 158142c2 bellard
    if ( aExp == 0 ) {
2715 158142c2 bellard
        ++expDiff;
2716 158142c2 bellard
    }
2717 158142c2 bellard
    else {
2718 158142c2 bellard
        aSig |= LIT64( 0x4000000000000000 );
2719 158142c2 bellard
    }
2720 158142c2 bellard
    shift64RightJamming( aSig, - expDiff, &aSig );
2721 158142c2 bellard
    bSig |= LIT64( 0x4000000000000000 );
2722 158142c2 bellard
 bBigger:
2723 158142c2 bellard
    zSig = bSig - aSig;
2724 158142c2 bellard
    zExp = bExp;
2725 158142c2 bellard
    zSign ^= 1;
2726 158142c2 bellard
    goto normalizeRoundAndPack;
2727 158142c2 bellard
 aExpBigger:
2728 158142c2 bellard
    if ( aExp == 0x7FF ) {
2729 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2730 158142c2 bellard
        return a;
2731 158142c2 bellard
    }
2732 158142c2 bellard
    if ( bExp == 0 ) {
2733 158142c2 bellard
        --expDiff;
2734 158142c2 bellard
    }
2735 158142c2 bellard
    else {
2736 158142c2 bellard
        bSig |= LIT64( 0x4000000000000000 );
2737 158142c2 bellard
    }
2738 158142c2 bellard
    shift64RightJamming( bSig, expDiff, &bSig );
2739 158142c2 bellard
    aSig |= LIT64( 0x4000000000000000 );
2740 158142c2 bellard
 aBigger:
2741 158142c2 bellard
    zSig = aSig - bSig;
2742 158142c2 bellard
    zExp = aExp;
2743 158142c2 bellard
 normalizeRoundAndPack:
2744 158142c2 bellard
    --zExp;
2745 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
2746 158142c2 bellard
2747 158142c2 bellard
}
2748 158142c2 bellard
2749 158142c2 bellard
/*----------------------------------------------------------------------------
2750 158142c2 bellard
| Returns the result of adding the double-precision floating-point values `a'
2751 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
2752 158142c2 bellard
| Binary Floating-Point Arithmetic.
2753 158142c2 bellard
*----------------------------------------------------------------------------*/
2754 158142c2 bellard
2755 158142c2 bellard
float64 float64_add( float64 a, float64 b STATUS_PARAM )
2756 158142c2 bellard
{
2757 158142c2 bellard
    flag aSign, bSign;
2758 158142c2 bellard
2759 158142c2 bellard
    aSign = extractFloat64Sign( a );
2760 158142c2 bellard
    bSign = extractFloat64Sign( b );
2761 158142c2 bellard
    if ( aSign == bSign ) {
2762 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
2763 158142c2 bellard
    }
2764 158142c2 bellard
    else {
2765 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
2766 158142c2 bellard
    }
2767 158142c2 bellard
2768 158142c2 bellard
}
2769 158142c2 bellard
2770 158142c2 bellard
/*----------------------------------------------------------------------------
2771 158142c2 bellard
| Returns the result of subtracting the double-precision floating-point values
2772 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2773 158142c2 bellard
| for Binary Floating-Point Arithmetic.
2774 158142c2 bellard
*----------------------------------------------------------------------------*/
2775 158142c2 bellard
2776 158142c2 bellard
float64 float64_sub( float64 a, float64 b STATUS_PARAM )
2777 158142c2 bellard
{
2778 158142c2 bellard
    flag aSign, bSign;
2779 158142c2 bellard
2780 158142c2 bellard
    aSign = extractFloat64Sign( a );
2781 158142c2 bellard
    bSign = extractFloat64Sign( b );
2782 158142c2 bellard
    if ( aSign == bSign ) {
2783 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
2784 158142c2 bellard
    }
2785 158142c2 bellard
    else {
2786 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
2787 158142c2 bellard
    }
2788 158142c2 bellard
2789 158142c2 bellard
}
2790 158142c2 bellard
2791 158142c2 bellard
/*----------------------------------------------------------------------------
2792 158142c2 bellard
| Returns the result of multiplying the double-precision floating-point values
2793 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2794 158142c2 bellard
| for Binary Floating-Point Arithmetic.
2795 158142c2 bellard
*----------------------------------------------------------------------------*/
2796 158142c2 bellard
2797 158142c2 bellard
float64 float64_mul( float64 a, float64 b STATUS_PARAM )
2798 158142c2 bellard
{
2799 158142c2 bellard
    flag aSign, bSign, zSign;
2800 158142c2 bellard
    int16 aExp, bExp, zExp;
2801 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
2802 158142c2 bellard
2803 158142c2 bellard
    aSig = extractFloat64Frac( a );
2804 158142c2 bellard
    aExp = extractFloat64Exp( a );
2805 158142c2 bellard
    aSign = extractFloat64Sign( a );
2806 158142c2 bellard
    bSig = extractFloat64Frac( b );
2807 158142c2 bellard
    bExp = extractFloat64Exp( b );
2808 158142c2 bellard
    bSign = extractFloat64Sign( b );
2809 158142c2 bellard
    zSign = aSign ^ bSign;
2810 158142c2 bellard
    if ( aExp == 0x7FF ) {
2811 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2812 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
2813 158142c2 bellard
        }
2814 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
2815 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2816 158142c2 bellard
            return float64_default_nan;
2817 158142c2 bellard
        }
2818 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
2819 158142c2 bellard
    }
2820 158142c2 bellard
    if ( bExp == 0x7FF ) {
2821 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2822 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
2823 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2824 158142c2 bellard
            return float64_default_nan;
2825 158142c2 bellard
        }
2826 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
2827 158142c2 bellard
    }
2828 158142c2 bellard
    if ( aExp == 0 ) {
2829 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2830 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2831 158142c2 bellard
    }
2832 158142c2 bellard
    if ( bExp == 0 ) {
2833 158142c2 bellard
        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2834 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2835 158142c2 bellard
    }
2836 158142c2 bellard
    zExp = aExp + bExp - 0x3FF;
2837 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2838 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2839 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
2840 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
2841 158142c2 bellard
    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2842 158142c2 bellard
        zSig0 <<= 1;
2843 158142c2 bellard
        --zExp;
2844 158142c2 bellard
    }
2845 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
2846 158142c2 bellard
2847 158142c2 bellard
}
2848 158142c2 bellard
2849 158142c2 bellard
/*----------------------------------------------------------------------------
2850 158142c2 bellard
| Returns the result of dividing the double-precision floating-point value `a'
2851 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to
2852 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2853 158142c2 bellard
*----------------------------------------------------------------------------*/
2854 158142c2 bellard
2855 158142c2 bellard
float64 float64_div( float64 a, float64 b STATUS_PARAM )
2856 158142c2 bellard
{
2857 158142c2 bellard
    flag aSign, bSign, zSign;
2858 158142c2 bellard
    int16 aExp, bExp, zExp;
2859 158142c2 bellard
    bits64 aSig, bSig, zSig;
2860 158142c2 bellard
    bits64 rem0, rem1;
2861 158142c2 bellard
    bits64 term0, term1;
2862 158142c2 bellard
2863 158142c2 bellard
    aSig = extractFloat64Frac( a );
2864 158142c2 bellard
    aExp = extractFloat64Exp( a );
2865 158142c2 bellard
    aSign = extractFloat64Sign( a );
2866 158142c2 bellard
    bSig = extractFloat64Frac( b );
2867 158142c2 bellard
    bExp = extractFloat64Exp( b );
2868 158142c2 bellard
    bSign = extractFloat64Sign( b );
2869 158142c2 bellard
    zSign = aSign ^ bSign;
2870 158142c2 bellard
    if ( aExp == 0x7FF ) {
2871 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2872 158142c2 bellard
        if ( bExp == 0x7FF ) {
2873 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2874 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2875 158142c2 bellard
            return float64_default_nan;
2876 158142c2 bellard
        }
2877 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
2878 158142c2 bellard
    }
2879 158142c2 bellard
    if ( bExp == 0x7FF ) {
2880 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2881 158142c2 bellard
        return packFloat64( zSign, 0, 0 );
2882 158142c2 bellard
    }
2883 158142c2 bellard
    if ( bExp == 0 ) {
2884 158142c2 bellard
        if ( bSig == 0 ) {
2885 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
2886 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
2887 158142c2 bellard
                return float64_default_nan;
2888 158142c2 bellard
            }
2889 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
2890 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
2891 158142c2 bellard
        }
2892 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2893 158142c2 bellard
    }
2894 158142c2 bellard
    if ( aExp == 0 ) {
2895 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2896 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2897 158142c2 bellard
    }
2898 158142c2 bellard
    zExp = aExp - bExp + 0x3FD;
2899 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2900 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2901 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
2902 158142c2 bellard
        aSig >>= 1;
2903 158142c2 bellard
        ++zExp;
2904 158142c2 bellard
    }
2905 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, bSig );
2906 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 2 ) {
2907 158142c2 bellard
        mul64To128( bSig, zSig, &term0, &term1 );
2908 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2909 158142c2 bellard
        while ( (sbits64) rem0 < 0 ) {
2910 158142c2 bellard
            --zSig;
2911 158142c2 bellard
            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2912 158142c2 bellard
        }
2913 158142c2 bellard
        zSig |= ( rem1 != 0 );
2914 158142c2 bellard
    }
2915 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
2916 158142c2 bellard
2917 158142c2 bellard
}
2918 158142c2 bellard
2919 158142c2 bellard
/*----------------------------------------------------------------------------
2920 158142c2 bellard
| Returns the remainder of the double-precision floating-point value `a'
2921 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
2922 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2923 158142c2 bellard
*----------------------------------------------------------------------------*/
2924 158142c2 bellard
2925 158142c2 bellard
float64 float64_rem( float64 a, float64 b STATUS_PARAM )
2926 158142c2 bellard
{
2927 158142c2 bellard
    flag aSign, bSign, zSign;
2928 158142c2 bellard
    int16 aExp, bExp, expDiff;
2929 158142c2 bellard
    bits64 aSig, bSig;
2930 158142c2 bellard
    bits64 q, alternateASig;
2931 158142c2 bellard
    sbits64 sigMean;
2932 158142c2 bellard
2933 158142c2 bellard
    aSig = extractFloat64Frac( a );
2934 158142c2 bellard
    aExp = extractFloat64Exp( a );
2935 158142c2 bellard
    aSign = extractFloat64Sign( a );
2936 158142c2 bellard
    bSig = extractFloat64Frac( b );
2937 158142c2 bellard
    bExp = extractFloat64Exp( b );
2938 158142c2 bellard
    bSign = extractFloat64Sign( b );
2939 158142c2 bellard
    if ( aExp == 0x7FF ) {
2940 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2941 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
2942 158142c2 bellard
        }
2943 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2944 158142c2 bellard
        return float64_default_nan;
2945 158142c2 bellard
    }
2946 158142c2 bellard
    if ( bExp == 0x7FF ) {
2947 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2948 158142c2 bellard
        return a;
2949 158142c2 bellard
    }
2950 158142c2 bellard
    if ( bExp == 0 ) {
2951 158142c2 bellard
        if ( bSig == 0 ) {
2952 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2953 158142c2 bellard
            return float64_default_nan;
2954 158142c2 bellard
        }
2955 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2956 158142c2 bellard
    }
2957 158142c2 bellard
    if ( aExp == 0 ) {
2958 158142c2 bellard
        if ( aSig == 0 ) return a;
2959 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2960 158142c2 bellard
    }
2961 158142c2 bellard
    expDiff = aExp - bExp;
2962 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
2963 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2964 158142c2 bellard
    if ( expDiff < 0 ) {
2965 158142c2 bellard
        if ( expDiff < -1 ) return a;
2966 158142c2 bellard
        aSig >>= 1;
2967 158142c2 bellard
    }
2968 158142c2 bellard
    q = ( bSig <= aSig );
2969 158142c2 bellard
    if ( q ) aSig -= bSig;
2970 158142c2 bellard
    expDiff -= 64;
2971 158142c2 bellard
    while ( 0 < expDiff ) {
2972 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
2973 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
2974 158142c2 bellard
        aSig = - ( ( bSig>>2 ) * q );
2975 158142c2 bellard
        expDiff -= 62;
2976 158142c2 bellard
    }
2977 158142c2 bellard
    expDiff += 64;
2978 158142c2 bellard
    if ( 0 < expDiff ) {
2979 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
2980 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
2981 158142c2 bellard
        q >>= 64 - expDiff;
2982 158142c2 bellard
        bSig >>= 2;
2983 158142c2 bellard
        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2984 158142c2 bellard
    }
2985 158142c2 bellard
    else {
2986 158142c2 bellard
        aSig >>= 2;
2987 158142c2 bellard
        bSig >>= 2;
2988 158142c2 bellard
    }
2989 158142c2 bellard
    do {
2990 158142c2 bellard
        alternateASig = aSig;
2991 158142c2 bellard
        ++q;
2992 158142c2 bellard
        aSig -= bSig;
2993 158142c2 bellard
    } while ( 0 <= (sbits64) aSig );
2994 158142c2 bellard
    sigMean = aSig + alternateASig;
2995 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2996 158142c2 bellard
        aSig = alternateASig;
2997 158142c2 bellard
    }
2998 158142c2 bellard
    zSign = ( (sbits64) aSig < 0 );
2999 158142c2 bellard
    if ( zSign ) aSig = - aSig;
3000 158142c2 bellard
    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
3001 158142c2 bellard
3002 158142c2 bellard
}
3003 158142c2 bellard
3004 158142c2 bellard
/*----------------------------------------------------------------------------
3005 158142c2 bellard
| Returns the square root of the double-precision floating-point value `a'.
3006 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
3007 158142c2 bellard
| Floating-Point Arithmetic.
3008 158142c2 bellard
*----------------------------------------------------------------------------*/
3009 158142c2 bellard
3010 158142c2 bellard
float64 float64_sqrt( float64 a STATUS_PARAM )
3011 158142c2 bellard
{
3012 158142c2 bellard
    flag aSign;
3013 158142c2 bellard
    int16 aExp, zExp;
3014 158142c2 bellard
    bits64 aSig, zSig, doubleZSig;
3015 158142c2 bellard
    bits64 rem0, rem1, term0, term1;
3016 158142c2 bellard
3017 158142c2 bellard
    aSig = extractFloat64Frac( a );
3018 158142c2 bellard
    aExp = extractFloat64Exp( a );
3019 158142c2 bellard
    aSign = extractFloat64Sign( a );
3020 158142c2 bellard
    if ( aExp == 0x7FF ) {
3021 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
3022 158142c2 bellard
        if ( ! aSign ) return a;
3023 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3024 158142c2 bellard
        return float64_default_nan;
3025 158142c2 bellard
    }
3026 158142c2 bellard
    if ( aSign ) {
3027 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
3028 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3029 158142c2 bellard
        return float64_default_nan;
3030 158142c2 bellard
    }
3031 158142c2 bellard
    if ( aExp == 0 ) {
3032 f090c9d4 pbrook
        if ( aSig == 0 ) return float64_zero;
3033 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3034 158142c2 bellard
    }
3035 158142c2 bellard
    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3036 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
3037 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig>>21 );
3038 158142c2 bellard
    aSig <<= 9 - ( aExp & 1 );
3039 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3040 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 5 ) {
3041 158142c2 bellard
        doubleZSig = zSig<<1;
3042 158142c2 bellard
        mul64To128( zSig, zSig, &term0, &term1 );
3043 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3044 158142c2 bellard
        while ( (sbits64) rem0 < 0 ) {
3045 158142c2 bellard
            --zSig;
3046 158142c2 bellard
            doubleZSig -= 2;
3047 158142c2 bellard
            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3048 158142c2 bellard
        }
3049 158142c2 bellard
        zSig |= ( ( rem0 | rem1 ) != 0 );
3050 158142c2 bellard
    }
3051 158142c2 bellard
    return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
3052 158142c2 bellard
3053 158142c2 bellard
}
3054 158142c2 bellard
3055 158142c2 bellard
/*----------------------------------------------------------------------------
3056 374dfc33 aurel32
| Returns the binary log of the double-precision floating-point value `a'.
3057 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
3058 374dfc33 aurel32
| Floating-Point Arithmetic.
3059 374dfc33 aurel32
*----------------------------------------------------------------------------*/
3060 374dfc33 aurel32
float64 float64_log2( float64 a STATUS_PARAM )
3061 374dfc33 aurel32
{
3062 374dfc33 aurel32
    flag aSign, zSign;
3063 374dfc33 aurel32
    int16 aExp;
3064 374dfc33 aurel32
    bits64 aSig, aSig0, aSig1, zSig, i;
3065 374dfc33 aurel32
3066 374dfc33 aurel32
    aSig = extractFloat64Frac( a );
3067 374dfc33 aurel32
    aExp = extractFloat64Exp( a );
3068 374dfc33 aurel32
    aSign = extractFloat64Sign( a );
3069 374dfc33 aurel32
3070 374dfc33 aurel32
    if ( aExp == 0 ) {
3071 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
3072 374dfc33 aurel32
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3073 374dfc33 aurel32
    }
3074 374dfc33 aurel32
    if ( aSign ) {
3075 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
3076 374dfc33 aurel32
        return float64_default_nan;
3077 374dfc33 aurel32
    }
3078 374dfc33 aurel32
    if ( aExp == 0x7FF ) {
3079 374dfc33 aurel32
        if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
3080 374dfc33 aurel32
        return a;
3081 374dfc33 aurel32
    }
3082 374dfc33 aurel32
3083 374dfc33 aurel32
    aExp -= 0x3FF;
3084 374dfc33 aurel32
    aSig |= LIT64( 0x0010000000000000 );
3085 374dfc33 aurel32
    zSign = aExp < 0;
3086 374dfc33 aurel32
    zSig = (bits64)aExp << 52;
3087 374dfc33 aurel32
    for (i = 1LL << 51; i > 0; i >>= 1) {
3088 374dfc33 aurel32
        mul64To128( aSig, aSig, &aSig0, &aSig1 );
3089 374dfc33 aurel32
        aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
3090 374dfc33 aurel32
        if ( aSig & LIT64( 0x0020000000000000 ) ) {
3091 374dfc33 aurel32
            aSig >>= 1;
3092 374dfc33 aurel32
            zSig |= i;
3093 374dfc33 aurel32
        }
3094 374dfc33 aurel32
    }
3095 374dfc33 aurel32
3096 374dfc33 aurel32
    if ( zSign )
3097 374dfc33 aurel32
        zSig = -zSig;
3098 374dfc33 aurel32
    return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
3099 374dfc33 aurel32
}
3100 374dfc33 aurel32
3101 374dfc33 aurel32
/*----------------------------------------------------------------------------
3102 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
3103 158142c2 bellard
| corresponding value `b', and 0 otherwise.  The comparison is performed
3104 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3105 158142c2 bellard
*----------------------------------------------------------------------------*/
3106 158142c2 bellard
3107 750afe93 bellard
int float64_eq( float64 a, float64 b STATUS_PARAM )
3108 158142c2 bellard
{
3109 f090c9d4 pbrook
    bits64 av, bv;
3110 158142c2 bellard
3111 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3112 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3113 158142c2 bellard
       ) {
3114 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3115 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3116 158142c2 bellard
        }
3117 158142c2 bellard
        return 0;
3118 158142c2 bellard
    }
3119 f090c9d4 pbrook
    av = float64_val(a);
3120 a1b91bb4 pbrook
    bv = float64_val(b);
3121 f090c9d4 pbrook
    return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3122 158142c2 bellard
3123 158142c2 bellard
}
3124 158142c2 bellard
3125 158142c2 bellard
/*----------------------------------------------------------------------------
3126 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
3127 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  The comparison is
3128 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
3129 158142c2 bellard
| Arithmetic.
3130 158142c2 bellard
*----------------------------------------------------------------------------*/
3131 158142c2 bellard
3132 750afe93 bellard
int float64_le( float64 a, float64 b STATUS_PARAM )
3133 158142c2 bellard
{
3134 158142c2 bellard
    flag aSign, bSign;
3135 f090c9d4 pbrook
    bits64 av, bv;
3136 158142c2 bellard
3137 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3138 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3139 158142c2 bellard
       ) {
3140 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3141 158142c2 bellard
        return 0;
3142 158142c2 bellard
    }
3143 158142c2 bellard
    aSign = extractFloat64Sign( a );
3144 158142c2 bellard
    bSign = extractFloat64Sign( b );
3145 f090c9d4 pbrook
    av = float64_val(a);
3146 a1b91bb4 pbrook
    bv = float64_val(b);
3147 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3148 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
3149 158142c2 bellard
3150 158142c2 bellard
}
3151 158142c2 bellard
3152 158142c2 bellard
/*----------------------------------------------------------------------------
3153 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
3154 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
3155 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3156 158142c2 bellard
*----------------------------------------------------------------------------*/
3157 158142c2 bellard
3158 750afe93 bellard
int float64_lt( float64 a, float64 b STATUS_PARAM )
3159 158142c2 bellard
{
3160 158142c2 bellard
    flag aSign, bSign;
3161 f090c9d4 pbrook
    bits64 av, bv;
3162 158142c2 bellard
3163 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3164 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3165 158142c2 bellard
       ) {
3166 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3167 158142c2 bellard
        return 0;
3168 158142c2 bellard
    }
3169 158142c2 bellard
    aSign = extractFloat64Sign( a );
3170 158142c2 bellard
    bSign = extractFloat64Sign( b );
3171 f090c9d4 pbrook
    av = float64_val(a);
3172 a1b91bb4 pbrook
    bv = float64_val(b);
3173 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
3174 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
3175 158142c2 bellard
3176 158142c2 bellard
}
3177 158142c2 bellard
3178 158142c2 bellard
/*----------------------------------------------------------------------------
3179 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
3180 158142c2 bellard
| corresponding value `b', and 0 otherwise.  The invalid exception is raised
3181 158142c2 bellard
| if either operand is a NaN.  Otherwise, the comparison is performed
3182 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3183 158142c2 bellard
*----------------------------------------------------------------------------*/
3184 158142c2 bellard
3185 750afe93 bellard
int float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
3186 158142c2 bellard
{
3187 f090c9d4 pbrook
    bits64 av, bv;
3188 158142c2 bellard
3189 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3190 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3191 158142c2 bellard
       ) {
3192 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3193 158142c2 bellard
        return 0;
3194 158142c2 bellard
    }
3195 f090c9d4 pbrook
    av = float64_val(a);
3196 a1b91bb4 pbrook
    bv = float64_val(b);
3197 f090c9d4 pbrook
    return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3198 158142c2 bellard
3199 158142c2 bellard
}
3200 158142c2 bellard
3201 158142c2 bellard
/*----------------------------------------------------------------------------
3202 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
3203 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
3204 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
3205 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3206 158142c2 bellard
*----------------------------------------------------------------------------*/
3207 158142c2 bellard
3208 750afe93 bellard
int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
3209 158142c2 bellard
{
3210 158142c2 bellard
    flag aSign, bSign;
3211 f090c9d4 pbrook
    bits64 av, bv;
3212 158142c2 bellard
3213 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3214 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3215 158142c2 bellard
       ) {
3216 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3217 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3218 158142c2 bellard
        }
3219 158142c2 bellard
        return 0;
3220 158142c2 bellard
    }
3221 158142c2 bellard
    aSign = extractFloat64Sign( a );
3222 158142c2 bellard
    bSign = extractFloat64Sign( b );
3223 f090c9d4 pbrook
    av = float64_val(a);
3224 a1b91bb4 pbrook
    bv = float64_val(b);
3225 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3226 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
3227 158142c2 bellard
3228 158142c2 bellard
}
3229 158142c2 bellard
3230 158142c2 bellard
/*----------------------------------------------------------------------------
3231 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
3232 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3233 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
3234 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3235 158142c2 bellard
*----------------------------------------------------------------------------*/
3236 158142c2 bellard
3237 750afe93 bellard
int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
3238 158142c2 bellard
{
3239 158142c2 bellard
    flag aSign, bSign;
3240 f090c9d4 pbrook
    bits64 av, bv;
3241 158142c2 bellard
3242 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3243 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3244 158142c2 bellard
       ) {
3245 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3246 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3247 158142c2 bellard
        }
3248 158142c2 bellard
        return 0;
3249 158142c2 bellard
    }
3250 158142c2 bellard
    aSign = extractFloat64Sign( a );
3251 158142c2 bellard
    bSign = extractFloat64Sign( b );
3252 f090c9d4 pbrook
    av = float64_val(a);
3253 a1b91bb4 pbrook
    bv = float64_val(b);
3254 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
3255 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
3256 158142c2 bellard
3257 158142c2 bellard
}
3258 158142c2 bellard
3259 158142c2 bellard
#ifdef FLOATX80
3260 158142c2 bellard
3261 158142c2 bellard
/*----------------------------------------------------------------------------
3262 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3263 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3264 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3265 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3266 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN, the
3267 158142c2 bellard
| largest positive integer is returned.  Otherwise, if the conversion
3268 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3269 158142c2 bellard
*----------------------------------------------------------------------------*/
3270 158142c2 bellard
3271 158142c2 bellard
int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
3272 158142c2 bellard
{
3273 158142c2 bellard
    flag aSign;
3274 158142c2 bellard
    int32 aExp, shiftCount;
3275 158142c2 bellard
    bits64 aSig;
3276 158142c2 bellard
3277 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3278 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3279 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3280 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3281 158142c2 bellard
    shiftCount = 0x4037 - aExp;
3282 158142c2 bellard
    if ( shiftCount <= 0 ) shiftCount = 1;
3283 158142c2 bellard
    shift64RightJamming( aSig, shiftCount, &aSig );
3284 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
3285 158142c2 bellard
3286 158142c2 bellard
}
3287 158142c2 bellard
3288 158142c2 bellard
/*----------------------------------------------------------------------------
3289 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3290 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3291 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3292 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3293 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3294 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3295 158142c2 bellard
| sign as `a' is returned.
3296 158142c2 bellard
*----------------------------------------------------------------------------*/
3297 158142c2 bellard
3298 158142c2 bellard
int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
3299 158142c2 bellard
{
3300 158142c2 bellard
    flag aSign;
3301 158142c2 bellard
    int32 aExp, shiftCount;
3302 158142c2 bellard
    bits64 aSig, savedASig;
3303 158142c2 bellard
    int32 z;
3304 158142c2 bellard
3305 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3306 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3307 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3308 158142c2 bellard
    if ( 0x401E < aExp ) {
3309 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3310 158142c2 bellard
        goto invalid;
3311 158142c2 bellard
    }
3312 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3313 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3314 158142c2 bellard
        return 0;
3315 158142c2 bellard
    }
3316 158142c2 bellard
    shiftCount = 0x403E - aExp;
3317 158142c2 bellard
    savedASig = aSig;
3318 158142c2 bellard
    aSig >>= shiftCount;
3319 158142c2 bellard
    z = aSig;
3320 158142c2 bellard
    if ( aSign ) z = - z;
3321 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
3322 158142c2 bellard
 invalid:
3323 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3324 158142c2 bellard
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
3325 158142c2 bellard
    }
3326 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
3327 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3328 158142c2 bellard
    }
3329 158142c2 bellard
    return z;
3330 158142c2 bellard
3331 158142c2 bellard
}
3332 158142c2 bellard
3333 158142c2 bellard
/*----------------------------------------------------------------------------
3334 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3335 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3336 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3337 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3338 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN,
3339 158142c2 bellard
| the largest positive integer is returned.  Otherwise, if the conversion
3340 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3341 158142c2 bellard
*----------------------------------------------------------------------------*/
3342 158142c2 bellard
3343 158142c2 bellard
int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
3344 158142c2 bellard
{
3345 158142c2 bellard
    flag aSign;
3346 158142c2 bellard
    int32 aExp, shiftCount;
3347 158142c2 bellard
    bits64 aSig, aSigExtra;
3348 158142c2 bellard
3349 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3350 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3351 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3352 158142c2 bellard
    shiftCount = 0x403E - aExp;
3353 158142c2 bellard
    if ( shiftCount <= 0 ) {
3354 158142c2 bellard
        if ( shiftCount ) {
3355 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3356 158142c2 bellard
            if (    ! aSign
3357 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
3358 158142c2 bellard
                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
3359 158142c2 bellard
               ) {
3360 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3361 158142c2 bellard
            }
3362 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
3363 158142c2 bellard
        }
3364 158142c2 bellard
        aSigExtra = 0;
3365 158142c2 bellard
    }
3366 158142c2 bellard
    else {
3367 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3368 158142c2 bellard
    }
3369 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
3370 158142c2 bellard
3371 158142c2 bellard
}
3372 158142c2 bellard
3373 158142c2 bellard
/*----------------------------------------------------------------------------
3374 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3375 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3376 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3377 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3378 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3379 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3380 158142c2 bellard
| sign as `a' is returned.
3381 158142c2 bellard
*----------------------------------------------------------------------------*/
3382 158142c2 bellard
3383 158142c2 bellard
int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
3384 158142c2 bellard
{
3385 158142c2 bellard
    flag aSign;
3386 158142c2 bellard
    int32 aExp, shiftCount;
3387 158142c2 bellard
    bits64 aSig;
3388 158142c2 bellard
    int64 z;
3389 158142c2 bellard
3390 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3391 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3392 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3393 158142c2 bellard
    shiftCount = aExp - 0x403E;
3394 158142c2 bellard
    if ( 0 <= shiftCount ) {
3395 158142c2 bellard
        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3396 158142c2 bellard
        if ( ( a.high != 0xC03E ) || aSig ) {
3397 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3398 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3399 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3400 158142c2 bellard
            }
3401 158142c2 bellard
        }
3402 158142c2 bellard
        return (sbits64) LIT64( 0x8000000000000000 );
3403 158142c2 bellard
    }
3404 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3405 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3406 158142c2 bellard
        return 0;
3407 158142c2 bellard
    }
3408 158142c2 bellard
    z = aSig>>( - shiftCount );
3409 158142c2 bellard
    if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3410 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3411 158142c2 bellard
    }
3412 158142c2 bellard
    if ( aSign ) z = - z;
3413 158142c2 bellard
    return z;
3414 158142c2 bellard
3415 158142c2 bellard
}
3416 158142c2 bellard
3417 158142c2 bellard
/*----------------------------------------------------------------------------
3418 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3419 158142c2 bellard
| point value `a' to the single-precision floating-point format.  The
3420 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3421 158142c2 bellard
| Floating-Point Arithmetic.
3422 158142c2 bellard
*----------------------------------------------------------------------------*/
3423 158142c2 bellard
3424 158142c2 bellard
float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
3425 158142c2 bellard
{
3426 158142c2 bellard
    flag aSign;
3427 158142c2 bellard
    int32 aExp;
3428 158142c2 bellard
    bits64 aSig;
3429 158142c2 bellard
3430 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3431 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3432 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3433 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3434 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) {
3435 158142c2 bellard
            return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) );
3436 158142c2 bellard
        }
3437 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
3438 158142c2 bellard
    }
3439 158142c2 bellard
    shift64RightJamming( aSig, 33, &aSig );
3440 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3F81;
3441 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
3442 158142c2 bellard
3443 158142c2 bellard
}
3444 158142c2 bellard
3445 158142c2 bellard
/*----------------------------------------------------------------------------
3446 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3447 158142c2 bellard
| point value `a' to the double-precision floating-point format.  The
3448 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3449 158142c2 bellard
| Floating-Point Arithmetic.
3450 158142c2 bellard
*----------------------------------------------------------------------------*/
3451 158142c2 bellard
3452 158142c2 bellard
float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
3453 158142c2 bellard
{
3454 158142c2 bellard
    flag aSign;
3455 158142c2 bellard
    int32 aExp;
3456 158142c2 bellard
    bits64 aSig, zSig;
3457 158142c2 bellard
3458 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3459 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3460 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3461 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3462 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) {
3463 158142c2 bellard
            return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) );
3464 158142c2 bellard
        }
3465 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
3466 158142c2 bellard
    }
3467 158142c2 bellard
    shift64RightJamming( aSig, 1, &zSig );
3468 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3C01;
3469 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
3470 158142c2 bellard
3471 158142c2 bellard
}
3472 158142c2 bellard
3473 158142c2 bellard
#ifdef FLOAT128
3474 158142c2 bellard
3475 158142c2 bellard
/*----------------------------------------------------------------------------
3476 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3477 158142c2 bellard
| point value `a' to the quadruple-precision floating-point format.  The
3478 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3479 158142c2 bellard
| Floating-Point Arithmetic.
3480 158142c2 bellard
*----------------------------------------------------------------------------*/
3481 158142c2 bellard
3482 158142c2 bellard
float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
3483 158142c2 bellard
{
3484 158142c2 bellard
    flag aSign;
3485 158142c2 bellard
    int16 aExp;
3486 158142c2 bellard
    bits64 aSig, zSig0, zSig1;
3487 158142c2 bellard
3488 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3489 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3490 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3491 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
3492 158142c2 bellard
        return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) );
3493 158142c2 bellard
    }
3494 158142c2 bellard
    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
3495 158142c2 bellard
    return packFloat128( aSign, aExp, zSig0, zSig1 );
3496 158142c2 bellard
3497 158142c2 bellard
}
3498 158142c2 bellard
3499 158142c2 bellard
#endif
3500 158142c2 bellard
3501 158142c2 bellard
/*----------------------------------------------------------------------------
3502 158142c2 bellard
| Rounds the extended double-precision floating-point value `a' to an integer,
3503 158142c2 bellard
| and returns the result as an extended quadruple-precision floating-point
3504 158142c2 bellard
| value.  The operation is performed according to the IEC/IEEE Standard for
3505 158142c2 bellard
| Binary Floating-Point Arithmetic.
3506 158142c2 bellard
*----------------------------------------------------------------------------*/
3507 158142c2 bellard
3508 158142c2 bellard
floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
3509 158142c2 bellard
{
3510 158142c2 bellard
    flag aSign;
3511 158142c2 bellard
    int32 aExp;
3512 158142c2 bellard
    bits64 lastBitMask, roundBitsMask;
3513 158142c2 bellard
    int8 roundingMode;
3514 158142c2 bellard
    floatx80 z;
3515 158142c2 bellard
3516 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3517 158142c2 bellard
    if ( 0x403E <= aExp ) {
3518 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
3519 158142c2 bellard
            return propagateFloatx80NaN( a, a STATUS_VAR );
3520 158142c2 bellard
        }
3521 158142c2 bellard
        return a;
3522 158142c2 bellard
    }
3523 158142c2 bellard
    if ( aExp < 0x3FFF ) {
3524 158142c2 bellard
        if (    ( aExp == 0 )
3525 158142c2 bellard
             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
3526 158142c2 bellard
            return a;
3527 158142c2 bellard
        }
3528 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3529 158142c2 bellard
        aSign = extractFloatx80Sign( a );
3530 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
3531 158142c2 bellard
         case float_round_nearest_even:
3532 158142c2 bellard
            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
3533 158142c2 bellard
               ) {
3534 158142c2 bellard
                return
3535 158142c2 bellard
                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3536 158142c2 bellard
            }
3537 158142c2 bellard
            break;
3538 158142c2 bellard
         case float_round_down:
3539 158142c2 bellard
            return
3540 158142c2 bellard
                  aSign ?
3541 158142c2 bellard
                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
3542 158142c2 bellard
                : packFloatx80( 0, 0, 0 );
3543 158142c2 bellard
         case float_round_up:
3544 158142c2 bellard
            return
3545 158142c2 bellard
                  aSign ? packFloatx80( 1, 0, 0 )
3546 158142c2 bellard
                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
3547 158142c2 bellard
        }
3548 158142c2 bellard
        return packFloatx80( aSign, 0, 0 );
3549 158142c2 bellard
    }
3550 158142c2 bellard
    lastBitMask = 1;
3551 158142c2 bellard
    lastBitMask <<= 0x403E - aExp;
3552 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
3553 158142c2 bellard
    z = a;
3554 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
3555 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
3556 158142c2 bellard
        z.low += lastBitMask>>1;
3557 158142c2 bellard
        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
3558 158142c2 bellard
    }
3559 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
3560 158142c2 bellard
        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
3561 158142c2 bellard
            z.low += roundBitsMask;
3562 158142c2 bellard
        }
3563 158142c2 bellard
    }
3564 158142c2 bellard
    z.low &= ~ roundBitsMask;
3565 158142c2 bellard
    if ( z.low == 0 ) {
3566 158142c2 bellard
        ++z.high;
3567 158142c2 bellard
        z.low = LIT64( 0x8000000000000000 );
3568 158142c2 bellard
    }
3569 158142c2 bellard
    if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
3570 158142c2 bellard
    return z;
3571 158142c2 bellard
3572 158142c2 bellard
}
3573 158142c2 bellard
3574 158142c2 bellard
/*----------------------------------------------------------------------------
3575 158142c2 bellard
| Returns the result of adding the absolute values of the extended double-
3576 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
3577 158142c2 bellard
| negated before being returned.  `zSign' is ignored if the result is a NaN.
3578 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
3579 158142c2 bellard
| Floating-Point Arithmetic.
3580 158142c2 bellard
*----------------------------------------------------------------------------*/
3581 158142c2 bellard
3582 158142c2 bellard
static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
3583 158142c2 bellard
{
3584 158142c2 bellard
    int32 aExp, bExp, zExp;
3585 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3586 158142c2 bellard
    int32 expDiff;
3587 158142c2 bellard
3588 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3589 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3590 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3591 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3592 158142c2 bellard
    expDiff = aExp - bExp;
3593 158142c2 bellard
    if ( 0 < expDiff ) {
3594 158142c2 bellard
        if ( aExp == 0x7FFF ) {
3595 158142c2 bellard
            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3596 158142c2 bellard
            return a;
3597 158142c2 bellard
        }
3598 158142c2 bellard
        if ( bExp == 0 ) --expDiff;
3599 158142c2 bellard
        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3600 158142c2 bellard
        zExp = aExp;
3601 158142c2 bellard
    }
3602 158142c2 bellard
    else if ( expDiff < 0 ) {
3603 158142c2 bellard
        if ( bExp == 0x7FFF ) {
3604 158142c2 bellard
            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3605 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3606 158142c2 bellard
        }
3607 158142c2 bellard
        if ( aExp == 0 ) ++expDiff;
3608 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3609 158142c2 bellard
        zExp = bExp;
3610 158142c2 bellard
    }
3611 158142c2 bellard
    else {
3612 158142c2 bellard
        if ( aExp == 0x7FFF ) {
3613 158142c2 bellard
            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3614 158142c2 bellard
                return propagateFloatx80NaN( a, b STATUS_VAR );
3615 158142c2 bellard
            }
3616 158142c2 bellard
            return a;
3617 158142c2 bellard
        }
3618 158142c2 bellard
        zSig1 = 0;
3619 158142c2 bellard
        zSig0 = aSig + bSig;
3620 158142c2 bellard
        if ( aExp == 0 ) {
3621 158142c2 bellard
            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
3622 158142c2 bellard
            goto roundAndPack;
3623 158142c2 bellard
        }
3624 158142c2 bellard
        zExp = aExp;
3625 158142c2 bellard
        goto shiftRight1;
3626 158142c2 bellard
    }
3627 158142c2 bellard
    zSig0 = aSig + bSig;
3628 158142c2 bellard
    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
3629 158142c2 bellard
 shiftRight1:
3630 158142c2 bellard
    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
3631 158142c2 bellard
    zSig0 |= LIT64( 0x8000000000000000 );
3632 158142c2 bellard
    ++zExp;
3633 158142c2 bellard
 roundAndPack:
3634 158142c2 bellard
    return
3635 158142c2 bellard
        roundAndPackFloatx80(
3636 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3637 158142c2 bellard
3638 158142c2 bellard
}
3639 158142c2 bellard
3640 158142c2 bellard
/*----------------------------------------------------------------------------
3641 158142c2 bellard
| Returns the result of subtracting the absolute values of the extended
3642 158142c2 bellard
| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
3643 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
3644 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
3645 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3646 158142c2 bellard
*----------------------------------------------------------------------------*/
3647 158142c2 bellard
3648 158142c2 bellard
static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
3649 158142c2 bellard
{
3650 158142c2 bellard
    int32 aExp, bExp, zExp;
3651 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3652 158142c2 bellard
    int32 expDiff;
3653 158142c2 bellard
    floatx80 z;
3654 158142c2 bellard
3655 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3656 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3657 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3658 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3659 158142c2 bellard
    expDiff = aExp - bExp;
3660 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
3661 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
3662 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3663 158142c2 bellard
        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3664 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
3665 158142c2 bellard
        }
3666 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3667 158142c2 bellard
        z.low = floatx80_default_nan_low;
3668 158142c2 bellard
        z.high = floatx80_default_nan_high;
3669 158142c2 bellard
        return z;
3670 158142c2 bellard
    }
3671 158142c2 bellard
    if ( aExp == 0 ) {
3672 158142c2 bellard
        aExp = 1;
3673 158142c2 bellard
        bExp = 1;
3674 158142c2 bellard
    }
3675 158142c2 bellard
    zSig1 = 0;
3676 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
3677 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
3678 158142c2 bellard
    return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
3679 158142c2 bellard
 bExpBigger:
3680 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3681 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3682 158142c2 bellard
        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
3683 158142c2 bellard
    }
3684 158142c2 bellard
    if ( aExp == 0 ) ++expDiff;
3685 158142c2 bellard
    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3686 158142c2 bellard
 bBigger:
3687 158142c2 bellard
    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
3688 158142c2 bellard
    zExp = bExp;
3689 158142c2 bellard
    zSign ^= 1;
3690 158142c2 bellard
    goto normalizeRoundAndPack;
3691 158142c2 bellard
 aExpBigger:
3692 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3693 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3694 158142c2 bellard
        return a;
3695 158142c2 bellard
    }
3696 158142c2 bellard
    if ( bExp == 0 ) --expDiff;
3697 158142c2 bellard
    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3698 158142c2 bellard
 aBigger:
3699 158142c2 bellard
    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
3700 158142c2 bellard
    zExp = aExp;
3701 158142c2 bellard
 normalizeRoundAndPack:
3702 158142c2 bellard
    return
3703 158142c2 bellard
        normalizeRoundAndPackFloatx80(
3704 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3705 158142c2 bellard
3706 158142c2 bellard
}
3707 158142c2 bellard
3708 158142c2 bellard
/*----------------------------------------------------------------------------
3709 158142c2 bellard
| Returns the result of adding the extended double-precision floating-point
3710 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
3711 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3712 158142c2 bellard
*----------------------------------------------------------------------------*/
3713 158142c2 bellard
3714 158142c2 bellard
floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
3715 158142c2 bellard
{
3716 158142c2 bellard
    flag aSign, bSign;
3717 158142c2 bellard
3718 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3719 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3720 158142c2 bellard
    if ( aSign == bSign ) {
3721 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
3722 158142c2 bellard
    }
3723 158142c2 bellard
    else {
3724 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
3725 158142c2 bellard
    }
3726 158142c2 bellard
3727 158142c2 bellard
}
3728 158142c2 bellard
3729 158142c2 bellard
/*----------------------------------------------------------------------------
3730 158142c2 bellard
| Returns the result of subtracting the extended double-precision floating-
3731 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
3732 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3733 158142c2 bellard
*----------------------------------------------------------------------------*/
3734 158142c2 bellard
3735 158142c2 bellard
floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
3736 158142c2 bellard
{
3737 158142c2 bellard
    flag aSign, bSign;
3738 158142c2 bellard
3739 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3740 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3741 158142c2 bellard
    if ( aSign == bSign ) {
3742 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
3743 158142c2 bellard
    }
3744 158142c2 bellard
    else {
3745 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
3746 158142c2 bellard
    }
3747 158142c2 bellard
3748 158142c2 bellard
}
3749 158142c2 bellard
3750 158142c2 bellard
/*----------------------------------------------------------------------------
3751 158142c2 bellard
| Returns the result of multiplying the extended double-precision floating-
3752 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
3753 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3754 158142c2 bellard
*----------------------------------------------------------------------------*/
3755 158142c2 bellard
3756 158142c2 bellard
floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
3757 158142c2 bellard
{
3758 158142c2 bellard
    flag aSign, bSign, zSign;
3759 158142c2 bellard
    int32 aExp, bExp, zExp;
3760 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3761 158142c2 bellard
    floatx80 z;
3762 158142c2 bellard
3763 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3764 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3765 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3766 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3767 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3768 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3769 158142c2 bellard
    zSign = aSign ^ bSign;
3770 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3771 158142c2 bellard
        if (    (bits64) ( aSig<<1 )
3772 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3773 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
3774 158142c2 bellard
        }
3775 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) goto invalid;
3776 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3777 158142c2 bellard
    }
3778 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3779 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3780 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
3781 158142c2 bellard
 invalid:
3782 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3783 158142c2 bellard
            z.low = floatx80_default_nan_low;
3784 158142c2 bellard
            z.high = floatx80_default_nan_high;
3785 158142c2 bellard
            return z;
3786 158142c2 bellard
        }
3787 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3788 158142c2 bellard
    }
3789 158142c2 bellard
    if ( aExp == 0 ) {
3790 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3791 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3792 158142c2 bellard
    }
3793 158142c2 bellard
    if ( bExp == 0 ) {
3794 158142c2 bellard
        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
3795 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3796 158142c2 bellard
    }
3797 158142c2 bellard
    zExp = aExp + bExp - 0x3FFE;
3798 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
3799 158142c2 bellard
    if ( 0 < (sbits64) zSig0 ) {
3800 158142c2 bellard
        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
3801 158142c2 bellard
        --zExp;
3802 158142c2 bellard
    }
3803 158142c2 bellard
    return
3804 158142c2 bellard
        roundAndPackFloatx80(
3805 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3806 158142c2 bellard
3807 158142c2 bellard
}
3808 158142c2 bellard
3809 158142c2 bellard
/*----------------------------------------------------------------------------
3810 158142c2 bellard
| Returns the result of dividing the extended double-precision floating-point
3811 158142c2 bellard
| value `a' by the corresponding value `b'.  The operation is performed
3812 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3813 158142c2 bellard
*----------------------------------------------------------------------------*/
3814 158142c2 bellard
3815 158142c2 bellard
floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
3816 158142c2 bellard
{
3817 158142c2 bellard
    flag aSign, bSign, zSign;
3818 158142c2 bellard
    int32 aExp, bExp, zExp;
3819 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3820 158142c2 bellard
    bits64 rem0, rem1, rem2, term0, term1, term2;
3821 158142c2 bellard
    floatx80 z;
3822 158142c2 bellard
3823 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3824 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3825 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3826 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3827 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3828 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3829 158142c2 bellard
    zSign = aSign ^ bSign;
3830 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3831 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3832 158142c2 bellard
        if ( bExp == 0x7FFF ) {
3833 158142c2 bellard
            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3834 158142c2 bellard
            goto invalid;
3835 158142c2 bellard
        }
3836 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3837 158142c2 bellard
    }
3838 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3839 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3840 158142c2 bellard
        return packFloatx80( zSign, 0, 0 );
3841 158142c2 bellard
    }
3842 158142c2 bellard
    if ( bExp == 0 ) {
3843 158142c2 bellard
        if ( bSig == 0 ) {
3844 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
3845 158142c2 bellard
 invalid:
3846 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
3847 158142c2 bellard
                z.low = floatx80_default_nan_low;
3848 158142c2 bellard
                z.high = floatx80_default_nan_high;
3849 158142c2 bellard
                return z;
3850 158142c2 bellard
            }
3851 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
3852 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3853 158142c2 bellard
        }
3854 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3855 158142c2 bellard
    }
3856 158142c2 bellard
    if ( aExp == 0 ) {
3857 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3858 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3859 158142c2 bellard
    }
3860 158142c2 bellard
    zExp = aExp - bExp + 0x3FFE;
3861 158142c2 bellard
    rem1 = 0;
3862 158142c2 bellard
    if ( bSig <= aSig ) {
3863 158142c2 bellard
        shift128Right( aSig, 0, 1, &aSig, &rem1 );
3864 158142c2 bellard
        ++zExp;
3865 158142c2 bellard
    }
3866 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3867 158142c2 bellard
    mul64To128( bSig, zSig0, &term0, &term1 );
3868 158142c2 bellard
    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3869 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
3870 158142c2 bellard
        --zSig0;
3871 158142c2 bellard
        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3872 158142c2 bellard
    }
3873 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, bSig );
3874 158142c2 bellard
    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3875 158142c2 bellard
        mul64To128( bSig, zSig1, &term1, &term2 );
3876 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3877 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
3878 158142c2 bellard
            --zSig1;
3879 158142c2 bellard
            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3880 158142c2 bellard
        }
3881 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 ) != 0 );
3882 158142c2 bellard
    }
3883 158142c2 bellard
    return
3884 158142c2 bellard
        roundAndPackFloatx80(
3885 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3886 158142c2 bellard
3887 158142c2 bellard
}
3888 158142c2 bellard
3889 158142c2 bellard
/*----------------------------------------------------------------------------
3890 158142c2 bellard
| Returns the remainder of the extended double-precision floating-point value
3891 158142c2 bellard
| `a' with respect to the corresponding value `b'.  The operation is performed
3892 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3893 158142c2 bellard
*----------------------------------------------------------------------------*/
3894 158142c2 bellard
3895 158142c2 bellard
floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
3896 158142c2 bellard
{
3897 158142c2 bellard
    flag aSign, bSign, zSign;
3898 158142c2 bellard
    int32 aExp, bExp, expDiff;
3899 158142c2 bellard
    bits64 aSig0, aSig1, bSig;
3900 158142c2 bellard
    bits64 q, term0, term1, alternateASig0, alternateASig1;
3901 158142c2 bellard
    floatx80 z;
3902 158142c2 bellard
3903 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
3904 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3905 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3906 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3907 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3908 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3909 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3910 158142c2 bellard
        if (    (bits64) ( aSig0<<1 )
3911 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3912 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
3913 158142c2 bellard
        }
3914 158142c2 bellard
        goto invalid;
3915 158142c2 bellard
    }
3916 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3917 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3918 158142c2 bellard
        return a;
3919 158142c2 bellard
    }
3920 158142c2 bellard
    if ( bExp == 0 ) {
3921 158142c2 bellard
        if ( bSig == 0 ) {
3922 158142c2 bellard
 invalid:
3923 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3924 158142c2 bellard
            z.low = floatx80_default_nan_low;
3925 158142c2 bellard
            z.high = floatx80_default_nan_high;
3926 158142c2 bellard
            return z;
3927 158142c2 bellard
        }
3928 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3929 158142c2 bellard
    }
3930 158142c2 bellard
    if ( aExp == 0 ) {
3931 158142c2 bellard
        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
3932 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3933 158142c2 bellard
    }
3934 158142c2 bellard
    bSig |= LIT64( 0x8000000000000000 );
3935 158142c2 bellard
    zSign = aSign;
3936 158142c2 bellard
    expDiff = aExp - bExp;
3937 158142c2 bellard
    aSig1 = 0;
3938 158142c2 bellard
    if ( expDiff < 0 ) {
3939 158142c2 bellard
        if ( expDiff < -1 ) return a;
3940 158142c2 bellard
        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3941 158142c2 bellard
        expDiff = 0;
3942 158142c2 bellard
    }
3943 158142c2 bellard
    q = ( bSig <= aSig0 );
3944 158142c2 bellard
    if ( q ) aSig0 -= bSig;
3945 158142c2 bellard
    expDiff -= 64;
3946 158142c2 bellard
    while ( 0 < expDiff ) {
3947 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
3948 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3949 158142c2 bellard
        mul64To128( bSig, q, &term0, &term1 );
3950 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3951 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3952 158142c2 bellard
        expDiff -= 62;
3953 158142c2 bellard
    }
3954 158142c2 bellard
    expDiff += 64;
3955 158142c2 bellard
    if ( 0 < expDiff ) {
3956 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
3957 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3958 158142c2 bellard
        q >>= 64 - expDiff;
3959 158142c2 bellard
        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
3960 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3961 158142c2 bellard
        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
3962 158142c2 bellard
        while ( le128( term0, term1, aSig0, aSig1 ) ) {
3963 158142c2 bellard
            ++q;
3964 158142c2 bellard
            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3965 158142c2 bellard
        }
3966 158142c2 bellard
    }
3967 158142c2 bellard
    else {
3968 158142c2 bellard
        term1 = 0;
3969 158142c2 bellard
        term0 = bSig;
3970 158142c2 bellard
    }
3971 158142c2 bellard
    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
3972 158142c2 bellard
    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
3973 158142c2 bellard
         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
3974 158142c2 bellard
              && ( q & 1 ) )
3975 158142c2 bellard
       ) {
3976 158142c2 bellard
        aSig0 = alternateASig0;
3977 158142c2 bellard
        aSig1 = alternateASig1;
3978 158142c2 bellard
        zSign = ! zSign;
3979 158142c2 bellard
    }
3980 158142c2 bellard
    return
3981 158142c2 bellard
        normalizeRoundAndPackFloatx80(
3982 158142c2 bellard
            80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
3983 158142c2 bellard
3984 158142c2 bellard
}
3985 158142c2 bellard
3986 158142c2 bellard
/*----------------------------------------------------------------------------
3987 158142c2 bellard
| Returns the square root of the extended double-precision floating-point
3988 158142c2 bellard
| value `a'.  The operation is performed according to the IEC/IEEE Standard
3989 158142c2 bellard
| for Binary Floating-Point Arithmetic.
3990 158142c2 bellard
*----------------------------------------------------------------------------*/
3991 158142c2 bellard
3992 158142c2 bellard
floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
3993 158142c2 bellard
{
3994 158142c2 bellard
    flag aSign;
3995 158142c2 bellard
    int32 aExp, zExp;
3996 158142c2 bellard
    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
3997 158142c2 bellard
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3998 158142c2 bellard
    floatx80 z;
3999 158142c2 bellard
4000 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
4001 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4002 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4003 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4004 158142c2 bellard
        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
4005 158142c2 bellard
        if ( ! aSign ) return a;
4006 158142c2 bellard
        goto invalid;
4007 158142c2 bellard
    }
4008 158142c2 bellard
    if ( aSign ) {
4009 158142c2 bellard
        if ( ( aExp | aSig0 ) == 0 ) return a;
4010 158142c2 bellard
 invalid:
4011 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4012 158142c2 bellard
        z.low = floatx80_default_nan_low;
4013 158142c2 bellard
        z.high = floatx80_default_nan_high;
4014 158142c2 bellard
        return z;
4015 158142c2 bellard
    }
4016 158142c2 bellard
    if ( aExp == 0 ) {
4017 158142c2 bellard
        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4018 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4019 158142c2 bellard
    }
4020 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4021 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4022 158142c2 bellard
    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4023 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4024 158142c2 bellard
    doubleZSig0 = zSig0<<1;
4025 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
4026 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4027 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
4028 158142c2 bellard
        --zSig0;
4029 158142c2 bellard
        doubleZSig0 -= 2;
4030 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4031 158142c2 bellard
    }
4032 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4033 158142c2 bellard
    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4034 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
4035 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4036 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4037 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
4038 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4039 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
4040 158142c2 bellard
            --zSig1;
4041 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4042 158142c2 bellard
            term3 |= 1;
4043 158142c2 bellard
            term2 |= doubleZSig0;
4044 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4045 158142c2 bellard
        }
4046 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4047 158142c2 bellard
    }
4048 158142c2 bellard
    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4049 158142c2 bellard
    zSig0 |= doubleZSig0;
4050 158142c2 bellard
    return
4051 158142c2 bellard
        roundAndPackFloatx80(
4052 158142c2 bellard
            STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
4053 158142c2 bellard
4054 158142c2 bellard
}
4055 158142c2 bellard
4056 158142c2 bellard
/*----------------------------------------------------------------------------
4057 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4058 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  The comparison is
4059 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
4060 158142c2 bellard
| Arithmetic.
4061 158142c2 bellard
*----------------------------------------------------------------------------*/
4062 158142c2 bellard
4063 750afe93 bellard
int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
4064 158142c2 bellard
{
4065 158142c2 bellard
4066 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4067 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4068 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4069 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4070 158142c2 bellard
       ) {
4071 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4072 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4073 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4074 158142c2 bellard
        }
4075 158142c2 bellard
        return 0;
4076 158142c2 bellard
    }
4077 158142c2 bellard
    return
4078 158142c2 bellard
           ( a.low == b.low )
4079 158142c2 bellard
        && (    ( a.high == b.high )
4080 158142c2 bellard
             || (    ( a.low == 0 )
4081 158142c2 bellard
                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4082 158142c2 bellard
           );
4083 158142c2 bellard
4084 158142c2 bellard
}
4085 158142c2 bellard
4086 158142c2 bellard
/*----------------------------------------------------------------------------
4087 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4088 158142c2 bellard
| less than or equal to the corresponding value `b', and 0 otherwise.  The
4089 158142c2 bellard
| comparison is performed according to the IEC/IEEE Standard for Binary
4090 158142c2 bellard
| Floating-Point Arithmetic.
4091 158142c2 bellard
*----------------------------------------------------------------------------*/
4092 158142c2 bellard
4093 750afe93 bellard
int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
4094 158142c2 bellard
{
4095 158142c2 bellard
    flag aSign, bSign;
4096 158142c2 bellard
4097 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4098 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4099 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4100 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4101 158142c2 bellard
       ) {
4102 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4103 158142c2 bellard
        return 0;
4104 158142c2 bellard
    }
4105 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4106 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4107 158142c2 bellard
    if ( aSign != bSign ) {
4108 158142c2 bellard
        return
4109 158142c2 bellard
               aSign
4110 158142c2 bellard
            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4111 158142c2 bellard
                 == 0 );
4112 158142c2 bellard
    }
4113 158142c2 bellard
    return
4114 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
4115 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
4116 158142c2 bellard
4117 158142c2 bellard
}
4118 158142c2 bellard
4119 158142c2 bellard
/*----------------------------------------------------------------------------
4120 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4121 158142c2 bellard
| less than the corresponding value `b', and 0 otherwise.  The comparison
4122 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4123 158142c2 bellard
| Arithmetic.
4124 158142c2 bellard
*----------------------------------------------------------------------------*/
4125 158142c2 bellard
4126 750afe93 bellard
int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
4127 158142c2 bellard
{
4128 158142c2 bellard
    flag aSign, bSign;
4129 158142c2 bellard
4130 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4131 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4132 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4133 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4134 158142c2 bellard
       ) {
4135 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4136 158142c2 bellard
        return 0;
4137 158142c2 bellard
    }
4138 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4139 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4140 158142c2 bellard
    if ( aSign != bSign ) {
4141 158142c2 bellard
        return
4142 158142c2 bellard
               aSign
4143 158142c2 bellard
            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4144 158142c2 bellard
                 != 0 );
4145 158142c2 bellard
    }
4146 158142c2 bellard
    return
4147 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
4148 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
4149 158142c2 bellard
4150 158142c2 bellard
}
4151 158142c2 bellard
4152 158142c2 bellard
/*----------------------------------------------------------------------------
4153 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is equal
4154 158142c2 bellard
| to the corresponding value `b', and 0 otherwise.  The invalid exception is
4155 158142c2 bellard
| raised if either operand is a NaN.  Otherwise, the comparison is performed
4156 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4157 158142c2 bellard
*----------------------------------------------------------------------------*/
4158 158142c2 bellard
4159 750afe93 bellard
int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM )
4160 158142c2 bellard
{
4161 158142c2 bellard
4162 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4163 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4164 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4165 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4166 158142c2 bellard
       ) {
4167 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4168 158142c2 bellard
        return 0;
4169 158142c2 bellard
    }
4170 158142c2 bellard
    return
4171 158142c2 bellard
           ( a.low == b.low )
4172 158142c2 bellard
        && (    ( a.high == b.high )
4173 158142c2 bellard
             || (    ( a.low == 0 )
4174 158142c2 bellard
                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4175 158142c2 bellard
           );
4176 158142c2 bellard
4177 158142c2 bellard
}
4178 158142c2 bellard
4179 158142c2 bellard
/*----------------------------------------------------------------------------
4180 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4181 158142c2 bellard
| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
4182 158142c2 bellard
| do not cause an exception.  Otherwise, the comparison is performed according
4183 158142c2 bellard
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4184 158142c2 bellard
*----------------------------------------------------------------------------*/
4185 158142c2 bellard
4186 750afe93 bellard
int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4187 158142c2 bellard
{
4188 158142c2 bellard
    flag aSign, bSign;
4189 158142c2 bellard
4190 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4191 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4192 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4193 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4194 158142c2 bellard
       ) {
4195 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4196 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4197 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4198 158142c2 bellard
        }
4199 158142c2 bellard
        return 0;
4200 158142c2 bellard
    }
4201 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4202 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4203 158142c2 bellard
    if ( aSign != bSign ) {
4204 158142c2 bellard
        return
4205 158142c2 bellard
               aSign
4206 158142c2 bellard
            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4207 158142c2 bellard
                 == 0 );
4208 158142c2 bellard
    }
4209 158142c2 bellard
    return
4210 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
4211 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
4212 158142c2 bellard
4213 158142c2 bellard
}
4214 158142c2 bellard
4215 158142c2 bellard
/*----------------------------------------------------------------------------
4216 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4217 158142c2 bellard
| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
4218 158142c2 bellard
| an exception.  Otherwise, the comparison is performed according to the
4219 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4220 158142c2 bellard
*----------------------------------------------------------------------------*/
4221 158142c2 bellard
4222 750afe93 bellard
int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4223 158142c2 bellard
{
4224 158142c2 bellard
    flag aSign, bSign;
4225 158142c2 bellard
4226 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4227 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4228 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4229 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4230 158142c2 bellard
       ) {
4231 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4232 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4233 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4234 158142c2 bellard
        }
4235 158142c2 bellard
        return 0;
4236 158142c2 bellard
    }
4237 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4238 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4239 158142c2 bellard
    if ( aSign != bSign ) {
4240 158142c2 bellard
        return
4241 158142c2 bellard
               aSign
4242 158142c2 bellard
            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4243 158142c2 bellard
                 != 0 );
4244 158142c2 bellard
    }
4245 158142c2 bellard
    return
4246 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
4247 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
4248 158142c2 bellard
4249 158142c2 bellard
}
4250 158142c2 bellard
4251 158142c2 bellard
#endif
4252 158142c2 bellard
4253 158142c2 bellard
#ifdef FLOAT128
4254 158142c2 bellard
4255 158142c2 bellard
/*----------------------------------------------------------------------------
4256 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4257 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4258 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4259 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4260 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4261 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4262 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4263 158142c2 bellard
*----------------------------------------------------------------------------*/
4264 158142c2 bellard
4265 158142c2 bellard
int32 float128_to_int32( float128 a STATUS_PARAM )
4266 158142c2 bellard
{
4267 158142c2 bellard
    flag aSign;
4268 158142c2 bellard
    int32 aExp, shiftCount;
4269 158142c2 bellard
    bits64 aSig0, aSig1;
4270 158142c2 bellard
4271 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4272 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4273 158142c2 bellard
    aExp = extractFloat128Exp( a );
4274 158142c2 bellard
    aSign = extractFloat128Sign( a );
4275 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4276 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4277 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4278 158142c2 bellard
    shiftCount = 0x4028 - aExp;
4279 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4280 158142c2 bellard
    return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
4281 158142c2 bellard
4282 158142c2 bellard
}
4283 158142c2 bellard
4284 158142c2 bellard
/*----------------------------------------------------------------------------
4285 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4286 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4287 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4288 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
4289 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
4290 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
4291 158142c2 bellard
| returned.
4292 158142c2 bellard
*----------------------------------------------------------------------------*/
4293 158142c2 bellard
4294 158142c2 bellard
int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
4295 158142c2 bellard
{
4296 158142c2 bellard
    flag aSign;
4297 158142c2 bellard
    int32 aExp, shiftCount;
4298 158142c2 bellard
    bits64 aSig0, aSig1, savedASig;
4299 158142c2 bellard
    int32 z;
4300 158142c2 bellard
4301 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4302 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4303 158142c2 bellard
    aExp = extractFloat128Exp( a );
4304 158142c2 bellard
    aSign = extractFloat128Sign( a );
4305 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4306 158142c2 bellard
    if ( 0x401E < aExp ) {
4307 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4308 158142c2 bellard
        goto invalid;
4309 158142c2 bellard
    }
4310 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
4311 158142c2 bellard
        if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
4312 158142c2 bellard
        return 0;
4313 158142c2 bellard
    }
4314 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4315 158142c2 bellard
    shiftCount = 0x402F - aExp;
4316 158142c2 bellard
    savedASig = aSig0;
4317 158142c2 bellard
    aSig0 >>= shiftCount;
4318 158142c2 bellard
    z = aSig0;
4319 158142c2 bellard
    if ( aSign ) z = - z;
4320 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
4321 158142c2 bellard
 invalid:
4322 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4323 158142c2 bellard
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
4324 158142c2 bellard
    }
4325 158142c2 bellard
    if ( ( aSig0<<shiftCount ) != savedASig ) {
4326 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4327 158142c2 bellard
    }
4328 158142c2 bellard
    return z;
4329 158142c2 bellard
4330 158142c2 bellard
}
4331 158142c2 bellard
4332 158142c2 bellard
/*----------------------------------------------------------------------------
4333 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4334 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4335 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4336 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4337 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4338 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4339 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4340 158142c2 bellard
*----------------------------------------------------------------------------*/
4341 158142c2 bellard
4342 158142c2 bellard
int64 float128_to_int64( float128 a STATUS_PARAM )
4343 158142c2 bellard
{
4344 158142c2 bellard
    flag aSign;
4345 158142c2 bellard
    int32 aExp, shiftCount;
4346 158142c2 bellard
    bits64 aSig0, aSig1;
4347 158142c2 bellard
4348 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4349 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4350 158142c2 bellard
    aExp = extractFloat128Exp( a );
4351 158142c2 bellard
    aSign = extractFloat128Sign( a );
4352 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4353 158142c2 bellard
    shiftCount = 0x402F - aExp;
4354 158142c2 bellard
    if ( shiftCount <= 0 ) {
4355 158142c2 bellard
        if ( 0x403E < aExp ) {
4356 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4357 158142c2 bellard
            if (    ! aSign
4358 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
4359 158142c2 bellard
                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4360 158142c2 bellard
                    )
4361 158142c2 bellard
               ) {
4362 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
4363 158142c2 bellard
            }
4364 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
4365 158142c2 bellard
        }
4366 158142c2 bellard
        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4367 158142c2 bellard
    }
4368 158142c2 bellard
    else {
4369 158142c2 bellard
        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4370 158142c2 bellard
    }
4371 158142c2 bellard
    return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
4372 158142c2 bellard
4373 158142c2 bellard
}
4374 158142c2 bellard
4375 158142c2 bellard
/*----------------------------------------------------------------------------
4376 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4377 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4378 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4379 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
4380 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
4381 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
4382 158142c2 bellard
| returned.
4383 158142c2 bellard
*----------------------------------------------------------------------------*/
4384 158142c2 bellard
4385 158142c2 bellard
int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
4386 158142c2 bellard
{
4387 158142c2 bellard
    flag aSign;
4388 158142c2 bellard
    int32 aExp, shiftCount;
4389 158142c2 bellard
    bits64 aSig0, aSig1;
4390 158142c2 bellard
    int64 z;
4391 158142c2 bellard
4392 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4393 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4394 158142c2 bellard
    aExp = extractFloat128Exp( a );
4395 158142c2 bellard
    aSign = extractFloat128Sign( a );
4396 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4397 158142c2 bellard
    shiftCount = aExp - 0x402F;
4398 158142c2 bellard
    if ( 0 < shiftCount ) {
4399 158142c2 bellard
        if ( 0x403E <= aExp ) {
4400 158142c2 bellard
            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4401 158142c2 bellard
            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4402 158142c2 bellard
                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4403 158142c2 bellard
                if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
4404 158142c2 bellard
            }
4405 158142c2 bellard
            else {
4406 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
4407 158142c2 bellard
                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4408 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
4409 158142c2 bellard
                }
4410 158142c2 bellard
            }
4411 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
4412 158142c2 bellard
        }
4413 158142c2 bellard
        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4414 158142c2 bellard
        if ( (bits64) ( aSig1<<shiftCount ) ) {
4415 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4416 158142c2 bellard
        }
4417 158142c2 bellard
    }
4418 158142c2 bellard
    else {
4419 158142c2 bellard
        if ( aExp < 0x3FFF ) {
4420 158142c2 bellard
            if ( aExp | aSig0 | aSig1 ) {
4421 158142c2 bellard
                STATUS(float_exception_flags) |= float_flag_inexact;
4422 158142c2 bellard
            }
4423 158142c2 bellard
            return 0;
4424 158142c2 bellard
        }
4425 158142c2 bellard
        z = aSig0>>( - shiftCount );
4426 158142c2 bellard
        if (    aSig1
4427 158142c2 bellard
             || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4428 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4429 158142c2 bellard
        }
4430 158142c2 bellard
    }
4431 158142c2 bellard
    if ( aSign ) z = - z;
4432 158142c2 bellard
    return z;
4433 158142c2 bellard
4434 158142c2 bellard
}
4435 158142c2 bellard
4436 158142c2 bellard
/*----------------------------------------------------------------------------
4437 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4438 158142c2 bellard
| value `a' to the single-precision floating-point format.  The conversion
4439 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4440 158142c2 bellard
| Arithmetic.
4441 158142c2 bellard
*----------------------------------------------------------------------------*/
4442 158142c2 bellard
4443 158142c2 bellard
float32 float128_to_float32( float128 a STATUS_PARAM )
4444 158142c2 bellard
{
4445 158142c2 bellard
    flag aSign;
4446 158142c2 bellard
    int32 aExp;
4447 158142c2 bellard
    bits64 aSig0, aSig1;
4448 158142c2 bellard
    bits32 zSig;
4449 158142c2 bellard
4450 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4451 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4452 158142c2 bellard
    aExp = extractFloat128Exp( a );
4453 158142c2 bellard
    aSign = extractFloat128Sign( a );
4454 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4455 158142c2 bellard
        if ( aSig0 | aSig1 ) {
4456 158142c2 bellard
            return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) );
4457 158142c2 bellard
        }
4458 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
4459 158142c2 bellard
    }
4460 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4461 158142c2 bellard
    shift64RightJamming( aSig0, 18, &aSig0 );
4462 158142c2 bellard
    zSig = aSig0;
4463 158142c2 bellard
    if ( aExp || zSig ) {
4464 158142c2 bellard
        zSig |= 0x40000000;
4465 158142c2 bellard
        aExp -= 0x3F81;
4466 158142c2 bellard
    }
4467 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
4468 158142c2 bellard
4469 158142c2 bellard
}
4470 158142c2 bellard
4471 158142c2 bellard
/*----------------------------------------------------------------------------
4472 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4473 158142c2 bellard
| value `a' to the double-precision floating-point format.  The conversion
4474 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4475 158142c2 bellard
| Arithmetic.
4476 158142c2 bellard
*----------------------------------------------------------------------------*/
4477 158142c2 bellard
4478 158142c2 bellard
float64 float128_to_float64( float128 a STATUS_PARAM )
4479 158142c2 bellard
{
4480 158142c2 bellard
    flag aSign;
4481 158142c2 bellard
    int32 aExp;
4482 158142c2 bellard
    bits64 aSig0, aSig1;
4483 158142c2 bellard
4484 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4485 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4486 158142c2 bellard
    aExp = extractFloat128Exp( a );
4487 158142c2 bellard
    aSign = extractFloat128Sign( a );
4488 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4489 158142c2 bellard
        if ( aSig0 | aSig1 ) {
4490 158142c2 bellard
            return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) );
4491 158142c2 bellard
        }
4492 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
4493 158142c2 bellard
    }
4494 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4495 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4496 158142c2 bellard
    if ( aExp || aSig0 ) {
4497 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
4498 158142c2 bellard
        aExp -= 0x3C01;
4499 158142c2 bellard
    }
4500 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
4501 158142c2 bellard
4502 158142c2 bellard
}
4503 158142c2 bellard
4504 158142c2 bellard
#ifdef FLOATX80
4505 158142c2 bellard
4506 158142c2 bellard
/*----------------------------------------------------------------------------
4507 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4508 158142c2 bellard
| value `a' to the extended double-precision floating-point format.  The
4509 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4510 158142c2 bellard
| Floating-Point Arithmetic.
4511 158142c2 bellard
*----------------------------------------------------------------------------*/
4512 158142c2 bellard
4513 158142c2 bellard
floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
4514 158142c2 bellard
{
4515 158142c2 bellard
    flag aSign;
4516 158142c2 bellard
    int32 aExp;
4517 158142c2 bellard
    bits64 aSig0, aSig1;
4518 158142c2 bellard
4519 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4520 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4521 158142c2 bellard
    aExp = extractFloat128Exp( a );
4522 158142c2 bellard
    aSign = extractFloat128Sign( a );
4523 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4524 158142c2 bellard
        if ( aSig0 | aSig1 ) {
4525 158142c2 bellard
            return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) );
4526 158142c2 bellard
        }
4527 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4528 158142c2 bellard
    }
4529 158142c2 bellard
    if ( aExp == 0 ) {
4530 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
4531 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4532 158142c2 bellard
    }
4533 158142c2 bellard
    else {
4534 158142c2 bellard
        aSig0 |= LIT64( 0x0001000000000000 );
4535 158142c2 bellard
    }
4536 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
4537 158142c2 bellard
    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
4538 158142c2 bellard
4539 158142c2 bellard
}
4540 158142c2 bellard
4541 158142c2 bellard
#endif
4542 158142c2 bellard
4543 158142c2 bellard
/*----------------------------------------------------------------------------
4544 158142c2 bellard
| Rounds the quadruple-precision floating-point value `a' to an integer, and
4545 158142c2 bellard
| returns the result as a quadruple-precision floating-point value.  The
4546 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
4547 158142c2 bellard
| Floating-Point Arithmetic.
4548 158142c2 bellard
*----------------------------------------------------------------------------*/
4549 158142c2 bellard
4550 158142c2 bellard
float128 float128_round_to_int( float128 a STATUS_PARAM )
4551 158142c2 bellard
{
4552 158142c2 bellard
    flag aSign;
4553 158142c2 bellard
    int32 aExp;
4554 158142c2 bellard
    bits64 lastBitMask, roundBitsMask;
4555 158142c2 bellard
    int8 roundingMode;
4556 158142c2 bellard
    float128 z;
4557 158142c2 bellard
4558 158142c2 bellard
    aExp = extractFloat128Exp( a );
4559 158142c2 bellard
    if ( 0x402F <= aExp ) {
4560 158142c2 bellard
        if ( 0x406F <= aExp ) {
4561 158142c2 bellard
            if (    ( aExp == 0x7FFF )
4562 158142c2 bellard
                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
4563 158142c2 bellard
               ) {
4564 158142c2 bellard
                return propagateFloat128NaN( a, a STATUS_VAR );
4565 158142c2 bellard
            }
4566 158142c2 bellard
            return a;
4567 158142c2 bellard
        }
4568 158142c2 bellard
        lastBitMask = 1;
4569 158142c2 bellard
        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
4570 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
4571 158142c2 bellard
        z = a;
4572 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
4573 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
4574 158142c2 bellard
            if ( lastBitMask ) {
4575 158142c2 bellard
                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
4576 158142c2 bellard
                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4577 158142c2 bellard
            }
4578 158142c2 bellard
            else {
4579 158142c2 bellard
                if ( (sbits64) z.low < 0 ) {
4580 158142c2 bellard
                    ++z.high;
4581 158142c2 bellard
                    if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
4582 158142c2 bellard
                }
4583 158142c2 bellard
            }
4584 158142c2 bellard
        }
4585 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
4586 158142c2 bellard
            if (   extractFloat128Sign( z )
4587 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
4588 158142c2 bellard
                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
4589 158142c2 bellard
            }
4590 158142c2 bellard
        }
4591 158142c2 bellard
        z.low &= ~ roundBitsMask;
4592 158142c2 bellard
    }
4593 158142c2 bellard
    else {
4594 158142c2 bellard
        if ( aExp < 0x3FFF ) {
4595 158142c2 bellard
            if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4596 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4597 158142c2 bellard
            aSign = extractFloat128Sign( a );
4598 158142c2 bellard
            switch ( STATUS(float_rounding_mode) ) {
4599 158142c2 bellard
             case float_round_nearest_even:
4600 158142c2 bellard
                if (    ( aExp == 0x3FFE )
4601 158142c2 bellard
                     && (   extractFloat128Frac0( a )
4602 158142c2 bellard
                          | extractFloat128Frac1( a ) )
4603 158142c2 bellard
                   ) {
4604 158142c2 bellard
                    return packFloat128( aSign, 0x3FFF, 0, 0 );
4605 158142c2 bellard
                }
4606 158142c2 bellard
                break;
4607 158142c2 bellard
             case float_round_down:
4608 158142c2 bellard
                return
4609 158142c2 bellard
                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
4610 158142c2 bellard
                    : packFloat128( 0, 0, 0, 0 );
4611 158142c2 bellard
             case float_round_up:
4612 158142c2 bellard
                return
4613 158142c2 bellard
                      aSign ? packFloat128( 1, 0, 0, 0 )
4614 158142c2 bellard
                    : packFloat128( 0, 0x3FFF, 0, 0 );
4615 158142c2 bellard
            }
4616 158142c2 bellard
            return packFloat128( aSign, 0, 0, 0 );
4617 158142c2 bellard
        }
4618 158142c2 bellard
        lastBitMask = 1;
4619 158142c2 bellard
        lastBitMask <<= 0x402F - aExp;
4620 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
4621 158142c2 bellard
        z.low = 0;
4622 158142c2 bellard
        z.high = a.high;
4623 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
4624 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
4625 158142c2 bellard
            z.high += lastBitMask>>1;
4626 158142c2 bellard
            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
4627 158142c2 bellard
                z.high &= ~ lastBitMask;
4628 158142c2 bellard
            }
4629 158142c2 bellard
        }
4630 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
4631 158142c2 bellard
            if (   extractFloat128Sign( z )
4632 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
4633 158142c2 bellard
                z.high |= ( a.low != 0 );
4634 158142c2 bellard
                z.high += roundBitsMask;
4635 158142c2 bellard
            }
4636 158142c2 bellard
        }
4637 158142c2 bellard
        z.high &= ~ roundBitsMask;
4638 158142c2 bellard
    }
4639 158142c2 bellard
    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4640 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4641 158142c2 bellard
    }
4642 158142c2 bellard
    return z;
4643 158142c2 bellard
4644 158142c2 bellard
}
4645 158142c2 bellard
4646 158142c2 bellard
/*----------------------------------------------------------------------------
4647 158142c2 bellard
| Returns the result of adding the absolute values of the quadruple-precision
4648 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
4649 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
4650 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
4651 158142c2 bellard
| Floating-Point Arithmetic.
4652 158142c2 bellard
*----------------------------------------------------------------------------*/
4653 158142c2 bellard
4654 158142c2 bellard
static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
4655 158142c2 bellard
{
4656 158142c2 bellard
    int32 aExp, bExp, zExp;
4657 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4658 158142c2 bellard
    int32 expDiff;
4659 158142c2 bellard
4660 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4661 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4662 158142c2 bellard
    aExp = extractFloat128Exp( a );
4663 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4664 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4665 158142c2 bellard
    bExp = extractFloat128Exp( b );
4666 158142c2 bellard
    expDiff = aExp - bExp;
4667 158142c2 bellard
    if ( 0 < expDiff ) {
4668 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4669 158142c2 bellard
            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4670 158142c2 bellard
            return a;
4671 158142c2 bellard
        }
4672 158142c2 bellard
        if ( bExp == 0 ) {
4673 158142c2 bellard
            --expDiff;
4674 158142c2 bellard
        }
4675 158142c2 bellard
        else {
4676 158142c2 bellard
            bSig0 |= LIT64( 0x0001000000000000 );
4677 158142c2 bellard
        }
4678 158142c2 bellard
        shift128ExtraRightJamming(
4679 158142c2 bellard
            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
4680 158142c2 bellard
        zExp = aExp;
4681 158142c2 bellard
    }
4682 158142c2 bellard
    else if ( expDiff < 0 ) {
4683 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4684 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4685 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
4686 158142c2 bellard
        }
4687 158142c2 bellard
        if ( aExp == 0 ) {
4688 158142c2 bellard
            ++expDiff;
4689 158142c2 bellard
        }
4690 158142c2 bellard
        else {
4691 158142c2 bellard
            aSig0 |= LIT64( 0x0001000000000000 );
4692 158142c2 bellard
        }
4693 158142c2 bellard
        shift128ExtraRightJamming(
4694 158142c2 bellard
            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
4695 158142c2 bellard
        zExp = bExp;
4696 158142c2 bellard
    }
4697 158142c2 bellard
    else {
4698 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4699 158142c2 bellard
            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4700 158142c2 bellard
                return propagateFloat128NaN( a, b STATUS_VAR );
4701 158142c2 bellard
            }
4702 158142c2 bellard
            return a;
4703 158142c2 bellard
        }
4704 158142c2 bellard
        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4705 fe76d976 pbrook
        if ( aExp == 0 ) {
4706 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
4707 fe76d976 pbrook
            return packFloat128( zSign, 0, zSig0, zSig1 );
4708 fe76d976 pbrook
        }
4709 158142c2 bellard
        zSig2 = 0;
4710 158142c2 bellard
        zSig0 |= LIT64( 0x0002000000000000 );
4711 158142c2 bellard
        zExp = aExp;
4712 158142c2 bellard
        goto shiftRight1;
4713 158142c2 bellard
    }
4714 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4715 158142c2 bellard
    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4716 158142c2 bellard
    --zExp;
4717 158142c2 bellard
    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
4718 158142c2 bellard
    ++zExp;
4719 158142c2 bellard
 shiftRight1:
4720 158142c2 bellard
    shift128ExtraRightJamming(
4721 158142c2 bellard
        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4722 158142c2 bellard
 roundAndPack:
4723 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
4724 158142c2 bellard
4725 158142c2 bellard
}
4726 158142c2 bellard
4727 158142c2 bellard
/*----------------------------------------------------------------------------
4728 158142c2 bellard
| Returns the result of subtracting the absolute values of the quadruple-
4729 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
4730 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
4731 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
4732 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4733 158142c2 bellard
*----------------------------------------------------------------------------*/
4734 158142c2 bellard
4735 158142c2 bellard
static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
4736 158142c2 bellard
{
4737 158142c2 bellard
    int32 aExp, bExp, zExp;
4738 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
4739 158142c2 bellard
    int32 expDiff;
4740 158142c2 bellard
    float128 z;
4741 158142c2 bellard
4742 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4743 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4744 158142c2 bellard
    aExp = extractFloat128Exp( a );
4745 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4746 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4747 158142c2 bellard
    bExp = extractFloat128Exp( b );
4748 158142c2 bellard
    expDiff = aExp - bExp;
4749 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4750 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
4751 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
4752 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
4753 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4754 158142c2 bellard
        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4755 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
4756 158142c2 bellard
        }
4757 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4758 158142c2 bellard
        z.low = float128_default_nan_low;
4759 158142c2 bellard
        z.high = float128_default_nan_high;
4760 158142c2 bellard
        return z;
4761 158142c2 bellard
    }
4762 158142c2 bellard
    if ( aExp == 0 ) {
4763 158142c2 bellard
        aExp = 1;
4764 158142c2 bellard
        bExp = 1;
4765 158142c2 bellard
    }
4766 158142c2 bellard
    if ( bSig0 < aSig0 ) goto aBigger;
4767 158142c2 bellard
    if ( aSig0 < bSig0 ) goto bBigger;
4768 158142c2 bellard
    if ( bSig1 < aSig1 ) goto aBigger;
4769 158142c2 bellard
    if ( aSig1 < bSig1 ) goto bBigger;
4770 158142c2 bellard
    return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
4771 158142c2 bellard
 bExpBigger:
4772 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4773 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4774 158142c2 bellard
        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
4775 158142c2 bellard
    }
4776 158142c2 bellard
    if ( aExp == 0 ) {
4777 158142c2 bellard
        ++expDiff;
4778 158142c2 bellard
    }
4779 158142c2 bellard
    else {
4780 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
4781 158142c2 bellard
    }
4782 158142c2 bellard
    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4783 158142c2 bellard
    bSig0 |= LIT64( 0x4000000000000000 );
4784 158142c2 bellard
 bBigger:
4785 158142c2 bellard
    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
4786 158142c2 bellard
    zExp = bExp;
4787 158142c2 bellard
    zSign ^= 1;
4788 158142c2 bellard
    goto normalizeRoundAndPack;
4789 158142c2 bellard
 aExpBigger:
4790 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4791 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4792 158142c2 bellard
        return a;
4793 158142c2 bellard
    }
4794 158142c2 bellard
    if ( bExp == 0 ) {
4795 158142c2 bellard
        --expDiff;
4796 158142c2 bellard
    }
4797 158142c2 bellard
    else {
4798 158142c2 bellard
        bSig0 |= LIT64( 0x4000000000000000 );
4799 158142c2 bellard
    }
4800 158142c2 bellard
    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
4801 158142c2 bellard
    aSig0 |= LIT64( 0x4000000000000000 );
4802 158142c2 bellard
 aBigger:
4803 158142c2 bellard
    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4804 158142c2 bellard
    zExp = aExp;
4805 158142c2 bellard
 normalizeRoundAndPack:
4806 158142c2 bellard
    --zExp;
4807 158142c2 bellard
    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
4808 158142c2 bellard
4809 158142c2 bellard
}
4810 158142c2 bellard
4811 158142c2 bellard
/*----------------------------------------------------------------------------
4812 158142c2 bellard
| Returns the result of adding the quadruple-precision floating-point values
4813 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
4814 158142c2 bellard
| for Binary Floating-Point Arithmetic.
4815 158142c2 bellard
*----------------------------------------------------------------------------*/
4816 158142c2 bellard
4817 158142c2 bellard
float128 float128_add( float128 a, float128 b STATUS_PARAM )
4818 158142c2 bellard
{
4819 158142c2 bellard
    flag aSign, bSign;
4820 158142c2 bellard
4821 158142c2 bellard
    aSign = extractFloat128Sign( a );
4822 158142c2 bellard
    bSign = extractFloat128Sign( b );
4823 158142c2 bellard
    if ( aSign == bSign ) {
4824 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
4825 158142c2 bellard
    }
4826 158142c2 bellard
    else {
4827 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
4828 158142c2 bellard
    }
4829 158142c2 bellard
4830 158142c2 bellard
}
4831 158142c2 bellard
4832 158142c2 bellard
/*----------------------------------------------------------------------------
4833 158142c2 bellard
| Returns the result of subtracting the quadruple-precision floating-point
4834 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
4835 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4836 158142c2 bellard
*----------------------------------------------------------------------------*/
4837 158142c2 bellard
4838 158142c2 bellard
float128 float128_sub( float128 a, float128 b STATUS_PARAM )
4839 158142c2 bellard
{
4840 158142c2 bellard
    flag aSign, bSign;
4841 158142c2 bellard
4842 158142c2 bellard
    aSign = extractFloat128Sign( a );
4843 158142c2 bellard
    bSign = extractFloat128Sign( b );
4844 158142c2 bellard
    if ( aSign == bSign ) {
4845 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
4846 158142c2 bellard
    }
4847 158142c2 bellard
    else {
4848 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
4849 158142c2 bellard
    }
4850 158142c2 bellard
4851 158142c2 bellard
}
4852 158142c2 bellard
4853 158142c2 bellard
/*----------------------------------------------------------------------------
4854 158142c2 bellard
| Returns the result of multiplying the quadruple-precision floating-point
4855 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
4856 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4857 158142c2 bellard
*----------------------------------------------------------------------------*/
4858 158142c2 bellard
4859 158142c2 bellard
float128 float128_mul( float128 a, float128 b STATUS_PARAM )
4860 158142c2 bellard
{
4861 158142c2 bellard
    flag aSign, bSign, zSign;
4862 158142c2 bellard
    int32 aExp, bExp, zExp;
4863 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
4864 158142c2 bellard
    float128 z;
4865 158142c2 bellard
4866 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4867 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4868 158142c2 bellard
    aExp = extractFloat128Exp( a );
4869 158142c2 bellard
    aSign = extractFloat128Sign( a );
4870 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4871 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4872 158142c2 bellard
    bExp = extractFloat128Exp( b );
4873 158142c2 bellard
    bSign = extractFloat128Sign( b );
4874 158142c2 bellard
    zSign = aSign ^ bSign;
4875 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4876 158142c2 bellard
        if (    ( aSig0 | aSig1 )
4877 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
4878 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
4879 158142c2 bellard
        }
4880 158142c2 bellard
        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
4881 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
4882 158142c2 bellard
    }
4883 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4884 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4885 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4886 158142c2 bellard
 invalid:
4887 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4888 158142c2 bellard
            z.low = float128_default_nan_low;
4889 158142c2 bellard
            z.high = float128_default_nan_high;
4890 158142c2 bellard
            return z;
4891 158142c2 bellard
        }
4892 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
4893 158142c2 bellard
    }
4894 158142c2 bellard
    if ( aExp == 0 ) {
4895 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4896 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4897 158142c2 bellard
    }
4898 158142c2 bellard
    if ( bExp == 0 ) {
4899 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4900 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4901 158142c2 bellard
    }
4902 158142c2 bellard
    zExp = aExp + bExp - 0x4000;
4903 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4904 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
4905 158142c2 bellard
    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
4906 158142c2 bellard
    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
4907 158142c2 bellard
    zSig2 |= ( zSig3 != 0 );
4908 158142c2 bellard
    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
4909 158142c2 bellard
        shift128ExtraRightJamming(
4910 158142c2 bellard
            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4911 158142c2 bellard
        ++zExp;
4912 158142c2 bellard
    }
4913 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
4914 158142c2 bellard
4915 158142c2 bellard
}
4916 158142c2 bellard
4917 158142c2 bellard
/*----------------------------------------------------------------------------
4918 158142c2 bellard
| Returns the result of dividing the quadruple-precision floating-point value
4919 158142c2 bellard
| `a' by the corresponding value `b'.  The operation is performed according to
4920 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4921 158142c2 bellard
*----------------------------------------------------------------------------*/
4922 158142c2 bellard
4923 158142c2 bellard
float128 float128_div( float128 a, float128 b STATUS_PARAM )
4924 158142c2 bellard
{
4925 158142c2 bellard
    flag aSign, bSign, zSign;
4926 158142c2 bellard
    int32 aExp, bExp, zExp;
4927 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4928 158142c2 bellard
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4929 158142c2 bellard
    float128 z;
4930 158142c2 bellard
4931 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4932 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4933 158142c2 bellard
    aExp = extractFloat128Exp( a );
4934 158142c2 bellard
    aSign = extractFloat128Sign( a );
4935 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4936 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4937 158142c2 bellard
    bExp = extractFloat128Exp( b );
4938 158142c2 bellard
    bSign = extractFloat128Sign( b );
4939 158142c2 bellard
    zSign = aSign ^ bSign;
4940 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4941 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4942 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4943 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4944 158142c2 bellard
            goto invalid;
4945 158142c2 bellard
        }
4946 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
4947 158142c2 bellard
    }
4948 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4949 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4950 158142c2 bellard
        return packFloat128( zSign, 0, 0, 0 );
4951 158142c2 bellard
    }
4952 158142c2 bellard
    if ( bExp == 0 ) {
4953 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
4954 158142c2 bellard
            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4955 158142c2 bellard
 invalid:
4956 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
4957 158142c2 bellard
                z.low = float128_default_nan_low;
4958 158142c2 bellard
                z.high = float128_default_nan_high;
4959 158142c2 bellard
                return z;
4960 158142c2 bellard
            }
4961 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
4962 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
4963 158142c2 bellard
        }
4964 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4965 158142c2 bellard
    }
4966 158142c2 bellard
    if ( aExp == 0 ) {
4967 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4968 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4969 158142c2 bellard
    }
4970 158142c2 bellard
    zExp = aExp - bExp + 0x3FFD;
4971 158142c2 bellard
    shortShift128Left(
4972 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
4973 158142c2 bellard
    shortShift128Left(
4974 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
4975 158142c2 bellard
    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
4976 158142c2 bellard
        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
4977 158142c2 bellard
        ++zExp;
4978 158142c2 bellard
    }
4979 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
4980 158142c2 bellard
    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
4981 158142c2 bellard
    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
4982 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
4983 158142c2 bellard
        --zSig0;
4984 158142c2 bellard
        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
4985 158142c2 bellard
    }
4986 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
4987 158142c2 bellard
    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
4988 158142c2 bellard
        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
4989 158142c2 bellard
        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
4990 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
4991 158142c2 bellard
            --zSig1;
4992 158142c2 bellard
            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
4993 158142c2 bellard
        }
4994 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4995 158142c2 bellard
    }
4996 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
4997 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
4998 158142c2 bellard
4999 158142c2 bellard
}
5000 158142c2 bellard
5001 158142c2 bellard
/*----------------------------------------------------------------------------
5002 158142c2 bellard
| Returns the remainder of the quadruple-precision floating-point value `a'
5003 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
5004 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5005 158142c2 bellard
*----------------------------------------------------------------------------*/
5006 158142c2 bellard
5007 158142c2 bellard
float128 float128_rem( float128 a, float128 b STATUS_PARAM )
5008 158142c2 bellard
{
5009 158142c2 bellard
    flag aSign, bSign, zSign;
5010 158142c2 bellard
    int32 aExp, bExp, expDiff;
5011 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5012 158142c2 bellard
    bits64 allZero, alternateASig0, alternateASig1, sigMean1;
5013 158142c2 bellard
    sbits64 sigMean0;
5014 158142c2 bellard
    float128 z;
5015 158142c2 bellard
5016 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5017 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5018 158142c2 bellard
    aExp = extractFloat128Exp( a );
5019 158142c2 bellard
    aSign = extractFloat128Sign( a );
5020 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5021 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5022 158142c2 bellard
    bExp = extractFloat128Exp( b );
5023 158142c2 bellard
    bSign = extractFloat128Sign( b );
5024 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5025 158142c2 bellard
        if (    ( aSig0 | aSig1 )
5026 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5027 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5028 158142c2 bellard
        }
5029 158142c2 bellard
        goto invalid;
5030 158142c2 bellard
    }
5031 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5032 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5033 158142c2 bellard
        return a;
5034 158142c2 bellard
    }
5035 158142c2 bellard
    if ( bExp == 0 ) {
5036 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
5037 158142c2 bellard
 invalid:
5038 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5039 158142c2 bellard
            z.low = float128_default_nan_low;
5040 158142c2 bellard
            z.high = float128_default_nan_high;
5041 158142c2 bellard
            return z;
5042 158142c2 bellard
        }
5043 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5044 158142c2 bellard
    }
5045 158142c2 bellard
    if ( aExp == 0 ) {
5046 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return a;
5047 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5048 158142c2 bellard
    }
5049 158142c2 bellard
    expDiff = aExp - bExp;
5050 158142c2 bellard
    if ( expDiff < -1 ) return a;
5051 158142c2 bellard
    shortShift128Left(
5052 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ),
5053 158142c2 bellard
        aSig1,
5054 158142c2 bellard
        15 - ( expDiff < 0 ),
5055 158142c2 bellard
        &aSig0,
5056 158142c2 bellard
        &aSig1
5057 158142c2 bellard
    );
5058 158142c2 bellard
    shortShift128Left(
5059 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5060 158142c2 bellard
    q = le128( bSig0, bSig1, aSig0, aSig1 );
5061 158142c2 bellard
    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5062 158142c2 bellard
    expDiff -= 64;
5063 158142c2 bellard
    while ( 0 < expDiff ) {
5064 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5065 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
5066 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5067 158142c2 bellard
        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5068 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5069 158142c2 bellard
        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5070 158142c2 bellard
        expDiff -= 61;
5071 158142c2 bellard
    }
5072 158142c2 bellard
    if ( -64 < expDiff ) {
5073 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5074 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
5075 158142c2 bellard
        q >>= - expDiff;
5076 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5077 158142c2 bellard
        expDiff += 52;
5078 158142c2 bellard
        if ( expDiff < 0 ) {
5079 158142c2 bellard
            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5080 158142c2 bellard
        }
5081 158142c2 bellard
        else {
5082 158142c2 bellard
            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5083 158142c2 bellard
        }
5084 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5085 158142c2 bellard
        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5086 158142c2 bellard
    }
5087 158142c2 bellard
    else {
5088 158142c2 bellard
        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5089 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5090 158142c2 bellard
    }
5091 158142c2 bellard
    do {
5092 158142c2 bellard
        alternateASig0 = aSig0;
5093 158142c2 bellard
        alternateASig1 = aSig1;
5094 158142c2 bellard
        ++q;
5095 158142c2 bellard
        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5096 158142c2 bellard
    } while ( 0 <= (sbits64) aSig0 );
5097 158142c2 bellard
    add128(
5098 b55266b5 blueswir1
        aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
5099 158142c2 bellard
    if (    ( sigMean0 < 0 )
5100 158142c2 bellard
         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5101 158142c2 bellard
        aSig0 = alternateASig0;
5102 158142c2 bellard
        aSig1 = alternateASig1;
5103 158142c2 bellard
    }
5104 158142c2 bellard
    zSign = ( (sbits64) aSig0 < 0 );
5105 158142c2 bellard
    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5106 158142c2 bellard
    return
5107 158142c2 bellard
        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
5108 158142c2 bellard
5109 158142c2 bellard
}
5110 158142c2 bellard
5111 158142c2 bellard
/*----------------------------------------------------------------------------
5112 158142c2 bellard
| Returns the square root of the quadruple-precision floating-point value `a'.
5113 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
5114 158142c2 bellard
| Floating-Point Arithmetic.
5115 158142c2 bellard
*----------------------------------------------------------------------------*/
5116 158142c2 bellard
5117 158142c2 bellard
float128 float128_sqrt( float128 a STATUS_PARAM )
5118 158142c2 bellard
{
5119 158142c2 bellard
    flag aSign;
5120 158142c2 bellard
    int32 aExp, zExp;
5121 158142c2 bellard
    bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5122 158142c2 bellard
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5123 158142c2 bellard
    float128 z;
5124 158142c2 bellard
5125 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5126 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5127 158142c2 bellard
    aExp = extractFloat128Exp( a );
5128 158142c2 bellard
    aSign = extractFloat128Sign( a );
5129 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5130 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
5131 158142c2 bellard
        if ( ! aSign ) return a;
5132 158142c2 bellard
        goto invalid;
5133 158142c2 bellard
    }
5134 158142c2 bellard
    if ( aSign ) {
5135 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5136 158142c2 bellard
 invalid:
5137 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5138 158142c2 bellard
        z.low = float128_default_nan_low;
5139 158142c2 bellard
        z.high = float128_default_nan_high;
5140 158142c2 bellard
        return z;
5141 158142c2 bellard
    }
5142 158142c2 bellard
    if ( aExp == 0 ) {
5143 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5144 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5145 158142c2 bellard
    }
5146 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5147 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5148 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5149 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5150 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5151 158142c2 bellard
    doubleZSig0 = zSig0<<1;
5152 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
5153 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5154 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
5155 158142c2 bellard
        --zSig0;
5156 158142c2 bellard
        doubleZSig0 -= 2;
5157 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5158 158142c2 bellard
    }
5159 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5160 158142c2 bellard
    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5161 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
5162 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5163 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5164 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
5165 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5166 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
5167 158142c2 bellard
            --zSig1;
5168 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5169 158142c2 bellard
            term3 |= 1;
5170 158142c2 bellard
            term2 |= doubleZSig0;
5171 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5172 158142c2 bellard
        }
5173 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5174 158142c2 bellard
    }
5175 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5176 158142c2 bellard
    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5177 158142c2 bellard
5178 158142c2 bellard
}
5179 158142c2 bellard
5180 158142c2 bellard
/*----------------------------------------------------------------------------
5181 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5182 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
5183 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5184 158142c2 bellard
*----------------------------------------------------------------------------*/
5185 158142c2 bellard
5186 750afe93 bellard
int float128_eq( float128 a, float128 b STATUS_PARAM )
5187 158142c2 bellard
{
5188 158142c2 bellard
5189 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5190 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5191 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5192 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5193 158142c2 bellard
       ) {
5194 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5195 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5196 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5197 158142c2 bellard
        }
5198 158142c2 bellard
        return 0;
5199 158142c2 bellard
    }
5200 158142c2 bellard
    return
5201 158142c2 bellard
           ( a.low == b.low )
5202 158142c2 bellard
        && (    ( a.high == b.high )
5203 158142c2 bellard
             || (    ( a.low == 0 )
5204 158142c2 bellard
                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5205 158142c2 bellard
           );
5206 158142c2 bellard
5207 158142c2 bellard
}
5208 158142c2 bellard
5209 158142c2 bellard
/*----------------------------------------------------------------------------
5210 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5211 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  The comparison
5212 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5213 158142c2 bellard
| Arithmetic.
5214 158142c2 bellard
*----------------------------------------------------------------------------*/
5215 158142c2 bellard
5216 750afe93 bellard
int float128_le( float128 a, float128 b STATUS_PARAM )
5217 158142c2 bellard
{
5218 158142c2 bellard
    flag aSign, bSign;
5219 158142c2 bellard
5220 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5221 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5222 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5223 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5224 158142c2 bellard
       ) {
5225 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5226 158142c2 bellard
        return 0;
5227 158142c2 bellard
    }
5228 158142c2 bellard
    aSign = extractFloat128Sign( a );
5229 158142c2 bellard
    bSign = extractFloat128Sign( b );
5230 158142c2 bellard
    if ( aSign != bSign ) {
5231 158142c2 bellard
        return
5232 158142c2 bellard
               aSign
5233 158142c2 bellard
            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5234 158142c2 bellard
                 == 0 );
5235 158142c2 bellard
    }
5236 158142c2 bellard
    return
5237 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5238 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5239 158142c2 bellard
5240 158142c2 bellard
}
5241 158142c2 bellard
5242 158142c2 bellard
/*----------------------------------------------------------------------------
5243 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5244 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
5245 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5246 158142c2 bellard
*----------------------------------------------------------------------------*/
5247 158142c2 bellard
5248 750afe93 bellard
int float128_lt( float128 a, float128 b STATUS_PARAM )
5249 158142c2 bellard
{
5250 158142c2 bellard
    flag aSign, bSign;
5251 158142c2 bellard
5252 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5253 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5254 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5255 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5256 158142c2 bellard
       ) {
5257 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5258 158142c2 bellard
        return 0;
5259 158142c2 bellard
    }
5260 158142c2 bellard
    aSign = extractFloat128Sign( a );
5261 158142c2 bellard
    bSign = extractFloat128Sign( b );
5262 158142c2 bellard
    if ( aSign != bSign ) {
5263 158142c2 bellard
        return
5264 158142c2 bellard
               aSign
5265 158142c2 bellard
            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5266 158142c2 bellard
                 != 0 );
5267 158142c2 bellard
    }
5268 158142c2 bellard
    return
5269 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5270 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5271 158142c2 bellard
5272 158142c2 bellard
}
5273 158142c2 bellard
5274 158142c2 bellard
/*----------------------------------------------------------------------------
5275 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5276 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The invalid exception is
5277 158142c2 bellard
| raised if either operand is a NaN.  Otherwise, the comparison is performed
5278 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5279 158142c2 bellard
*----------------------------------------------------------------------------*/
5280 158142c2 bellard
5281 750afe93 bellard
int float128_eq_signaling( float128 a, float128 b STATUS_PARAM )
5282 158142c2 bellard
{
5283 158142c2 bellard
5284 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5285 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5286 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5287 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5288 158142c2 bellard
       ) {
5289 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5290 158142c2 bellard
        return 0;
5291 158142c2 bellard
    }
5292 158142c2 bellard
    return
5293 158142c2 bellard
           ( a.low == b.low )
5294 158142c2 bellard
        && (    ( a.high == b.high )
5295 158142c2 bellard
             || (    ( a.low == 0 )
5296 158142c2 bellard
                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5297 158142c2 bellard
           );
5298 158142c2 bellard
5299 158142c2 bellard
}
5300 158142c2 bellard
5301 158142c2 bellard
/*----------------------------------------------------------------------------
5302 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5303 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5304 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
5305 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5306 158142c2 bellard
*----------------------------------------------------------------------------*/
5307 158142c2 bellard
5308 750afe93 bellard
int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
5309 158142c2 bellard
{
5310 158142c2 bellard
    flag aSign, bSign;
5311 158142c2 bellard
5312 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5313 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5314 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5315 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5316 158142c2 bellard
       ) {
5317 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5318 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5319 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5320 158142c2 bellard
        }
5321 158142c2 bellard
        return 0;
5322 158142c2 bellard
    }
5323 158142c2 bellard
    aSign = extractFloat128Sign( a );
5324 158142c2 bellard
    bSign = extractFloat128Sign( b );
5325 158142c2 bellard
    if ( aSign != bSign ) {
5326 158142c2 bellard
        return
5327 158142c2 bellard
               aSign
5328 158142c2 bellard
            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5329 158142c2 bellard
                 == 0 );
5330 158142c2 bellard
    }
5331 158142c2 bellard
    return
5332 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5333 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5334 158142c2 bellard
5335 158142c2 bellard
}
5336 158142c2 bellard
5337 158142c2 bellard
/*----------------------------------------------------------------------------
5338 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5339 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5340 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
5341 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5342 158142c2 bellard
*----------------------------------------------------------------------------*/
5343 158142c2 bellard
5344 750afe93 bellard
int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
5345 158142c2 bellard
{
5346 158142c2 bellard
    flag aSign, bSign;
5347 158142c2 bellard
5348 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5349 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5350 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5351 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5352 158142c2 bellard
       ) {
5353 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5354 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5355 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5356 158142c2 bellard
        }
5357 158142c2 bellard
        return 0;
5358 158142c2 bellard
    }
5359 158142c2 bellard
    aSign = extractFloat128Sign( a );
5360 158142c2 bellard
    bSign = extractFloat128Sign( b );
5361 158142c2 bellard
    if ( aSign != bSign ) {
5362 158142c2 bellard
        return
5363 158142c2 bellard
               aSign
5364 158142c2 bellard
            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5365 158142c2 bellard
                 != 0 );
5366 158142c2 bellard
    }
5367 158142c2 bellard
    return
5368 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5369 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5370 158142c2 bellard
5371 158142c2 bellard
}
5372 158142c2 bellard
5373 158142c2 bellard
#endif
5374 158142c2 bellard
5375 1d6bda35 bellard
/* misc functions */
5376 1d6bda35 bellard
float32 uint32_to_float32( unsigned int a STATUS_PARAM )
5377 1d6bda35 bellard
{
5378 1d6bda35 bellard
    return int64_to_float32(a STATUS_VAR);
5379 1d6bda35 bellard
}
5380 1d6bda35 bellard
5381 1d6bda35 bellard
float64 uint32_to_float64( unsigned int a STATUS_PARAM )
5382 1d6bda35 bellard
{
5383 1d6bda35 bellard
    return int64_to_float64(a STATUS_VAR);
5384 1d6bda35 bellard
}
5385 1d6bda35 bellard
5386 1d6bda35 bellard
unsigned int float32_to_uint32( float32 a STATUS_PARAM )
5387 1d6bda35 bellard
{
5388 1d6bda35 bellard
    int64_t v;
5389 1d6bda35 bellard
    unsigned int res;
5390 1d6bda35 bellard
5391 1d6bda35 bellard
    v = float32_to_int64(a STATUS_VAR);
5392 1d6bda35 bellard
    if (v < 0) {
5393 1d6bda35 bellard
        res = 0;
5394 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5395 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5396 1d6bda35 bellard
        res = 0xffffffff;
5397 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5398 1d6bda35 bellard
    } else {
5399 1d6bda35 bellard
        res = v;
5400 1d6bda35 bellard
    }
5401 1d6bda35 bellard
    return res;
5402 1d6bda35 bellard
}
5403 1d6bda35 bellard
5404 1d6bda35 bellard
unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
5405 1d6bda35 bellard
{
5406 1d6bda35 bellard
    int64_t v;
5407 1d6bda35 bellard
    unsigned int res;
5408 1d6bda35 bellard
5409 1d6bda35 bellard
    v = float32_to_int64_round_to_zero(a STATUS_VAR);
5410 1d6bda35 bellard
    if (v < 0) {
5411 1d6bda35 bellard
        res = 0;
5412 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5413 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5414 1d6bda35 bellard
        res = 0xffffffff;
5415 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5416 1d6bda35 bellard
    } else {
5417 1d6bda35 bellard
        res = v;
5418 1d6bda35 bellard
    }
5419 1d6bda35 bellard
    return res;
5420 1d6bda35 bellard
}
5421 1d6bda35 bellard
5422 1d6bda35 bellard
unsigned int float64_to_uint32( float64 a STATUS_PARAM )
5423 1d6bda35 bellard
{
5424 1d6bda35 bellard
    int64_t v;
5425 1d6bda35 bellard
    unsigned int res;
5426 1d6bda35 bellard
5427 1d6bda35 bellard
    v = float64_to_int64(a STATUS_VAR);
5428 1d6bda35 bellard
    if (v < 0) {
5429 1d6bda35 bellard
        res = 0;
5430 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5431 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5432 1d6bda35 bellard
        res = 0xffffffff;
5433 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5434 1d6bda35 bellard
    } else {
5435 1d6bda35 bellard
        res = v;
5436 1d6bda35 bellard
    }
5437 1d6bda35 bellard
    return res;
5438 1d6bda35 bellard
}
5439 1d6bda35 bellard
5440 1d6bda35 bellard
unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
5441 1d6bda35 bellard
{
5442 1d6bda35 bellard
    int64_t v;
5443 1d6bda35 bellard
    unsigned int res;
5444 1d6bda35 bellard
5445 1d6bda35 bellard
    v = float64_to_int64_round_to_zero(a STATUS_VAR);
5446 1d6bda35 bellard
    if (v < 0) {
5447 1d6bda35 bellard
        res = 0;
5448 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5449 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5450 1d6bda35 bellard
        res = 0xffffffff;
5451 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5452 1d6bda35 bellard
    } else {
5453 1d6bda35 bellard
        res = v;
5454 1d6bda35 bellard
    }
5455 1d6bda35 bellard
    return res;
5456 1d6bda35 bellard
}
5457 1d6bda35 bellard
5458 f090c9d4 pbrook
/* FIXME: This looks broken.  */
5459 75d62a58 j_mayer
uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
5460 75d62a58 j_mayer
{
5461 75d62a58 j_mayer
    int64_t v;
5462 75d62a58 j_mayer
5463 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
5464 f090c9d4 pbrook
    v += float64_val(a);
5465 f090c9d4 pbrook
    v = float64_to_int64(make_float64(v) STATUS_VAR);
5466 75d62a58 j_mayer
5467 75d62a58 j_mayer
    return v - INT64_MIN;
5468 75d62a58 j_mayer
}
5469 75d62a58 j_mayer
5470 75d62a58 j_mayer
uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
5471 75d62a58 j_mayer
{
5472 75d62a58 j_mayer
    int64_t v;
5473 75d62a58 j_mayer
5474 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
5475 f090c9d4 pbrook
    v += float64_val(a);
5476 f090c9d4 pbrook
    v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
5477 75d62a58 j_mayer
5478 75d62a58 j_mayer
    return v - INT64_MIN;
5479 75d62a58 j_mayer
}
5480 75d62a58 j_mayer
5481 1d6bda35 bellard
#define COMPARE(s, nan_exp)                                                  \
5482 750afe93 bellard
INLINE int float ## s ## _compare_internal( float ## s a, float ## s b,      \
5483 1d6bda35 bellard
                                      int is_quiet STATUS_PARAM )            \
5484 1d6bda35 bellard
{                                                                            \
5485 1d6bda35 bellard
    flag aSign, bSign;                                                       \
5486 f090c9d4 pbrook
    bits ## s av, bv;                                                        \
5487 1d6bda35 bellard
                                                                             \
5488 1d6bda35 bellard
    if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) &&                    \
5489 1d6bda35 bellard
         extractFloat ## s ## Frac( a ) ) ||                                 \
5490 1d6bda35 bellard
        ( ( extractFloat ## s ## Exp( b ) == nan_exp ) &&                    \
5491 1d6bda35 bellard
          extractFloat ## s ## Frac( b ) )) {                                \
5492 1d6bda35 bellard
        if (!is_quiet ||                                                     \
5493 1d6bda35 bellard
            float ## s ## _is_signaling_nan( a ) ||                          \
5494 1d6bda35 bellard
            float ## s ## _is_signaling_nan( b ) ) {                         \
5495 1d6bda35 bellard
            float_raise( float_flag_invalid STATUS_VAR);                     \
5496 1d6bda35 bellard
        }                                                                    \
5497 1d6bda35 bellard
        return float_relation_unordered;                                     \
5498 1d6bda35 bellard
    }                                                                        \
5499 1d6bda35 bellard
    aSign = extractFloat ## s ## Sign( a );                                  \
5500 1d6bda35 bellard
    bSign = extractFloat ## s ## Sign( b );                                  \
5501 f090c9d4 pbrook
    av = float ## s ## _val(a);                                              \
5502 cd8a2533 blueswir1
    bv = float ## s ## _val(b);                                              \
5503 1d6bda35 bellard
    if ( aSign != bSign ) {                                                  \
5504 f090c9d4 pbrook
        if ( (bits ## s) ( ( av | bv )<<1 ) == 0 ) {                         \
5505 1d6bda35 bellard
            /* zero case */                                                  \
5506 1d6bda35 bellard
            return float_relation_equal;                                     \
5507 1d6bda35 bellard
        } else {                                                             \
5508 1d6bda35 bellard
            return 1 - (2 * aSign);                                          \
5509 1d6bda35 bellard
        }                                                                    \
5510 1d6bda35 bellard
    } else {                                                                 \
5511 f090c9d4 pbrook
        if (av == bv) {                                                      \
5512 1d6bda35 bellard
            return float_relation_equal;                                     \
5513 1d6bda35 bellard
        } else {                                                             \
5514 f090c9d4 pbrook
            return 1 - 2 * (aSign ^ ( av < bv ));                            \
5515 1d6bda35 bellard
        }                                                                    \
5516 1d6bda35 bellard
    }                                                                        \
5517 1d6bda35 bellard
}                                                                            \
5518 1d6bda35 bellard
                                                                             \
5519 750afe93 bellard
int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM )        \
5520 1d6bda35 bellard
{                                                                            \
5521 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 0 STATUS_VAR);              \
5522 1d6bda35 bellard
}                                                                            \
5523 1d6bda35 bellard
                                                                             \
5524 750afe93 bellard
int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM )  \
5525 1d6bda35 bellard
{                                                                            \
5526 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 1 STATUS_VAR);              \
5527 1d6bda35 bellard
}
5528 1d6bda35 bellard
5529 1d6bda35 bellard
COMPARE(32, 0xff)
5530 1d6bda35 bellard
COMPARE(64, 0x7ff)
5531 9ee6e8bb pbrook
5532 1f587329 blueswir1
INLINE int float128_compare_internal( float128 a, float128 b,
5533 1f587329 blueswir1
                                      int is_quiet STATUS_PARAM )
5534 1f587329 blueswir1
{
5535 1f587329 blueswir1
    flag aSign, bSign;
5536 1f587329 blueswir1
5537 1f587329 blueswir1
    if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
5538 1f587329 blueswir1
          ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
5539 1f587329 blueswir1
        ( ( extractFloat128Exp( b ) == 0x7fff ) &&
5540 1f587329 blueswir1
          ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
5541 1f587329 blueswir1
        if (!is_quiet ||
5542 1f587329 blueswir1
            float128_is_signaling_nan( a ) ||
5543 1f587329 blueswir1
            float128_is_signaling_nan( b ) ) {
5544 1f587329 blueswir1
            float_raise( float_flag_invalid STATUS_VAR);
5545 1f587329 blueswir1
        }
5546 1f587329 blueswir1
        return float_relation_unordered;
5547 1f587329 blueswir1
    }
5548 1f587329 blueswir1
    aSign = extractFloat128Sign( a );
5549 1f587329 blueswir1
    bSign = extractFloat128Sign( b );
5550 1f587329 blueswir1
    if ( aSign != bSign ) {
5551 1f587329 blueswir1
        if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
5552 1f587329 blueswir1
            /* zero case */
5553 1f587329 blueswir1
            return float_relation_equal;
5554 1f587329 blueswir1
        } else {
5555 1f587329 blueswir1
            return 1 - (2 * aSign);
5556 1f587329 blueswir1
        }
5557 1f587329 blueswir1
    } else {
5558 1f587329 blueswir1
        if (a.low == b.low && a.high == b.high) {
5559 1f587329 blueswir1
            return float_relation_equal;
5560 1f587329 blueswir1
        } else {
5561 1f587329 blueswir1
            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
5562 1f587329 blueswir1
        }
5563 1f587329 blueswir1
    }
5564 1f587329 blueswir1
}
5565 1f587329 blueswir1
5566 1f587329 blueswir1
int float128_compare( float128 a, float128 b STATUS_PARAM )
5567 1f587329 blueswir1
{
5568 1f587329 blueswir1
    return float128_compare_internal(a, b, 0 STATUS_VAR);
5569 1f587329 blueswir1
}
5570 1f587329 blueswir1
5571 1f587329 blueswir1
int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
5572 1f587329 blueswir1
{
5573 1f587329 blueswir1
    return float128_compare_internal(a, b, 1 STATUS_VAR);
5574 1f587329 blueswir1
}
5575 1f587329 blueswir1
5576 9ee6e8bb pbrook
/* Multiply A by 2 raised to the power N.  */
5577 9ee6e8bb pbrook
float32 float32_scalbn( float32 a, int n STATUS_PARAM )
5578 9ee6e8bb pbrook
{
5579 9ee6e8bb pbrook
    flag aSign;
5580 9ee6e8bb pbrook
    int16 aExp;
5581 9ee6e8bb pbrook
    bits32 aSig;
5582 9ee6e8bb pbrook
5583 9ee6e8bb pbrook
    aSig = extractFloat32Frac( a );
5584 9ee6e8bb pbrook
    aExp = extractFloat32Exp( a );
5585 9ee6e8bb pbrook
    aSign = extractFloat32Sign( a );
5586 9ee6e8bb pbrook
5587 9ee6e8bb pbrook
    if ( aExp == 0xFF ) {
5588 9ee6e8bb pbrook
        return a;
5589 9ee6e8bb pbrook
    }
5590 69397542 pbrook
    if ( aExp != 0 )
5591 69397542 pbrook
        aSig |= 0x00800000;
5592 69397542 pbrook
    else if ( aSig == 0 )
5593 69397542 pbrook
        return a;
5594 69397542 pbrook
5595 69397542 pbrook
    aExp += n - 1;
5596 69397542 pbrook
    aSig <<= 7;
5597 69397542 pbrook
    return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
5598 9ee6e8bb pbrook
}
5599 9ee6e8bb pbrook
5600 9ee6e8bb pbrook
float64 float64_scalbn( float64 a, int n STATUS_PARAM )
5601 9ee6e8bb pbrook
{
5602 9ee6e8bb pbrook
    flag aSign;
5603 9ee6e8bb pbrook
    int16 aExp;
5604 9ee6e8bb pbrook
    bits64 aSig;
5605 9ee6e8bb pbrook
5606 9ee6e8bb pbrook
    aSig = extractFloat64Frac( a );
5607 9ee6e8bb pbrook
    aExp = extractFloat64Exp( a );
5608 9ee6e8bb pbrook
    aSign = extractFloat64Sign( a );
5609 9ee6e8bb pbrook
5610 9ee6e8bb pbrook
    if ( aExp == 0x7FF ) {
5611 9ee6e8bb pbrook
        return a;
5612 9ee6e8bb pbrook
    }
5613 69397542 pbrook
    if ( aExp != 0 )
5614 69397542 pbrook
        aSig |= LIT64( 0x0010000000000000 );
5615 69397542 pbrook
    else if ( aSig == 0 )
5616 69397542 pbrook
        return a;
5617 69397542 pbrook
5618 69397542 pbrook
    aExp += n - 1;
5619 69397542 pbrook
    aSig <<= 10;
5620 69397542 pbrook
    return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
5621 9ee6e8bb pbrook
}
5622 9ee6e8bb pbrook
5623 9ee6e8bb pbrook
#ifdef FLOATX80
5624 9ee6e8bb pbrook
floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
5625 9ee6e8bb pbrook
{
5626 9ee6e8bb pbrook
    flag aSign;
5627 9ee6e8bb pbrook
    int16 aExp;
5628 9ee6e8bb pbrook
    bits64 aSig;
5629 9ee6e8bb pbrook
5630 9ee6e8bb pbrook
    aSig = extractFloatx80Frac( a );
5631 9ee6e8bb pbrook
    aExp = extractFloatx80Exp( a );
5632 9ee6e8bb pbrook
    aSign = extractFloatx80Sign( a );
5633 9ee6e8bb pbrook
5634 9ee6e8bb pbrook
    if ( aExp == 0x7FF ) {
5635 9ee6e8bb pbrook
        return a;
5636 9ee6e8bb pbrook
    }
5637 69397542 pbrook
    if (aExp == 0 && aSig == 0)
5638 69397542 pbrook
        return a;
5639 69397542 pbrook
5640 9ee6e8bb pbrook
    aExp += n;
5641 69397542 pbrook
    return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
5642 69397542 pbrook
                                          aSign, aExp, aSig, 0 STATUS_VAR );
5643 9ee6e8bb pbrook
}
5644 9ee6e8bb pbrook
#endif
5645 9ee6e8bb pbrook
5646 9ee6e8bb pbrook
#ifdef FLOAT128
5647 9ee6e8bb pbrook
float128 float128_scalbn( float128 a, int n STATUS_PARAM )
5648 9ee6e8bb pbrook
{
5649 9ee6e8bb pbrook
    flag aSign;
5650 9ee6e8bb pbrook
    int32 aExp;
5651 9ee6e8bb pbrook
    bits64 aSig0, aSig1;
5652 9ee6e8bb pbrook
5653 9ee6e8bb pbrook
    aSig1 = extractFloat128Frac1( a );
5654 9ee6e8bb pbrook
    aSig0 = extractFloat128Frac0( a );
5655 9ee6e8bb pbrook
    aExp = extractFloat128Exp( a );
5656 9ee6e8bb pbrook
    aSign = extractFloat128Sign( a );
5657 9ee6e8bb pbrook
    if ( aExp == 0x7FFF ) {
5658 9ee6e8bb pbrook
        return a;
5659 9ee6e8bb pbrook
    }
5660 69397542 pbrook
    if ( aExp != 0 )
5661 69397542 pbrook
        aSig0 |= LIT64( 0x0001000000000000 );
5662 69397542 pbrook
    else if ( aSig0 == 0 && aSig1 == 0 )
5663 69397542 pbrook
        return a;
5664 69397542 pbrook
5665 69397542 pbrook
    aExp += n - 1;
5666 69397542 pbrook
    return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
5667 69397542 pbrook
                                          STATUS_VAR );
5668 9ee6e8bb pbrook
5669 9ee6e8bb pbrook
}
5670 9ee6e8bb pbrook
#endif