Statistics
| Branch: | Revision:

root / fpu / softfloat.c @ a63b5829

History | View | Annotate | Download (202 kB)

1 158142c2 bellard
2 158142c2 bellard
/*============================================================================
3 158142c2 bellard

4 158142c2 bellard
This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
5 158142c2 bellard
Package, Release 2b.
6 158142c2 bellard

7 158142c2 bellard
Written by John R. Hauser.  This work was made possible in part by the
8 158142c2 bellard
International Computer Science Institute, located at Suite 600, 1947 Center
9 158142c2 bellard
Street, Berkeley, California 94704.  Funding was partially provided by the
10 158142c2 bellard
National Science Foundation under grant MIP-9311980.  The original version
11 158142c2 bellard
of this code was written as part of a project to build a fixed-point vector
12 158142c2 bellard
processor in collaboration with the University of California at Berkeley,
13 158142c2 bellard
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
14 158142c2 bellard
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
15 158142c2 bellard
arithmetic/SoftFloat.html'.
16 158142c2 bellard

17 158142c2 bellard
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
18 158142c2 bellard
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
19 158142c2 bellard
RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
20 158142c2 bellard
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
21 158142c2 bellard
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
22 158142c2 bellard
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
23 158142c2 bellard
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
24 158142c2 bellard
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
25 158142c2 bellard

26 158142c2 bellard
Derivative works are acceptable, even for commercial purposes, so long as
27 158142c2 bellard
(1) the source code for the derivative work includes prominent notice that
28 158142c2 bellard
the work is derivative, and (2) the source code includes prominent notice with
29 158142c2 bellard
these four paragraphs for those parts of this code that are retained.
30 158142c2 bellard

31 158142c2 bellard
=============================================================================*/
32 158142c2 bellard
33 fe76d976 pbrook
/* FIXME: Flush-To-Zero only effects results.  Denormal inputs should also
34 fe76d976 pbrook
   be flushed to zero.  */
35 158142c2 bellard
#include "softfloat.h"
36 158142c2 bellard
37 158142c2 bellard
/*----------------------------------------------------------------------------
38 158142c2 bellard
| Primitive arithmetic functions, including multi-word arithmetic, and
39 158142c2 bellard
| division and square root approximations.  (Can be specialized to target if
40 158142c2 bellard
| desired.)
41 158142c2 bellard
*----------------------------------------------------------------------------*/
42 158142c2 bellard
#include "softfloat-macros.h"
43 158142c2 bellard
44 158142c2 bellard
/*----------------------------------------------------------------------------
45 158142c2 bellard
| Functions and definitions to determine:  (1) whether tininess for underflow
46 158142c2 bellard
| is detected before or after rounding by default, (2) what (if anything)
47 158142c2 bellard
| happens when exceptions are raised, (3) how signaling NaNs are distinguished
48 158142c2 bellard
| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
49 158142c2 bellard
| are propagated from function inputs to output.  These details are target-
50 158142c2 bellard
| specific.
51 158142c2 bellard
*----------------------------------------------------------------------------*/
52 158142c2 bellard
#include "softfloat-specialize.h"
53 158142c2 bellard
54 158142c2 bellard
void set_float_rounding_mode(int val STATUS_PARAM)
55 158142c2 bellard
{
56 158142c2 bellard
    STATUS(float_rounding_mode) = val;
57 158142c2 bellard
}
58 158142c2 bellard
59 1d6bda35 bellard
void set_float_exception_flags(int val STATUS_PARAM)
60 1d6bda35 bellard
{
61 1d6bda35 bellard
    STATUS(float_exception_flags) = val;
62 1d6bda35 bellard
}
63 1d6bda35 bellard
64 158142c2 bellard
#ifdef FLOATX80
65 158142c2 bellard
void set_floatx80_rounding_precision(int val STATUS_PARAM)
66 158142c2 bellard
{
67 158142c2 bellard
    STATUS(floatx80_rounding_precision) = val;
68 158142c2 bellard
}
69 158142c2 bellard
#endif
70 158142c2 bellard
71 158142c2 bellard
/*----------------------------------------------------------------------------
72 158142c2 bellard
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
73 158142c2 bellard
| and 7, and returns the properly rounded 32-bit integer corresponding to the
74 158142c2 bellard
| input.  If `zSign' is 1, the input is negated before being converted to an
75 158142c2 bellard
| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
76 158142c2 bellard
| is simply rounded to an integer, with the inexact exception raised if the
77 158142c2 bellard
| input cannot be represented exactly as an integer.  However, if the fixed-
78 158142c2 bellard
| point input is too large, the invalid exception is raised and the largest
79 158142c2 bellard
| positive or negative integer is returned.
80 158142c2 bellard
*----------------------------------------------------------------------------*/
81 158142c2 bellard
82 158142c2 bellard
static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM)
83 158142c2 bellard
{
84 158142c2 bellard
    int8 roundingMode;
85 158142c2 bellard
    flag roundNearestEven;
86 158142c2 bellard
    int8 roundIncrement, roundBits;
87 158142c2 bellard
    int32 z;
88 158142c2 bellard
89 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
90 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
91 158142c2 bellard
    roundIncrement = 0x40;
92 158142c2 bellard
    if ( ! roundNearestEven ) {
93 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
94 158142c2 bellard
            roundIncrement = 0;
95 158142c2 bellard
        }
96 158142c2 bellard
        else {
97 158142c2 bellard
            roundIncrement = 0x7F;
98 158142c2 bellard
            if ( zSign ) {
99 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
100 158142c2 bellard
            }
101 158142c2 bellard
            else {
102 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
103 158142c2 bellard
            }
104 158142c2 bellard
        }
105 158142c2 bellard
    }
106 158142c2 bellard
    roundBits = absZ & 0x7F;
107 158142c2 bellard
    absZ = ( absZ + roundIncrement )>>7;
108 158142c2 bellard
    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
109 158142c2 bellard
    z = absZ;
110 158142c2 bellard
    if ( zSign ) z = - z;
111 158142c2 bellard
    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
112 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
113 158142c2 bellard
        return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
114 158142c2 bellard
    }
115 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
116 158142c2 bellard
    return z;
117 158142c2 bellard
118 158142c2 bellard
}
119 158142c2 bellard
120 158142c2 bellard
/*----------------------------------------------------------------------------
121 158142c2 bellard
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
122 158142c2 bellard
| `absZ1', with binary point between bits 63 and 64 (between the input words),
123 158142c2 bellard
| and returns the properly rounded 64-bit integer corresponding to the input.
124 158142c2 bellard
| If `zSign' is 1, the input is negated before being converted to an integer.
125 158142c2 bellard
| Ordinarily, the fixed-point input is simply rounded to an integer, with
126 158142c2 bellard
| the inexact exception raised if the input cannot be represented exactly as
127 158142c2 bellard
| an integer.  However, if the fixed-point input is too large, the invalid
128 158142c2 bellard
| exception is raised and the largest positive or negative integer is
129 158142c2 bellard
| returned.
130 158142c2 bellard
*----------------------------------------------------------------------------*/
131 158142c2 bellard
132 158142c2 bellard
static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM)
133 158142c2 bellard
{
134 158142c2 bellard
    int8 roundingMode;
135 158142c2 bellard
    flag roundNearestEven, increment;
136 158142c2 bellard
    int64 z;
137 158142c2 bellard
138 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
139 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
140 158142c2 bellard
    increment = ( (sbits64) absZ1 < 0 );
141 158142c2 bellard
    if ( ! roundNearestEven ) {
142 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
143 158142c2 bellard
            increment = 0;
144 158142c2 bellard
        }
145 158142c2 bellard
        else {
146 158142c2 bellard
            if ( zSign ) {
147 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && absZ1;
148 158142c2 bellard
            }
149 158142c2 bellard
            else {
150 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && absZ1;
151 158142c2 bellard
            }
152 158142c2 bellard
        }
153 158142c2 bellard
    }
154 158142c2 bellard
    if ( increment ) {
155 158142c2 bellard
        ++absZ0;
156 158142c2 bellard
        if ( absZ0 == 0 ) goto overflow;
157 158142c2 bellard
        absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158 158142c2 bellard
    }
159 158142c2 bellard
    z = absZ0;
160 158142c2 bellard
    if ( zSign ) z = - z;
161 158142c2 bellard
    if ( z && ( ( z < 0 ) ^ zSign ) ) {
162 158142c2 bellard
 overflow:
163 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
164 158142c2 bellard
        return
165 158142c2 bellard
              zSign ? (sbits64) LIT64( 0x8000000000000000 )
166 158142c2 bellard
            : LIT64( 0x7FFFFFFFFFFFFFFF );
167 158142c2 bellard
    }
168 158142c2 bellard
    if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
169 158142c2 bellard
    return z;
170 158142c2 bellard
171 158142c2 bellard
}
172 158142c2 bellard
173 158142c2 bellard
/*----------------------------------------------------------------------------
174 158142c2 bellard
| Returns the fraction bits of the single-precision floating-point value `a'.
175 158142c2 bellard
*----------------------------------------------------------------------------*/
176 158142c2 bellard
177 158142c2 bellard
INLINE bits32 extractFloat32Frac( float32 a )
178 158142c2 bellard
{
179 158142c2 bellard
180 f090c9d4 pbrook
    return float32_val(a) & 0x007FFFFF;
181 158142c2 bellard
182 158142c2 bellard
}
183 158142c2 bellard
184 158142c2 bellard
/*----------------------------------------------------------------------------
185 158142c2 bellard
| Returns the exponent bits of the single-precision floating-point value `a'.
186 158142c2 bellard
*----------------------------------------------------------------------------*/
187 158142c2 bellard
188 158142c2 bellard
INLINE int16 extractFloat32Exp( float32 a )
189 158142c2 bellard
{
190 158142c2 bellard
191 f090c9d4 pbrook
    return ( float32_val(a)>>23 ) & 0xFF;
192 158142c2 bellard
193 158142c2 bellard
}
194 158142c2 bellard
195 158142c2 bellard
/*----------------------------------------------------------------------------
196 158142c2 bellard
| Returns the sign bit of the single-precision floating-point value `a'.
197 158142c2 bellard
*----------------------------------------------------------------------------*/
198 158142c2 bellard
199 158142c2 bellard
INLINE flag extractFloat32Sign( float32 a )
200 158142c2 bellard
{
201 158142c2 bellard
202 f090c9d4 pbrook
    return float32_val(a)>>31;
203 158142c2 bellard
204 158142c2 bellard
}
205 158142c2 bellard
206 158142c2 bellard
/*----------------------------------------------------------------------------
207 158142c2 bellard
| Normalizes the subnormal single-precision floating-point value represented
208 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
209 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
210 158142c2 bellard
| `zSigPtr', respectively.
211 158142c2 bellard
*----------------------------------------------------------------------------*/
212 158142c2 bellard
213 158142c2 bellard
static void
214 158142c2 bellard
 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
215 158142c2 bellard
{
216 158142c2 bellard
    int8 shiftCount;
217 158142c2 bellard
218 158142c2 bellard
    shiftCount = countLeadingZeros32( aSig ) - 8;
219 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
220 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
221 158142c2 bellard
222 158142c2 bellard
}
223 158142c2 bellard
224 158142c2 bellard
/*----------------------------------------------------------------------------
225 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
226 158142c2 bellard
| single-precision floating-point value, returning the result.  After being
227 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
228 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
229 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
230 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
231 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
232 158142c2 bellard
| significand.
233 158142c2 bellard
*----------------------------------------------------------------------------*/
234 158142c2 bellard
235 158142c2 bellard
INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
236 158142c2 bellard
{
237 158142c2 bellard
238 f090c9d4 pbrook
    return make_float32(
239 f090c9d4 pbrook
          ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig);
240 158142c2 bellard
241 158142c2 bellard
}
242 158142c2 bellard
243 158142c2 bellard
/*----------------------------------------------------------------------------
244 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
245 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
246 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
247 158142c2 bellard
| value is simply rounded and packed into the single-precision format, with
248 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
249 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
250 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
251 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
252 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
253 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal single-
254 158142c2 bellard
| precision floating-point number.
255 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 30
256 158142c2 bellard
| and 29, which is 7 bits to the left of the usual location.  This shifted
257 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
258 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
259 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
260 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
261 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
262 158142c2 bellard
| Binary Floating-Point Arithmetic.
263 158142c2 bellard
*----------------------------------------------------------------------------*/
264 158142c2 bellard
265 158142c2 bellard
static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
266 158142c2 bellard
{
267 158142c2 bellard
    int8 roundingMode;
268 158142c2 bellard
    flag roundNearestEven;
269 158142c2 bellard
    int8 roundIncrement, roundBits;
270 158142c2 bellard
    flag isTiny;
271 158142c2 bellard
272 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
273 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
274 158142c2 bellard
    roundIncrement = 0x40;
275 158142c2 bellard
    if ( ! roundNearestEven ) {
276 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
277 158142c2 bellard
            roundIncrement = 0;
278 158142c2 bellard
        }
279 158142c2 bellard
        else {
280 158142c2 bellard
            roundIncrement = 0x7F;
281 158142c2 bellard
            if ( zSign ) {
282 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
283 158142c2 bellard
            }
284 158142c2 bellard
            else {
285 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
286 158142c2 bellard
            }
287 158142c2 bellard
        }
288 158142c2 bellard
    }
289 158142c2 bellard
    roundBits = zSig & 0x7F;
290 158142c2 bellard
    if ( 0xFD <= (bits16) zExp ) {
291 158142c2 bellard
        if (    ( 0xFD < zExp )
292 158142c2 bellard
             || (    ( zExp == 0xFD )
293 158142c2 bellard
                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
294 158142c2 bellard
           ) {
295 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
296 f090c9d4 pbrook
            return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
297 158142c2 bellard
        }
298 158142c2 bellard
        if ( zExp < 0 ) {
299 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
300 158142c2 bellard
            isTiny =
301 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
302 158142c2 bellard
                || ( zExp < -1 )
303 158142c2 bellard
                || ( zSig + roundIncrement < 0x80000000 );
304 158142c2 bellard
            shift32RightJamming( zSig, - zExp, &zSig );
305 158142c2 bellard
            zExp = 0;
306 158142c2 bellard
            roundBits = zSig & 0x7F;
307 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
308 158142c2 bellard
        }
309 158142c2 bellard
    }
310 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
311 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>7;
312 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
313 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
314 158142c2 bellard
    return packFloat32( zSign, zExp, zSig );
315 158142c2 bellard
316 158142c2 bellard
}
317 158142c2 bellard
318 158142c2 bellard
/*----------------------------------------------------------------------------
319 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
320 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
321 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
322 158142c2 bellard
| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
323 158142c2 bellard
| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
324 158142c2 bellard
| floating-point exponent.
325 158142c2 bellard
*----------------------------------------------------------------------------*/
326 158142c2 bellard
327 158142c2 bellard
static float32
328 158142c2 bellard
 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
329 158142c2 bellard
{
330 158142c2 bellard
    int8 shiftCount;
331 158142c2 bellard
332 158142c2 bellard
    shiftCount = countLeadingZeros32( zSig ) - 1;
333 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
334 158142c2 bellard
335 158142c2 bellard
}
336 158142c2 bellard
337 158142c2 bellard
/*----------------------------------------------------------------------------
338 158142c2 bellard
| Returns the fraction bits of the double-precision floating-point value `a'.
339 158142c2 bellard
*----------------------------------------------------------------------------*/
340 158142c2 bellard
341 158142c2 bellard
INLINE bits64 extractFloat64Frac( float64 a )
342 158142c2 bellard
{
343 158142c2 bellard
344 f090c9d4 pbrook
    return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
345 158142c2 bellard
346 158142c2 bellard
}
347 158142c2 bellard
348 158142c2 bellard
/*----------------------------------------------------------------------------
349 158142c2 bellard
| Returns the exponent bits of the double-precision floating-point value `a'.
350 158142c2 bellard
*----------------------------------------------------------------------------*/
351 158142c2 bellard
352 158142c2 bellard
INLINE int16 extractFloat64Exp( float64 a )
353 158142c2 bellard
{
354 158142c2 bellard
355 f090c9d4 pbrook
    return ( float64_val(a)>>52 ) & 0x7FF;
356 158142c2 bellard
357 158142c2 bellard
}
358 158142c2 bellard
359 158142c2 bellard
/*----------------------------------------------------------------------------
360 158142c2 bellard
| Returns the sign bit of the double-precision floating-point value `a'.
361 158142c2 bellard
*----------------------------------------------------------------------------*/
362 158142c2 bellard
363 158142c2 bellard
INLINE flag extractFloat64Sign( float64 a )
364 158142c2 bellard
{
365 158142c2 bellard
366 f090c9d4 pbrook
    return float64_val(a)>>63;
367 158142c2 bellard
368 158142c2 bellard
}
369 158142c2 bellard
370 158142c2 bellard
/*----------------------------------------------------------------------------
371 158142c2 bellard
| Normalizes the subnormal double-precision floating-point value represented
372 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
373 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
374 158142c2 bellard
| `zSigPtr', respectively.
375 158142c2 bellard
*----------------------------------------------------------------------------*/
376 158142c2 bellard
377 158142c2 bellard
static void
378 158142c2 bellard
 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
379 158142c2 bellard
{
380 158142c2 bellard
    int8 shiftCount;
381 158142c2 bellard
382 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig ) - 11;
383 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
384 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
385 158142c2 bellard
386 158142c2 bellard
}
387 158142c2 bellard
388 158142c2 bellard
/*----------------------------------------------------------------------------
389 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
390 158142c2 bellard
| double-precision floating-point value, returning the result.  After being
391 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
392 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
393 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
394 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
395 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
396 158142c2 bellard
| significand.
397 158142c2 bellard
*----------------------------------------------------------------------------*/
398 158142c2 bellard
399 158142c2 bellard
INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
400 158142c2 bellard
{
401 158142c2 bellard
402 f090c9d4 pbrook
    return make_float64(
403 f090c9d4 pbrook
        ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig);
404 158142c2 bellard
405 158142c2 bellard
}
406 158142c2 bellard
407 158142c2 bellard
/*----------------------------------------------------------------------------
408 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
409 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
410 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
411 158142c2 bellard
| value is simply rounded and packed into the double-precision format, with
412 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
413 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
414 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
415 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded
416 158142c2 bellard
| to a subnormal number, and the underflow and inexact exceptions are raised
417 158142c2 bellard
| if the abstract input cannot be represented exactly as a subnormal double-
418 158142c2 bellard
| precision floating-point number.
419 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 62
420 158142c2 bellard
| and 61, which is 10 bits to the left of the usual location.  This shifted
421 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
422 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
423 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
424 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
425 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
426 158142c2 bellard
| Binary Floating-Point Arithmetic.
427 158142c2 bellard
*----------------------------------------------------------------------------*/
428 158142c2 bellard
429 158142c2 bellard
static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
430 158142c2 bellard
{
431 158142c2 bellard
    int8 roundingMode;
432 158142c2 bellard
    flag roundNearestEven;
433 158142c2 bellard
    int16 roundIncrement, roundBits;
434 158142c2 bellard
    flag isTiny;
435 158142c2 bellard
436 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
437 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
438 158142c2 bellard
    roundIncrement = 0x200;
439 158142c2 bellard
    if ( ! roundNearestEven ) {
440 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
441 158142c2 bellard
            roundIncrement = 0;
442 158142c2 bellard
        }
443 158142c2 bellard
        else {
444 158142c2 bellard
            roundIncrement = 0x3FF;
445 158142c2 bellard
            if ( zSign ) {
446 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
447 158142c2 bellard
            }
448 158142c2 bellard
            else {
449 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
450 158142c2 bellard
            }
451 158142c2 bellard
        }
452 158142c2 bellard
    }
453 158142c2 bellard
    roundBits = zSig & 0x3FF;
454 158142c2 bellard
    if ( 0x7FD <= (bits16) zExp ) {
455 158142c2 bellard
        if (    ( 0x7FD < zExp )
456 158142c2 bellard
             || (    ( zExp == 0x7FD )
457 158142c2 bellard
                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
458 158142c2 bellard
           ) {
459 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
460 f090c9d4 pbrook
            return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
461 158142c2 bellard
        }
462 158142c2 bellard
        if ( zExp < 0 ) {
463 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
464 158142c2 bellard
            isTiny =
465 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
466 158142c2 bellard
                || ( zExp < -1 )
467 158142c2 bellard
                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
468 158142c2 bellard
            shift64RightJamming( zSig, - zExp, &zSig );
469 158142c2 bellard
            zExp = 0;
470 158142c2 bellard
            roundBits = zSig & 0x3FF;
471 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
472 158142c2 bellard
        }
473 158142c2 bellard
    }
474 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
475 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>10;
476 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
477 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
478 158142c2 bellard
    return packFloat64( zSign, zExp, zSig );
479 158142c2 bellard
480 158142c2 bellard
}
481 158142c2 bellard
482 158142c2 bellard
/*----------------------------------------------------------------------------
483 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
484 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
485 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
486 158142c2 bellard
| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
487 158142c2 bellard
| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
488 158142c2 bellard
| floating-point exponent.
489 158142c2 bellard
*----------------------------------------------------------------------------*/
490 158142c2 bellard
491 158142c2 bellard
static float64
492 158142c2 bellard
 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
493 158142c2 bellard
{
494 158142c2 bellard
    int8 shiftCount;
495 158142c2 bellard
496 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig ) - 1;
497 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
498 158142c2 bellard
499 158142c2 bellard
}
500 158142c2 bellard
501 158142c2 bellard
#ifdef FLOATX80
502 158142c2 bellard
503 158142c2 bellard
/*----------------------------------------------------------------------------
504 158142c2 bellard
| Returns the fraction bits of the extended double-precision floating-point
505 158142c2 bellard
| value `a'.
506 158142c2 bellard
*----------------------------------------------------------------------------*/
507 158142c2 bellard
508 158142c2 bellard
INLINE bits64 extractFloatx80Frac( floatx80 a )
509 158142c2 bellard
{
510 158142c2 bellard
511 158142c2 bellard
    return a.low;
512 158142c2 bellard
513 158142c2 bellard
}
514 158142c2 bellard
515 158142c2 bellard
/*----------------------------------------------------------------------------
516 158142c2 bellard
| Returns the exponent bits of the extended double-precision floating-point
517 158142c2 bellard
| value `a'.
518 158142c2 bellard
*----------------------------------------------------------------------------*/
519 158142c2 bellard
520 158142c2 bellard
INLINE int32 extractFloatx80Exp( floatx80 a )
521 158142c2 bellard
{
522 158142c2 bellard
523 158142c2 bellard
    return a.high & 0x7FFF;
524 158142c2 bellard
525 158142c2 bellard
}
526 158142c2 bellard
527 158142c2 bellard
/*----------------------------------------------------------------------------
528 158142c2 bellard
| Returns the sign bit of the extended double-precision floating-point value
529 158142c2 bellard
| `a'.
530 158142c2 bellard
*----------------------------------------------------------------------------*/
531 158142c2 bellard
532 158142c2 bellard
INLINE flag extractFloatx80Sign( floatx80 a )
533 158142c2 bellard
{
534 158142c2 bellard
535 158142c2 bellard
    return a.high>>15;
536 158142c2 bellard
537 158142c2 bellard
}
538 158142c2 bellard
539 158142c2 bellard
/*----------------------------------------------------------------------------
540 158142c2 bellard
| Normalizes the subnormal extended double-precision floating-point value
541 158142c2 bellard
| represented by the denormalized significand `aSig'.  The normalized exponent
542 158142c2 bellard
| and significand are stored at the locations pointed to by `zExpPtr' and
543 158142c2 bellard
| `zSigPtr', respectively.
544 158142c2 bellard
*----------------------------------------------------------------------------*/
545 158142c2 bellard
546 158142c2 bellard
static void
547 158142c2 bellard
 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
548 158142c2 bellard
{
549 158142c2 bellard
    int8 shiftCount;
550 158142c2 bellard
551 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig );
552 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
553 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
554 158142c2 bellard
555 158142c2 bellard
}
556 158142c2 bellard
557 158142c2 bellard
/*----------------------------------------------------------------------------
558 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
559 158142c2 bellard
| extended double-precision floating-point value, returning the result.
560 158142c2 bellard
*----------------------------------------------------------------------------*/
561 158142c2 bellard
562 158142c2 bellard
INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
563 158142c2 bellard
{
564 158142c2 bellard
    floatx80 z;
565 158142c2 bellard
566 158142c2 bellard
    z.low = zSig;
567 158142c2 bellard
    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
568 158142c2 bellard
    return z;
569 158142c2 bellard
570 158142c2 bellard
}
571 158142c2 bellard
572 158142c2 bellard
/*----------------------------------------------------------------------------
573 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
574 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0' and `zSig1',
575 158142c2 bellard
| and returns the proper extended double-precision floating-point value
576 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
577 158142c2 bellard
| rounded and packed into the extended double-precision format, with the
578 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
579 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
580 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
581 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
582 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
583 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal extended
584 158142c2 bellard
| double-precision floating-point number.
585 158142c2 bellard
|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
586 158142c2 bellard
| number of bits as single or double precision, respectively.  Otherwise, the
587 158142c2 bellard
| result is rounded to the full precision of the extended double-precision
588 158142c2 bellard
| format.
589 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
590 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
591 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  The
592 158142c2 bellard
| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
593 158142c2 bellard
| Floating-Point Arithmetic.
594 158142c2 bellard
*----------------------------------------------------------------------------*/
595 158142c2 bellard
596 158142c2 bellard
static floatx80
597 158142c2 bellard
 roundAndPackFloatx80(
598 158142c2 bellard
     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
599 158142c2 bellard
 STATUS_PARAM)
600 158142c2 bellard
{
601 158142c2 bellard
    int8 roundingMode;
602 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
603 158142c2 bellard
    int64 roundIncrement, roundMask, roundBits;
604 158142c2 bellard
605 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
606 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
607 158142c2 bellard
    if ( roundingPrecision == 80 ) goto precision80;
608 158142c2 bellard
    if ( roundingPrecision == 64 ) {
609 158142c2 bellard
        roundIncrement = LIT64( 0x0000000000000400 );
610 158142c2 bellard
        roundMask = LIT64( 0x00000000000007FF );
611 158142c2 bellard
    }
612 158142c2 bellard
    else if ( roundingPrecision == 32 ) {
613 158142c2 bellard
        roundIncrement = LIT64( 0x0000008000000000 );
614 158142c2 bellard
        roundMask = LIT64( 0x000000FFFFFFFFFF );
615 158142c2 bellard
    }
616 158142c2 bellard
    else {
617 158142c2 bellard
        goto precision80;
618 158142c2 bellard
    }
619 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
620 158142c2 bellard
    if ( ! roundNearestEven ) {
621 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
622 158142c2 bellard
            roundIncrement = 0;
623 158142c2 bellard
        }
624 158142c2 bellard
        else {
625 158142c2 bellard
            roundIncrement = roundMask;
626 158142c2 bellard
            if ( zSign ) {
627 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
628 158142c2 bellard
            }
629 158142c2 bellard
            else {
630 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
631 158142c2 bellard
            }
632 158142c2 bellard
        }
633 158142c2 bellard
    }
634 158142c2 bellard
    roundBits = zSig0 & roundMask;
635 158142c2 bellard
    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
636 158142c2 bellard
        if (    ( 0x7FFE < zExp )
637 158142c2 bellard
             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
638 158142c2 bellard
           ) {
639 158142c2 bellard
            goto overflow;
640 158142c2 bellard
        }
641 158142c2 bellard
        if ( zExp <= 0 ) {
642 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloatx80( zSign, 0, 0 );
643 158142c2 bellard
            isTiny =
644 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
645 158142c2 bellard
                || ( zExp < 0 )
646 158142c2 bellard
                || ( zSig0 <= zSig0 + roundIncrement );
647 158142c2 bellard
            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
648 158142c2 bellard
            zExp = 0;
649 158142c2 bellard
            roundBits = zSig0 & roundMask;
650 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
651 158142c2 bellard
            if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
652 158142c2 bellard
            zSig0 += roundIncrement;
653 158142c2 bellard
            if ( (sbits64) zSig0 < 0 ) zExp = 1;
654 158142c2 bellard
            roundIncrement = roundMask + 1;
655 158142c2 bellard
            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
656 158142c2 bellard
                roundMask |= roundIncrement;
657 158142c2 bellard
            }
658 158142c2 bellard
            zSig0 &= ~ roundMask;
659 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
660 158142c2 bellard
        }
661 158142c2 bellard
    }
662 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
663 158142c2 bellard
    zSig0 += roundIncrement;
664 158142c2 bellard
    if ( zSig0 < roundIncrement ) {
665 158142c2 bellard
        ++zExp;
666 158142c2 bellard
        zSig0 = LIT64( 0x8000000000000000 );
667 158142c2 bellard
    }
668 158142c2 bellard
    roundIncrement = roundMask + 1;
669 158142c2 bellard
    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
670 158142c2 bellard
        roundMask |= roundIncrement;
671 158142c2 bellard
    }
672 158142c2 bellard
    zSig0 &= ~ roundMask;
673 158142c2 bellard
    if ( zSig0 == 0 ) zExp = 0;
674 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
675 158142c2 bellard
 precision80:
676 158142c2 bellard
    increment = ( (sbits64) zSig1 < 0 );
677 158142c2 bellard
    if ( ! roundNearestEven ) {
678 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
679 158142c2 bellard
            increment = 0;
680 158142c2 bellard
        }
681 158142c2 bellard
        else {
682 158142c2 bellard
            if ( zSign ) {
683 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig1;
684 158142c2 bellard
            }
685 158142c2 bellard
            else {
686 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig1;
687 158142c2 bellard
            }
688 158142c2 bellard
        }
689 158142c2 bellard
    }
690 158142c2 bellard
    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
691 158142c2 bellard
        if (    ( 0x7FFE < zExp )
692 158142c2 bellard
             || (    ( zExp == 0x7FFE )
693 158142c2 bellard
                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
694 158142c2 bellard
                  && increment
695 158142c2 bellard
                )
696 158142c2 bellard
           ) {
697 158142c2 bellard
            roundMask = 0;
698 158142c2 bellard
 overflow:
699 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
700 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
701 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
702 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
703 158142c2 bellard
               ) {
704 158142c2 bellard
                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
705 158142c2 bellard
            }
706 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
707 158142c2 bellard
        }
708 158142c2 bellard
        if ( zExp <= 0 ) {
709 158142c2 bellard
            isTiny =
710 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
711 158142c2 bellard
                || ( zExp < 0 )
712 158142c2 bellard
                || ! increment
713 158142c2 bellard
                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
714 158142c2 bellard
            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
715 158142c2 bellard
            zExp = 0;
716 158142c2 bellard
            if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
717 158142c2 bellard
            if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
718 158142c2 bellard
            if ( roundNearestEven ) {
719 158142c2 bellard
                increment = ( (sbits64) zSig1 < 0 );
720 158142c2 bellard
            }
721 158142c2 bellard
            else {
722 158142c2 bellard
                if ( zSign ) {
723 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig1;
724 158142c2 bellard
                }
725 158142c2 bellard
                else {
726 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig1;
727 158142c2 bellard
                }
728 158142c2 bellard
            }
729 158142c2 bellard
            if ( increment ) {
730 158142c2 bellard
                ++zSig0;
731 158142c2 bellard
                zSig0 &=
732 158142c2 bellard
                    ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
733 158142c2 bellard
                if ( (sbits64) zSig0 < 0 ) zExp = 1;
734 158142c2 bellard
            }
735 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
736 158142c2 bellard
        }
737 158142c2 bellard
    }
738 158142c2 bellard
    if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
739 158142c2 bellard
    if ( increment ) {
740 158142c2 bellard
        ++zSig0;
741 158142c2 bellard
        if ( zSig0 == 0 ) {
742 158142c2 bellard
            ++zExp;
743 158142c2 bellard
            zSig0 = LIT64( 0x8000000000000000 );
744 158142c2 bellard
        }
745 158142c2 bellard
        else {
746 158142c2 bellard
            zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
747 158142c2 bellard
        }
748 158142c2 bellard
    }
749 158142c2 bellard
    else {
750 158142c2 bellard
        if ( zSig0 == 0 ) zExp = 0;
751 158142c2 bellard
    }
752 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
753 158142c2 bellard
754 158142c2 bellard
}
755 158142c2 bellard
756 158142c2 bellard
/*----------------------------------------------------------------------------
757 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent
758 158142c2 bellard
| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
759 158142c2 bellard
| and returns the proper extended double-precision floating-point value
760 158142c2 bellard
| corresponding to the abstract input.  This routine is just like
761 158142c2 bellard
| `roundAndPackFloatx80' except that the input significand does not have to be
762 158142c2 bellard
| normalized.
763 158142c2 bellard
*----------------------------------------------------------------------------*/
764 158142c2 bellard
765 158142c2 bellard
static floatx80
766 158142c2 bellard
 normalizeRoundAndPackFloatx80(
767 158142c2 bellard
     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
768 158142c2 bellard
 STATUS_PARAM)
769 158142c2 bellard
{
770 158142c2 bellard
    int8 shiftCount;
771 158142c2 bellard
772 158142c2 bellard
    if ( zSig0 == 0 ) {
773 158142c2 bellard
        zSig0 = zSig1;
774 158142c2 bellard
        zSig1 = 0;
775 158142c2 bellard
        zExp -= 64;
776 158142c2 bellard
    }
777 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 );
778 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
779 158142c2 bellard
    zExp -= shiftCount;
780 158142c2 bellard
    return
781 158142c2 bellard
        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
782 158142c2 bellard
783 158142c2 bellard
}
784 158142c2 bellard
785 158142c2 bellard
#endif
786 158142c2 bellard
787 158142c2 bellard
#ifdef FLOAT128
788 158142c2 bellard
789 158142c2 bellard
/*----------------------------------------------------------------------------
790 158142c2 bellard
| Returns the least-significant 64 fraction bits of the quadruple-precision
791 158142c2 bellard
| floating-point value `a'.
792 158142c2 bellard
*----------------------------------------------------------------------------*/
793 158142c2 bellard
794 158142c2 bellard
INLINE bits64 extractFloat128Frac1( float128 a )
795 158142c2 bellard
{
796 158142c2 bellard
797 158142c2 bellard
    return a.low;
798 158142c2 bellard
799 158142c2 bellard
}
800 158142c2 bellard
801 158142c2 bellard
/*----------------------------------------------------------------------------
802 158142c2 bellard
| Returns the most-significant 48 fraction bits of the quadruple-precision
803 158142c2 bellard
| floating-point value `a'.
804 158142c2 bellard
*----------------------------------------------------------------------------*/
805 158142c2 bellard
806 158142c2 bellard
INLINE bits64 extractFloat128Frac0( float128 a )
807 158142c2 bellard
{
808 158142c2 bellard
809 158142c2 bellard
    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
810 158142c2 bellard
811 158142c2 bellard
}
812 158142c2 bellard
813 158142c2 bellard
/*----------------------------------------------------------------------------
814 158142c2 bellard
| Returns the exponent bits of the quadruple-precision floating-point value
815 158142c2 bellard
| `a'.
816 158142c2 bellard
*----------------------------------------------------------------------------*/
817 158142c2 bellard
818 158142c2 bellard
INLINE int32 extractFloat128Exp( float128 a )
819 158142c2 bellard
{
820 158142c2 bellard
821 158142c2 bellard
    return ( a.high>>48 ) & 0x7FFF;
822 158142c2 bellard
823 158142c2 bellard
}
824 158142c2 bellard
825 158142c2 bellard
/*----------------------------------------------------------------------------
826 158142c2 bellard
| Returns the sign bit of the quadruple-precision floating-point value `a'.
827 158142c2 bellard
*----------------------------------------------------------------------------*/
828 158142c2 bellard
829 158142c2 bellard
INLINE flag extractFloat128Sign( float128 a )
830 158142c2 bellard
{
831 158142c2 bellard
832 158142c2 bellard
    return a.high>>63;
833 158142c2 bellard
834 158142c2 bellard
}
835 158142c2 bellard
836 158142c2 bellard
/*----------------------------------------------------------------------------
837 158142c2 bellard
| Normalizes the subnormal quadruple-precision floating-point value
838 158142c2 bellard
| represented by the denormalized significand formed by the concatenation of
839 158142c2 bellard
| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
840 158142c2 bellard
| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
841 158142c2 bellard
| significand are stored at the location pointed to by `zSig0Ptr', and the
842 158142c2 bellard
| least significant 64 bits of the normalized significand are stored at the
843 158142c2 bellard
| location pointed to by `zSig1Ptr'.
844 158142c2 bellard
*----------------------------------------------------------------------------*/
845 158142c2 bellard
846 158142c2 bellard
static void
847 158142c2 bellard
 normalizeFloat128Subnormal(
848 158142c2 bellard
     bits64 aSig0,
849 158142c2 bellard
     bits64 aSig1,
850 158142c2 bellard
     int32 *zExpPtr,
851 158142c2 bellard
     bits64 *zSig0Ptr,
852 158142c2 bellard
     bits64 *zSig1Ptr
853 158142c2 bellard
 )
854 158142c2 bellard
{
855 158142c2 bellard
    int8 shiftCount;
856 158142c2 bellard
857 158142c2 bellard
    if ( aSig0 == 0 ) {
858 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig1 ) - 15;
859 158142c2 bellard
        if ( shiftCount < 0 ) {
860 158142c2 bellard
            *zSig0Ptr = aSig1>>( - shiftCount );
861 158142c2 bellard
            *zSig1Ptr = aSig1<<( shiftCount & 63 );
862 158142c2 bellard
        }
863 158142c2 bellard
        else {
864 158142c2 bellard
            *zSig0Ptr = aSig1<<shiftCount;
865 158142c2 bellard
            *zSig1Ptr = 0;
866 158142c2 bellard
        }
867 158142c2 bellard
        *zExpPtr = - shiftCount - 63;
868 158142c2 bellard
    }
869 158142c2 bellard
    else {
870 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig0 ) - 15;
871 158142c2 bellard
        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
872 158142c2 bellard
        *zExpPtr = 1 - shiftCount;
873 158142c2 bellard
    }
874 158142c2 bellard
875 158142c2 bellard
}
876 158142c2 bellard
877 158142c2 bellard
/*----------------------------------------------------------------------------
878 158142c2 bellard
| Packs the sign `zSign', the exponent `zExp', and the significand formed
879 158142c2 bellard
| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
880 158142c2 bellard
| floating-point value, returning the result.  After being shifted into the
881 158142c2 bellard
| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
882 158142c2 bellard
| added together to form the most significant 32 bits of the result.  This
883 158142c2 bellard
| means that any integer portion of `zSig0' will be added into the exponent.
884 158142c2 bellard
| Since a properly normalized significand will have an integer portion equal
885 158142c2 bellard
| to 1, the `zExp' input should be 1 less than the desired result exponent
886 158142c2 bellard
| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
887 158142c2 bellard
| significand.
888 158142c2 bellard
*----------------------------------------------------------------------------*/
889 158142c2 bellard
890 158142c2 bellard
INLINE float128
891 158142c2 bellard
 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
892 158142c2 bellard
{
893 158142c2 bellard
    float128 z;
894 158142c2 bellard
895 158142c2 bellard
    z.low = zSig1;
896 158142c2 bellard
    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
897 158142c2 bellard
    return z;
898 158142c2 bellard
899 158142c2 bellard
}
900 158142c2 bellard
901 158142c2 bellard
/*----------------------------------------------------------------------------
902 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
903 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0', `zSig1',
904 158142c2 bellard
| and `zSig2', and returns the proper quadruple-precision floating-point value
905 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
906 158142c2 bellard
| simply rounded and packed into the quadruple-precision format, with the
907 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
908 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
909 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
910 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
911 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
912 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal quadruple-
913 158142c2 bellard
| precision floating-point number.
914 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
915 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
916 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  In the
917 158142c2 bellard
| usual case that the input significand is normalized, `zExp' must be 1 less
918 158142c2 bellard
| than the ``true'' floating-point exponent.  The handling of underflow and
919 158142c2 bellard
| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
920 158142c2 bellard
*----------------------------------------------------------------------------*/
921 158142c2 bellard
922 158142c2 bellard
static float128
923 158142c2 bellard
 roundAndPackFloat128(
924 158142c2 bellard
     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM)
925 158142c2 bellard
{
926 158142c2 bellard
    int8 roundingMode;
927 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
928 158142c2 bellard
929 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
930 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
931 158142c2 bellard
    increment = ( (sbits64) zSig2 < 0 );
932 158142c2 bellard
    if ( ! roundNearestEven ) {
933 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
934 158142c2 bellard
            increment = 0;
935 158142c2 bellard
        }
936 158142c2 bellard
        else {
937 158142c2 bellard
            if ( zSign ) {
938 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig2;
939 158142c2 bellard
            }
940 158142c2 bellard
            else {
941 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig2;
942 158142c2 bellard
            }
943 158142c2 bellard
        }
944 158142c2 bellard
    }
945 158142c2 bellard
    if ( 0x7FFD <= (bits32) zExp ) {
946 158142c2 bellard
        if (    ( 0x7FFD < zExp )
947 158142c2 bellard
             || (    ( zExp == 0x7FFD )
948 158142c2 bellard
                  && eq128(
949 158142c2 bellard
                         LIT64( 0x0001FFFFFFFFFFFF ),
950 158142c2 bellard
                         LIT64( 0xFFFFFFFFFFFFFFFF ),
951 158142c2 bellard
                         zSig0,
952 158142c2 bellard
                         zSig1
953 158142c2 bellard
                     )
954 158142c2 bellard
                  && increment
955 158142c2 bellard
                )
956 158142c2 bellard
           ) {
957 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
958 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
959 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
960 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
961 158142c2 bellard
               ) {
962 158142c2 bellard
                return
963 158142c2 bellard
                    packFloat128(
964 158142c2 bellard
                        zSign,
965 158142c2 bellard
                        0x7FFE,
966 158142c2 bellard
                        LIT64( 0x0000FFFFFFFFFFFF ),
967 158142c2 bellard
                        LIT64( 0xFFFFFFFFFFFFFFFF )
968 158142c2 bellard
                    );
969 158142c2 bellard
            }
970 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
971 158142c2 bellard
        }
972 158142c2 bellard
        if ( zExp < 0 ) {
973 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
974 158142c2 bellard
            isTiny =
975 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
976 158142c2 bellard
                || ( zExp < -1 )
977 158142c2 bellard
                || ! increment
978 158142c2 bellard
                || lt128(
979 158142c2 bellard
                       zSig0,
980 158142c2 bellard
                       zSig1,
981 158142c2 bellard
                       LIT64( 0x0001FFFFFFFFFFFF ),
982 158142c2 bellard
                       LIT64( 0xFFFFFFFFFFFFFFFF )
983 158142c2 bellard
                   );
984 158142c2 bellard
            shift128ExtraRightJamming(
985 158142c2 bellard
                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
986 158142c2 bellard
            zExp = 0;
987 158142c2 bellard
            if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
988 158142c2 bellard
            if ( roundNearestEven ) {
989 158142c2 bellard
                increment = ( (sbits64) zSig2 < 0 );
990 158142c2 bellard
            }
991 158142c2 bellard
            else {
992 158142c2 bellard
                if ( zSign ) {
993 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig2;
994 158142c2 bellard
                }
995 158142c2 bellard
                else {
996 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig2;
997 158142c2 bellard
                }
998 158142c2 bellard
            }
999 158142c2 bellard
        }
1000 158142c2 bellard
    }
1001 158142c2 bellard
    if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
1002 158142c2 bellard
    if ( increment ) {
1003 158142c2 bellard
        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1004 158142c2 bellard
        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1005 158142c2 bellard
    }
1006 158142c2 bellard
    else {
1007 158142c2 bellard
        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1008 158142c2 bellard
    }
1009 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1010 158142c2 bellard
1011 158142c2 bellard
}
1012 158142c2 bellard
1013 158142c2 bellard
/*----------------------------------------------------------------------------
1014 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1015 158142c2 bellard
| and significand formed by the concatenation of `zSig0' and `zSig1', and
1016 158142c2 bellard
| returns the proper quadruple-precision floating-point value corresponding
1017 158142c2 bellard
| to the abstract input.  This routine is just like `roundAndPackFloat128'
1018 158142c2 bellard
| except that the input significand has fewer bits and does not have to be
1019 158142c2 bellard
| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1020 158142c2 bellard
| point exponent.
1021 158142c2 bellard
*----------------------------------------------------------------------------*/
1022 158142c2 bellard
1023 158142c2 bellard
static float128
1024 158142c2 bellard
 normalizeRoundAndPackFloat128(
1025 158142c2 bellard
     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM)
1026 158142c2 bellard
{
1027 158142c2 bellard
    int8 shiftCount;
1028 158142c2 bellard
    bits64 zSig2;
1029 158142c2 bellard
1030 158142c2 bellard
    if ( zSig0 == 0 ) {
1031 158142c2 bellard
        zSig0 = zSig1;
1032 158142c2 bellard
        zSig1 = 0;
1033 158142c2 bellard
        zExp -= 64;
1034 158142c2 bellard
    }
1035 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 ) - 15;
1036 158142c2 bellard
    if ( 0 <= shiftCount ) {
1037 158142c2 bellard
        zSig2 = 0;
1038 158142c2 bellard
        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1039 158142c2 bellard
    }
1040 158142c2 bellard
    else {
1041 158142c2 bellard
        shift128ExtraRightJamming(
1042 158142c2 bellard
            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1043 158142c2 bellard
    }
1044 158142c2 bellard
    zExp -= shiftCount;
1045 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
1046 158142c2 bellard
1047 158142c2 bellard
}
1048 158142c2 bellard
1049 158142c2 bellard
#endif
1050 158142c2 bellard
1051 158142c2 bellard
/*----------------------------------------------------------------------------
1052 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1053 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1054 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1055 158142c2 bellard
*----------------------------------------------------------------------------*/
1056 158142c2 bellard
1057 158142c2 bellard
float32 int32_to_float32( int32 a STATUS_PARAM )
1058 158142c2 bellard
{
1059 158142c2 bellard
    flag zSign;
1060 158142c2 bellard
1061 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1062 158142c2 bellard
    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1063 158142c2 bellard
    zSign = ( a < 0 );
1064 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
1065 158142c2 bellard
1066 158142c2 bellard
}
1067 158142c2 bellard
1068 158142c2 bellard
/*----------------------------------------------------------------------------
1069 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1070 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1071 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1072 158142c2 bellard
*----------------------------------------------------------------------------*/
1073 158142c2 bellard
1074 158142c2 bellard
float64 int32_to_float64( int32 a STATUS_PARAM )
1075 158142c2 bellard
{
1076 158142c2 bellard
    flag zSign;
1077 158142c2 bellard
    uint32 absA;
1078 158142c2 bellard
    int8 shiftCount;
1079 158142c2 bellard
    bits64 zSig;
1080 158142c2 bellard
1081 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1082 158142c2 bellard
    zSign = ( a < 0 );
1083 158142c2 bellard
    absA = zSign ? - a : a;
1084 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 21;
1085 158142c2 bellard
    zSig = absA;
1086 158142c2 bellard
    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1087 158142c2 bellard
1088 158142c2 bellard
}
1089 158142c2 bellard
1090 158142c2 bellard
#ifdef FLOATX80
1091 158142c2 bellard
1092 158142c2 bellard
/*----------------------------------------------------------------------------
1093 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1094 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1095 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1096 158142c2 bellard
| Arithmetic.
1097 158142c2 bellard
*----------------------------------------------------------------------------*/
1098 158142c2 bellard
1099 158142c2 bellard
floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
1100 158142c2 bellard
{
1101 158142c2 bellard
    flag zSign;
1102 158142c2 bellard
    uint32 absA;
1103 158142c2 bellard
    int8 shiftCount;
1104 158142c2 bellard
    bits64 zSig;
1105 158142c2 bellard
1106 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1107 158142c2 bellard
    zSign = ( a < 0 );
1108 158142c2 bellard
    absA = zSign ? - a : a;
1109 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 32;
1110 158142c2 bellard
    zSig = absA;
1111 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1112 158142c2 bellard
1113 158142c2 bellard
}
1114 158142c2 bellard
1115 158142c2 bellard
#endif
1116 158142c2 bellard
1117 158142c2 bellard
#ifdef FLOAT128
1118 158142c2 bellard
1119 158142c2 bellard
/*----------------------------------------------------------------------------
1120 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a' to
1121 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1122 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1123 158142c2 bellard
*----------------------------------------------------------------------------*/
1124 158142c2 bellard
1125 158142c2 bellard
float128 int32_to_float128( int32 a STATUS_PARAM )
1126 158142c2 bellard
{
1127 158142c2 bellard
    flag zSign;
1128 158142c2 bellard
    uint32 absA;
1129 158142c2 bellard
    int8 shiftCount;
1130 158142c2 bellard
    bits64 zSig0;
1131 158142c2 bellard
1132 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1133 158142c2 bellard
    zSign = ( a < 0 );
1134 158142c2 bellard
    absA = zSign ? - a : a;
1135 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 17;
1136 158142c2 bellard
    zSig0 = absA;
1137 158142c2 bellard
    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1138 158142c2 bellard
1139 158142c2 bellard
}
1140 158142c2 bellard
1141 158142c2 bellard
#endif
1142 158142c2 bellard
1143 158142c2 bellard
/*----------------------------------------------------------------------------
1144 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1145 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1146 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1147 158142c2 bellard
*----------------------------------------------------------------------------*/
1148 158142c2 bellard
1149 158142c2 bellard
float32 int64_to_float32( int64 a STATUS_PARAM )
1150 158142c2 bellard
{
1151 158142c2 bellard
    flag zSign;
1152 158142c2 bellard
    uint64 absA;
1153 158142c2 bellard
    int8 shiftCount;
1154 158142c2 bellard
1155 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1156 158142c2 bellard
    zSign = ( a < 0 );
1157 158142c2 bellard
    absA = zSign ? - a : a;
1158 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) - 40;
1159 158142c2 bellard
    if ( 0 <= shiftCount ) {
1160 158142c2 bellard
        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1161 158142c2 bellard
    }
1162 158142c2 bellard
    else {
1163 158142c2 bellard
        shiftCount += 7;
1164 158142c2 bellard
        if ( shiftCount < 0 ) {
1165 158142c2 bellard
            shift64RightJamming( absA, - shiftCount, &absA );
1166 158142c2 bellard
        }
1167 158142c2 bellard
        else {
1168 158142c2 bellard
            absA <<= shiftCount;
1169 158142c2 bellard
        }
1170 158142c2 bellard
        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
1171 158142c2 bellard
    }
1172 158142c2 bellard
1173 158142c2 bellard
}
1174 158142c2 bellard
1175 3430b0be j_mayer
float32 uint64_to_float32( uint64 a STATUS_PARAM )
1176 75d62a58 j_mayer
{
1177 75d62a58 j_mayer
    int8 shiftCount;
1178 75d62a58 j_mayer
1179 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1180 75d62a58 j_mayer
    shiftCount = countLeadingZeros64( a ) - 40;
1181 75d62a58 j_mayer
    if ( 0 <= shiftCount ) {
1182 75d62a58 j_mayer
        return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
1183 75d62a58 j_mayer
    }
1184 75d62a58 j_mayer
    else {
1185 75d62a58 j_mayer
        shiftCount += 7;
1186 75d62a58 j_mayer
        if ( shiftCount < 0 ) {
1187 75d62a58 j_mayer
            shift64RightJamming( a, - shiftCount, &a );
1188 75d62a58 j_mayer
        }
1189 75d62a58 j_mayer
        else {
1190 75d62a58 j_mayer
            a <<= shiftCount;
1191 75d62a58 j_mayer
        }
1192 75d62a58 j_mayer
        return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
1193 75d62a58 j_mayer
    }
1194 75d62a58 j_mayer
}
1195 75d62a58 j_mayer
1196 158142c2 bellard
/*----------------------------------------------------------------------------
1197 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1198 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1199 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1200 158142c2 bellard
*----------------------------------------------------------------------------*/
1201 158142c2 bellard
1202 158142c2 bellard
float64 int64_to_float64( int64 a STATUS_PARAM )
1203 158142c2 bellard
{
1204 158142c2 bellard
    flag zSign;
1205 158142c2 bellard
1206 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1207 158142c2 bellard
    if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
1208 158142c2 bellard
        return packFloat64( 1, 0x43E, 0 );
1209 158142c2 bellard
    }
1210 158142c2 bellard
    zSign = ( a < 0 );
1211 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
1212 158142c2 bellard
1213 158142c2 bellard
}
1214 158142c2 bellard
1215 75d62a58 j_mayer
float64 uint64_to_float64( uint64 a STATUS_PARAM )
1216 75d62a58 j_mayer
{
1217 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1218 75d62a58 j_mayer
    return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
1219 75d62a58 j_mayer
1220 75d62a58 j_mayer
}
1221 75d62a58 j_mayer
1222 158142c2 bellard
#ifdef FLOATX80
1223 158142c2 bellard
1224 158142c2 bellard
/*----------------------------------------------------------------------------
1225 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1226 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1227 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1228 158142c2 bellard
| Arithmetic.
1229 158142c2 bellard
*----------------------------------------------------------------------------*/
1230 158142c2 bellard
1231 158142c2 bellard
floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
1232 158142c2 bellard
{
1233 158142c2 bellard
    flag zSign;
1234 158142c2 bellard
    uint64 absA;
1235 158142c2 bellard
    int8 shiftCount;
1236 158142c2 bellard
1237 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1238 158142c2 bellard
    zSign = ( a < 0 );
1239 158142c2 bellard
    absA = zSign ? - a : a;
1240 158142c2 bellard
    shiftCount = countLeadingZeros64( absA );
1241 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1242 158142c2 bellard
1243 158142c2 bellard
}
1244 158142c2 bellard
1245 158142c2 bellard
#endif
1246 158142c2 bellard
1247 158142c2 bellard
#ifdef FLOAT128
1248 158142c2 bellard
1249 158142c2 bellard
/*----------------------------------------------------------------------------
1250 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a' to
1251 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1252 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1253 158142c2 bellard
*----------------------------------------------------------------------------*/
1254 158142c2 bellard
1255 158142c2 bellard
float128 int64_to_float128( int64 a STATUS_PARAM )
1256 158142c2 bellard
{
1257 158142c2 bellard
    flag zSign;
1258 158142c2 bellard
    uint64 absA;
1259 158142c2 bellard
    int8 shiftCount;
1260 158142c2 bellard
    int32 zExp;
1261 158142c2 bellard
    bits64 zSig0, zSig1;
1262 158142c2 bellard
1263 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1264 158142c2 bellard
    zSign = ( a < 0 );
1265 158142c2 bellard
    absA = zSign ? - a : a;
1266 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) + 49;
1267 158142c2 bellard
    zExp = 0x406E - shiftCount;
1268 158142c2 bellard
    if ( 64 <= shiftCount ) {
1269 158142c2 bellard
        zSig1 = 0;
1270 158142c2 bellard
        zSig0 = absA;
1271 158142c2 bellard
        shiftCount -= 64;
1272 158142c2 bellard
    }
1273 158142c2 bellard
    else {
1274 158142c2 bellard
        zSig1 = absA;
1275 158142c2 bellard
        zSig0 = 0;
1276 158142c2 bellard
    }
1277 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1278 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1279 158142c2 bellard
1280 158142c2 bellard
}
1281 158142c2 bellard
1282 158142c2 bellard
#endif
1283 158142c2 bellard
1284 158142c2 bellard
/*----------------------------------------------------------------------------
1285 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1286 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1287 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1288 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1289 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1290 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1291 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1292 158142c2 bellard
*----------------------------------------------------------------------------*/
1293 158142c2 bellard
1294 158142c2 bellard
int32 float32_to_int32( float32 a STATUS_PARAM )
1295 158142c2 bellard
{
1296 158142c2 bellard
    flag aSign;
1297 158142c2 bellard
    int16 aExp, shiftCount;
1298 158142c2 bellard
    bits32 aSig;
1299 158142c2 bellard
    bits64 aSig64;
1300 158142c2 bellard
1301 158142c2 bellard
    aSig = extractFloat32Frac( a );
1302 158142c2 bellard
    aExp = extractFloat32Exp( a );
1303 158142c2 bellard
    aSign = extractFloat32Sign( a );
1304 158142c2 bellard
    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1305 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1306 158142c2 bellard
    shiftCount = 0xAF - aExp;
1307 158142c2 bellard
    aSig64 = aSig;
1308 158142c2 bellard
    aSig64 <<= 32;
1309 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1310 158142c2 bellard
    return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
1311 158142c2 bellard
1312 158142c2 bellard
}
1313 158142c2 bellard
1314 158142c2 bellard
/*----------------------------------------------------------------------------
1315 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1316 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1317 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1318 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
1319 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1320 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
1321 158142c2 bellard
| returned.
1322 158142c2 bellard
*----------------------------------------------------------------------------*/
1323 158142c2 bellard
1324 158142c2 bellard
int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
1325 158142c2 bellard
{
1326 158142c2 bellard
    flag aSign;
1327 158142c2 bellard
    int16 aExp, shiftCount;
1328 158142c2 bellard
    bits32 aSig;
1329 158142c2 bellard
    int32 z;
1330 158142c2 bellard
1331 158142c2 bellard
    aSig = extractFloat32Frac( a );
1332 158142c2 bellard
    aExp = extractFloat32Exp( a );
1333 158142c2 bellard
    aSign = extractFloat32Sign( a );
1334 158142c2 bellard
    shiftCount = aExp - 0x9E;
1335 158142c2 bellard
    if ( 0 <= shiftCount ) {
1336 f090c9d4 pbrook
        if ( float32_val(a) != 0xCF000000 ) {
1337 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1338 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1339 158142c2 bellard
        }
1340 158142c2 bellard
        return (sbits32) 0x80000000;
1341 158142c2 bellard
    }
1342 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1343 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1344 158142c2 bellard
        return 0;
1345 158142c2 bellard
    }
1346 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
1347 158142c2 bellard
    z = aSig>>( - shiftCount );
1348 158142c2 bellard
    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1349 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1350 158142c2 bellard
    }
1351 158142c2 bellard
    if ( aSign ) z = - z;
1352 158142c2 bellard
    return z;
1353 158142c2 bellard
1354 158142c2 bellard
}
1355 158142c2 bellard
1356 158142c2 bellard
/*----------------------------------------------------------------------------
1357 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1358 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1359 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1360 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1361 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1362 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1363 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1364 158142c2 bellard
*----------------------------------------------------------------------------*/
1365 158142c2 bellard
1366 158142c2 bellard
int64 float32_to_int64( float32 a STATUS_PARAM )
1367 158142c2 bellard
{
1368 158142c2 bellard
    flag aSign;
1369 158142c2 bellard
    int16 aExp, shiftCount;
1370 158142c2 bellard
    bits32 aSig;
1371 158142c2 bellard
    bits64 aSig64, aSigExtra;
1372 158142c2 bellard
1373 158142c2 bellard
    aSig = extractFloat32Frac( a );
1374 158142c2 bellard
    aExp = extractFloat32Exp( a );
1375 158142c2 bellard
    aSign = extractFloat32Sign( a );
1376 158142c2 bellard
    shiftCount = 0xBE - aExp;
1377 158142c2 bellard
    if ( shiftCount < 0 ) {
1378 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1379 158142c2 bellard
        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1380 158142c2 bellard
            return LIT64( 0x7FFFFFFFFFFFFFFF );
1381 158142c2 bellard
        }
1382 158142c2 bellard
        return (sbits64) LIT64( 0x8000000000000000 );
1383 158142c2 bellard
    }
1384 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1385 158142c2 bellard
    aSig64 = aSig;
1386 158142c2 bellard
    aSig64 <<= 40;
1387 158142c2 bellard
    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1388 158142c2 bellard
    return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
1389 158142c2 bellard
1390 158142c2 bellard
}
1391 158142c2 bellard
1392 158142c2 bellard
/*----------------------------------------------------------------------------
1393 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1394 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1395 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1396 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
1397 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1398 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
1399 158142c2 bellard
| returned.
1400 158142c2 bellard
*----------------------------------------------------------------------------*/
1401 158142c2 bellard
1402 158142c2 bellard
int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
1403 158142c2 bellard
{
1404 158142c2 bellard
    flag aSign;
1405 158142c2 bellard
    int16 aExp, shiftCount;
1406 158142c2 bellard
    bits32 aSig;
1407 158142c2 bellard
    bits64 aSig64;
1408 158142c2 bellard
    int64 z;
1409 158142c2 bellard
1410 158142c2 bellard
    aSig = extractFloat32Frac( a );
1411 158142c2 bellard
    aExp = extractFloat32Exp( a );
1412 158142c2 bellard
    aSign = extractFloat32Sign( a );
1413 158142c2 bellard
    shiftCount = aExp - 0xBE;
1414 158142c2 bellard
    if ( 0 <= shiftCount ) {
1415 f090c9d4 pbrook
        if ( float32_val(a) != 0xDF000000 ) {
1416 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1417 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1418 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
1419 158142c2 bellard
            }
1420 158142c2 bellard
        }
1421 158142c2 bellard
        return (sbits64) LIT64( 0x8000000000000000 );
1422 158142c2 bellard
    }
1423 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1424 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1425 158142c2 bellard
        return 0;
1426 158142c2 bellard
    }
1427 158142c2 bellard
    aSig64 = aSig | 0x00800000;
1428 158142c2 bellard
    aSig64 <<= 40;
1429 158142c2 bellard
    z = aSig64>>( - shiftCount );
1430 158142c2 bellard
    if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1431 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1432 158142c2 bellard
    }
1433 158142c2 bellard
    if ( aSign ) z = - z;
1434 158142c2 bellard
    return z;
1435 158142c2 bellard
1436 158142c2 bellard
}
1437 158142c2 bellard
1438 158142c2 bellard
/*----------------------------------------------------------------------------
1439 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1440 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1441 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1442 158142c2 bellard
| Arithmetic.
1443 158142c2 bellard
*----------------------------------------------------------------------------*/
1444 158142c2 bellard
1445 158142c2 bellard
float64 float32_to_float64( float32 a STATUS_PARAM )
1446 158142c2 bellard
{
1447 158142c2 bellard
    flag aSign;
1448 158142c2 bellard
    int16 aExp;
1449 158142c2 bellard
    bits32 aSig;
1450 158142c2 bellard
1451 158142c2 bellard
    aSig = extractFloat32Frac( a );
1452 158142c2 bellard
    aExp = extractFloat32Exp( a );
1453 158142c2 bellard
    aSign = extractFloat32Sign( a );
1454 158142c2 bellard
    if ( aExp == 0xFF ) {
1455 158142c2 bellard
        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ));
1456 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
1457 158142c2 bellard
    }
1458 158142c2 bellard
    if ( aExp == 0 ) {
1459 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1460 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1461 158142c2 bellard
        --aExp;
1462 158142c2 bellard
    }
1463 158142c2 bellard
    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
1464 158142c2 bellard
1465 158142c2 bellard
}
1466 158142c2 bellard
1467 158142c2 bellard
#ifdef FLOATX80
1468 158142c2 bellard
1469 158142c2 bellard
/*----------------------------------------------------------------------------
1470 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1471 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
1472 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1473 158142c2 bellard
| Arithmetic.
1474 158142c2 bellard
*----------------------------------------------------------------------------*/
1475 158142c2 bellard
1476 158142c2 bellard
floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
1477 158142c2 bellard
{
1478 158142c2 bellard
    flag aSign;
1479 158142c2 bellard
    int16 aExp;
1480 158142c2 bellard
    bits32 aSig;
1481 158142c2 bellard
1482 158142c2 bellard
    aSig = extractFloat32Frac( a );
1483 158142c2 bellard
    aExp = extractFloat32Exp( a );
1484 158142c2 bellard
    aSign = extractFloat32Sign( a );
1485 158142c2 bellard
    if ( aExp == 0xFF ) {
1486 158142c2 bellard
        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) );
1487 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1488 158142c2 bellard
    }
1489 158142c2 bellard
    if ( aExp == 0 ) {
1490 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1491 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1492 158142c2 bellard
    }
1493 158142c2 bellard
    aSig |= 0x00800000;
1494 158142c2 bellard
    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
1495 158142c2 bellard
1496 158142c2 bellard
}
1497 158142c2 bellard
1498 158142c2 bellard
#endif
1499 158142c2 bellard
1500 158142c2 bellard
#ifdef FLOAT128
1501 158142c2 bellard
1502 158142c2 bellard
/*----------------------------------------------------------------------------
1503 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1504 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1505 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1506 158142c2 bellard
| Arithmetic.
1507 158142c2 bellard
*----------------------------------------------------------------------------*/
1508 158142c2 bellard
1509 158142c2 bellard
float128 float32_to_float128( float32 a STATUS_PARAM )
1510 158142c2 bellard
{
1511 158142c2 bellard
    flag aSign;
1512 158142c2 bellard
    int16 aExp;
1513 158142c2 bellard
    bits32 aSig;
1514 158142c2 bellard
1515 158142c2 bellard
    aSig = extractFloat32Frac( a );
1516 158142c2 bellard
    aExp = extractFloat32Exp( a );
1517 158142c2 bellard
    aSign = extractFloat32Sign( a );
1518 158142c2 bellard
    if ( aExp == 0xFF ) {
1519 158142c2 bellard
        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) );
1520 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
1521 158142c2 bellard
    }
1522 158142c2 bellard
    if ( aExp == 0 ) {
1523 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1524 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1525 158142c2 bellard
        --aExp;
1526 158142c2 bellard
    }
1527 158142c2 bellard
    return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
1528 158142c2 bellard
1529 158142c2 bellard
}
1530 158142c2 bellard
1531 158142c2 bellard
#endif
1532 158142c2 bellard
1533 158142c2 bellard
/*----------------------------------------------------------------------------
1534 158142c2 bellard
| Rounds the single-precision floating-point value `a' to an integer, and
1535 158142c2 bellard
| returns the result as a single-precision floating-point value.  The
1536 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
1537 158142c2 bellard
| Floating-Point Arithmetic.
1538 158142c2 bellard
*----------------------------------------------------------------------------*/
1539 158142c2 bellard
1540 158142c2 bellard
float32 float32_round_to_int( float32 a STATUS_PARAM)
1541 158142c2 bellard
{
1542 158142c2 bellard
    flag aSign;
1543 158142c2 bellard
    int16 aExp;
1544 158142c2 bellard
    bits32 lastBitMask, roundBitsMask;
1545 158142c2 bellard
    int8 roundingMode;
1546 f090c9d4 pbrook
    bits32 z;
1547 158142c2 bellard
1548 158142c2 bellard
    aExp = extractFloat32Exp( a );
1549 158142c2 bellard
    if ( 0x96 <= aExp ) {
1550 158142c2 bellard
        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1551 158142c2 bellard
            return propagateFloat32NaN( a, a STATUS_VAR );
1552 158142c2 bellard
        }
1553 158142c2 bellard
        return a;
1554 158142c2 bellard
    }
1555 158142c2 bellard
    if ( aExp <= 0x7E ) {
1556 f090c9d4 pbrook
        if ( (bits32) ( float32_val(a)<<1 ) == 0 ) return a;
1557 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1558 158142c2 bellard
        aSign = extractFloat32Sign( a );
1559 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
1560 158142c2 bellard
         case float_round_nearest_even:
1561 158142c2 bellard
            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1562 158142c2 bellard
                return packFloat32( aSign, 0x7F, 0 );
1563 158142c2 bellard
            }
1564 158142c2 bellard
            break;
1565 158142c2 bellard
         case float_round_down:
1566 f090c9d4 pbrook
            return make_float32(aSign ? 0xBF800000 : 0);
1567 158142c2 bellard
         case float_round_up:
1568 f090c9d4 pbrook
            return make_float32(aSign ? 0x80000000 : 0x3F800000);
1569 158142c2 bellard
        }
1570 158142c2 bellard
        return packFloat32( aSign, 0, 0 );
1571 158142c2 bellard
    }
1572 158142c2 bellard
    lastBitMask = 1;
1573 158142c2 bellard
    lastBitMask <<= 0x96 - aExp;
1574 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
1575 f090c9d4 pbrook
    z = float32_val(a);
1576 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
1577 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
1578 158142c2 bellard
        z += lastBitMask>>1;
1579 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1580 158142c2 bellard
    }
1581 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
1582 f090c9d4 pbrook
        if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
1583 158142c2 bellard
            z += roundBitsMask;
1584 158142c2 bellard
        }
1585 158142c2 bellard
    }
1586 158142c2 bellard
    z &= ~ roundBitsMask;
1587 f090c9d4 pbrook
    if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
1588 f090c9d4 pbrook
    return make_float32(z);
1589 158142c2 bellard
1590 158142c2 bellard
}
1591 158142c2 bellard
1592 158142c2 bellard
/*----------------------------------------------------------------------------
1593 158142c2 bellard
| Returns the result of adding the absolute values of the single-precision
1594 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1595 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
1596 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
1597 158142c2 bellard
| Floating-Point Arithmetic.
1598 158142c2 bellard
*----------------------------------------------------------------------------*/
1599 158142c2 bellard
1600 158142c2 bellard
static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1601 158142c2 bellard
{
1602 158142c2 bellard
    int16 aExp, bExp, zExp;
1603 158142c2 bellard
    bits32 aSig, bSig, zSig;
1604 158142c2 bellard
    int16 expDiff;
1605 158142c2 bellard
1606 158142c2 bellard
    aSig = extractFloat32Frac( a );
1607 158142c2 bellard
    aExp = extractFloat32Exp( a );
1608 158142c2 bellard
    bSig = extractFloat32Frac( b );
1609 158142c2 bellard
    bExp = extractFloat32Exp( b );
1610 158142c2 bellard
    expDiff = aExp - bExp;
1611 158142c2 bellard
    aSig <<= 6;
1612 158142c2 bellard
    bSig <<= 6;
1613 158142c2 bellard
    if ( 0 < expDiff ) {
1614 158142c2 bellard
        if ( aExp == 0xFF ) {
1615 158142c2 bellard
            if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1616 158142c2 bellard
            return a;
1617 158142c2 bellard
        }
1618 158142c2 bellard
        if ( bExp == 0 ) {
1619 158142c2 bellard
            --expDiff;
1620 158142c2 bellard
        }
1621 158142c2 bellard
        else {
1622 158142c2 bellard
            bSig |= 0x20000000;
1623 158142c2 bellard
        }
1624 158142c2 bellard
        shift32RightJamming( bSig, expDiff, &bSig );
1625 158142c2 bellard
        zExp = aExp;
1626 158142c2 bellard
    }
1627 158142c2 bellard
    else if ( expDiff < 0 ) {
1628 158142c2 bellard
        if ( bExp == 0xFF ) {
1629 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1630 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1631 158142c2 bellard
        }
1632 158142c2 bellard
        if ( aExp == 0 ) {
1633 158142c2 bellard
            ++expDiff;
1634 158142c2 bellard
        }
1635 158142c2 bellard
        else {
1636 158142c2 bellard
            aSig |= 0x20000000;
1637 158142c2 bellard
        }
1638 158142c2 bellard
        shift32RightJamming( aSig, - expDiff, &aSig );
1639 158142c2 bellard
        zExp = bExp;
1640 158142c2 bellard
    }
1641 158142c2 bellard
    else {
1642 158142c2 bellard
        if ( aExp == 0xFF ) {
1643 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1644 158142c2 bellard
            return a;
1645 158142c2 bellard
        }
1646 fe76d976 pbrook
        if ( aExp == 0 ) {
1647 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
1648 fe76d976 pbrook
            return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1649 fe76d976 pbrook
        }
1650 158142c2 bellard
        zSig = 0x40000000 + aSig + bSig;
1651 158142c2 bellard
        zExp = aExp;
1652 158142c2 bellard
        goto roundAndPack;
1653 158142c2 bellard
    }
1654 158142c2 bellard
    aSig |= 0x20000000;
1655 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
1656 158142c2 bellard
    --zExp;
1657 158142c2 bellard
    if ( (sbits32) zSig < 0 ) {
1658 158142c2 bellard
        zSig = aSig + bSig;
1659 158142c2 bellard
        ++zExp;
1660 158142c2 bellard
    }
1661 158142c2 bellard
 roundAndPack:
1662 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1663 158142c2 bellard
1664 158142c2 bellard
}
1665 158142c2 bellard
1666 158142c2 bellard
/*----------------------------------------------------------------------------
1667 158142c2 bellard
| Returns the result of subtracting the absolute values of the single-
1668 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
1669 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
1670 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
1671 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
1672 158142c2 bellard
*----------------------------------------------------------------------------*/
1673 158142c2 bellard
1674 158142c2 bellard
static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1675 158142c2 bellard
{
1676 158142c2 bellard
    int16 aExp, bExp, zExp;
1677 158142c2 bellard
    bits32 aSig, bSig, zSig;
1678 158142c2 bellard
    int16 expDiff;
1679 158142c2 bellard
1680 158142c2 bellard
    aSig = extractFloat32Frac( a );
1681 158142c2 bellard
    aExp = extractFloat32Exp( a );
1682 158142c2 bellard
    bSig = extractFloat32Frac( b );
1683 158142c2 bellard
    bExp = extractFloat32Exp( b );
1684 158142c2 bellard
    expDiff = aExp - bExp;
1685 158142c2 bellard
    aSig <<= 7;
1686 158142c2 bellard
    bSig <<= 7;
1687 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
1688 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
1689 158142c2 bellard
    if ( aExp == 0xFF ) {
1690 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1691 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1692 158142c2 bellard
        return float32_default_nan;
1693 158142c2 bellard
    }
1694 158142c2 bellard
    if ( aExp == 0 ) {
1695 158142c2 bellard
        aExp = 1;
1696 158142c2 bellard
        bExp = 1;
1697 158142c2 bellard
    }
1698 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
1699 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
1700 158142c2 bellard
    return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
1701 158142c2 bellard
 bExpBigger:
1702 158142c2 bellard
    if ( bExp == 0xFF ) {
1703 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1704 158142c2 bellard
        return packFloat32( zSign ^ 1, 0xFF, 0 );
1705 158142c2 bellard
    }
1706 158142c2 bellard
    if ( aExp == 0 ) {
1707 158142c2 bellard
        ++expDiff;
1708 158142c2 bellard
    }
1709 158142c2 bellard
    else {
1710 158142c2 bellard
        aSig |= 0x40000000;
1711 158142c2 bellard
    }
1712 158142c2 bellard
    shift32RightJamming( aSig, - expDiff, &aSig );
1713 158142c2 bellard
    bSig |= 0x40000000;
1714 158142c2 bellard
 bBigger:
1715 158142c2 bellard
    zSig = bSig - aSig;
1716 158142c2 bellard
    zExp = bExp;
1717 158142c2 bellard
    zSign ^= 1;
1718 158142c2 bellard
    goto normalizeRoundAndPack;
1719 158142c2 bellard
 aExpBigger:
1720 158142c2 bellard
    if ( aExp == 0xFF ) {
1721 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1722 158142c2 bellard
        return a;
1723 158142c2 bellard
    }
1724 158142c2 bellard
    if ( bExp == 0 ) {
1725 158142c2 bellard
        --expDiff;
1726 158142c2 bellard
    }
1727 158142c2 bellard
    else {
1728 158142c2 bellard
        bSig |= 0x40000000;
1729 158142c2 bellard
    }
1730 158142c2 bellard
    shift32RightJamming( bSig, expDiff, &bSig );
1731 158142c2 bellard
    aSig |= 0x40000000;
1732 158142c2 bellard
 aBigger:
1733 158142c2 bellard
    zSig = aSig - bSig;
1734 158142c2 bellard
    zExp = aExp;
1735 158142c2 bellard
 normalizeRoundAndPack:
1736 158142c2 bellard
    --zExp;
1737 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1738 158142c2 bellard
1739 158142c2 bellard
}
1740 158142c2 bellard
1741 158142c2 bellard
/*----------------------------------------------------------------------------
1742 158142c2 bellard
| Returns the result of adding the single-precision floating-point values `a'
1743 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
1744 158142c2 bellard
| Binary Floating-Point Arithmetic.
1745 158142c2 bellard
*----------------------------------------------------------------------------*/
1746 158142c2 bellard
1747 158142c2 bellard
float32 float32_add( float32 a, float32 b STATUS_PARAM )
1748 158142c2 bellard
{
1749 158142c2 bellard
    flag aSign, bSign;
1750 158142c2 bellard
1751 158142c2 bellard
    aSign = extractFloat32Sign( a );
1752 158142c2 bellard
    bSign = extractFloat32Sign( b );
1753 158142c2 bellard
    if ( aSign == bSign ) {
1754 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR);
1755 158142c2 bellard
    }
1756 158142c2 bellard
    else {
1757 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1758 158142c2 bellard
    }
1759 158142c2 bellard
1760 158142c2 bellard
}
1761 158142c2 bellard
1762 158142c2 bellard
/*----------------------------------------------------------------------------
1763 158142c2 bellard
| Returns the result of subtracting the single-precision floating-point values
1764 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1765 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1766 158142c2 bellard
*----------------------------------------------------------------------------*/
1767 158142c2 bellard
1768 158142c2 bellard
float32 float32_sub( float32 a, float32 b STATUS_PARAM )
1769 158142c2 bellard
{
1770 158142c2 bellard
    flag aSign, bSign;
1771 158142c2 bellard
1772 158142c2 bellard
    aSign = extractFloat32Sign( a );
1773 158142c2 bellard
    bSign = extractFloat32Sign( b );
1774 158142c2 bellard
    if ( aSign == bSign ) {
1775 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1776 158142c2 bellard
    }
1777 158142c2 bellard
    else {
1778 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR );
1779 158142c2 bellard
    }
1780 158142c2 bellard
1781 158142c2 bellard
}
1782 158142c2 bellard
1783 158142c2 bellard
/*----------------------------------------------------------------------------
1784 158142c2 bellard
| Returns the result of multiplying the single-precision floating-point values
1785 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1786 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1787 158142c2 bellard
*----------------------------------------------------------------------------*/
1788 158142c2 bellard
1789 158142c2 bellard
float32 float32_mul( float32 a, float32 b STATUS_PARAM )
1790 158142c2 bellard
{
1791 158142c2 bellard
    flag aSign, bSign, zSign;
1792 158142c2 bellard
    int16 aExp, bExp, zExp;
1793 158142c2 bellard
    bits32 aSig, bSig;
1794 158142c2 bellard
    bits64 zSig64;
1795 158142c2 bellard
    bits32 zSig;
1796 158142c2 bellard
1797 158142c2 bellard
    aSig = extractFloat32Frac( a );
1798 158142c2 bellard
    aExp = extractFloat32Exp( a );
1799 158142c2 bellard
    aSign = extractFloat32Sign( a );
1800 158142c2 bellard
    bSig = extractFloat32Frac( b );
1801 158142c2 bellard
    bExp = extractFloat32Exp( b );
1802 158142c2 bellard
    bSign = extractFloat32Sign( b );
1803 158142c2 bellard
    zSign = aSign ^ bSign;
1804 158142c2 bellard
    if ( aExp == 0xFF ) {
1805 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1806 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
1807 158142c2 bellard
        }
1808 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
1809 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1810 158142c2 bellard
            return float32_default_nan;
1811 158142c2 bellard
        }
1812 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1813 158142c2 bellard
    }
1814 158142c2 bellard
    if ( bExp == 0xFF ) {
1815 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1816 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
1817 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1818 158142c2 bellard
            return float32_default_nan;
1819 158142c2 bellard
        }
1820 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1821 158142c2 bellard
    }
1822 158142c2 bellard
    if ( aExp == 0 ) {
1823 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1824 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1825 158142c2 bellard
    }
1826 158142c2 bellard
    if ( bExp == 0 ) {
1827 158142c2 bellard
        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1828 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1829 158142c2 bellard
    }
1830 158142c2 bellard
    zExp = aExp + bExp - 0x7F;
1831 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
1832 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
1833 158142c2 bellard
    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1834 158142c2 bellard
    zSig = zSig64;
1835 158142c2 bellard
    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1836 158142c2 bellard
        zSig <<= 1;
1837 158142c2 bellard
        --zExp;
1838 158142c2 bellard
    }
1839 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1840 158142c2 bellard
1841 158142c2 bellard
}
1842 158142c2 bellard
1843 158142c2 bellard
/*----------------------------------------------------------------------------
1844 158142c2 bellard
| Returns the result of dividing the single-precision floating-point value `a'
1845 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to the
1846 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1847 158142c2 bellard
*----------------------------------------------------------------------------*/
1848 158142c2 bellard
1849 158142c2 bellard
float32 float32_div( float32 a, float32 b STATUS_PARAM )
1850 158142c2 bellard
{
1851 158142c2 bellard
    flag aSign, bSign, zSign;
1852 158142c2 bellard
    int16 aExp, bExp, zExp;
1853 158142c2 bellard
    bits32 aSig, bSig, zSig;
1854 158142c2 bellard
1855 158142c2 bellard
    aSig = extractFloat32Frac( a );
1856 158142c2 bellard
    aExp = extractFloat32Exp( a );
1857 158142c2 bellard
    aSign = extractFloat32Sign( a );
1858 158142c2 bellard
    bSig = extractFloat32Frac( b );
1859 158142c2 bellard
    bExp = extractFloat32Exp( b );
1860 158142c2 bellard
    bSign = extractFloat32Sign( b );
1861 158142c2 bellard
    zSign = aSign ^ bSign;
1862 158142c2 bellard
    if ( aExp == 0xFF ) {
1863 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1864 158142c2 bellard
        if ( bExp == 0xFF ) {
1865 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1866 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1867 158142c2 bellard
            return float32_default_nan;
1868 158142c2 bellard
        }
1869 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1870 158142c2 bellard
    }
1871 158142c2 bellard
    if ( bExp == 0xFF ) {
1872 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1873 158142c2 bellard
        return packFloat32( zSign, 0, 0 );
1874 158142c2 bellard
    }
1875 158142c2 bellard
    if ( bExp == 0 ) {
1876 158142c2 bellard
        if ( bSig == 0 ) {
1877 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
1878 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
1879 158142c2 bellard
                return float32_default_nan;
1880 158142c2 bellard
            }
1881 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
1882 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1883 158142c2 bellard
        }
1884 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1885 158142c2 bellard
    }
1886 158142c2 bellard
    if ( aExp == 0 ) {
1887 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1888 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1889 158142c2 bellard
    }
1890 158142c2 bellard
    zExp = aExp - bExp + 0x7D;
1891 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
1892 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
1893 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
1894 158142c2 bellard
        aSig >>= 1;
1895 158142c2 bellard
        ++zExp;
1896 158142c2 bellard
    }
1897 158142c2 bellard
    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
1898 158142c2 bellard
    if ( ( zSig & 0x3F ) == 0 ) {
1899 158142c2 bellard
        zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
1900 158142c2 bellard
    }
1901 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1902 158142c2 bellard
1903 158142c2 bellard
}
1904 158142c2 bellard
1905 158142c2 bellard
/*----------------------------------------------------------------------------
1906 158142c2 bellard
| Returns the remainder of the single-precision floating-point value `a'
1907 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
1908 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1909 158142c2 bellard
*----------------------------------------------------------------------------*/
1910 158142c2 bellard
1911 158142c2 bellard
float32 float32_rem( float32 a, float32 b STATUS_PARAM )
1912 158142c2 bellard
{
1913 ed086f3d Blue Swirl
    flag aSign, zSign;
1914 158142c2 bellard
    int16 aExp, bExp, expDiff;
1915 158142c2 bellard
    bits32 aSig, bSig;
1916 158142c2 bellard
    bits32 q;
1917 158142c2 bellard
    bits64 aSig64, bSig64, q64;
1918 158142c2 bellard
    bits32 alternateASig;
1919 158142c2 bellard
    sbits32 sigMean;
1920 158142c2 bellard
1921 158142c2 bellard
    aSig = extractFloat32Frac( a );
1922 158142c2 bellard
    aExp = extractFloat32Exp( a );
1923 158142c2 bellard
    aSign = extractFloat32Sign( a );
1924 158142c2 bellard
    bSig = extractFloat32Frac( b );
1925 158142c2 bellard
    bExp = extractFloat32Exp( b );
1926 158142c2 bellard
    if ( aExp == 0xFF ) {
1927 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1928 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
1929 158142c2 bellard
        }
1930 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1931 158142c2 bellard
        return float32_default_nan;
1932 158142c2 bellard
    }
1933 158142c2 bellard
    if ( bExp == 0xFF ) {
1934 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1935 158142c2 bellard
        return a;
1936 158142c2 bellard
    }
1937 158142c2 bellard
    if ( bExp == 0 ) {
1938 158142c2 bellard
        if ( bSig == 0 ) {
1939 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1940 158142c2 bellard
            return float32_default_nan;
1941 158142c2 bellard
        }
1942 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1943 158142c2 bellard
    }
1944 158142c2 bellard
    if ( aExp == 0 ) {
1945 158142c2 bellard
        if ( aSig == 0 ) return a;
1946 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1947 158142c2 bellard
    }
1948 158142c2 bellard
    expDiff = aExp - bExp;
1949 158142c2 bellard
    aSig |= 0x00800000;
1950 158142c2 bellard
    bSig |= 0x00800000;
1951 158142c2 bellard
    if ( expDiff < 32 ) {
1952 158142c2 bellard
        aSig <<= 8;
1953 158142c2 bellard
        bSig <<= 8;
1954 158142c2 bellard
        if ( expDiff < 0 ) {
1955 158142c2 bellard
            if ( expDiff < -1 ) return a;
1956 158142c2 bellard
            aSig >>= 1;
1957 158142c2 bellard
        }
1958 158142c2 bellard
        q = ( bSig <= aSig );
1959 158142c2 bellard
        if ( q ) aSig -= bSig;
1960 158142c2 bellard
        if ( 0 < expDiff ) {
1961 158142c2 bellard
            q = ( ( (bits64) aSig )<<32 ) / bSig;
1962 158142c2 bellard
            q >>= 32 - expDiff;
1963 158142c2 bellard
            bSig >>= 2;
1964 158142c2 bellard
            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1965 158142c2 bellard
        }
1966 158142c2 bellard
        else {
1967 158142c2 bellard
            aSig >>= 2;
1968 158142c2 bellard
            bSig >>= 2;
1969 158142c2 bellard
        }
1970 158142c2 bellard
    }
1971 158142c2 bellard
    else {
1972 158142c2 bellard
        if ( bSig <= aSig ) aSig -= bSig;
1973 158142c2 bellard
        aSig64 = ( (bits64) aSig )<<40;
1974 158142c2 bellard
        bSig64 = ( (bits64) bSig )<<40;
1975 158142c2 bellard
        expDiff -= 64;
1976 158142c2 bellard
        while ( 0 < expDiff ) {
1977 158142c2 bellard
            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1978 158142c2 bellard
            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1979 158142c2 bellard
            aSig64 = - ( ( bSig * q64 )<<38 );
1980 158142c2 bellard
            expDiff -= 62;
1981 158142c2 bellard
        }
1982 158142c2 bellard
        expDiff += 64;
1983 158142c2 bellard
        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1984 158142c2 bellard
        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1985 158142c2 bellard
        q = q64>>( 64 - expDiff );
1986 158142c2 bellard
        bSig <<= 6;
1987 158142c2 bellard
        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
1988 158142c2 bellard
    }
1989 158142c2 bellard
    do {
1990 158142c2 bellard
        alternateASig = aSig;
1991 158142c2 bellard
        ++q;
1992 158142c2 bellard
        aSig -= bSig;
1993 158142c2 bellard
    } while ( 0 <= (sbits32) aSig );
1994 158142c2 bellard
    sigMean = aSig + alternateASig;
1995 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1996 158142c2 bellard
        aSig = alternateASig;
1997 158142c2 bellard
    }
1998 158142c2 bellard
    zSign = ( (sbits32) aSig < 0 );
1999 158142c2 bellard
    if ( zSign ) aSig = - aSig;
2000 158142c2 bellard
    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
2001 158142c2 bellard
2002 158142c2 bellard
}
2003 158142c2 bellard
2004 158142c2 bellard
/*----------------------------------------------------------------------------
2005 158142c2 bellard
| Returns the square root of the single-precision floating-point value `a'.
2006 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
2007 158142c2 bellard
| Floating-Point Arithmetic.
2008 158142c2 bellard
*----------------------------------------------------------------------------*/
2009 158142c2 bellard
2010 158142c2 bellard
float32 float32_sqrt( float32 a STATUS_PARAM )
2011 158142c2 bellard
{
2012 158142c2 bellard
    flag aSign;
2013 158142c2 bellard
    int16 aExp, zExp;
2014 158142c2 bellard
    bits32 aSig, zSig;
2015 158142c2 bellard
    bits64 rem, term;
2016 158142c2 bellard
2017 158142c2 bellard
    aSig = extractFloat32Frac( a );
2018 158142c2 bellard
    aExp = extractFloat32Exp( a );
2019 158142c2 bellard
    aSign = extractFloat32Sign( a );
2020 158142c2 bellard
    if ( aExp == 0xFF ) {
2021 f090c9d4 pbrook
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2022 158142c2 bellard
        if ( ! aSign ) return a;
2023 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2024 158142c2 bellard
        return float32_default_nan;
2025 158142c2 bellard
    }
2026 158142c2 bellard
    if ( aSign ) {
2027 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
2028 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2029 158142c2 bellard
        return float32_default_nan;
2030 158142c2 bellard
    }
2031 158142c2 bellard
    if ( aExp == 0 ) {
2032 f090c9d4 pbrook
        if ( aSig == 0 ) return float32_zero;
2033 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2034 158142c2 bellard
    }
2035 158142c2 bellard
    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2036 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
2037 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig ) + 2;
2038 158142c2 bellard
    if ( ( zSig & 0x7F ) <= 5 ) {
2039 158142c2 bellard
        if ( zSig < 2 ) {
2040 158142c2 bellard
            zSig = 0x7FFFFFFF;
2041 158142c2 bellard
            goto roundAndPack;
2042 158142c2 bellard
        }
2043 158142c2 bellard
        aSig >>= aExp & 1;
2044 158142c2 bellard
        term = ( (bits64) zSig ) * zSig;
2045 158142c2 bellard
        rem = ( ( (bits64) aSig )<<32 ) - term;
2046 158142c2 bellard
        while ( (sbits64) rem < 0 ) {
2047 158142c2 bellard
            --zSig;
2048 158142c2 bellard
            rem += ( ( (bits64) zSig )<<1 ) | 1;
2049 158142c2 bellard
        }
2050 158142c2 bellard
        zSig |= ( rem != 0 );
2051 158142c2 bellard
    }
2052 158142c2 bellard
    shift32RightJamming( zSig, 1, &zSig );
2053 158142c2 bellard
 roundAndPack:
2054 158142c2 bellard
    return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
2055 158142c2 bellard
2056 158142c2 bellard
}
2057 158142c2 bellard
2058 158142c2 bellard
/*----------------------------------------------------------------------------
2059 374dfc33 aurel32
| Returns the binary log of the single-precision floating-point value `a'.
2060 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
2061 374dfc33 aurel32
| Floating-Point Arithmetic.
2062 374dfc33 aurel32
*----------------------------------------------------------------------------*/
2063 374dfc33 aurel32
float32 float32_log2( float32 a STATUS_PARAM )
2064 374dfc33 aurel32
{
2065 374dfc33 aurel32
    flag aSign, zSign;
2066 374dfc33 aurel32
    int16 aExp;
2067 374dfc33 aurel32
    bits32 aSig, zSig, i;
2068 374dfc33 aurel32
2069 374dfc33 aurel32
    aSig = extractFloat32Frac( a );
2070 374dfc33 aurel32
    aExp = extractFloat32Exp( a );
2071 374dfc33 aurel32
    aSign = extractFloat32Sign( a );
2072 374dfc33 aurel32
2073 374dfc33 aurel32
    if ( aExp == 0 ) {
2074 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2075 374dfc33 aurel32
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2076 374dfc33 aurel32
    }
2077 374dfc33 aurel32
    if ( aSign ) {
2078 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
2079 374dfc33 aurel32
        return float32_default_nan;
2080 374dfc33 aurel32
    }
2081 374dfc33 aurel32
    if ( aExp == 0xFF ) {
2082 374dfc33 aurel32
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2083 374dfc33 aurel32
        return a;
2084 374dfc33 aurel32
    }
2085 374dfc33 aurel32
2086 374dfc33 aurel32
    aExp -= 0x7F;
2087 374dfc33 aurel32
    aSig |= 0x00800000;
2088 374dfc33 aurel32
    zSign = aExp < 0;
2089 374dfc33 aurel32
    zSig = aExp << 23;
2090 374dfc33 aurel32
2091 374dfc33 aurel32
    for (i = 1 << 22; i > 0; i >>= 1) {
2092 374dfc33 aurel32
        aSig = ( (bits64)aSig * aSig ) >> 23;
2093 374dfc33 aurel32
        if ( aSig & 0x01000000 ) {
2094 374dfc33 aurel32
            aSig >>= 1;
2095 374dfc33 aurel32
            zSig |= i;
2096 374dfc33 aurel32
        }
2097 374dfc33 aurel32
    }
2098 374dfc33 aurel32
2099 374dfc33 aurel32
    if ( zSign )
2100 374dfc33 aurel32
        zSig = -zSig;
2101 374dfc33 aurel32
2102 374dfc33 aurel32
    return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
2103 374dfc33 aurel32
}
2104 374dfc33 aurel32
2105 374dfc33 aurel32
/*----------------------------------------------------------------------------
2106 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2107 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
2108 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2109 158142c2 bellard
*----------------------------------------------------------------------------*/
2110 158142c2 bellard
2111 750afe93 bellard
int float32_eq( float32 a, float32 b STATUS_PARAM )
2112 158142c2 bellard
{
2113 158142c2 bellard
2114 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2115 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2116 158142c2 bellard
       ) {
2117 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2118 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2119 158142c2 bellard
        }
2120 158142c2 bellard
        return 0;
2121 158142c2 bellard
    }
2122 f090c9d4 pbrook
    return ( float32_val(a) == float32_val(b) ) ||
2123 f090c9d4 pbrook
            ( (bits32) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
2124 158142c2 bellard
2125 158142c2 bellard
}
2126 158142c2 bellard
2127 158142c2 bellard
/*----------------------------------------------------------------------------
2128 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2129 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  The comparison
2130 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2131 158142c2 bellard
| Arithmetic.
2132 158142c2 bellard
*----------------------------------------------------------------------------*/
2133 158142c2 bellard
2134 750afe93 bellard
int float32_le( float32 a, float32 b STATUS_PARAM )
2135 158142c2 bellard
{
2136 158142c2 bellard
    flag aSign, bSign;
2137 f090c9d4 pbrook
    bits32 av, bv;
2138 158142c2 bellard
2139 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2140 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2141 158142c2 bellard
       ) {
2142 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2143 158142c2 bellard
        return 0;
2144 158142c2 bellard
    }
2145 158142c2 bellard
    aSign = extractFloat32Sign( a );
2146 158142c2 bellard
    bSign = extractFloat32Sign( b );
2147 f090c9d4 pbrook
    av = float32_val(a);
2148 f090c9d4 pbrook
    bv = float32_val(b);
2149 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
2150 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2151 158142c2 bellard
2152 158142c2 bellard
}
2153 158142c2 bellard
2154 158142c2 bellard
/*----------------------------------------------------------------------------
2155 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2156 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
2157 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2158 158142c2 bellard
*----------------------------------------------------------------------------*/
2159 158142c2 bellard
2160 750afe93 bellard
int float32_lt( float32 a, float32 b STATUS_PARAM )
2161 158142c2 bellard
{
2162 158142c2 bellard
    flag aSign, bSign;
2163 f090c9d4 pbrook
    bits32 av, bv;
2164 158142c2 bellard
2165 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2166 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2167 158142c2 bellard
       ) {
2168 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2169 158142c2 bellard
        return 0;
2170 158142c2 bellard
    }
2171 158142c2 bellard
    aSign = extractFloat32Sign( a );
2172 158142c2 bellard
    bSign = extractFloat32Sign( b );
2173 f090c9d4 pbrook
    av = float32_val(a);
2174 f090c9d4 pbrook
    bv = float32_val(b);
2175 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
2176 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2177 158142c2 bellard
2178 158142c2 bellard
}
2179 158142c2 bellard
2180 158142c2 bellard
/*----------------------------------------------------------------------------
2181 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2182 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The invalid exception is
2183 158142c2 bellard
| raised if either operand is a NaN.  Otherwise, the comparison is performed
2184 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2185 158142c2 bellard
*----------------------------------------------------------------------------*/
2186 158142c2 bellard
2187 750afe93 bellard
int float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
2188 158142c2 bellard
{
2189 f090c9d4 pbrook
    bits32 av, bv;
2190 158142c2 bellard
2191 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2192 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2193 158142c2 bellard
       ) {
2194 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2195 158142c2 bellard
        return 0;
2196 158142c2 bellard
    }
2197 f090c9d4 pbrook
    av = float32_val(a);
2198 f090c9d4 pbrook
    bv = float32_val(b);
2199 f090c9d4 pbrook
    return ( av == bv ) || ( (bits32) ( ( av | bv )<<1 ) == 0 );
2200 158142c2 bellard
2201 158142c2 bellard
}
2202 158142c2 bellard
2203 158142c2 bellard
/*----------------------------------------------------------------------------
2204 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than or
2205 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2206 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
2207 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2208 158142c2 bellard
*----------------------------------------------------------------------------*/
2209 158142c2 bellard
2210 750afe93 bellard
int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
2211 158142c2 bellard
{
2212 158142c2 bellard
    flag aSign, bSign;
2213 f090c9d4 pbrook
    bits32 av, bv;
2214 158142c2 bellard
2215 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2216 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2217 158142c2 bellard
       ) {
2218 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2219 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2220 158142c2 bellard
        }
2221 158142c2 bellard
        return 0;
2222 158142c2 bellard
    }
2223 158142c2 bellard
    aSign = extractFloat32Sign( a );
2224 158142c2 bellard
    bSign = extractFloat32Sign( b );
2225 f090c9d4 pbrook
    av = float32_val(a);
2226 f090c9d4 pbrook
    bv = float32_val(b);
2227 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
2228 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2229 158142c2 bellard
2230 158142c2 bellard
}
2231 158142c2 bellard
2232 158142c2 bellard
/*----------------------------------------------------------------------------
2233 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2234 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2235 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2236 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
2237 158142c2 bellard
*----------------------------------------------------------------------------*/
2238 158142c2 bellard
2239 750afe93 bellard
int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
2240 158142c2 bellard
{
2241 158142c2 bellard
    flag aSign, bSign;
2242 f090c9d4 pbrook
    bits32 av, bv;
2243 158142c2 bellard
2244 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2245 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2246 158142c2 bellard
       ) {
2247 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2248 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2249 158142c2 bellard
        }
2250 158142c2 bellard
        return 0;
2251 158142c2 bellard
    }
2252 158142c2 bellard
    aSign = extractFloat32Sign( a );
2253 158142c2 bellard
    bSign = extractFloat32Sign( b );
2254 f090c9d4 pbrook
    av = float32_val(a);
2255 f090c9d4 pbrook
    bv = float32_val(b);
2256 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
2257 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2258 158142c2 bellard
2259 158142c2 bellard
}
2260 158142c2 bellard
2261 158142c2 bellard
/*----------------------------------------------------------------------------
2262 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2263 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2264 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2265 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2266 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2267 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2268 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2269 158142c2 bellard
*----------------------------------------------------------------------------*/
2270 158142c2 bellard
2271 158142c2 bellard
int32 float64_to_int32( float64 a STATUS_PARAM )
2272 158142c2 bellard
{
2273 158142c2 bellard
    flag aSign;
2274 158142c2 bellard
    int16 aExp, shiftCount;
2275 158142c2 bellard
    bits64 aSig;
2276 158142c2 bellard
2277 158142c2 bellard
    aSig = extractFloat64Frac( a );
2278 158142c2 bellard
    aExp = extractFloat64Exp( a );
2279 158142c2 bellard
    aSign = extractFloat64Sign( a );
2280 158142c2 bellard
    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2281 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2282 158142c2 bellard
    shiftCount = 0x42C - aExp;
2283 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2284 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
2285 158142c2 bellard
2286 158142c2 bellard
}
2287 158142c2 bellard
2288 158142c2 bellard
/*----------------------------------------------------------------------------
2289 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2290 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2291 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2292 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2293 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2294 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2295 158142c2 bellard
| returned.
2296 158142c2 bellard
*----------------------------------------------------------------------------*/
2297 158142c2 bellard
2298 158142c2 bellard
int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
2299 158142c2 bellard
{
2300 158142c2 bellard
    flag aSign;
2301 158142c2 bellard
    int16 aExp, shiftCount;
2302 158142c2 bellard
    bits64 aSig, savedASig;
2303 158142c2 bellard
    int32 z;
2304 158142c2 bellard
2305 158142c2 bellard
    aSig = extractFloat64Frac( a );
2306 158142c2 bellard
    aExp = extractFloat64Exp( a );
2307 158142c2 bellard
    aSign = extractFloat64Sign( a );
2308 158142c2 bellard
    if ( 0x41E < aExp ) {
2309 158142c2 bellard
        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2310 158142c2 bellard
        goto invalid;
2311 158142c2 bellard
    }
2312 158142c2 bellard
    else if ( aExp < 0x3FF ) {
2313 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2314 158142c2 bellard
        return 0;
2315 158142c2 bellard
    }
2316 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
2317 158142c2 bellard
    shiftCount = 0x433 - aExp;
2318 158142c2 bellard
    savedASig = aSig;
2319 158142c2 bellard
    aSig >>= shiftCount;
2320 158142c2 bellard
    z = aSig;
2321 158142c2 bellard
    if ( aSign ) z = - z;
2322 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
2323 158142c2 bellard
 invalid:
2324 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2325 158142c2 bellard
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
2326 158142c2 bellard
    }
2327 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
2328 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2329 158142c2 bellard
    }
2330 158142c2 bellard
    return z;
2331 158142c2 bellard
2332 158142c2 bellard
}
2333 158142c2 bellard
2334 158142c2 bellard
/*----------------------------------------------------------------------------
2335 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2336 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2337 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2338 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2339 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2340 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2341 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2342 158142c2 bellard
*----------------------------------------------------------------------------*/
2343 158142c2 bellard
2344 158142c2 bellard
int64 float64_to_int64( float64 a STATUS_PARAM )
2345 158142c2 bellard
{
2346 158142c2 bellard
    flag aSign;
2347 158142c2 bellard
    int16 aExp, shiftCount;
2348 158142c2 bellard
    bits64 aSig, aSigExtra;
2349 158142c2 bellard
2350 158142c2 bellard
    aSig = extractFloat64Frac( a );
2351 158142c2 bellard
    aExp = extractFloat64Exp( a );
2352 158142c2 bellard
    aSign = extractFloat64Sign( a );
2353 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2354 158142c2 bellard
    shiftCount = 0x433 - aExp;
2355 158142c2 bellard
    if ( shiftCount <= 0 ) {
2356 158142c2 bellard
        if ( 0x43E < aExp ) {
2357 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2358 158142c2 bellard
            if (    ! aSign
2359 158142c2 bellard
                 || (    ( aExp == 0x7FF )
2360 158142c2 bellard
                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
2361 158142c2 bellard
               ) {
2362 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
2363 158142c2 bellard
            }
2364 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
2365 158142c2 bellard
        }
2366 158142c2 bellard
        aSigExtra = 0;
2367 158142c2 bellard
        aSig <<= - shiftCount;
2368 158142c2 bellard
    }
2369 158142c2 bellard
    else {
2370 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2371 158142c2 bellard
    }
2372 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
2373 158142c2 bellard
2374 158142c2 bellard
}
2375 158142c2 bellard
2376 158142c2 bellard
/*----------------------------------------------------------------------------
2377 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2378 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2379 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2380 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2381 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2382 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2383 158142c2 bellard
| returned.
2384 158142c2 bellard
*----------------------------------------------------------------------------*/
2385 158142c2 bellard
2386 158142c2 bellard
int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
2387 158142c2 bellard
{
2388 158142c2 bellard
    flag aSign;
2389 158142c2 bellard
    int16 aExp, shiftCount;
2390 158142c2 bellard
    bits64 aSig;
2391 158142c2 bellard
    int64 z;
2392 158142c2 bellard
2393 158142c2 bellard
    aSig = extractFloat64Frac( a );
2394 158142c2 bellard
    aExp = extractFloat64Exp( a );
2395 158142c2 bellard
    aSign = extractFloat64Sign( a );
2396 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2397 158142c2 bellard
    shiftCount = aExp - 0x433;
2398 158142c2 bellard
    if ( 0 <= shiftCount ) {
2399 158142c2 bellard
        if ( 0x43E <= aExp ) {
2400 f090c9d4 pbrook
            if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
2401 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
2402 158142c2 bellard
                if (    ! aSign
2403 158142c2 bellard
                     || (    ( aExp == 0x7FF )
2404 158142c2 bellard
                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
2405 158142c2 bellard
                   ) {
2406 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
2407 158142c2 bellard
                }
2408 158142c2 bellard
            }
2409 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
2410 158142c2 bellard
        }
2411 158142c2 bellard
        z = aSig<<shiftCount;
2412 158142c2 bellard
    }
2413 158142c2 bellard
    else {
2414 158142c2 bellard
        if ( aExp < 0x3FE ) {
2415 158142c2 bellard
            if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2416 158142c2 bellard
            return 0;
2417 158142c2 bellard
        }
2418 158142c2 bellard
        z = aSig>>( - shiftCount );
2419 158142c2 bellard
        if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
2420 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
2421 158142c2 bellard
        }
2422 158142c2 bellard
    }
2423 158142c2 bellard
    if ( aSign ) z = - z;
2424 158142c2 bellard
    return z;
2425 158142c2 bellard
2426 158142c2 bellard
}
2427 158142c2 bellard
2428 158142c2 bellard
/*----------------------------------------------------------------------------
2429 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2430 158142c2 bellard
| `a' to the single-precision floating-point format.  The conversion is
2431 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2432 158142c2 bellard
| Arithmetic.
2433 158142c2 bellard
*----------------------------------------------------------------------------*/
2434 158142c2 bellard
2435 158142c2 bellard
float32 float64_to_float32( float64 a STATUS_PARAM )
2436 158142c2 bellard
{
2437 158142c2 bellard
    flag aSign;
2438 158142c2 bellard
    int16 aExp;
2439 158142c2 bellard
    bits64 aSig;
2440 158142c2 bellard
    bits32 zSig;
2441 158142c2 bellard
2442 158142c2 bellard
    aSig = extractFloat64Frac( a );
2443 158142c2 bellard
    aExp = extractFloat64Exp( a );
2444 158142c2 bellard
    aSign = extractFloat64Sign( a );
2445 158142c2 bellard
    if ( aExp == 0x7FF ) {
2446 158142c2 bellard
        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) );
2447 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
2448 158142c2 bellard
    }
2449 158142c2 bellard
    shift64RightJamming( aSig, 22, &aSig );
2450 158142c2 bellard
    zSig = aSig;
2451 158142c2 bellard
    if ( aExp || zSig ) {
2452 158142c2 bellard
        zSig |= 0x40000000;
2453 158142c2 bellard
        aExp -= 0x381;
2454 158142c2 bellard
    }
2455 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
2456 158142c2 bellard
2457 158142c2 bellard
}
2458 158142c2 bellard
2459 60011498 Paul Brook
2460 60011498 Paul Brook
/*----------------------------------------------------------------------------
2461 60011498 Paul Brook
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2462 60011498 Paul Brook
| half-precision floating-point value, returning the result.  After being
2463 60011498 Paul Brook
| shifted into the proper positions, the three fields are simply added
2464 60011498 Paul Brook
| together to form the result.  This means that any integer portion of `zSig'
2465 60011498 Paul Brook
| will be added into the exponent.  Since a properly normalized significand
2466 60011498 Paul Brook
| will have an integer portion equal to 1, the `zExp' input should be 1 less
2467 60011498 Paul Brook
| than the desired result exponent whenever `zSig' is a complete, normalized
2468 60011498 Paul Brook
| significand.
2469 60011498 Paul Brook
*----------------------------------------------------------------------------*/
2470 60011498 Paul Brook
static bits16 packFloat16(flag zSign, int16 zExp, bits16 zSig)
2471 60011498 Paul Brook
{
2472 60011498 Paul Brook
    return (((bits32)zSign) << 15) + (((bits32)zExp) << 10) + zSig;
2473 60011498 Paul Brook
}
2474 60011498 Paul Brook
2475 60011498 Paul Brook
/* Half precision floats come in two formats: standard IEEE and "ARM" format.
2476 60011498 Paul Brook
   The latter gains extra exponent range by omitting the NaN/Inf encodings.  */
2477 60011498 Paul Brook
  
2478 60011498 Paul Brook
float32 float16_to_float32( bits16 a, flag ieee STATUS_PARAM )
2479 60011498 Paul Brook
{
2480 60011498 Paul Brook
    flag aSign;
2481 60011498 Paul Brook
    int16 aExp;
2482 60011498 Paul Brook
    bits32 aSig;
2483 60011498 Paul Brook
2484 60011498 Paul Brook
    aSign = a >> 15;
2485 60011498 Paul Brook
    aExp = (a >> 10) & 0x1f;
2486 60011498 Paul Brook
    aSig = a & 0x3ff;
2487 60011498 Paul Brook
2488 60011498 Paul Brook
    if (aExp == 0x1f && ieee) {
2489 60011498 Paul Brook
        if (aSig) {
2490 60011498 Paul Brook
            /* Make sure correct exceptions are raised.  */
2491 60011498 Paul Brook
            float32ToCommonNaN(a STATUS_VAR);
2492 60011498 Paul Brook
            aSig |= 0x200;
2493 60011498 Paul Brook
        }
2494 60011498 Paul Brook
        return packFloat32(aSign, 0xff, aSig << 13);
2495 60011498 Paul Brook
    }
2496 60011498 Paul Brook
    if (aExp == 0) {
2497 60011498 Paul Brook
        int8 shiftCount;
2498 60011498 Paul Brook
2499 60011498 Paul Brook
        if (aSig == 0) {
2500 60011498 Paul Brook
            return packFloat32(aSign, 0, 0);
2501 60011498 Paul Brook
        }
2502 60011498 Paul Brook
2503 60011498 Paul Brook
        shiftCount = countLeadingZeros32( aSig ) - 21;
2504 60011498 Paul Brook
        aSig = aSig << shiftCount;
2505 60011498 Paul Brook
        aExp = -shiftCount;
2506 60011498 Paul Brook
    }
2507 60011498 Paul Brook
    return packFloat32( aSign, aExp + 0x70, aSig << 13);
2508 60011498 Paul Brook
}
2509 60011498 Paul Brook
2510 60011498 Paul Brook
bits16 float32_to_float16( float32 a, flag ieee STATUS_PARAM)
2511 60011498 Paul Brook
{
2512 60011498 Paul Brook
    flag aSign;
2513 60011498 Paul Brook
    int16 aExp;
2514 60011498 Paul Brook
    bits32 aSig;
2515 60011498 Paul Brook
    bits32 mask;
2516 60011498 Paul Brook
    bits32 increment;
2517 60011498 Paul Brook
    int8 roundingMode;
2518 60011498 Paul Brook
2519 60011498 Paul Brook
    aSig = extractFloat32Frac( a );
2520 60011498 Paul Brook
    aExp = extractFloat32Exp( a );
2521 60011498 Paul Brook
    aSign = extractFloat32Sign( a );
2522 60011498 Paul Brook
    if ( aExp == 0xFF ) {
2523 60011498 Paul Brook
        if (aSig) {
2524 60011498 Paul Brook
            /* Make sure correct exceptions are raised.  */
2525 60011498 Paul Brook
            float32ToCommonNaN(a STATUS_VAR);
2526 60011498 Paul Brook
            aSig |= 0x00400000;
2527 60011498 Paul Brook
        }
2528 60011498 Paul Brook
        return packFloat16(aSign, 0x1f, aSig >> 13);
2529 60011498 Paul Brook
    }
2530 60011498 Paul Brook
    if (aExp == 0 && aSign == 0) {
2531 60011498 Paul Brook
        return packFloat16(aSign, 0, 0);
2532 60011498 Paul Brook
    }
2533 60011498 Paul Brook
    /* Decimal point between bits 22 and 23.  */
2534 60011498 Paul Brook
    aSig |= 0x00800000;
2535 60011498 Paul Brook
    aExp -= 0x7f;
2536 60011498 Paul Brook
    if (aExp < -14) {
2537 60011498 Paul Brook
        mask = 0x007fffff;
2538 60011498 Paul Brook
        if (aExp < -24) {
2539 60011498 Paul Brook
            aExp = -25;
2540 60011498 Paul Brook
        } else {
2541 60011498 Paul Brook
            mask >>= 24 + aExp;
2542 60011498 Paul Brook
        }
2543 60011498 Paul Brook
    } else {
2544 60011498 Paul Brook
        mask = 0x00001fff;
2545 60011498 Paul Brook
    }
2546 60011498 Paul Brook
    if (aSig & mask) {
2547 60011498 Paul Brook
        float_raise( float_flag_underflow STATUS_VAR );
2548 60011498 Paul Brook
        roundingMode = STATUS(float_rounding_mode);
2549 60011498 Paul Brook
        switch (roundingMode) {
2550 60011498 Paul Brook
        case float_round_nearest_even:
2551 60011498 Paul Brook
            increment = (mask + 1) >> 1;
2552 60011498 Paul Brook
            if ((aSig & mask) == increment) {
2553 60011498 Paul Brook
                increment = aSig & (increment << 1);
2554 60011498 Paul Brook
            }
2555 60011498 Paul Brook
            break;
2556 60011498 Paul Brook
        case float_round_up:
2557 60011498 Paul Brook
            increment = aSign ? 0 : mask;
2558 60011498 Paul Brook
            break;
2559 60011498 Paul Brook
        case float_round_down:
2560 60011498 Paul Brook
            increment = aSign ? mask : 0;
2561 60011498 Paul Brook
            break;
2562 60011498 Paul Brook
        default: /* round_to_zero */
2563 60011498 Paul Brook
            increment = 0;
2564 60011498 Paul Brook
            break;
2565 60011498 Paul Brook
        }
2566 60011498 Paul Brook
        aSig += increment;
2567 60011498 Paul Brook
        if (aSig >= 0x01000000) {
2568 60011498 Paul Brook
            aSig >>= 1;
2569 60011498 Paul Brook
            aExp++;
2570 60011498 Paul Brook
        }
2571 60011498 Paul Brook
    } else if (aExp < -14
2572 60011498 Paul Brook
          && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
2573 60011498 Paul Brook
        float_raise( float_flag_underflow STATUS_VAR);
2574 60011498 Paul Brook
    }
2575 60011498 Paul Brook
2576 60011498 Paul Brook
    if (ieee) {
2577 60011498 Paul Brook
        if (aExp > 15) {
2578 60011498 Paul Brook
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
2579 60011498 Paul Brook
            return packFloat16(aSign, 0x1f, 0);
2580 60011498 Paul Brook
        }
2581 60011498 Paul Brook
    } else {
2582 60011498 Paul Brook
        if (aExp > 16) {
2583 60011498 Paul Brook
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
2584 60011498 Paul Brook
            return packFloat16(aSign, 0x1f, 0x3ff);
2585 60011498 Paul Brook
        }
2586 60011498 Paul Brook
    }
2587 60011498 Paul Brook
    if (aExp < -24) {
2588 60011498 Paul Brook
        return packFloat16(aSign, 0, 0);
2589 60011498 Paul Brook
    }
2590 60011498 Paul Brook
    if (aExp < -14) {
2591 60011498 Paul Brook
        aSig >>= -14 - aExp;
2592 60011498 Paul Brook
        aExp = -14;
2593 60011498 Paul Brook
    }
2594 60011498 Paul Brook
    return packFloat16(aSign, aExp + 14, aSig >> 13);
2595 60011498 Paul Brook
}
2596 60011498 Paul Brook
2597 158142c2 bellard
#ifdef FLOATX80
2598 158142c2 bellard
2599 158142c2 bellard
/*----------------------------------------------------------------------------
2600 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2601 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
2602 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2603 158142c2 bellard
| Arithmetic.
2604 158142c2 bellard
*----------------------------------------------------------------------------*/
2605 158142c2 bellard
2606 158142c2 bellard
floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
2607 158142c2 bellard
{
2608 158142c2 bellard
    flag aSign;
2609 158142c2 bellard
    int16 aExp;
2610 158142c2 bellard
    bits64 aSig;
2611 158142c2 bellard
2612 158142c2 bellard
    aSig = extractFloat64Frac( a );
2613 158142c2 bellard
    aExp = extractFloat64Exp( a );
2614 158142c2 bellard
    aSign = extractFloat64Sign( a );
2615 158142c2 bellard
    if ( aExp == 0x7FF ) {
2616 158142c2 bellard
        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) );
2617 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2618 158142c2 bellard
    }
2619 158142c2 bellard
    if ( aExp == 0 ) {
2620 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2621 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2622 158142c2 bellard
    }
2623 158142c2 bellard
    return
2624 158142c2 bellard
        packFloatx80(
2625 158142c2 bellard
            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2626 158142c2 bellard
2627 158142c2 bellard
}
2628 158142c2 bellard
2629 158142c2 bellard
#endif
2630 158142c2 bellard
2631 158142c2 bellard
#ifdef FLOAT128
2632 158142c2 bellard
2633 158142c2 bellard
/*----------------------------------------------------------------------------
2634 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2635 158142c2 bellard
| `a' to the quadruple-precision floating-point format.  The conversion is
2636 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2637 158142c2 bellard
| Arithmetic.
2638 158142c2 bellard
*----------------------------------------------------------------------------*/
2639 158142c2 bellard
2640 158142c2 bellard
float128 float64_to_float128( float64 a STATUS_PARAM )
2641 158142c2 bellard
{
2642 158142c2 bellard
    flag aSign;
2643 158142c2 bellard
    int16 aExp;
2644 158142c2 bellard
    bits64 aSig, zSig0, zSig1;
2645 158142c2 bellard
2646 158142c2 bellard
    aSig = extractFloat64Frac( a );
2647 158142c2 bellard
    aExp = extractFloat64Exp( a );
2648 158142c2 bellard
    aSign = extractFloat64Sign( a );
2649 158142c2 bellard
    if ( aExp == 0x7FF ) {
2650 158142c2 bellard
        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) );
2651 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
2652 158142c2 bellard
    }
2653 158142c2 bellard
    if ( aExp == 0 ) {
2654 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2655 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2656 158142c2 bellard
        --aExp;
2657 158142c2 bellard
    }
2658 158142c2 bellard
    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2659 158142c2 bellard
    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2660 158142c2 bellard
2661 158142c2 bellard
}
2662 158142c2 bellard
2663 158142c2 bellard
#endif
2664 158142c2 bellard
2665 158142c2 bellard
/*----------------------------------------------------------------------------
2666 158142c2 bellard
| Rounds the double-precision floating-point value `a' to an integer, and
2667 158142c2 bellard
| returns the result as a double-precision floating-point value.  The
2668 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
2669 158142c2 bellard
| Floating-Point Arithmetic.
2670 158142c2 bellard
*----------------------------------------------------------------------------*/
2671 158142c2 bellard
2672 158142c2 bellard
float64 float64_round_to_int( float64 a STATUS_PARAM )
2673 158142c2 bellard
{
2674 158142c2 bellard
    flag aSign;
2675 158142c2 bellard
    int16 aExp;
2676 158142c2 bellard
    bits64 lastBitMask, roundBitsMask;
2677 158142c2 bellard
    int8 roundingMode;
2678 f090c9d4 pbrook
    bits64 z;
2679 158142c2 bellard
2680 158142c2 bellard
    aExp = extractFloat64Exp( a );
2681 158142c2 bellard
    if ( 0x433 <= aExp ) {
2682 158142c2 bellard
        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
2683 158142c2 bellard
            return propagateFloat64NaN( a, a STATUS_VAR );
2684 158142c2 bellard
        }
2685 158142c2 bellard
        return a;
2686 158142c2 bellard
    }
2687 158142c2 bellard
    if ( aExp < 0x3FF ) {
2688 f090c9d4 pbrook
        if ( (bits64) ( float64_val(a)<<1 ) == 0 ) return a;
2689 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2690 158142c2 bellard
        aSign = extractFloat64Sign( a );
2691 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
2692 158142c2 bellard
         case float_round_nearest_even:
2693 158142c2 bellard
            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
2694 158142c2 bellard
                return packFloat64( aSign, 0x3FF, 0 );
2695 158142c2 bellard
            }
2696 158142c2 bellard
            break;
2697 158142c2 bellard
         case float_round_down:
2698 f090c9d4 pbrook
            return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
2699 158142c2 bellard
         case float_round_up:
2700 f090c9d4 pbrook
            return make_float64(
2701 f090c9d4 pbrook
            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
2702 158142c2 bellard
        }
2703 158142c2 bellard
        return packFloat64( aSign, 0, 0 );
2704 158142c2 bellard
    }
2705 158142c2 bellard
    lastBitMask = 1;
2706 158142c2 bellard
    lastBitMask <<= 0x433 - aExp;
2707 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
2708 f090c9d4 pbrook
    z = float64_val(a);
2709 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
2710 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
2711 158142c2 bellard
        z += lastBitMask>>1;
2712 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
2713 158142c2 bellard
    }
2714 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
2715 f090c9d4 pbrook
        if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
2716 158142c2 bellard
            z += roundBitsMask;
2717 158142c2 bellard
        }
2718 158142c2 bellard
    }
2719 158142c2 bellard
    z &= ~ roundBitsMask;
2720 f090c9d4 pbrook
    if ( z != float64_val(a) )
2721 f090c9d4 pbrook
        STATUS(float_exception_flags) |= float_flag_inexact;
2722 f090c9d4 pbrook
    return make_float64(z);
2723 158142c2 bellard
2724 158142c2 bellard
}
2725 158142c2 bellard
2726 e6e5906b pbrook
float64 float64_trunc_to_int( float64 a STATUS_PARAM)
2727 e6e5906b pbrook
{
2728 e6e5906b pbrook
    int oldmode;
2729 e6e5906b pbrook
    float64 res;
2730 e6e5906b pbrook
    oldmode = STATUS(float_rounding_mode);
2731 e6e5906b pbrook
    STATUS(float_rounding_mode) = float_round_to_zero;
2732 e6e5906b pbrook
    res = float64_round_to_int(a STATUS_VAR);
2733 e6e5906b pbrook
    STATUS(float_rounding_mode) = oldmode;
2734 e6e5906b pbrook
    return res;
2735 e6e5906b pbrook
}
2736 e6e5906b pbrook
2737 158142c2 bellard
/*----------------------------------------------------------------------------
2738 158142c2 bellard
| Returns the result of adding the absolute values of the double-precision
2739 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
2740 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
2741 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
2742 158142c2 bellard
| Floating-Point Arithmetic.
2743 158142c2 bellard
*----------------------------------------------------------------------------*/
2744 158142c2 bellard
2745 158142c2 bellard
static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
2746 158142c2 bellard
{
2747 158142c2 bellard
    int16 aExp, bExp, zExp;
2748 158142c2 bellard
    bits64 aSig, bSig, zSig;
2749 158142c2 bellard
    int16 expDiff;
2750 158142c2 bellard
2751 158142c2 bellard
    aSig = extractFloat64Frac( a );
2752 158142c2 bellard
    aExp = extractFloat64Exp( a );
2753 158142c2 bellard
    bSig = extractFloat64Frac( b );
2754 158142c2 bellard
    bExp = extractFloat64Exp( b );
2755 158142c2 bellard
    expDiff = aExp - bExp;
2756 158142c2 bellard
    aSig <<= 9;
2757 158142c2 bellard
    bSig <<= 9;
2758 158142c2 bellard
    if ( 0 < expDiff ) {
2759 158142c2 bellard
        if ( aExp == 0x7FF ) {
2760 158142c2 bellard
            if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2761 158142c2 bellard
            return a;
2762 158142c2 bellard
        }
2763 158142c2 bellard
        if ( bExp == 0 ) {
2764 158142c2 bellard
            --expDiff;
2765 158142c2 bellard
        }
2766 158142c2 bellard
        else {
2767 158142c2 bellard
            bSig |= LIT64( 0x2000000000000000 );
2768 158142c2 bellard
        }
2769 158142c2 bellard
        shift64RightJamming( bSig, expDiff, &bSig );
2770 158142c2 bellard
        zExp = aExp;
2771 158142c2 bellard
    }
2772 158142c2 bellard
    else if ( expDiff < 0 ) {
2773 158142c2 bellard
        if ( bExp == 0x7FF ) {
2774 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2775 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
2776 158142c2 bellard
        }
2777 158142c2 bellard
        if ( aExp == 0 ) {
2778 158142c2 bellard
            ++expDiff;
2779 158142c2 bellard
        }
2780 158142c2 bellard
        else {
2781 158142c2 bellard
            aSig |= LIT64( 0x2000000000000000 );
2782 158142c2 bellard
        }
2783 158142c2 bellard
        shift64RightJamming( aSig, - expDiff, &aSig );
2784 158142c2 bellard
        zExp = bExp;
2785 158142c2 bellard
    }
2786 158142c2 bellard
    else {
2787 158142c2 bellard
        if ( aExp == 0x7FF ) {
2788 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2789 158142c2 bellard
            return a;
2790 158142c2 bellard
        }
2791 fe76d976 pbrook
        if ( aExp == 0 ) {
2792 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
2793 fe76d976 pbrook
            return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
2794 fe76d976 pbrook
        }
2795 158142c2 bellard
        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
2796 158142c2 bellard
        zExp = aExp;
2797 158142c2 bellard
        goto roundAndPack;
2798 158142c2 bellard
    }
2799 158142c2 bellard
    aSig |= LIT64( 0x2000000000000000 );
2800 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
2801 158142c2 bellard
    --zExp;
2802 158142c2 bellard
    if ( (sbits64) zSig < 0 ) {
2803 158142c2 bellard
        zSig = aSig + bSig;
2804 158142c2 bellard
        ++zExp;
2805 158142c2 bellard
    }
2806 158142c2 bellard
 roundAndPack:
2807 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
2808 158142c2 bellard
2809 158142c2 bellard
}
2810 158142c2 bellard
2811 158142c2 bellard
/*----------------------------------------------------------------------------
2812 158142c2 bellard
| Returns the result of subtracting the absolute values of the double-
2813 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
2814 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
2815 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
2816 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
2817 158142c2 bellard
*----------------------------------------------------------------------------*/
2818 158142c2 bellard
2819 158142c2 bellard
static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
2820 158142c2 bellard
{
2821 158142c2 bellard
    int16 aExp, bExp, zExp;
2822 158142c2 bellard
    bits64 aSig, bSig, zSig;
2823 158142c2 bellard
    int16 expDiff;
2824 158142c2 bellard
2825 158142c2 bellard
    aSig = extractFloat64Frac( a );
2826 158142c2 bellard
    aExp = extractFloat64Exp( a );
2827 158142c2 bellard
    bSig = extractFloat64Frac( b );
2828 158142c2 bellard
    bExp = extractFloat64Exp( b );
2829 158142c2 bellard
    expDiff = aExp - bExp;
2830 158142c2 bellard
    aSig <<= 10;
2831 158142c2 bellard
    bSig <<= 10;
2832 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
2833 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
2834 158142c2 bellard
    if ( aExp == 0x7FF ) {
2835 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2836 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2837 158142c2 bellard
        return float64_default_nan;
2838 158142c2 bellard
    }
2839 158142c2 bellard
    if ( aExp == 0 ) {
2840 158142c2 bellard
        aExp = 1;
2841 158142c2 bellard
        bExp = 1;
2842 158142c2 bellard
    }
2843 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
2844 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
2845 158142c2 bellard
    return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
2846 158142c2 bellard
 bExpBigger:
2847 158142c2 bellard
    if ( bExp == 0x7FF ) {
2848 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2849 158142c2 bellard
        return packFloat64( zSign ^ 1, 0x7FF, 0 );
2850 158142c2 bellard
    }
2851 158142c2 bellard
    if ( aExp == 0 ) {
2852 158142c2 bellard
        ++expDiff;
2853 158142c2 bellard
    }
2854 158142c2 bellard
    else {
2855 158142c2 bellard
        aSig |= LIT64( 0x4000000000000000 );
2856 158142c2 bellard
    }
2857 158142c2 bellard
    shift64RightJamming( aSig, - expDiff, &aSig );
2858 158142c2 bellard
    bSig |= LIT64( 0x4000000000000000 );
2859 158142c2 bellard
 bBigger:
2860 158142c2 bellard
    zSig = bSig - aSig;
2861 158142c2 bellard
    zExp = bExp;
2862 158142c2 bellard
    zSign ^= 1;
2863 158142c2 bellard
    goto normalizeRoundAndPack;
2864 158142c2 bellard
 aExpBigger:
2865 158142c2 bellard
    if ( aExp == 0x7FF ) {
2866 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2867 158142c2 bellard
        return a;
2868 158142c2 bellard
    }
2869 158142c2 bellard
    if ( bExp == 0 ) {
2870 158142c2 bellard
        --expDiff;
2871 158142c2 bellard
    }
2872 158142c2 bellard
    else {
2873 158142c2 bellard
        bSig |= LIT64( 0x4000000000000000 );
2874 158142c2 bellard
    }
2875 158142c2 bellard
    shift64RightJamming( bSig, expDiff, &bSig );
2876 158142c2 bellard
    aSig |= LIT64( 0x4000000000000000 );
2877 158142c2 bellard
 aBigger:
2878 158142c2 bellard
    zSig = aSig - bSig;
2879 158142c2 bellard
    zExp = aExp;
2880 158142c2 bellard
 normalizeRoundAndPack:
2881 158142c2 bellard
    --zExp;
2882 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
2883 158142c2 bellard
2884 158142c2 bellard
}
2885 158142c2 bellard
2886 158142c2 bellard
/*----------------------------------------------------------------------------
2887 158142c2 bellard
| Returns the result of adding the double-precision floating-point values `a'
2888 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
2889 158142c2 bellard
| Binary Floating-Point Arithmetic.
2890 158142c2 bellard
*----------------------------------------------------------------------------*/
2891 158142c2 bellard
2892 158142c2 bellard
float64 float64_add( float64 a, float64 b STATUS_PARAM )
2893 158142c2 bellard
{
2894 158142c2 bellard
    flag aSign, bSign;
2895 158142c2 bellard
2896 158142c2 bellard
    aSign = extractFloat64Sign( a );
2897 158142c2 bellard
    bSign = extractFloat64Sign( b );
2898 158142c2 bellard
    if ( aSign == bSign ) {
2899 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
2900 158142c2 bellard
    }
2901 158142c2 bellard
    else {
2902 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
2903 158142c2 bellard
    }
2904 158142c2 bellard
2905 158142c2 bellard
}
2906 158142c2 bellard
2907 158142c2 bellard
/*----------------------------------------------------------------------------
2908 158142c2 bellard
| Returns the result of subtracting the double-precision floating-point values
2909 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2910 158142c2 bellard
| for Binary Floating-Point Arithmetic.
2911 158142c2 bellard
*----------------------------------------------------------------------------*/
2912 158142c2 bellard
2913 158142c2 bellard
float64 float64_sub( float64 a, float64 b STATUS_PARAM )
2914 158142c2 bellard
{
2915 158142c2 bellard
    flag aSign, bSign;
2916 158142c2 bellard
2917 158142c2 bellard
    aSign = extractFloat64Sign( a );
2918 158142c2 bellard
    bSign = extractFloat64Sign( b );
2919 158142c2 bellard
    if ( aSign == bSign ) {
2920 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
2921 158142c2 bellard
    }
2922 158142c2 bellard
    else {
2923 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
2924 158142c2 bellard
    }
2925 158142c2 bellard
2926 158142c2 bellard
}
2927 158142c2 bellard
2928 158142c2 bellard
/*----------------------------------------------------------------------------
2929 158142c2 bellard
| Returns the result of multiplying the double-precision floating-point values
2930 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2931 158142c2 bellard
| for Binary Floating-Point Arithmetic.
2932 158142c2 bellard
*----------------------------------------------------------------------------*/
2933 158142c2 bellard
2934 158142c2 bellard
float64 float64_mul( float64 a, float64 b STATUS_PARAM )
2935 158142c2 bellard
{
2936 158142c2 bellard
    flag aSign, bSign, zSign;
2937 158142c2 bellard
    int16 aExp, bExp, zExp;
2938 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
2939 158142c2 bellard
2940 158142c2 bellard
    aSig = extractFloat64Frac( a );
2941 158142c2 bellard
    aExp = extractFloat64Exp( a );
2942 158142c2 bellard
    aSign = extractFloat64Sign( a );
2943 158142c2 bellard
    bSig = extractFloat64Frac( b );
2944 158142c2 bellard
    bExp = extractFloat64Exp( b );
2945 158142c2 bellard
    bSign = extractFloat64Sign( b );
2946 158142c2 bellard
    zSign = aSign ^ bSign;
2947 158142c2 bellard
    if ( aExp == 0x7FF ) {
2948 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2949 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
2950 158142c2 bellard
        }
2951 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
2952 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2953 158142c2 bellard
            return float64_default_nan;
2954 158142c2 bellard
        }
2955 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
2956 158142c2 bellard
    }
2957 158142c2 bellard
    if ( bExp == 0x7FF ) {
2958 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2959 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
2960 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2961 158142c2 bellard
            return float64_default_nan;
2962 158142c2 bellard
        }
2963 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
2964 158142c2 bellard
    }
2965 158142c2 bellard
    if ( aExp == 0 ) {
2966 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2967 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2968 158142c2 bellard
    }
2969 158142c2 bellard
    if ( bExp == 0 ) {
2970 158142c2 bellard
        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2971 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2972 158142c2 bellard
    }
2973 158142c2 bellard
    zExp = aExp + bExp - 0x3FF;
2974 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2975 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2976 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
2977 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
2978 158142c2 bellard
    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2979 158142c2 bellard
        zSig0 <<= 1;
2980 158142c2 bellard
        --zExp;
2981 158142c2 bellard
    }
2982 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
2983 158142c2 bellard
2984 158142c2 bellard
}
2985 158142c2 bellard
2986 158142c2 bellard
/*----------------------------------------------------------------------------
2987 158142c2 bellard
| Returns the result of dividing the double-precision floating-point value `a'
2988 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to
2989 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2990 158142c2 bellard
*----------------------------------------------------------------------------*/
2991 158142c2 bellard
2992 158142c2 bellard
float64 float64_div( float64 a, float64 b STATUS_PARAM )
2993 158142c2 bellard
{
2994 158142c2 bellard
    flag aSign, bSign, zSign;
2995 158142c2 bellard
    int16 aExp, bExp, zExp;
2996 158142c2 bellard
    bits64 aSig, bSig, zSig;
2997 158142c2 bellard
    bits64 rem0, rem1;
2998 158142c2 bellard
    bits64 term0, term1;
2999 158142c2 bellard
3000 158142c2 bellard
    aSig = extractFloat64Frac( a );
3001 158142c2 bellard
    aExp = extractFloat64Exp( a );
3002 158142c2 bellard
    aSign = extractFloat64Sign( a );
3003 158142c2 bellard
    bSig = extractFloat64Frac( b );
3004 158142c2 bellard
    bExp = extractFloat64Exp( b );
3005 158142c2 bellard
    bSign = extractFloat64Sign( b );
3006 158142c2 bellard
    zSign = aSign ^ bSign;
3007 158142c2 bellard
    if ( aExp == 0x7FF ) {
3008 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3009 158142c2 bellard
        if ( bExp == 0x7FF ) {
3010 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3011 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3012 158142c2 bellard
            return float64_default_nan;
3013 158142c2 bellard
        }
3014 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3015 158142c2 bellard
    }
3016 158142c2 bellard
    if ( bExp == 0x7FF ) {
3017 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3018 158142c2 bellard
        return packFloat64( zSign, 0, 0 );
3019 158142c2 bellard
    }
3020 158142c2 bellard
    if ( bExp == 0 ) {
3021 158142c2 bellard
        if ( bSig == 0 ) {
3022 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
3023 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
3024 158142c2 bellard
                return float64_default_nan;
3025 158142c2 bellard
            }
3026 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
3027 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
3028 158142c2 bellard
        }
3029 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3030 158142c2 bellard
    }
3031 158142c2 bellard
    if ( aExp == 0 ) {
3032 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3033 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3034 158142c2 bellard
    }
3035 158142c2 bellard
    zExp = aExp - bExp + 0x3FD;
3036 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3037 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3038 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
3039 158142c2 bellard
        aSig >>= 1;
3040 158142c2 bellard
        ++zExp;
3041 158142c2 bellard
    }
3042 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, bSig );
3043 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 2 ) {
3044 158142c2 bellard
        mul64To128( bSig, zSig, &term0, &term1 );
3045 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3046 158142c2 bellard
        while ( (sbits64) rem0 < 0 ) {
3047 158142c2 bellard
            --zSig;
3048 158142c2 bellard
            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3049 158142c2 bellard
        }
3050 158142c2 bellard
        zSig |= ( rem1 != 0 );
3051 158142c2 bellard
    }
3052 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3053 158142c2 bellard
3054 158142c2 bellard
}
3055 158142c2 bellard
3056 158142c2 bellard
/*----------------------------------------------------------------------------
3057 158142c2 bellard
| Returns the remainder of the double-precision floating-point value `a'
3058 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
3059 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3060 158142c2 bellard
*----------------------------------------------------------------------------*/
3061 158142c2 bellard
3062 158142c2 bellard
float64 float64_rem( float64 a, float64 b STATUS_PARAM )
3063 158142c2 bellard
{
3064 ed086f3d Blue Swirl
    flag aSign, zSign;
3065 158142c2 bellard
    int16 aExp, bExp, expDiff;
3066 158142c2 bellard
    bits64 aSig, bSig;
3067 158142c2 bellard
    bits64 q, alternateASig;
3068 158142c2 bellard
    sbits64 sigMean;
3069 158142c2 bellard
3070 158142c2 bellard
    aSig = extractFloat64Frac( a );
3071 158142c2 bellard
    aExp = extractFloat64Exp( a );
3072 158142c2 bellard
    aSign = extractFloat64Sign( a );
3073 158142c2 bellard
    bSig = extractFloat64Frac( b );
3074 158142c2 bellard
    bExp = extractFloat64Exp( b );
3075 158142c2 bellard
    if ( aExp == 0x7FF ) {
3076 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3077 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
3078 158142c2 bellard
        }
3079 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3080 158142c2 bellard
        return float64_default_nan;
3081 158142c2 bellard
    }
3082 158142c2 bellard
    if ( bExp == 0x7FF ) {
3083 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3084 158142c2 bellard
        return a;
3085 158142c2 bellard
    }
3086 158142c2 bellard
    if ( bExp == 0 ) {
3087 158142c2 bellard
        if ( bSig == 0 ) {
3088 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3089 158142c2 bellard
            return float64_default_nan;
3090 158142c2 bellard
        }
3091 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3092 158142c2 bellard
    }
3093 158142c2 bellard
    if ( aExp == 0 ) {
3094 158142c2 bellard
        if ( aSig == 0 ) return a;
3095 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3096 158142c2 bellard
    }
3097 158142c2 bellard
    expDiff = aExp - bExp;
3098 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3099 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3100 158142c2 bellard
    if ( expDiff < 0 ) {
3101 158142c2 bellard
        if ( expDiff < -1 ) return a;
3102 158142c2 bellard
        aSig >>= 1;
3103 158142c2 bellard
    }
3104 158142c2 bellard
    q = ( bSig <= aSig );
3105 158142c2 bellard
    if ( q ) aSig -= bSig;
3106 158142c2 bellard
    expDiff -= 64;
3107 158142c2 bellard
    while ( 0 < expDiff ) {
3108 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
3109 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3110 158142c2 bellard
        aSig = - ( ( bSig>>2 ) * q );
3111 158142c2 bellard
        expDiff -= 62;
3112 158142c2 bellard
    }
3113 158142c2 bellard
    expDiff += 64;
3114 158142c2 bellard
    if ( 0 < expDiff ) {
3115 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
3116 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3117 158142c2 bellard
        q >>= 64 - expDiff;
3118 158142c2 bellard
        bSig >>= 2;
3119 158142c2 bellard
        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3120 158142c2 bellard
    }
3121 158142c2 bellard
    else {
3122 158142c2 bellard
        aSig >>= 2;
3123 158142c2 bellard
        bSig >>= 2;
3124 158142c2 bellard
    }
3125 158142c2 bellard
    do {
3126 158142c2 bellard
        alternateASig = aSig;
3127 158142c2 bellard
        ++q;
3128 158142c2 bellard
        aSig -= bSig;
3129 158142c2 bellard
    } while ( 0 <= (sbits64) aSig );
3130 158142c2 bellard
    sigMean = aSig + alternateASig;
3131 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3132 158142c2 bellard
        aSig = alternateASig;
3133 158142c2 bellard
    }
3134 158142c2 bellard
    zSign = ( (sbits64) aSig < 0 );
3135 158142c2 bellard
    if ( zSign ) aSig = - aSig;
3136 158142c2 bellard
    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
3137 158142c2 bellard
3138 158142c2 bellard
}
3139 158142c2 bellard
3140 158142c2 bellard
/*----------------------------------------------------------------------------
3141 158142c2 bellard
| Returns the square root of the double-precision floating-point value `a'.
3142 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
3143 158142c2 bellard
| Floating-Point Arithmetic.
3144 158142c2 bellard
*----------------------------------------------------------------------------*/
3145 158142c2 bellard
3146 158142c2 bellard
float64 float64_sqrt( float64 a STATUS_PARAM )
3147 158142c2 bellard
{
3148 158142c2 bellard
    flag aSign;
3149 158142c2 bellard
    int16 aExp, zExp;
3150 158142c2 bellard
    bits64 aSig, zSig, doubleZSig;
3151 158142c2 bellard
    bits64 rem0, rem1, term0, term1;
3152 158142c2 bellard
3153 158142c2 bellard
    aSig = extractFloat64Frac( a );
3154 158142c2 bellard
    aExp = extractFloat64Exp( a );
3155 158142c2 bellard
    aSign = extractFloat64Sign( a );
3156 158142c2 bellard
    if ( aExp == 0x7FF ) {
3157 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
3158 158142c2 bellard
        if ( ! aSign ) return a;
3159 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3160 158142c2 bellard
        return float64_default_nan;
3161 158142c2 bellard
    }
3162 158142c2 bellard
    if ( aSign ) {
3163 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
3164 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3165 158142c2 bellard
        return float64_default_nan;
3166 158142c2 bellard
    }
3167 158142c2 bellard
    if ( aExp == 0 ) {
3168 f090c9d4 pbrook
        if ( aSig == 0 ) return float64_zero;
3169 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3170 158142c2 bellard
    }
3171 158142c2 bellard
    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3172 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
3173 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig>>21 );
3174 158142c2 bellard
    aSig <<= 9 - ( aExp & 1 );
3175 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3176 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 5 ) {
3177 158142c2 bellard
        doubleZSig = zSig<<1;
3178 158142c2 bellard
        mul64To128( zSig, zSig, &term0, &term1 );
3179 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3180 158142c2 bellard
        while ( (sbits64) rem0 < 0 ) {
3181 158142c2 bellard
            --zSig;
3182 158142c2 bellard
            doubleZSig -= 2;
3183 158142c2 bellard
            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3184 158142c2 bellard
        }
3185 158142c2 bellard
        zSig |= ( ( rem0 | rem1 ) != 0 );
3186 158142c2 bellard
    }
3187 158142c2 bellard
    return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
3188 158142c2 bellard
3189 158142c2 bellard
}
3190 158142c2 bellard
3191 158142c2 bellard
/*----------------------------------------------------------------------------
3192 374dfc33 aurel32
| Returns the binary log of the double-precision floating-point value `a'.
3193 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
3194 374dfc33 aurel32
| Floating-Point Arithmetic.
3195 374dfc33 aurel32
*----------------------------------------------------------------------------*/
3196 374dfc33 aurel32
float64 float64_log2( float64 a STATUS_PARAM )
3197 374dfc33 aurel32
{
3198 374dfc33 aurel32
    flag aSign, zSign;
3199 374dfc33 aurel32
    int16 aExp;
3200 374dfc33 aurel32
    bits64 aSig, aSig0, aSig1, zSig, i;
3201 374dfc33 aurel32
3202 374dfc33 aurel32
    aSig = extractFloat64Frac( a );
3203 374dfc33 aurel32
    aExp = extractFloat64Exp( a );
3204 374dfc33 aurel32
    aSign = extractFloat64Sign( a );
3205 374dfc33 aurel32
3206 374dfc33 aurel32
    if ( aExp == 0 ) {
3207 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
3208 374dfc33 aurel32
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3209 374dfc33 aurel32
    }
3210 374dfc33 aurel32
    if ( aSign ) {
3211 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
3212 374dfc33 aurel32
        return float64_default_nan;
3213 374dfc33 aurel32
    }
3214 374dfc33 aurel32
    if ( aExp == 0x7FF ) {
3215 374dfc33 aurel32
        if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
3216 374dfc33 aurel32
        return a;
3217 374dfc33 aurel32
    }
3218 374dfc33 aurel32
3219 374dfc33 aurel32
    aExp -= 0x3FF;
3220 374dfc33 aurel32
    aSig |= LIT64( 0x0010000000000000 );
3221 374dfc33 aurel32
    zSign = aExp < 0;
3222 374dfc33 aurel32
    zSig = (bits64)aExp << 52;
3223 374dfc33 aurel32
    for (i = 1LL << 51; i > 0; i >>= 1) {
3224 374dfc33 aurel32
        mul64To128( aSig, aSig, &aSig0, &aSig1 );
3225 374dfc33 aurel32
        aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
3226 374dfc33 aurel32
        if ( aSig & LIT64( 0x0020000000000000 ) ) {
3227 374dfc33 aurel32
            aSig >>= 1;
3228 374dfc33 aurel32
            zSig |= i;
3229 374dfc33 aurel32
        }
3230 374dfc33 aurel32
    }
3231 374dfc33 aurel32
3232 374dfc33 aurel32
    if ( zSign )
3233 374dfc33 aurel32
        zSig = -zSig;
3234 374dfc33 aurel32
    return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
3235 374dfc33 aurel32
}
3236 374dfc33 aurel32
3237 374dfc33 aurel32
/*----------------------------------------------------------------------------
3238 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
3239 158142c2 bellard
| corresponding value `b', and 0 otherwise.  The comparison is performed
3240 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3241 158142c2 bellard
*----------------------------------------------------------------------------*/
3242 158142c2 bellard
3243 750afe93 bellard
int float64_eq( float64 a, float64 b STATUS_PARAM )
3244 158142c2 bellard
{
3245 f090c9d4 pbrook
    bits64 av, bv;
3246 158142c2 bellard
3247 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3248 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3249 158142c2 bellard
       ) {
3250 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3251 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3252 158142c2 bellard
        }
3253 158142c2 bellard
        return 0;
3254 158142c2 bellard
    }
3255 f090c9d4 pbrook
    av = float64_val(a);
3256 a1b91bb4 pbrook
    bv = float64_val(b);
3257 f090c9d4 pbrook
    return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3258 158142c2 bellard
3259 158142c2 bellard
}
3260 158142c2 bellard
3261 158142c2 bellard
/*----------------------------------------------------------------------------
3262 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
3263 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  The comparison is
3264 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
3265 158142c2 bellard
| Arithmetic.
3266 158142c2 bellard
*----------------------------------------------------------------------------*/
3267 158142c2 bellard
3268 750afe93 bellard
int float64_le( float64 a, float64 b STATUS_PARAM )
3269 158142c2 bellard
{
3270 158142c2 bellard
    flag aSign, bSign;
3271 f090c9d4 pbrook
    bits64 av, bv;
3272 158142c2 bellard
3273 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3274 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3275 158142c2 bellard
       ) {
3276 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3277 158142c2 bellard
        return 0;
3278 158142c2 bellard
    }
3279 158142c2 bellard
    aSign = extractFloat64Sign( a );
3280 158142c2 bellard
    bSign = extractFloat64Sign( b );
3281 f090c9d4 pbrook
    av = float64_val(a);
3282 a1b91bb4 pbrook
    bv = float64_val(b);
3283 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3284 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
3285 158142c2 bellard
3286 158142c2 bellard
}
3287 158142c2 bellard
3288 158142c2 bellard
/*----------------------------------------------------------------------------
3289 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
3290 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
3291 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3292 158142c2 bellard
*----------------------------------------------------------------------------*/
3293 158142c2 bellard
3294 750afe93 bellard
int float64_lt( float64 a, float64 b STATUS_PARAM )
3295 158142c2 bellard
{
3296 158142c2 bellard
    flag aSign, bSign;
3297 f090c9d4 pbrook
    bits64 av, bv;
3298 158142c2 bellard
3299 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3300 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3301 158142c2 bellard
       ) {
3302 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3303 158142c2 bellard
        return 0;
3304 158142c2 bellard
    }
3305 158142c2 bellard
    aSign = extractFloat64Sign( a );
3306 158142c2 bellard
    bSign = extractFloat64Sign( b );
3307 f090c9d4 pbrook
    av = float64_val(a);
3308 a1b91bb4 pbrook
    bv = float64_val(b);
3309 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
3310 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
3311 158142c2 bellard
3312 158142c2 bellard
}
3313 158142c2 bellard
3314 158142c2 bellard
/*----------------------------------------------------------------------------
3315 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
3316 158142c2 bellard
| corresponding value `b', and 0 otherwise.  The invalid exception is raised
3317 158142c2 bellard
| if either operand is a NaN.  Otherwise, the comparison is performed
3318 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3319 158142c2 bellard
*----------------------------------------------------------------------------*/
3320 158142c2 bellard
3321 750afe93 bellard
int float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
3322 158142c2 bellard
{
3323 f090c9d4 pbrook
    bits64 av, bv;
3324 158142c2 bellard
3325 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3326 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3327 158142c2 bellard
       ) {
3328 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3329 158142c2 bellard
        return 0;
3330 158142c2 bellard
    }
3331 f090c9d4 pbrook
    av = float64_val(a);
3332 a1b91bb4 pbrook
    bv = float64_val(b);
3333 f090c9d4 pbrook
    return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3334 158142c2 bellard
3335 158142c2 bellard
}
3336 158142c2 bellard
3337 158142c2 bellard
/*----------------------------------------------------------------------------
3338 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
3339 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
3340 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
3341 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3342 158142c2 bellard
*----------------------------------------------------------------------------*/
3343 158142c2 bellard
3344 750afe93 bellard
int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
3345 158142c2 bellard
{
3346 158142c2 bellard
    flag aSign, bSign;
3347 f090c9d4 pbrook
    bits64 av, bv;
3348 158142c2 bellard
3349 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3350 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3351 158142c2 bellard
       ) {
3352 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3353 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3354 158142c2 bellard
        }
3355 158142c2 bellard
        return 0;
3356 158142c2 bellard
    }
3357 158142c2 bellard
    aSign = extractFloat64Sign( a );
3358 158142c2 bellard
    bSign = extractFloat64Sign( b );
3359 f090c9d4 pbrook
    av = float64_val(a);
3360 a1b91bb4 pbrook
    bv = float64_val(b);
3361 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3362 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
3363 158142c2 bellard
3364 158142c2 bellard
}
3365 158142c2 bellard
3366 158142c2 bellard
/*----------------------------------------------------------------------------
3367 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
3368 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3369 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
3370 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3371 158142c2 bellard
*----------------------------------------------------------------------------*/
3372 158142c2 bellard
3373 750afe93 bellard
int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
3374 158142c2 bellard
{
3375 158142c2 bellard
    flag aSign, bSign;
3376 f090c9d4 pbrook
    bits64 av, bv;
3377 158142c2 bellard
3378 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3379 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3380 158142c2 bellard
       ) {
3381 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3382 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3383 158142c2 bellard
        }
3384 158142c2 bellard
        return 0;
3385 158142c2 bellard
    }
3386 158142c2 bellard
    aSign = extractFloat64Sign( a );
3387 158142c2 bellard
    bSign = extractFloat64Sign( b );
3388 f090c9d4 pbrook
    av = float64_val(a);
3389 a1b91bb4 pbrook
    bv = float64_val(b);
3390 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
3391 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
3392 158142c2 bellard
3393 158142c2 bellard
}
3394 158142c2 bellard
3395 158142c2 bellard
#ifdef FLOATX80
3396 158142c2 bellard
3397 158142c2 bellard
/*----------------------------------------------------------------------------
3398 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3399 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3400 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3401 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3402 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN, the
3403 158142c2 bellard
| largest positive integer is returned.  Otherwise, if the conversion
3404 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3405 158142c2 bellard
*----------------------------------------------------------------------------*/
3406 158142c2 bellard
3407 158142c2 bellard
int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
3408 158142c2 bellard
{
3409 158142c2 bellard
    flag aSign;
3410 158142c2 bellard
    int32 aExp, shiftCount;
3411 158142c2 bellard
    bits64 aSig;
3412 158142c2 bellard
3413 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3414 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3415 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3416 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3417 158142c2 bellard
    shiftCount = 0x4037 - aExp;
3418 158142c2 bellard
    if ( shiftCount <= 0 ) shiftCount = 1;
3419 158142c2 bellard
    shift64RightJamming( aSig, shiftCount, &aSig );
3420 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
3421 158142c2 bellard
3422 158142c2 bellard
}
3423 158142c2 bellard
3424 158142c2 bellard
/*----------------------------------------------------------------------------
3425 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3426 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3427 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3428 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3429 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3430 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3431 158142c2 bellard
| sign as `a' is returned.
3432 158142c2 bellard
*----------------------------------------------------------------------------*/
3433 158142c2 bellard
3434 158142c2 bellard
int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
3435 158142c2 bellard
{
3436 158142c2 bellard
    flag aSign;
3437 158142c2 bellard
    int32 aExp, shiftCount;
3438 158142c2 bellard
    bits64 aSig, savedASig;
3439 158142c2 bellard
    int32 z;
3440 158142c2 bellard
3441 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3442 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3443 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3444 158142c2 bellard
    if ( 0x401E < aExp ) {
3445 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3446 158142c2 bellard
        goto invalid;
3447 158142c2 bellard
    }
3448 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3449 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3450 158142c2 bellard
        return 0;
3451 158142c2 bellard
    }
3452 158142c2 bellard
    shiftCount = 0x403E - aExp;
3453 158142c2 bellard
    savedASig = aSig;
3454 158142c2 bellard
    aSig >>= shiftCount;
3455 158142c2 bellard
    z = aSig;
3456 158142c2 bellard
    if ( aSign ) z = - z;
3457 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
3458 158142c2 bellard
 invalid:
3459 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3460 158142c2 bellard
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
3461 158142c2 bellard
    }
3462 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
3463 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3464 158142c2 bellard
    }
3465 158142c2 bellard
    return z;
3466 158142c2 bellard
3467 158142c2 bellard
}
3468 158142c2 bellard
3469 158142c2 bellard
/*----------------------------------------------------------------------------
3470 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3471 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3472 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3473 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3474 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN,
3475 158142c2 bellard
| the largest positive integer is returned.  Otherwise, if the conversion
3476 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3477 158142c2 bellard
*----------------------------------------------------------------------------*/
3478 158142c2 bellard
3479 158142c2 bellard
int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
3480 158142c2 bellard
{
3481 158142c2 bellard
    flag aSign;
3482 158142c2 bellard
    int32 aExp, shiftCount;
3483 158142c2 bellard
    bits64 aSig, aSigExtra;
3484 158142c2 bellard
3485 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3486 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3487 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3488 158142c2 bellard
    shiftCount = 0x403E - aExp;
3489 158142c2 bellard
    if ( shiftCount <= 0 ) {
3490 158142c2 bellard
        if ( shiftCount ) {
3491 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3492 158142c2 bellard
            if (    ! aSign
3493 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
3494 158142c2 bellard
                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
3495 158142c2 bellard
               ) {
3496 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3497 158142c2 bellard
            }
3498 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
3499 158142c2 bellard
        }
3500 158142c2 bellard
        aSigExtra = 0;
3501 158142c2 bellard
    }
3502 158142c2 bellard
    else {
3503 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3504 158142c2 bellard
    }
3505 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
3506 158142c2 bellard
3507 158142c2 bellard
}
3508 158142c2 bellard
3509 158142c2 bellard
/*----------------------------------------------------------------------------
3510 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3511 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3512 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3513 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3514 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3515 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3516 158142c2 bellard
| sign as `a' is returned.
3517 158142c2 bellard
*----------------------------------------------------------------------------*/
3518 158142c2 bellard
3519 158142c2 bellard
int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
3520 158142c2 bellard
{
3521 158142c2 bellard
    flag aSign;
3522 158142c2 bellard
    int32 aExp, shiftCount;
3523 158142c2 bellard
    bits64 aSig;
3524 158142c2 bellard
    int64 z;
3525 158142c2 bellard
3526 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3527 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3528 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3529 158142c2 bellard
    shiftCount = aExp - 0x403E;
3530 158142c2 bellard
    if ( 0 <= shiftCount ) {
3531 158142c2 bellard
        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3532 158142c2 bellard
        if ( ( a.high != 0xC03E ) || aSig ) {
3533 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3534 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3535 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3536 158142c2 bellard
            }
3537 158142c2 bellard
        }
3538 158142c2 bellard
        return (sbits64) LIT64( 0x8000000000000000 );
3539 158142c2 bellard
    }
3540 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3541 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3542 158142c2 bellard
        return 0;
3543 158142c2 bellard
    }
3544 158142c2 bellard
    z = aSig>>( - shiftCount );
3545 158142c2 bellard
    if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3546 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3547 158142c2 bellard
    }
3548 158142c2 bellard
    if ( aSign ) z = - z;
3549 158142c2 bellard
    return z;
3550 158142c2 bellard
3551 158142c2 bellard
}
3552 158142c2 bellard
3553 158142c2 bellard
/*----------------------------------------------------------------------------
3554 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3555 158142c2 bellard
| point value `a' to the single-precision floating-point format.  The
3556 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3557 158142c2 bellard
| Floating-Point Arithmetic.
3558 158142c2 bellard
*----------------------------------------------------------------------------*/
3559 158142c2 bellard
3560 158142c2 bellard
float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
3561 158142c2 bellard
{
3562 158142c2 bellard
    flag aSign;
3563 158142c2 bellard
    int32 aExp;
3564 158142c2 bellard
    bits64 aSig;
3565 158142c2 bellard
3566 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3567 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3568 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3569 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3570 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) {
3571 158142c2 bellard
            return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) );
3572 158142c2 bellard
        }
3573 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
3574 158142c2 bellard
    }
3575 158142c2 bellard
    shift64RightJamming( aSig, 33, &aSig );
3576 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3F81;
3577 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
3578 158142c2 bellard
3579 158142c2 bellard
}
3580 158142c2 bellard
3581 158142c2 bellard
/*----------------------------------------------------------------------------
3582 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3583 158142c2 bellard
| point value `a' to the double-precision floating-point format.  The
3584 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3585 158142c2 bellard
| Floating-Point Arithmetic.
3586 158142c2 bellard
*----------------------------------------------------------------------------*/
3587 158142c2 bellard
3588 158142c2 bellard
float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
3589 158142c2 bellard
{
3590 158142c2 bellard
    flag aSign;
3591 158142c2 bellard
    int32 aExp;
3592 158142c2 bellard
    bits64 aSig, zSig;
3593 158142c2 bellard
3594 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3595 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3596 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3597 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3598 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) {
3599 158142c2 bellard
            return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) );
3600 158142c2 bellard
        }
3601 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
3602 158142c2 bellard
    }
3603 158142c2 bellard
    shift64RightJamming( aSig, 1, &zSig );
3604 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3C01;
3605 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
3606 158142c2 bellard
3607 158142c2 bellard
}
3608 158142c2 bellard
3609 158142c2 bellard
#ifdef FLOAT128
3610 158142c2 bellard
3611 158142c2 bellard
/*----------------------------------------------------------------------------
3612 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3613 158142c2 bellard
| point value `a' to the quadruple-precision floating-point format.  The
3614 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3615 158142c2 bellard
| Floating-Point Arithmetic.
3616 158142c2 bellard
*----------------------------------------------------------------------------*/
3617 158142c2 bellard
3618 158142c2 bellard
float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
3619 158142c2 bellard
{
3620 158142c2 bellard
    flag aSign;
3621 158142c2 bellard
    int16 aExp;
3622 158142c2 bellard
    bits64 aSig, zSig0, zSig1;
3623 158142c2 bellard
3624 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3625 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3626 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3627 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
3628 158142c2 bellard
        return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) );
3629 158142c2 bellard
    }
3630 158142c2 bellard
    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
3631 158142c2 bellard
    return packFloat128( aSign, aExp, zSig0, zSig1 );
3632 158142c2 bellard
3633 158142c2 bellard
}
3634 158142c2 bellard
3635 158142c2 bellard
#endif
3636 158142c2 bellard
3637 158142c2 bellard
/*----------------------------------------------------------------------------
3638 158142c2 bellard
| Rounds the extended double-precision floating-point value `a' to an integer,
3639 158142c2 bellard
| and returns the result as an extended quadruple-precision floating-point
3640 158142c2 bellard
| value.  The operation is performed according to the IEC/IEEE Standard for
3641 158142c2 bellard
| Binary Floating-Point Arithmetic.
3642 158142c2 bellard
*----------------------------------------------------------------------------*/
3643 158142c2 bellard
3644 158142c2 bellard
floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
3645 158142c2 bellard
{
3646 158142c2 bellard
    flag aSign;
3647 158142c2 bellard
    int32 aExp;
3648 158142c2 bellard
    bits64 lastBitMask, roundBitsMask;
3649 158142c2 bellard
    int8 roundingMode;
3650 158142c2 bellard
    floatx80 z;
3651 158142c2 bellard
3652 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3653 158142c2 bellard
    if ( 0x403E <= aExp ) {
3654 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
3655 158142c2 bellard
            return propagateFloatx80NaN( a, a STATUS_VAR );
3656 158142c2 bellard
        }
3657 158142c2 bellard
        return a;
3658 158142c2 bellard
    }
3659 158142c2 bellard
    if ( aExp < 0x3FFF ) {
3660 158142c2 bellard
        if (    ( aExp == 0 )
3661 158142c2 bellard
             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
3662 158142c2 bellard
            return a;
3663 158142c2 bellard
        }
3664 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3665 158142c2 bellard
        aSign = extractFloatx80Sign( a );
3666 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
3667 158142c2 bellard
         case float_round_nearest_even:
3668 158142c2 bellard
            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
3669 158142c2 bellard
               ) {
3670 158142c2 bellard
                return
3671 158142c2 bellard
                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3672 158142c2 bellard
            }
3673 158142c2 bellard
            break;
3674 158142c2 bellard
         case float_round_down:
3675 158142c2 bellard
            return
3676 158142c2 bellard
                  aSign ?
3677 158142c2 bellard
                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
3678 158142c2 bellard
                : packFloatx80( 0, 0, 0 );
3679 158142c2 bellard
         case float_round_up:
3680 158142c2 bellard
            return
3681 158142c2 bellard
                  aSign ? packFloatx80( 1, 0, 0 )
3682 158142c2 bellard
                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
3683 158142c2 bellard
        }
3684 158142c2 bellard
        return packFloatx80( aSign, 0, 0 );
3685 158142c2 bellard
    }
3686 158142c2 bellard
    lastBitMask = 1;
3687 158142c2 bellard
    lastBitMask <<= 0x403E - aExp;
3688 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
3689 158142c2 bellard
    z = a;
3690 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
3691 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
3692 158142c2 bellard
        z.low += lastBitMask>>1;
3693 158142c2 bellard
        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
3694 158142c2 bellard
    }
3695 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
3696 158142c2 bellard
        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
3697 158142c2 bellard
            z.low += roundBitsMask;
3698 158142c2 bellard
        }
3699 158142c2 bellard
    }
3700 158142c2 bellard
    z.low &= ~ roundBitsMask;
3701 158142c2 bellard
    if ( z.low == 0 ) {
3702 158142c2 bellard
        ++z.high;
3703 158142c2 bellard
        z.low = LIT64( 0x8000000000000000 );
3704 158142c2 bellard
    }
3705 158142c2 bellard
    if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
3706 158142c2 bellard
    return z;
3707 158142c2 bellard
3708 158142c2 bellard
}
3709 158142c2 bellard
3710 158142c2 bellard
/*----------------------------------------------------------------------------
3711 158142c2 bellard
| Returns the result of adding the absolute values of the extended double-
3712 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
3713 158142c2 bellard
| negated before being returned.  `zSign' is ignored if the result is a NaN.
3714 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
3715 158142c2 bellard
| Floating-Point Arithmetic.
3716 158142c2 bellard
*----------------------------------------------------------------------------*/
3717 158142c2 bellard
3718 158142c2 bellard
static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
3719 158142c2 bellard
{
3720 158142c2 bellard
    int32 aExp, bExp, zExp;
3721 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3722 158142c2 bellard
    int32 expDiff;
3723 158142c2 bellard
3724 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3725 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3726 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3727 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3728 158142c2 bellard
    expDiff = aExp - bExp;
3729 158142c2 bellard
    if ( 0 < expDiff ) {
3730 158142c2 bellard
        if ( aExp == 0x7FFF ) {
3731 158142c2 bellard
            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3732 158142c2 bellard
            return a;
3733 158142c2 bellard
        }
3734 158142c2 bellard
        if ( bExp == 0 ) --expDiff;
3735 158142c2 bellard
        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3736 158142c2 bellard
        zExp = aExp;
3737 158142c2 bellard
    }
3738 158142c2 bellard
    else if ( expDiff < 0 ) {
3739 158142c2 bellard
        if ( bExp == 0x7FFF ) {
3740 158142c2 bellard
            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3741 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3742 158142c2 bellard
        }
3743 158142c2 bellard
        if ( aExp == 0 ) ++expDiff;
3744 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3745 158142c2 bellard
        zExp = bExp;
3746 158142c2 bellard
    }
3747 158142c2 bellard
    else {
3748 158142c2 bellard
        if ( aExp == 0x7FFF ) {
3749 158142c2 bellard
            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3750 158142c2 bellard
                return propagateFloatx80NaN( a, b STATUS_VAR );
3751 158142c2 bellard
            }
3752 158142c2 bellard
            return a;
3753 158142c2 bellard
        }
3754 158142c2 bellard
        zSig1 = 0;
3755 158142c2 bellard
        zSig0 = aSig + bSig;
3756 158142c2 bellard
        if ( aExp == 0 ) {
3757 158142c2 bellard
            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
3758 158142c2 bellard
            goto roundAndPack;
3759 158142c2 bellard
        }
3760 158142c2 bellard
        zExp = aExp;
3761 158142c2 bellard
        goto shiftRight1;
3762 158142c2 bellard
    }
3763 158142c2 bellard
    zSig0 = aSig + bSig;
3764 158142c2 bellard
    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
3765 158142c2 bellard
 shiftRight1:
3766 158142c2 bellard
    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
3767 158142c2 bellard
    zSig0 |= LIT64( 0x8000000000000000 );
3768 158142c2 bellard
    ++zExp;
3769 158142c2 bellard
 roundAndPack:
3770 158142c2 bellard
    return
3771 158142c2 bellard
        roundAndPackFloatx80(
3772 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3773 158142c2 bellard
3774 158142c2 bellard
}
3775 158142c2 bellard
3776 158142c2 bellard
/*----------------------------------------------------------------------------
3777 158142c2 bellard
| Returns the result of subtracting the absolute values of the extended
3778 158142c2 bellard
| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
3779 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
3780 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
3781 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3782 158142c2 bellard
*----------------------------------------------------------------------------*/
3783 158142c2 bellard
3784 158142c2 bellard
static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
3785 158142c2 bellard
{
3786 158142c2 bellard
    int32 aExp, bExp, zExp;
3787 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3788 158142c2 bellard
    int32 expDiff;
3789 158142c2 bellard
    floatx80 z;
3790 158142c2 bellard
3791 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3792 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3793 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3794 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3795 158142c2 bellard
    expDiff = aExp - bExp;
3796 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
3797 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
3798 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3799 158142c2 bellard
        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3800 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
3801 158142c2 bellard
        }
3802 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3803 158142c2 bellard
        z.low = floatx80_default_nan_low;
3804 158142c2 bellard
        z.high = floatx80_default_nan_high;
3805 158142c2 bellard
        return z;
3806 158142c2 bellard
    }
3807 158142c2 bellard
    if ( aExp == 0 ) {
3808 158142c2 bellard
        aExp = 1;
3809 158142c2 bellard
        bExp = 1;
3810 158142c2 bellard
    }
3811 158142c2 bellard
    zSig1 = 0;
3812 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
3813 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
3814 158142c2 bellard
    return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
3815 158142c2 bellard
 bExpBigger:
3816 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3817 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3818 158142c2 bellard
        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
3819 158142c2 bellard
    }
3820 158142c2 bellard
    if ( aExp == 0 ) ++expDiff;
3821 158142c2 bellard
    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3822 158142c2 bellard
 bBigger:
3823 158142c2 bellard
    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
3824 158142c2 bellard
    zExp = bExp;
3825 158142c2 bellard
    zSign ^= 1;
3826 158142c2 bellard
    goto normalizeRoundAndPack;
3827 158142c2 bellard
 aExpBigger:
3828 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3829 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3830 158142c2 bellard
        return a;
3831 158142c2 bellard
    }
3832 158142c2 bellard
    if ( bExp == 0 ) --expDiff;
3833 158142c2 bellard
    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3834 158142c2 bellard
 aBigger:
3835 158142c2 bellard
    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
3836 158142c2 bellard
    zExp = aExp;
3837 158142c2 bellard
 normalizeRoundAndPack:
3838 158142c2 bellard
    return
3839 158142c2 bellard
        normalizeRoundAndPackFloatx80(
3840 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3841 158142c2 bellard
3842 158142c2 bellard
}
3843 158142c2 bellard
3844 158142c2 bellard
/*----------------------------------------------------------------------------
3845 158142c2 bellard
| Returns the result of adding the extended double-precision floating-point
3846 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
3847 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3848 158142c2 bellard
*----------------------------------------------------------------------------*/
3849 158142c2 bellard
3850 158142c2 bellard
floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
3851 158142c2 bellard
{
3852 158142c2 bellard
    flag aSign, bSign;
3853 158142c2 bellard
3854 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3855 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3856 158142c2 bellard
    if ( aSign == bSign ) {
3857 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
3858 158142c2 bellard
    }
3859 158142c2 bellard
    else {
3860 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
3861 158142c2 bellard
    }
3862 158142c2 bellard
3863 158142c2 bellard
}
3864 158142c2 bellard
3865 158142c2 bellard
/*----------------------------------------------------------------------------
3866 158142c2 bellard
| Returns the result of subtracting the extended double-precision floating-
3867 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
3868 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3869 158142c2 bellard
*----------------------------------------------------------------------------*/
3870 158142c2 bellard
3871 158142c2 bellard
floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
3872 158142c2 bellard
{
3873 158142c2 bellard
    flag aSign, bSign;
3874 158142c2 bellard
3875 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3876 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3877 158142c2 bellard
    if ( aSign == bSign ) {
3878 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
3879 158142c2 bellard
    }
3880 158142c2 bellard
    else {
3881 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
3882 158142c2 bellard
    }
3883 158142c2 bellard
3884 158142c2 bellard
}
3885 158142c2 bellard
3886 158142c2 bellard
/*----------------------------------------------------------------------------
3887 158142c2 bellard
| Returns the result of multiplying the extended double-precision floating-
3888 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
3889 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3890 158142c2 bellard
*----------------------------------------------------------------------------*/
3891 158142c2 bellard
3892 158142c2 bellard
floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
3893 158142c2 bellard
{
3894 158142c2 bellard
    flag aSign, bSign, zSign;
3895 158142c2 bellard
    int32 aExp, bExp, zExp;
3896 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3897 158142c2 bellard
    floatx80 z;
3898 158142c2 bellard
3899 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3900 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3901 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3902 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3903 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3904 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3905 158142c2 bellard
    zSign = aSign ^ bSign;
3906 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3907 158142c2 bellard
        if (    (bits64) ( aSig<<1 )
3908 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3909 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
3910 158142c2 bellard
        }
3911 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) goto invalid;
3912 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3913 158142c2 bellard
    }
3914 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3915 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3916 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
3917 158142c2 bellard
 invalid:
3918 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3919 158142c2 bellard
            z.low = floatx80_default_nan_low;
3920 158142c2 bellard
            z.high = floatx80_default_nan_high;
3921 158142c2 bellard
            return z;
3922 158142c2 bellard
        }
3923 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3924 158142c2 bellard
    }
3925 158142c2 bellard
    if ( aExp == 0 ) {
3926 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3927 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3928 158142c2 bellard
    }
3929 158142c2 bellard
    if ( bExp == 0 ) {
3930 158142c2 bellard
        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
3931 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3932 158142c2 bellard
    }
3933 158142c2 bellard
    zExp = aExp + bExp - 0x3FFE;
3934 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
3935 158142c2 bellard
    if ( 0 < (sbits64) zSig0 ) {
3936 158142c2 bellard
        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
3937 158142c2 bellard
        --zExp;
3938 158142c2 bellard
    }
3939 158142c2 bellard
    return
3940 158142c2 bellard
        roundAndPackFloatx80(
3941 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3942 158142c2 bellard
3943 158142c2 bellard
}
3944 158142c2 bellard
3945 158142c2 bellard
/*----------------------------------------------------------------------------
3946 158142c2 bellard
| Returns the result of dividing the extended double-precision floating-point
3947 158142c2 bellard
| value `a' by the corresponding value `b'.  The operation is performed
3948 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3949 158142c2 bellard
*----------------------------------------------------------------------------*/
3950 158142c2 bellard
3951 158142c2 bellard
floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
3952 158142c2 bellard
{
3953 158142c2 bellard
    flag aSign, bSign, zSign;
3954 158142c2 bellard
    int32 aExp, bExp, zExp;
3955 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3956 158142c2 bellard
    bits64 rem0, rem1, rem2, term0, term1, term2;
3957 158142c2 bellard
    floatx80 z;
3958 158142c2 bellard
3959 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3960 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3961 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3962 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3963 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3964 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3965 158142c2 bellard
    zSign = aSign ^ bSign;
3966 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3967 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3968 158142c2 bellard
        if ( bExp == 0x7FFF ) {
3969 158142c2 bellard
            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3970 158142c2 bellard
            goto invalid;
3971 158142c2 bellard
        }
3972 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3973 158142c2 bellard
    }
3974 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3975 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3976 158142c2 bellard
        return packFloatx80( zSign, 0, 0 );
3977 158142c2 bellard
    }
3978 158142c2 bellard
    if ( bExp == 0 ) {
3979 158142c2 bellard
        if ( bSig == 0 ) {
3980 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
3981 158142c2 bellard
 invalid:
3982 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
3983 158142c2 bellard
                z.low = floatx80_default_nan_low;
3984 158142c2 bellard
                z.high = floatx80_default_nan_high;
3985 158142c2 bellard
                return z;
3986 158142c2 bellard
            }
3987 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
3988 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3989 158142c2 bellard
        }
3990 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3991 158142c2 bellard
    }
3992 158142c2 bellard
    if ( aExp == 0 ) {
3993 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3994 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3995 158142c2 bellard
    }
3996 158142c2 bellard
    zExp = aExp - bExp + 0x3FFE;
3997 158142c2 bellard
    rem1 = 0;
3998 158142c2 bellard
    if ( bSig <= aSig ) {
3999 158142c2 bellard
        shift128Right( aSig, 0, 1, &aSig, &rem1 );
4000 158142c2 bellard
        ++zExp;
4001 158142c2 bellard
    }
4002 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
4003 158142c2 bellard
    mul64To128( bSig, zSig0, &term0, &term1 );
4004 158142c2 bellard
    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
4005 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
4006 158142c2 bellard
        --zSig0;
4007 158142c2 bellard
        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4008 158142c2 bellard
    }
4009 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, bSig );
4010 158142c2 bellard
    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
4011 158142c2 bellard
        mul64To128( bSig, zSig1, &term1, &term2 );
4012 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4013 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
4014 158142c2 bellard
            --zSig1;
4015 158142c2 bellard
            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
4016 158142c2 bellard
        }
4017 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 ) != 0 );
4018 158142c2 bellard
    }
4019 158142c2 bellard
    return
4020 158142c2 bellard
        roundAndPackFloatx80(
4021 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4022 158142c2 bellard
4023 158142c2 bellard
}
4024 158142c2 bellard
4025 158142c2 bellard
/*----------------------------------------------------------------------------
4026 158142c2 bellard
| Returns the remainder of the extended double-precision floating-point value
4027 158142c2 bellard
| `a' with respect to the corresponding value `b'.  The operation is performed
4028 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4029 158142c2 bellard
*----------------------------------------------------------------------------*/
4030 158142c2 bellard
4031 158142c2 bellard
floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
4032 158142c2 bellard
{
4033 ed086f3d Blue Swirl
    flag aSign, zSign;
4034 158142c2 bellard
    int32 aExp, bExp, expDiff;
4035 158142c2 bellard
    bits64 aSig0, aSig1, bSig;
4036 158142c2 bellard
    bits64 q, term0, term1, alternateASig0, alternateASig1;
4037 158142c2 bellard
    floatx80 z;
4038 158142c2 bellard
4039 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
4040 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4041 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4042 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4043 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4044 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4045 158142c2 bellard
        if (    (bits64) ( aSig0<<1 )
4046 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
4047 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4048 158142c2 bellard
        }
4049 158142c2 bellard
        goto invalid;
4050 158142c2 bellard
    }
4051 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4052 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4053 158142c2 bellard
        return a;
4054 158142c2 bellard
    }
4055 158142c2 bellard
    if ( bExp == 0 ) {
4056 158142c2 bellard
        if ( bSig == 0 ) {
4057 158142c2 bellard
 invalid:
4058 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4059 158142c2 bellard
            z.low = floatx80_default_nan_low;
4060 158142c2 bellard
            z.high = floatx80_default_nan_high;
4061 158142c2 bellard
            return z;
4062 158142c2 bellard
        }
4063 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4064 158142c2 bellard
    }
4065 158142c2 bellard
    if ( aExp == 0 ) {
4066 158142c2 bellard
        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
4067 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4068 158142c2 bellard
    }
4069 158142c2 bellard
    bSig |= LIT64( 0x8000000000000000 );
4070 158142c2 bellard
    zSign = aSign;
4071 158142c2 bellard
    expDiff = aExp - bExp;
4072 158142c2 bellard
    aSig1 = 0;
4073 158142c2 bellard
    if ( expDiff < 0 ) {
4074 158142c2 bellard
        if ( expDiff < -1 ) return a;
4075 158142c2 bellard
        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
4076 158142c2 bellard
        expDiff = 0;
4077 158142c2 bellard
    }
4078 158142c2 bellard
    q = ( bSig <= aSig0 );
4079 158142c2 bellard
    if ( q ) aSig0 -= bSig;
4080 158142c2 bellard
    expDiff -= 64;
4081 158142c2 bellard
    while ( 0 < expDiff ) {
4082 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
4083 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
4084 158142c2 bellard
        mul64To128( bSig, q, &term0, &term1 );
4085 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4086 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
4087 158142c2 bellard
        expDiff -= 62;
4088 158142c2 bellard
    }
4089 158142c2 bellard
    expDiff += 64;
4090 158142c2 bellard
    if ( 0 < expDiff ) {
4091 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
4092 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
4093 158142c2 bellard
        q >>= 64 - expDiff;
4094 158142c2 bellard
        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4095 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4096 158142c2 bellard
        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4097 158142c2 bellard
        while ( le128( term0, term1, aSig0, aSig1 ) ) {
4098 158142c2 bellard
            ++q;
4099 158142c2 bellard
            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4100 158142c2 bellard
        }
4101 158142c2 bellard
    }
4102 158142c2 bellard
    else {
4103 158142c2 bellard
        term1 = 0;
4104 158142c2 bellard
        term0 = bSig;
4105 158142c2 bellard
    }
4106 158142c2 bellard
    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4107 158142c2 bellard
    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4108 158142c2 bellard
         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4109 158142c2 bellard
              && ( q & 1 ) )
4110 158142c2 bellard
       ) {
4111 158142c2 bellard
        aSig0 = alternateASig0;
4112 158142c2 bellard
        aSig1 = alternateASig1;
4113 158142c2 bellard
        zSign = ! zSign;
4114 158142c2 bellard
    }
4115 158142c2 bellard
    return
4116 158142c2 bellard
        normalizeRoundAndPackFloatx80(
4117 158142c2 bellard
            80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
4118 158142c2 bellard
4119 158142c2 bellard
}
4120 158142c2 bellard
4121 158142c2 bellard
/*----------------------------------------------------------------------------
4122 158142c2 bellard
| Returns the square root of the extended double-precision floating-point
4123 158142c2 bellard
| value `a'.  The operation is performed according to the IEC/IEEE Standard
4124 158142c2 bellard
| for Binary Floating-Point Arithmetic.
4125 158142c2 bellard
*----------------------------------------------------------------------------*/
4126 158142c2 bellard
4127 158142c2 bellard
floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
4128 158142c2 bellard
{
4129 158142c2 bellard
    flag aSign;
4130 158142c2 bellard
    int32 aExp, zExp;
4131 158142c2 bellard
    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4132 158142c2 bellard
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4133 158142c2 bellard
    floatx80 z;
4134 158142c2 bellard
4135 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
4136 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4137 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4138 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4139 158142c2 bellard
        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
4140 158142c2 bellard
        if ( ! aSign ) return a;
4141 158142c2 bellard
        goto invalid;
4142 158142c2 bellard
    }
4143 158142c2 bellard
    if ( aSign ) {
4144 158142c2 bellard
        if ( ( aExp | aSig0 ) == 0 ) return a;
4145 158142c2 bellard
 invalid:
4146 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4147 158142c2 bellard
        z.low = floatx80_default_nan_low;
4148 158142c2 bellard
        z.high = floatx80_default_nan_high;
4149 158142c2 bellard
        return z;
4150 158142c2 bellard
    }
4151 158142c2 bellard
    if ( aExp == 0 ) {
4152 158142c2 bellard
        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4153 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4154 158142c2 bellard
    }
4155 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4156 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4157 158142c2 bellard
    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4158 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4159 158142c2 bellard
    doubleZSig0 = zSig0<<1;
4160 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
4161 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4162 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
4163 158142c2 bellard
        --zSig0;
4164 158142c2 bellard
        doubleZSig0 -= 2;
4165 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4166 158142c2 bellard
    }
4167 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4168 158142c2 bellard
    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4169 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
4170 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4171 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4172 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
4173 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4174 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
4175 158142c2 bellard
            --zSig1;
4176 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4177 158142c2 bellard
            term3 |= 1;
4178 158142c2 bellard
            term2 |= doubleZSig0;
4179 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4180 158142c2 bellard
        }
4181 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4182 158142c2 bellard
    }
4183 158142c2 bellard
    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4184 158142c2 bellard
    zSig0 |= doubleZSig0;
4185 158142c2 bellard
    return
4186 158142c2 bellard
        roundAndPackFloatx80(
4187 158142c2 bellard
            STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
4188 158142c2 bellard
4189 158142c2 bellard
}
4190 158142c2 bellard
4191 158142c2 bellard
/*----------------------------------------------------------------------------
4192 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4193 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  The comparison is
4194 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
4195 158142c2 bellard
| Arithmetic.
4196 158142c2 bellard
*----------------------------------------------------------------------------*/
4197 158142c2 bellard
4198 750afe93 bellard
int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
4199 158142c2 bellard
{
4200 158142c2 bellard
4201 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4202 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4203 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4204 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4205 158142c2 bellard
       ) {
4206 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4207 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4208 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4209 158142c2 bellard
        }
4210 158142c2 bellard
        return 0;
4211 158142c2 bellard
    }
4212 158142c2 bellard
    return
4213 158142c2 bellard
           ( a.low == b.low )
4214 158142c2 bellard
        && (    ( a.high == b.high )
4215 158142c2 bellard
             || (    ( a.low == 0 )
4216 158142c2 bellard
                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4217 158142c2 bellard
           );
4218 158142c2 bellard
4219 158142c2 bellard
}
4220 158142c2 bellard
4221 158142c2 bellard
/*----------------------------------------------------------------------------
4222 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4223 158142c2 bellard
| less than or equal to the corresponding value `b', and 0 otherwise.  The
4224 158142c2 bellard
| comparison is performed according to the IEC/IEEE Standard for Binary
4225 158142c2 bellard
| Floating-Point Arithmetic.
4226 158142c2 bellard
*----------------------------------------------------------------------------*/
4227 158142c2 bellard
4228 750afe93 bellard
int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
4229 158142c2 bellard
{
4230 158142c2 bellard
    flag aSign, bSign;
4231 158142c2 bellard
4232 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4233 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4234 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4235 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4236 158142c2 bellard
       ) {
4237 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4238 158142c2 bellard
        return 0;
4239 158142c2 bellard
    }
4240 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4241 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4242 158142c2 bellard
    if ( aSign != bSign ) {
4243 158142c2 bellard
        return
4244 158142c2 bellard
               aSign
4245 158142c2 bellard
            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4246 158142c2 bellard
                 == 0 );
4247 158142c2 bellard
    }
4248 158142c2 bellard
    return
4249 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
4250 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
4251 158142c2 bellard
4252 158142c2 bellard
}
4253 158142c2 bellard
4254 158142c2 bellard
/*----------------------------------------------------------------------------
4255 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4256 158142c2 bellard
| less than the corresponding value `b', and 0 otherwise.  The comparison
4257 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4258 158142c2 bellard
| Arithmetic.
4259 158142c2 bellard
*----------------------------------------------------------------------------*/
4260 158142c2 bellard
4261 750afe93 bellard
int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
4262 158142c2 bellard
{
4263 158142c2 bellard
    flag aSign, bSign;
4264 158142c2 bellard
4265 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4266 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4267 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4268 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4269 158142c2 bellard
       ) {
4270 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4271 158142c2 bellard
        return 0;
4272 158142c2 bellard
    }
4273 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4274 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4275 158142c2 bellard
    if ( aSign != bSign ) {
4276 158142c2 bellard
        return
4277 158142c2 bellard
               aSign
4278 158142c2 bellard
            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4279 158142c2 bellard
                 != 0 );
4280 158142c2 bellard
    }
4281 158142c2 bellard
    return
4282 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
4283 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
4284 158142c2 bellard
4285 158142c2 bellard
}
4286 158142c2 bellard
4287 158142c2 bellard
/*----------------------------------------------------------------------------
4288 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is equal
4289 158142c2 bellard
| to the corresponding value `b', and 0 otherwise.  The invalid exception is
4290 158142c2 bellard
| raised if either operand is a NaN.  Otherwise, the comparison is performed
4291 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4292 158142c2 bellard
*----------------------------------------------------------------------------*/
4293 158142c2 bellard
4294 750afe93 bellard
int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM )
4295 158142c2 bellard
{
4296 158142c2 bellard
4297 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4298 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4299 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4300 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4301 158142c2 bellard
       ) {
4302 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4303 158142c2 bellard
        return 0;
4304 158142c2 bellard
    }
4305 158142c2 bellard
    return
4306 158142c2 bellard
           ( a.low == b.low )
4307 158142c2 bellard
        && (    ( a.high == b.high )
4308 158142c2 bellard
             || (    ( a.low == 0 )
4309 158142c2 bellard
                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4310 158142c2 bellard
           );
4311 158142c2 bellard
4312 158142c2 bellard
}
4313 158142c2 bellard
4314 158142c2 bellard
/*----------------------------------------------------------------------------
4315 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4316 158142c2 bellard
| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
4317 158142c2 bellard
| do not cause an exception.  Otherwise, the comparison is performed according
4318 158142c2 bellard
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4319 158142c2 bellard
*----------------------------------------------------------------------------*/
4320 158142c2 bellard
4321 750afe93 bellard
int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4322 158142c2 bellard
{
4323 158142c2 bellard
    flag aSign, bSign;
4324 158142c2 bellard
4325 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4326 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4327 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4328 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4329 158142c2 bellard
       ) {
4330 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4331 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4332 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4333 158142c2 bellard
        }
4334 158142c2 bellard
        return 0;
4335 158142c2 bellard
    }
4336 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4337 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4338 158142c2 bellard
    if ( aSign != bSign ) {
4339 158142c2 bellard
        return
4340 158142c2 bellard
               aSign
4341 158142c2 bellard
            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4342 158142c2 bellard
                 == 0 );
4343 158142c2 bellard
    }
4344 158142c2 bellard
    return
4345 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
4346 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
4347 158142c2 bellard
4348 158142c2 bellard
}
4349 158142c2 bellard
4350 158142c2 bellard
/*----------------------------------------------------------------------------
4351 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4352 158142c2 bellard
| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
4353 158142c2 bellard
| an exception.  Otherwise, the comparison is performed according to the
4354 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4355 158142c2 bellard
*----------------------------------------------------------------------------*/
4356 158142c2 bellard
4357 750afe93 bellard
int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4358 158142c2 bellard
{
4359 158142c2 bellard
    flag aSign, bSign;
4360 158142c2 bellard
4361 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4362 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4363 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4364 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4365 158142c2 bellard
       ) {
4366 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4367 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4368 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4369 158142c2 bellard
        }
4370 158142c2 bellard
        return 0;
4371 158142c2 bellard
    }
4372 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4373 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4374 158142c2 bellard
    if ( aSign != bSign ) {
4375 158142c2 bellard
        return
4376 158142c2 bellard
               aSign
4377 158142c2 bellard
            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4378 158142c2 bellard
                 != 0 );
4379 158142c2 bellard
    }
4380 158142c2 bellard
    return
4381 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
4382 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
4383 158142c2 bellard
4384 158142c2 bellard
}
4385 158142c2 bellard
4386 158142c2 bellard
#endif
4387 158142c2 bellard
4388 158142c2 bellard
#ifdef FLOAT128
4389 158142c2 bellard
4390 158142c2 bellard
/*----------------------------------------------------------------------------
4391 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4392 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4393 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4394 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4395 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4396 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4397 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4398 158142c2 bellard
*----------------------------------------------------------------------------*/
4399 158142c2 bellard
4400 158142c2 bellard
int32 float128_to_int32( float128 a STATUS_PARAM )
4401 158142c2 bellard
{
4402 158142c2 bellard
    flag aSign;
4403 158142c2 bellard
    int32 aExp, shiftCount;
4404 158142c2 bellard
    bits64 aSig0, aSig1;
4405 158142c2 bellard
4406 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4407 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4408 158142c2 bellard
    aExp = extractFloat128Exp( a );
4409 158142c2 bellard
    aSign = extractFloat128Sign( a );
4410 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4411 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4412 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4413 158142c2 bellard
    shiftCount = 0x4028 - aExp;
4414 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4415 158142c2 bellard
    return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
4416 158142c2 bellard
4417 158142c2 bellard
}
4418 158142c2 bellard
4419 158142c2 bellard
/*----------------------------------------------------------------------------
4420 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4421 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4422 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4423 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
4424 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
4425 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
4426 158142c2 bellard
| returned.
4427 158142c2 bellard
*----------------------------------------------------------------------------*/
4428 158142c2 bellard
4429 158142c2 bellard
int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
4430 158142c2 bellard
{
4431 158142c2 bellard
    flag aSign;
4432 158142c2 bellard
    int32 aExp, shiftCount;
4433 158142c2 bellard
    bits64 aSig0, aSig1, savedASig;
4434 158142c2 bellard
    int32 z;
4435 158142c2 bellard
4436 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4437 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4438 158142c2 bellard
    aExp = extractFloat128Exp( a );
4439 158142c2 bellard
    aSign = extractFloat128Sign( a );
4440 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4441 158142c2 bellard
    if ( 0x401E < aExp ) {
4442 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4443 158142c2 bellard
        goto invalid;
4444 158142c2 bellard
    }
4445 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
4446 158142c2 bellard
        if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
4447 158142c2 bellard
        return 0;
4448 158142c2 bellard
    }
4449 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4450 158142c2 bellard
    shiftCount = 0x402F - aExp;
4451 158142c2 bellard
    savedASig = aSig0;
4452 158142c2 bellard
    aSig0 >>= shiftCount;
4453 158142c2 bellard
    z = aSig0;
4454 158142c2 bellard
    if ( aSign ) z = - z;
4455 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
4456 158142c2 bellard
 invalid:
4457 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4458 158142c2 bellard
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
4459 158142c2 bellard
    }
4460 158142c2 bellard
    if ( ( aSig0<<shiftCount ) != savedASig ) {
4461 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4462 158142c2 bellard
    }
4463 158142c2 bellard
    return z;
4464 158142c2 bellard
4465 158142c2 bellard
}
4466 158142c2 bellard
4467 158142c2 bellard
/*----------------------------------------------------------------------------
4468 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4469 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4470 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4471 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4472 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4473 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4474 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4475 158142c2 bellard
*----------------------------------------------------------------------------*/
4476 158142c2 bellard
4477 158142c2 bellard
int64 float128_to_int64( float128 a STATUS_PARAM )
4478 158142c2 bellard
{
4479 158142c2 bellard
    flag aSign;
4480 158142c2 bellard
    int32 aExp, shiftCount;
4481 158142c2 bellard
    bits64 aSig0, aSig1;
4482 158142c2 bellard
4483 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4484 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4485 158142c2 bellard
    aExp = extractFloat128Exp( a );
4486 158142c2 bellard
    aSign = extractFloat128Sign( a );
4487 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4488 158142c2 bellard
    shiftCount = 0x402F - aExp;
4489 158142c2 bellard
    if ( shiftCount <= 0 ) {
4490 158142c2 bellard
        if ( 0x403E < aExp ) {
4491 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4492 158142c2 bellard
            if (    ! aSign
4493 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
4494 158142c2 bellard
                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4495 158142c2 bellard
                    )
4496 158142c2 bellard
               ) {
4497 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
4498 158142c2 bellard
            }
4499 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
4500 158142c2 bellard
        }
4501 158142c2 bellard
        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4502 158142c2 bellard
    }
4503 158142c2 bellard
    else {
4504 158142c2 bellard
        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4505 158142c2 bellard
    }
4506 158142c2 bellard
    return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
4507 158142c2 bellard
4508 158142c2 bellard
}
4509 158142c2 bellard
4510 158142c2 bellard
/*----------------------------------------------------------------------------
4511 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4512 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4513 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4514 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
4515 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
4516 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
4517 158142c2 bellard
| returned.
4518 158142c2 bellard
*----------------------------------------------------------------------------*/
4519 158142c2 bellard
4520 158142c2 bellard
int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
4521 158142c2 bellard
{
4522 158142c2 bellard
    flag aSign;
4523 158142c2 bellard
    int32 aExp, shiftCount;
4524 158142c2 bellard
    bits64 aSig0, aSig1;
4525 158142c2 bellard
    int64 z;
4526 158142c2 bellard
4527 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4528 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4529 158142c2 bellard
    aExp = extractFloat128Exp( a );
4530 158142c2 bellard
    aSign = extractFloat128Sign( a );
4531 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4532 158142c2 bellard
    shiftCount = aExp - 0x402F;
4533 158142c2 bellard
    if ( 0 < shiftCount ) {
4534 158142c2 bellard
        if ( 0x403E <= aExp ) {
4535 158142c2 bellard
            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4536 158142c2 bellard
            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4537 158142c2 bellard
                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4538 158142c2 bellard
                if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
4539 158142c2 bellard
            }
4540 158142c2 bellard
            else {
4541 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
4542 158142c2 bellard
                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4543 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
4544 158142c2 bellard
                }
4545 158142c2 bellard
            }
4546 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
4547 158142c2 bellard
        }
4548 158142c2 bellard
        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4549 158142c2 bellard
        if ( (bits64) ( aSig1<<shiftCount ) ) {
4550 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4551 158142c2 bellard
        }
4552 158142c2 bellard
    }
4553 158142c2 bellard
    else {
4554 158142c2 bellard
        if ( aExp < 0x3FFF ) {
4555 158142c2 bellard
            if ( aExp | aSig0 | aSig1 ) {
4556 158142c2 bellard
                STATUS(float_exception_flags) |= float_flag_inexact;
4557 158142c2 bellard
            }
4558 158142c2 bellard
            return 0;
4559 158142c2 bellard
        }
4560 158142c2 bellard
        z = aSig0>>( - shiftCount );
4561 158142c2 bellard
        if (    aSig1
4562 158142c2 bellard
             || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4563 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4564 158142c2 bellard
        }
4565 158142c2 bellard
    }
4566 158142c2 bellard
    if ( aSign ) z = - z;
4567 158142c2 bellard
    return z;
4568 158142c2 bellard
4569 158142c2 bellard
}
4570 158142c2 bellard
4571 158142c2 bellard
/*----------------------------------------------------------------------------
4572 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4573 158142c2 bellard
| value `a' to the single-precision floating-point format.  The conversion
4574 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4575 158142c2 bellard
| Arithmetic.
4576 158142c2 bellard
*----------------------------------------------------------------------------*/
4577 158142c2 bellard
4578 158142c2 bellard
float32 float128_to_float32( float128 a STATUS_PARAM )
4579 158142c2 bellard
{
4580 158142c2 bellard
    flag aSign;
4581 158142c2 bellard
    int32 aExp;
4582 158142c2 bellard
    bits64 aSig0, aSig1;
4583 158142c2 bellard
    bits32 zSig;
4584 158142c2 bellard
4585 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4586 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4587 158142c2 bellard
    aExp = extractFloat128Exp( a );
4588 158142c2 bellard
    aSign = extractFloat128Sign( a );
4589 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4590 158142c2 bellard
        if ( aSig0 | aSig1 ) {
4591 158142c2 bellard
            return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) );
4592 158142c2 bellard
        }
4593 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
4594 158142c2 bellard
    }
4595 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4596 158142c2 bellard
    shift64RightJamming( aSig0, 18, &aSig0 );
4597 158142c2 bellard
    zSig = aSig0;
4598 158142c2 bellard
    if ( aExp || zSig ) {
4599 158142c2 bellard
        zSig |= 0x40000000;
4600 158142c2 bellard
        aExp -= 0x3F81;
4601 158142c2 bellard
    }
4602 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
4603 158142c2 bellard
4604 158142c2 bellard
}
4605 158142c2 bellard
4606 158142c2 bellard
/*----------------------------------------------------------------------------
4607 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4608 158142c2 bellard
| value `a' to the double-precision floating-point format.  The conversion
4609 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4610 158142c2 bellard
| Arithmetic.
4611 158142c2 bellard
*----------------------------------------------------------------------------*/
4612 158142c2 bellard
4613 158142c2 bellard
float64 float128_to_float64( float128 a STATUS_PARAM )
4614 158142c2 bellard
{
4615 158142c2 bellard
    flag aSign;
4616 158142c2 bellard
    int32 aExp;
4617 158142c2 bellard
    bits64 aSig0, aSig1;
4618 158142c2 bellard
4619 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4620 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4621 158142c2 bellard
    aExp = extractFloat128Exp( a );
4622 158142c2 bellard
    aSign = extractFloat128Sign( a );
4623 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4624 158142c2 bellard
        if ( aSig0 | aSig1 ) {
4625 158142c2 bellard
            return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) );
4626 158142c2 bellard
        }
4627 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
4628 158142c2 bellard
    }
4629 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4630 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4631 158142c2 bellard
    if ( aExp || aSig0 ) {
4632 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
4633 158142c2 bellard
        aExp -= 0x3C01;
4634 158142c2 bellard
    }
4635 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
4636 158142c2 bellard
4637 158142c2 bellard
}
4638 158142c2 bellard
4639 158142c2 bellard
#ifdef FLOATX80
4640 158142c2 bellard
4641 158142c2 bellard
/*----------------------------------------------------------------------------
4642 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4643 158142c2 bellard
| value `a' to the extended double-precision floating-point format.  The
4644 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4645 158142c2 bellard
| Floating-Point Arithmetic.
4646 158142c2 bellard
*----------------------------------------------------------------------------*/
4647 158142c2 bellard
4648 158142c2 bellard
floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
4649 158142c2 bellard
{
4650 158142c2 bellard
    flag aSign;
4651 158142c2 bellard
    int32 aExp;
4652 158142c2 bellard
    bits64 aSig0, aSig1;
4653 158142c2 bellard
4654 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4655 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4656 158142c2 bellard
    aExp = extractFloat128Exp( a );
4657 158142c2 bellard
    aSign = extractFloat128Sign( a );
4658 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4659 158142c2 bellard
        if ( aSig0 | aSig1 ) {
4660 158142c2 bellard
            return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) );
4661 158142c2 bellard
        }
4662 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4663 158142c2 bellard
    }
4664 158142c2 bellard
    if ( aExp == 0 ) {
4665 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
4666 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4667 158142c2 bellard
    }
4668 158142c2 bellard
    else {
4669 158142c2 bellard
        aSig0 |= LIT64( 0x0001000000000000 );
4670 158142c2 bellard
    }
4671 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
4672 158142c2 bellard
    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
4673 158142c2 bellard
4674 158142c2 bellard
}
4675 158142c2 bellard
4676 158142c2 bellard
#endif
4677 158142c2 bellard
4678 158142c2 bellard
/*----------------------------------------------------------------------------
4679 158142c2 bellard
| Rounds the quadruple-precision floating-point value `a' to an integer, and
4680 158142c2 bellard
| returns the result as a quadruple-precision floating-point value.  The
4681 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
4682 158142c2 bellard
| Floating-Point Arithmetic.
4683 158142c2 bellard
*----------------------------------------------------------------------------*/
4684 158142c2 bellard
4685 158142c2 bellard
float128 float128_round_to_int( float128 a STATUS_PARAM )
4686 158142c2 bellard
{
4687 158142c2 bellard
    flag aSign;
4688 158142c2 bellard
    int32 aExp;
4689 158142c2 bellard
    bits64 lastBitMask, roundBitsMask;
4690 158142c2 bellard
    int8 roundingMode;
4691 158142c2 bellard
    float128 z;
4692 158142c2 bellard
4693 158142c2 bellard
    aExp = extractFloat128Exp( a );
4694 158142c2 bellard
    if ( 0x402F <= aExp ) {
4695 158142c2 bellard
        if ( 0x406F <= aExp ) {
4696 158142c2 bellard
            if (    ( aExp == 0x7FFF )
4697 158142c2 bellard
                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
4698 158142c2 bellard
               ) {
4699 158142c2 bellard
                return propagateFloat128NaN( a, a STATUS_VAR );
4700 158142c2 bellard
            }
4701 158142c2 bellard
            return a;
4702 158142c2 bellard
        }
4703 158142c2 bellard
        lastBitMask = 1;
4704 158142c2 bellard
        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
4705 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
4706 158142c2 bellard
        z = a;
4707 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
4708 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
4709 158142c2 bellard
            if ( lastBitMask ) {
4710 158142c2 bellard
                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
4711 158142c2 bellard
                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4712 158142c2 bellard
            }
4713 158142c2 bellard
            else {
4714 158142c2 bellard
                if ( (sbits64) z.low < 0 ) {
4715 158142c2 bellard
                    ++z.high;
4716 158142c2 bellard
                    if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
4717 158142c2 bellard
                }
4718 158142c2 bellard
            }
4719 158142c2 bellard
        }
4720 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
4721 158142c2 bellard
            if (   extractFloat128Sign( z )
4722 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
4723 158142c2 bellard
                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
4724 158142c2 bellard
            }
4725 158142c2 bellard
        }
4726 158142c2 bellard
        z.low &= ~ roundBitsMask;
4727 158142c2 bellard
    }
4728 158142c2 bellard
    else {
4729 158142c2 bellard
        if ( aExp < 0x3FFF ) {
4730 158142c2 bellard
            if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4731 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4732 158142c2 bellard
            aSign = extractFloat128Sign( a );
4733 158142c2 bellard
            switch ( STATUS(float_rounding_mode) ) {
4734 158142c2 bellard
             case float_round_nearest_even:
4735 158142c2 bellard
                if (    ( aExp == 0x3FFE )
4736 158142c2 bellard
                     && (   extractFloat128Frac0( a )
4737 158142c2 bellard
                          | extractFloat128Frac1( a ) )
4738 158142c2 bellard
                   ) {
4739 158142c2 bellard
                    return packFloat128( aSign, 0x3FFF, 0, 0 );
4740 158142c2 bellard
                }
4741 158142c2 bellard
                break;
4742 158142c2 bellard
             case float_round_down:
4743 158142c2 bellard
                return
4744 158142c2 bellard
                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
4745 158142c2 bellard
                    : packFloat128( 0, 0, 0, 0 );
4746 158142c2 bellard
             case float_round_up:
4747 158142c2 bellard
                return
4748 158142c2 bellard
                      aSign ? packFloat128( 1, 0, 0, 0 )
4749 158142c2 bellard
                    : packFloat128( 0, 0x3FFF, 0, 0 );
4750 158142c2 bellard
            }
4751 158142c2 bellard
            return packFloat128( aSign, 0, 0, 0 );
4752 158142c2 bellard
        }
4753 158142c2 bellard
        lastBitMask = 1;
4754 158142c2 bellard
        lastBitMask <<= 0x402F - aExp;
4755 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
4756 158142c2 bellard
        z.low = 0;
4757 158142c2 bellard
        z.high = a.high;
4758 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
4759 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
4760 158142c2 bellard
            z.high += lastBitMask>>1;
4761 158142c2 bellard
            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
4762 158142c2 bellard
                z.high &= ~ lastBitMask;
4763 158142c2 bellard
            }
4764 158142c2 bellard
        }
4765 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
4766 158142c2 bellard
            if (   extractFloat128Sign( z )
4767 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
4768 158142c2 bellard
                z.high |= ( a.low != 0 );
4769 158142c2 bellard
                z.high += roundBitsMask;
4770 158142c2 bellard
            }
4771 158142c2 bellard
        }
4772 158142c2 bellard
        z.high &= ~ roundBitsMask;
4773 158142c2 bellard
    }
4774 158142c2 bellard
    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4775 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4776 158142c2 bellard
    }
4777 158142c2 bellard
    return z;
4778 158142c2 bellard
4779 158142c2 bellard
}
4780 158142c2 bellard
4781 158142c2 bellard
/*----------------------------------------------------------------------------
4782 158142c2 bellard
| Returns the result of adding the absolute values of the quadruple-precision
4783 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
4784 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
4785 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
4786 158142c2 bellard
| Floating-Point Arithmetic.
4787 158142c2 bellard
*----------------------------------------------------------------------------*/
4788 158142c2 bellard
4789 158142c2 bellard
static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
4790 158142c2 bellard
{
4791 158142c2 bellard
    int32 aExp, bExp, zExp;
4792 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4793 158142c2 bellard
    int32 expDiff;
4794 158142c2 bellard
4795 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4796 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4797 158142c2 bellard
    aExp = extractFloat128Exp( a );
4798 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4799 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4800 158142c2 bellard
    bExp = extractFloat128Exp( b );
4801 158142c2 bellard
    expDiff = aExp - bExp;
4802 158142c2 bellard
    if ( 0 < expDiff ) {
4803 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4804 158142c2 bellard
            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4805 158142c2 bellard
            return a;
4806 158142c2 bellard
        }
4807 158142c2 bellard
        if ( bExp == 0 ) {
4808 158142c2 bellard
            --expDiff;
4809 158142c2 bellard
        }
4810 158142c2 bellard
        else {
4811 158142c2 bellard
            bSig0 |= LIT64( 0x0001000000000000 );
4812 158142c2 bellard
        }
4813 158142c2 bellard
        shift128ExtraRightJamming(
4814 158142c2 bellard
            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
4815 158142c2 bellard
        zExp = aExp;
4816 158142c2 bellard
    }
4817 158142c2 bellard
    else if ( expDiff < 0 ) {
4818 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4819 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4820 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
4821 158142c2 bellard
        }
4822 158142c2 bellard
        if ( aExp == 0 ) {
4823 158142c2 bellard
            ++expDiff;
4824 158142c2 bellard
        }
4825 158142c2 bellard
        else {
4826 158142c2 bellard
            aSig0 |= LIT64( 0x0001000000000000 );
4827 158142c2 bellard
        }
4828 158142c2 bellard
        shift128ExtraRightJamming(
4829 158142c2 bellard
            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
4830 158142c2 bellard
        zExp = bExp;
4831 158142c2 bellard
    }
4832 158142c2 bellard
    else {
4833 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4834 158142c2 bellard
            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4835 158142c2 bellard
                return propagateFloat128NaN( a, b STATUS_VAR );
4836 158142c2 bellard
            }
4837 158142c2 bellard
            return a;
4838 158142c2 bellard
        }
4839 158142c2 bellard
        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4840 fe76d976 pbrook
        if ( aExp == 0 ) {
4841 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
4842 fe76d976 pbrook
            return packFloat128( zSign, 0, zSig0, zSig1 );
4843 fe76d976 pbrook
        }
4844 158142c2 bellard
        zSig2 = 0;
4845 158142c2 bellard
        zSig0 |= LIT64( 0x0002000000000000 );
4846 158142c2 bellard
        zExp = aExp;
4847 158142c2 bellard
        goto shiftRight1;
4848 158142c2 bellard
    }
4849 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4850 158142c2 bellard
    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4851 158142c2 bellard
    --zExp;
4852 158142c2 bellard
    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
4853 158142c2 bellard
    ++zExp;
4854 158142c2 bellard
 shiftRight1:
4855 158142c2 bellard
    shift128ExtraRightJamming(
4856 158142c2 bellard
        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4857 158142c2 bellard
 roundAndPack:
4858 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
4859 158142c2 bellard
4860 158142c2 bellard
}
4861 158142c2 bellard
4862 158142c2 bellard
/*----------------------------------------------------------------------------
4863 158142c2 bellard
| Returns the result of subtracting the absolute values of the quadruple-
4864 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
4865 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
4866 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
4867 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4868 158142c2 bellard
*----------------------------------------------------------------------------*/
4869 158142c2 bellard
4870 158142c2 bellard
static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
4871 158142c2 bellard
{
4872 158142c2 bellard
    int32 aExp, bExp, zExp;
4873 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
4874 158142c2 bellard
    int32 expDiff;
4875 158142c2 bellard
    float128 z;
4876 158142c2 bellard
4877 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4878 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4879 158142c2 bellard
    aExp = extractFloat128Exp( a );
4880 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4881 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4882 158142c2 bellard
    bExp = extractFloat128Exp( b );
4883 158142c2 bellard
    expDiff = aExp - bExp;
4884 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4885 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
4886 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
4887 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
4888 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4889 158142c2 bellard
        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4890 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
4891 158142c2 bellard
        }
4892 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4893 158142c2 bellard
        z.low = float128_default_nan_low;
4894 158142c2 bellard
        z.high = float128_default_nan_high;
4895 158142c2 bellard
        return z;
4896 158142c2 bellard
    }
4897 158142c2 bellard
    if ( aExp == 0 ) {
4898 158142c2 bellard
        aExp = 1;
4899 158142c2 bellard
        bExp = 1;
4900 158142c2 bellard
    }
4901 158142c2 bellard
    if ( bSig0 < aSig0 ) goto aBigger;
4902 158142c2 bellard
    if ( aSig0 < bSig0 ) goto bBigger;
4903 158142c2 bellard
    if ( bSig1 < aSig1 ) goto aBigger;
4904 158142c2 bellard
    if ( aSig1 < bSig1 ) goto bBigger;
4905 158142c2 bellard
    return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
4906 158142c2 bellard
 bExpBigger:
4907 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4908 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4909 158142c2 bellard
        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
4910 158142c2 bellard
    }
4911 158142c2 bellard
    if ( aExp == 0 ) {
4912 158142c2 bellard
        ++expDiff;
4913 158142c2 bellard
    }
4914 158142c2 bellard
    else {
4915 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
4916 158142c2 bellard
    }
4917 158142c2 bellard
    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4918 158142c2 bellard
    bSig0 |= LIT64( 0x4000000000000000 );
4919 158142c2 bellard
 bBigger:
4920 158142c2 bellard
    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
4921 158142c2 bellard
    zExp = bExp;
4922 158142c2 bellard
    zSign ^= 1;
4923 158142c2 bellard
    goto normalizeRoundAndPack;
4924 158142c2 bellard
 aExpBigger:
4925 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4926 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4927 158142c2 bellard
        return a;
4928 158142c2 bellard
    }
4929 158142c2 bellard
    if ( bExp == 0 ) {
4930 158142c2 bellard
        --expDiff;
4931 158142c2 bellard
    }
4932 158142c2 bellard
    else {
4933 158142c2 bellard
        bSig0 |= LIT64( 0x4000000000000000 );
4934 158142c2 bellard
    }
4935 158142c2 bellard
    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
4936 158142c2 bellard
    aSig0 |= LIT64( 0x4000000000000000 );
4937 158142c2 bellard
 aBigger:
4938 158142c2 bellard
    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4939 158142c2 bellard
    zExp = aExp;
4940 158142c2 bellard
 normalizeRoundAndPack:
4941 158142c2 bellard
    --zExp;
4942 158142c2 bellard
    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
4943 158142c2 bellard
4944 158142c2 bellard
}
4945 158142c2 bellard
4946 158142c2 bellard
/*----------------------------------------------------------------------------
4947 158142c2 bellard
| Returns the result of adding the quadruple-precision floating-point values
4948 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
4949 158142c2 bellard
| for Binary Floating-Point Arithmetic.
4950 158142c2 bellard
*----------------------------------------------------------------------------*/
4951 158142c2 bellard
4952 158142c2 bellard
float128 float128_add( float128 a, float128 b STATUS_PARAM )
4953 158142c2 bellard
{
4954 158142c2 bellard
    flag aSign, bSign;
4955 158142c2 bellard
4956 158142c2 bellard
    aSign = extractFloat128Sign( a );
4957 158142c2 bellard
    bSign = extractFloat128Sign( b );
4958 158142c2 bellard
    if ( aSign == bSign ) {
4959 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
4960 158142c2 bellard
    }
4961 158142c2 bellard
    else {
4962 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
4963 158142c2 bellard
    }
4964 158142c2 bellard
4965 158142c2 bellard
}
4966 158142c2 bellard
4967 158142c2 bellard
/*----------------------------------------------------------------------------
4968 158142c2 bellard
| Returns the result of subtracting the quadruple-precision floating-point
4969 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
4970 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4971 158142c2 bellard
*----------------------------------------------------------------------------*/
4972 158142c2 bellard
4973 158142c2 bellard
float128 float128_sub( float128 a, float128 b STATUS_PARAM )
4974 158142c2 bellard
{
4975 158142c2 bellard
    flag aSign, bSign;
4976 158142c2 bellard
4977 158142c2 bellard
    aSign = extractFloat128Sign( a );
4978 158142c2 bellard
    bSign = extractFloat128Sign( b );
4979 158142c2 bellard
    if ( aSign == bSign ) {
4980 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
4981 158142c2 bellard
    }
4982 158142c2 bellard
    else {
4983 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
4984 158142c2 bellard
    }
4985 158142c2 bellard
4986 158142c2 bellard
}
4987 158142c2 bellard
4988 158142c2 bellard
/*----------------------------------------------------------------------------
4989 158142c2 bellard
| Returns the result of multiplying the quadruple-precision floating-point
4990 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
4991 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4992 158142c2 bellard
*----------------------------------------------------------------------------*/
4993 158142c2 bellard
4994 158142c2 bellard
float128 float128_mul( float128 a, float128 b STATUS_PARAM )
4995 158142c2 bellard
{
4996 158142c2 bellard
    flag aSign, bSign, zSign;
4997 158142c2 bellard
    int32 aExp, bExp, zExp;
4998 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
4999 158142c2 bellard
    float128 z;
5000 158142c2 bellard
5001 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5002 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5003 158142c2 bellard
    aExp = extractFloat128Exp( a );
5004 158142c2 bellard
    aSign = extractFloat128Sign( a );
5005 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5006 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5007 158142c2 bellard
    bExp = extractFloat128Exp( b );
5008 158142c2 bellard
    bSign = extractFloat128Sign( b );
5009 158142c2 bellard
    zSign = aSign ^ bSign;
5010 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5011 158142c2 bellard
        if (    ( aSig0 | aSig1 )
5012 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5013 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5014 158142c2 bellard
        }
5015 158142c2 bellard
        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
5016 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5017 158142c2 bellard
    }
5018 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5019 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5020 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5021 158142c2 bellard
 invalid:
5022 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5023 158142c2 bellard
            z.low = float128_default_nan_low;
5024 158142c2 bellard
            z.high = float128_default_nan_high;
5025 158142c2 bellard
            return z;
5026 158142c2 bellard
        }
5027 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5028 158142c2 bellard
    }
5029 158142c2 bellard
    if ( aExp == 0 ) {
5030 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5031 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5032 158142c2 bellard
    }
5033 158142c2 bellard
    if ( bExp == 0 ) {
5034 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5035 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5036 158142c2 bellard
    }
5037 158142c2 bellard
    zExp = aExp + bExp - 0x4000;
5038 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5039 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
5040 158142c2 bellard
    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
5041 158142c2 bellard
    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
5042 158142c2 bellard
    zSig2 |= ( zSig3 != 0 );
5043 158142c2 bellard
    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
5044 158142c2 bellard
        shift128ExtraRightJamming(
5045 158142c2 bellard
            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5046 158142c2 bellard
        ++zExp;
5047 158142c2 bellard
    }
5048 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5049 158142c2 bellard
5050 158142c2 bellard
}
5051 158142c2 bellard
5052 158142c2 bellard
/*----------------------------------------------------------------------------
5053 158142c2 bellard
| Returns the result of dividing the quadruple-precision floating-point value
5054 158142c2 bellard
| `a' by the corresponding value `b'.  The operation is performed according to
5055 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5056 158142c2 bellard
*----------------------------------------------------------------------------*/
5057 158142c2 bellard
5058 158142c2 bellard
float128 float128_div( float128 a, float128 b STATUS_PARAM )
5059 158142c2 bellard
{
5060 158142c2 bellard
    flag aSign, bSign, zSign;
5061 158142c2 bellard
    int32 aExp, bExp, zExp;
5062 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5063 158142c2 bellard
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5064 158142c2 bellard
    float128 z;
5065 158142c2 bellard
5066 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5067 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5068 158142c2 bellard
    aExp = extractFloat128Exp( a );
5069 158142c2 bellard
    aSign = extractFloat128Sign( a );
5070 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5071 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5072 158142c2 bellard
    bExp = extractFloat128Exp( b );
5073 158142c2 bellard
    bSign = extractFloat128Sign( b );
5074 158142c2 bellard
    zSign = aSign ^ bSign;
5075 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5076 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5077 158142c2 bellard
        if ( bExp == 0x7FFF ) {
5078 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5079 158142c2 bellard
            goto invalid;
5080 158142c2 bellard
        }
5081 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5082 158142c2 bellard
    }
5083 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5084 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5085 158142c2 bellard
        return packFloat128( zSign, 0, 0, 0 );
5086 158142c2 bellard
    }
5087 158142c2 bellard
    if ( bExp == 0 ) {
5088 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
5089 158142c2 bellard
            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5090 158142c2 bellard
 invalid:
5091 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
5092 158142c2 bellard
                z.low = float128_default_nan_low;
5093 158142c2 bellard
                z.high = float128_default_nan_high;
5094 158142c2 bellard
                return z;
5095 158142c2 bellard
            }
5096 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
5097 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
5098 158142c2 bellard
        }
5099 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5100 158142c2 bellard
    }
5101 158142c2 bellard
    if ( aExp == 0 ) {
5102 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5103 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5104 158142c2 bellard
    }
5105 158142c2 bellard
    zExp = aExp - bExp + 0x3FFD;
5106 158142c2 bellard
    shortShift128Left(
5107 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
5108 158142c2 bellard
    shortShift128Left(
5109 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5110 158142c2 bellard
    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
5111 158142c2 bellard
        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
5112 158142c2 bellard
        ++zExp;
5113 158142c2 bellard
    }
5114 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
5115 158142c2 bellard
    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
5116 158142c2 bellard
    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
5117 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
5118 158142c2 bellard
        --zSig0;
5119 158142c2 bellard
        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5120 158142c2 bellard
    }
5121 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5122 158142c2 bellard
    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5123 158142c2 bellard
        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5124 158142c2 bellard
        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5125 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
5126 158142c2 bellard
            --zSig1;
5127 158142c2 bellard
            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5128 158142c2 bellard
        }
5129 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5130 158142c2 bellard
    }
5131 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5132 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5133 158142c2 bellard
5134 158142c2 bellard
}
5135 158142c2 bellard
5136 158142c2 bellard
/*----------------------------------------------------------------------------
5137 158142c2 bellard
| Returns the remainder of the quadruple-precision floating-point value `a'
5138 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
5139 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5140 158142c2 bellard
*----------------------------------------------------------------------------*/
5141 158142c2 bellard
5142 158142c2 bellard
float128 float128_rem( float128 a, float128 b STATUS_PARAM )
5143 158142c2 bellard
{
5144 ed086f3d Blue Swirl
    flag aSign, zSign;
5145 158142c2 bellard
    int32 aExp, bExp, expDiff;
5146 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5147 158142c2 bellard
    bits64 allZero, alternateASig0, alternateASig1, sigMean1;
5148 158142c2 bellard
    sbits64 sigMean0;
5149 158142c2 bellard
    float128 z;
5150 158142c2 bellard
5151 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5152 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5153 158142c2 bellard
    aExp = extractFloat128Exp( a );
5154 158142c2 bellard
    aSign = extractFloat128Sign( a );
5155 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5156 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5157 158142c2 bellard
    bExp = extractFloat128Exp( b );
5158 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5159 158142c2 bellard
        if (    ( aSig0 | aSig1 )
5160 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5161 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5162 158142c2 bellard
        }
5163 158142c2 bellard
        goto invalid;
5164 158142c2 bellard
    }
5165 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5166 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5167 158142c2 bellard
        return a;
5168 158142c2 bellard
    }
5169 158142c2 bellard
    if ( bExp == 0 ) {
5170 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
5171 158142c2 bellard
 invalid:
5172 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5173 158142c2 bellard
            z.low = float128_default_nan_low;
5174 158142c2 bellard
            z.high = float128_default_nan_high;
5175 158142c2 bellard
            return z;
5176 158142c2 bellard
        }
5177 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5178 158142c2 bellard
    }
5179 158142c2 bellard
    if ( aExp == 0 ) {
5180 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return a;
5181 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5182 158142c2 bellard
    }
5183 158142c2 bellard
    expDiff = aExp - bExp;
5184 158142c2 bellard
    if ( expDiff < -1 ) return a;
5185 158142c2 bellard
    shortShift128Left(
5186 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ),
5187 158142c2 bellard
        aSig1,
5188 158142c2 bellard
        15 - ( expDiff < 0 ),
5189 158142c2 bellard
        &aSig0,
5190 158142c2 bellard
        &aSig1
5191 158142c2 bellard
    );
5192 158142c2 bellard
    shortShift128Left(
5193 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5194 158142c2 bellard
    q = le128( bSig0, bSig1, aSig0, aSig1 );
5195 158142c2 bellard
    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5196 158142c2 bellard
    expDiff -= 64;
5197 158142c2 bellard
    while ( 0 < expDiff ) {
5198 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5199 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
5200 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5201 158142c2 bellard
        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5202 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5203 158142c2 bellard
        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5204 158142c2 bellard
        expDiff -= 61;
5205 158142c2 bellard
    }
5206 158142c2 bellard
    if ( -64 < expDiff ) {
5207 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5208 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
5209 158142c2 bellard
        q >>= - expDiff;
5210 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5211 158142c2 bellard
        expDiff += 52;
5212 158142c2 bellard
        if ( expDiff < 0 ) {
5213 158142c2 bellard
            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5214 158142c2 bellard
        }
5215 158142c2 bellard
        else {
5216 158142c2 bellard
            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5217 158142c2 bellard
        }
5218 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5219 158142c2 bellard
        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5220 158142c2 bellard
    }
5221 158142c2 bellard
    else {
5222 158142c2 bellard
        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5223 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5224 158142c2 bellard
    }
5225 158142c2 bellard
    do {
5226 158142c2 bellard
        alternateASig0 = aSig0;
5227 158142c2 bellard
        alternateASig1 = aSig1;
5228 158142c2 bellard
        ++q;
5229 158142c2 bellard
        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5230 158142c2 bellard
    } while ( 0 <= (sbits64) aSig0 );
5231 158142c2 bellard
    add128(
5232 b55266b5 blueswir1
        aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
5233 158142c2 bellard
    if (    ( sigMean0 < 0 )
5234 158142c2 bellard
         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5235 158142c2 bellard
        aSig0 = alternateASig0;
5236 158142c2 bellard
        aSig1 = alternateASig1;
5237 158142c2 bellard
    }
5238 158142c2 bellard
    zSign = ( (sbits64) aSig0 < 0 );
5239 158142c2 bellard
    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5240 158142c2 bellard
    return
5241 158142c2 bellard
        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
5242 158142c2 bellard
5243 158142c2 bellard
}
5244 158142c2 bellard
5245 158142c2 bellard
/*----------------------------------------------------------------------------
5246 158142c2 bellard
| Returns the square root of the quadruple-precision floating-point value `a'.
5247 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
5248 158142c2 bellard
| Floating-Point Arithmetic.
5249 158142c2 bellard
*----------------------------------------------------------------------------*/
5250 158142c2 bellard
5251 158142c2 bellard
float128 float128_sqrt( float128 a STATUS_PARAM )
5252 158142c2 bellard
{
5253 158142c2 bellard
    flag aSign;
5254 158142c2 bellard
    int32 aExp, zExp;
5255 158142c2 bellard
    bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5256 158142c2 bellard
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5257 158142c2 bellard
    float128 z;
5258 158142c2 bellard
5259 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5260 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5261 158142c2 bellard
    aExp = extractFloat128Exp( a );
5262 158142c2 bellard
    aSign = extractFloat128Sign( a );
5263 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5264 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
5265 158142c2 bellard
        if ( ! aSign ) return a;
5266 158142c2 bellard
        goto invalid;
5267 158142c2 bellard
    }
5268 158142c2 bellard
    if ( aSign ) {
5269 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5270 158142c2 bellard
 invalid:
5271 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5272 158142c2 bellard
        z.low = float128_default_nan_low;
5273 158142c2 bellard
        z.high = float128_default_nan_high;
5274 158142c2 bellard
        return z;
5275 158142c2 bellard
    }
5276 158142c2 bellard
    if ( aExp == 0 ) {
5277 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5278 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5279 158142c2 bellard
    }
5280 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5281 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5282 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5283 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5284 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5285 158142c2 bellard
    doubleZSig0 = zSig0<<1;
5286 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
5287 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5288 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
5289 158142c2 bellard
        --zSig0;
5290 158142c2 bellard
        doubleZSig0 -= 2;
5291 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5292 158142c2 bellard
    }
5293 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5294 158142c2 bellard
    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5295 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
5296 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5297 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5298 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
5299 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5300 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
5301 158142c2 bellard
            --zSig1;
5302 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5303 158142c2 bellard
            term3 |= 1;
5304 158142c2 bellard
            term2 |= doubleZSig0;
5305 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5306 158142c2 bellard
        }
5307 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5308 158142c2 bellard
    }
5309 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5310 158142c2 bellard
    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5311 158142c2 bellard
5312 158142c2 bellard
}
5313 158142c2 bellard
5314 158142c2 bellard
/*----------------------------------------------------------------------------
5315 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5316 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
5317 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5318 158142c2 bellard
*----------------------------------------------------------------------------*/
5319 158142c2 bellard
5320 750afe93 bellard
int float128_eq( float128 a, float128 b STATUS_PARAM )
5321 158142c2 bellard
{
5322 158142c2 bellard
5323 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5324 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5325 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5326 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5327 158142c2 bellard
       ) {
5328 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5329 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5330 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5331 158142c2 bellard
        }
5332 158142c2 bellard
        return 0;
5333 158142c2 bellard
    }
5334 158142c2 bellard
    return
5335 158142c2 bellard
           ( a.low == b.low )
5336 158142c2 bellard
        && (    ( a.high == b.high )
5337 158142c2 bellard
             || (    ( a.low == 0 )
5338 158142c2 bellard
                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5339 158142c2 bellard
           );
5340 158142c2 bellard
5341 158142c2 bellard
}
5342 158142c2 bellard
5343 158142c2 bellard
/*----------------------------------------------------------------------------
5344 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5345 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  The comparison
5346 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5347 158142c2 bellard
| Arithmetic.
5348 158142c2 bellard
*----------------------------------------------------------------------------*/
5349 158142c2 bellard
5350 750afe93 bellard
int float128_le( float128 a, float128 b STATUS_PARAM )
5351 158142c2 bellard
{
5352 158142c2 bellard
    flag aSign, bSign;
5353 158142c2 bellard
5354 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5355 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5356 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5357 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5358 158142c2 bellard
       ) {
5359 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5360 158142c2 bellard
        return 0;
5361 158142c2 bellard
    }
5362 158142c2 bellard
    aSign = extractFloat128Sign( a );
5363 158142c2 bellard
    bSign = extractFloat128Sign( b );
5364 158142c2 bellard
    if ( aSign != bSign ) {
5365 158142c2 bellard
        return
5366 158142c2 bellard
               aSign
5367 158142c2 bellard
            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5368 158142c2 bellard
                 == 0 );
5369 158142c2 bellard
    }
5370 158142c2 bellard
    return
5371 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5372 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5373 158142c2 bellard
5374 158142c2 bellard
}
5375 158142c2 bellard
5376 158142c2 bellard
/*----------------------------------------------------------------------------
5377 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5378 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
5379 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5380 158142c2 bellard
*----------------------------------------------------------------------------*/
5381 158142c2 bellard
5382 750afe93 bellard
int float128_lt( float128 a, float128 b STATUS_PARAM )
5383 158142c2 bellard
{
5384 158142c2 bellard
    flag aSign, bSign;
5385 158142c2 bellard
5386 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5387 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5388 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5389 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5390 158142c2 bellard
       ) {
5391 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5392 158142c2 bellard
        return 0;
5393 158142c2 bellard
    }
5394 158142c2 bellard
    aSign = extractFloat128Sign( a );
5395 158142c2 bellard
    bSign = extractFloat128Sign( b );
5396 158142c2 bellard
    if ( aSign != bSign ) {
5397 158142c2 bellard
        return
5398 158142c2 bellard
               aSign
5399 158142c2 bellard
            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5400 158142c2 bellard
                 != 0 );
5401 158142c2 bellard
    }
5402 158142c2 bellard
    return
5403 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5404 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5405 158142c2 bellard
5406 158142c2 bellard
}
5407 158142c2 bellard
5408 158142c2 bellard
/*----------------------------------------------------------------------------
5409 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5410 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The invalid exception is
5411 158142c2 bellard
| raised if either operand is a NaN.  Otherwise, the comparison is performed
5412 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5413 158142c2 bellard
*----------------------------------------------------------------------------*/
5414 158142c2 bellard
5415 750afe93 bellard
int float128_eq_signaling( float128 a, float128 b STATUS_PARAM )
5416 158142c2 bellard
{
5417 158142c2 bellard
5418 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5419 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5420 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5421 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5422 158142c2 bellard
       ) {
5423 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5424 158142c2 bellard
        return 0;
5425 158142c2 bellard
    }
5426 158142c2 bellard
    return
5427 158142c2 bellard
           ( a.low == b.low )
5428 158142c2 bellard
        && (    ( a.high == b.high )
5429 158142c2 bellard
             || (    ( a.low == 0 )
5430 158142c2 bellard
                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5431 158142c2 bellard
           );
5432 158142c2 bellard
5433 158142c2 bellard
}
5434 158142c2 bellard
5435 158142c2 bellard
/*----------------------------------------------------------------------------
5436 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5437 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5438 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
5439 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5440 158142c2 bellard
*----------------------------------------------------------------------------*/
5441 158142c2 bellard
5442 750afe93 bellard
int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
5443 158142c2 bellard
{
5444 158142c2 bellard
    flag aSign, bSign;
5445 158142c2 bellard
5446 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5447 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5448 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5449 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5450 158142c2 bellard
       ) {
5451 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5452 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5453 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5454 158142c2 bellard
        }
5455 158142c2 bellard
        return 0;
5456 158142c2 bellard
    }
5457 158142c2 bellard
    aSign = extractFloat128Sign( a );
5458 158142c2 bellard
    bSign = extractFloat128Sign( b );
5459 158142c2 bellard
    if ( aSign != bSign ) {
5460 158142c2 bellard
        return
5461 158142c2 bellard
               aSign
5462 158142c2 bellard
            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5463 158142c2 bellard
                 == 0 );
5464 158142c2 bellard
    }
5465 158142c2 bellard
    return
5466 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5467 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5468 158142c2 bellard
5469 158142c2 bellard
}
5470 158142c2 bellard
5471 158142c2 bellard
/*----------------------------------------------------------------------------
5472 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5473 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5474 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
5475 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5476 158142c2 bellard
*----------------------------------------------------------------------------*/
5477 158142c2 bellard
5478 750afe93 bellard
int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
5479 158142c2 bellard
{
5480 158142c2 bellard
    flag aSign, bSign;
5481 158142c2 bellard
5482 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5483 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5484 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5485 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5486 158142c2 bellard
       ) {
5487 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5488 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5489 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5490 158142c2 bellard
        }
5491 158142c2 bellard
        return 0;
5492 158142c2 bellard
    }
5493 158142c2 bellard
    aSign = extractFloat128Sign( a );
5494 158142c2 bellard
    bSign = extractFloat128Sign( b );
5495 158142c2 bellard
    if ( aSign != bSign ) {
5496 158142c2 bellard
        return
5497 158142c2 bellard
               aSign
5498 158142c2 bellard
            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5499 158142c2 bellard
                 != 0 );
5500 158142c2 bellard
    }
5501 158142c2 bellard
    return
5502 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5503 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5504 158142c2 bellard
5505 158142c2 bellard
}
5506 158142c2 bellard
5507 158142c2 bellard
#endif
5508 158142c2 bellard
5509 1d6bda35 bellard
/* misc functions */
5510 1d6bda35 bellard
float32 uint32_to_float32( unsigned int a STATUS_PARAM )
5511 1d6bda35 bellard
{
5512 1d6bda35 bellard
    return int64_to_float32(a STATUS_VAR);
5513 1d6bda35 bellard
}
5514 1d6bda35 bellard
5515 1d6bda35 bellard
float64 uint32_to_float64( unsigned int a STATUS_PARAM )
5516 1d6bda35 bellard
{
5517 1d6bda35 bellard
    return int64_to_float64(a STATUS_VAR);
5518 1d6bda35 bellard
}
5519 1d6bda35 bellard
5520 1d6bda35 bellard
unsigned int float32_to_uint32( float32 a STATUS_PARAM )
5521 1d6bda35 bellard
{
5522 1d6bda35 bellard
    int64_t v;
5523 1d6bda35 bellard
    unsigned int res;
5524 1d6bda35 bellard
5525 1d6bda35 bellard
    v = float32_to_int64(a STATUS_VAR);
5526 1d6bda35 bellard
    if (v < 0) {
5527 1d6bda35 bellard
        res = 0;
5528 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5529 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5530 1d6bda35 bellard
        res = 0xffffffff;
5531 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5532 1d6bda35 bellard
    } else {
5533 1d6bda35 bellard
        res = v;
5534 1d6bda35 bellard
    }
5535 1d6bda35 bellard
    return res;
5536 1d6bda35 bellard
}
5537 1d6bda35 bellard
5538 1d6bda35 bellard
unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
5539 1d6bda35 bellard
{
5540 1d6bda35 bellard
    int64_t v;
5541 1d6bda35 bellard
    unsigned int res;
5542 1d6bda35 bellard
5543 1d6bda35 bellard
    v = float32_to_int64_round_to_zero(a STATUS_VAR);
5544 1d6bda35 bellard
    if (v < 0) {
5545 1d6bda35 bellard
        res = 0;
5546 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5547 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5548 1d6bda35 bellard
        res = 0xffffffff;
5549 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5550 1d6bda35 bellard
    } else {
5551 1d6bda35 bellard
        res = v;
5552 1d6bda35 bellard
    }
5553 1d6bda35 bellard
    return res;
5554 1d6bda35 bellard
}
5555 1d6bda35 bellard
5556 1d6bda35 bellard
unsigned int float64_to_uint32( float64 a STATUS_PARAM )
5557 1d6bda35 bellard
{
5558 1d6bda35 bellard
    int64_t v;
5559 1d6bda35 bellard
    unsigned int res;
5560 1d6bda35 bellard
5561 1d6bda35 bellard
    v = float64_to_int64(a STATUS_VAR);
5562 1d6bda35 bellard
    if (v < 0) {
5563 1d6bda35 bellard
        res = 0;
5564 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5565 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5566 1d6bda35 bellard
        res = 0xffffffff;
5567 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5568 1d6bda35 bellard
    } else {
5569 1d6bda35 bellard
        res = v;
5570 1d6bda35 bellard
    }
5571 1d6bda35 bellard
    return res;
5572 1d6bda35 bellard
}
5573 1d6bda35 bellard
5574 1d6bda35 bellard
unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
5575 1d6bda35 bellard
{
5576 1d6bda35 bellard
    int64_t v;
5577 1d6bda35 bellard
    unsigned int res;
5578 1d6bda35 bellard
5579 1d6bda35 bellard
    v = float64_to_int64_round_to_zero(a STATUS_VAR);
5580 1d6bda35 bellard
    if (v < 0) {
5581 1d6bda35 bellard
        res = 0;
5582 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5583 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5584 1d6bda35 bellard
        res = 0xffffffff;
5585 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5586 1d6bda35 bellard
    } else {
5587 1d6bda35 bellard
        res = v;
5588 1d6bda35 bellard
    }
5589 1d6bda35 bellard
    return res;
5590 1d6bda35 bellard
}
5591 1d6bda35 bellard
5592 f090c9d4 pbrook
/* FIXME: This looks broken.  */
5593 75d62a58 j_mayer
uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
5594 75d62a58 j_mayer
{
5595 75d62a58 j_mayer
    int64_t v;
5596 75d62a58 j_mayer
5597 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
5598 f090c9d4 pbrook
    v += float64_val(a);
5599 f090c9d4 pbrook
    v = float64_to_int64(make_float64(v) STATUS_VAR);
5600 75d62a58 j_mayer
5601 75d62a58 j_mayer
    return v - INT64_MIN;
5602 75d62a58 j_mayer
}
5603 75d62a58 j_mayer
5604 75d62a58 j_mayer
uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
5605 75d62a58 j_mayer
{
5606 75d62a58 j_mayer
    int64_t v;
5607 75d62a58 j_mayer
5608 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
5609 f090c9d4 pbrook
    v += float64_val(a);
5610 f090c9d4 pbrook
    v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
5611 75d62a58 j_mayer
5612 75d62a58 j_mayer
    return v - INT64_MIN;
5613 75d62a58 j_mayer
}
5614 75d62a58 j_mayer
5615 1d6bda35 bellard
#define COMPARE(s, nan_exp)                                                  \
5616 750afe93 bellard
INLINE int float ## s ## _compare_internal( float ## s a, float ## s b,      \
5617 1d6bda35 bellard
                                      int is_quiet STATUS_PARAM )            \
5618 1d6bda35 bellard
{                                                                            \
5619 1d6bda35 bellard
    flag aSign, bSign;                                                       \
5620 f090c9d4 pbrook
    bits ## s av, bv;                                                        \
5621 1d6bda35 bellard
                                                                             \
5622 1d6bda35 bellard
    if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) &&                    \
5623 1d6bda35 bellard
         extractFloat ## s ## Frac( a ) ) ||                                 \
5624 1d6bda35 bellard
        ( ( extractFloat ## s ## Exp( b ) == nan_exp ) &&                    \
5625 1d6bda35 bellard
          extractFloat ## s ## Frac( b ) )) {                                \
5626 1d6bda35 bellard
        if (!is_quiet ||                                                     \
5627 1d6bda35 bellard
            float ## s ## _is_signaling_nan( a ) ||                          \
5628 1d6bda35 bellard
            float ## s ## _is_signaling_nan( b ) ) {                         \
5629 1d6bda35 bellard
            float_raise( float_flag_invalid STATUS_VAR);                     \
5630 1d6bda35 bellard
        }                                                                    \
5631 1d6bda35 bellard
        return float_relation_unordered;                                     \
5632 1d6bda35 bellard
    }                                                                        \
5633 1d6bda35 bellard
    aSign = extractFloat ## s ## Sign( a );                                  \
5634 1d6bda35 bellard
    bSign = extractFloat ## s ## Sign( b );                                  \
5635 f090c9d4 pbrook
    av = float ## s ## _val(a);                                              \
5636 cd8a2533 blueswir1
    bv = float ## s ## _val(b);                                              \
5637 1d6bda35 bellard
    if ( aSign != bSign ) {                                                  \
5638 f090c9d4 pbrook
        if ( (bits ## s) ( ( av | bv )<<1 ) == 0 ) {                         \
5639 1d6bda35 bellard
            /* zero case */                                                  \
5640 1d6bda35 bellard
            return float_relation_equal;                                     \
5641 1d6bda35 bellard
        } else {                                                             \
5642 1d6bda35 bellard
            return 1 - (2 * aSign);                                          \
5643 1d6bda35 bellard
        }                                                                    \
5644 1d6bda35 bellard
    } else {                                                                 \
5645 f090c9d4 pbrook
        if (av == bv) {                                                      \
5646 1d6bda35 bellard
            return float_relation_equal;                                     \
5647 1d6bda35 bellard
        } else {                                                             \
5648 f090c9d4 pbrook
            return 1 - 2 * (aSign ^ ( av < bv ));                            \
5649 1d6bda35 bellard
        }                                                                    \
5650 1d6bda35 bellard
    }                                                                        \
5651 1d6bda35 bellard
}                                                                            \
5652 1d6bda35 bellard
                                                                             \
5653 750afe93 bellard
int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM )        \
5654 1d6bda35 bellard
{                                                                            \
5655 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 0 STATUS_VAR);              \
5656 1d6bda35 bellard
}                                                                            \
5657 1d6bda35 bellard
                                                                             \
5658 750afe93 bellard
int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM )  \
5659 1d6bda35 bellard
{                                                                            \
5660 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 1 STATUS_VAR);              \
5661 1d6bda35 bellard
}
5662 1d6bda35 bellard
5663 1d6bda35 bellard
COMPARE(32, 0xff)
5664 1d6bda35 bellard
COMPARE(64, 0x7ff)
5665 9ee6e8bb pbrook
5666 1f587329 blueswir1
INLINE int float128_compare_internal( float128 a, float128 b,
5667 1f587329 blueswir1
                                      int is_quiet STATUS_PARAM )
5668 1f587329 blueswir1
{
5669 1f587329 blueswir1
    flag aSign, bSign;
5670 1f587329 blueswir1
5671 1f587329 blueswir1
    if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
5672 1f587329 blueswir1
          ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
5673 1f587329 blueswir1
        ( ( extractFloat128Exp( b ) == 0x7fff ) &&
5674 1f587329 blueswir1
          ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
5675 1f587329 blueswir1
        if (!is_quiet ||
5676 1f587329 blueswir1
            float128_is_signaling_nan( a ) ||
5677 1f587329 blueswir1
            float128_is_signaling_nan( b ) ) {
5678 1f587329 blueswir1
            float_raise( float_flag_invalid STATUS_VAR);
5679 1f587329 blueswir1
        }
5680 1f587329 blueswir1
        return float_relation_unordered;
5681 1f587329 blueswir1
    }
5682 1f587329 blueswir1
    aSign = extractFloat128Sign( a );
5683 1f587329 blueswir1
    bSign = extractFloat128Sign( b );
5684 1f587329 blueswir1
    if ( aSign != bSign ) {
5685 1f587329 blueswir1
        if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
5686 1f587329 blueswir1
            /* zero case */
5687 1f587329 blueswir1
            return float_relation_equal;
5688 1f587329 blueswir1
        } else {
5689 1f587329 blueswir1
            return 1 - (2 * aSign);
5690 1f587329 blueswir1
        }
5691 1f587329 blueswir1
    } else {
5692 1f587329 blueswir1
        if (a.low == b.low && a.high == b.high) {
5693 1f587329 blueswir1
            return float_relation_equal;
5694 1f587329 blueswir1
        } else {
5695 1f587329 blueswir1
            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
5696 1f587329 blueswir1
        }
5697 1f587329 blueswir1
    }
5698 1f587329 blueswir1
}
5699 1f587329 blueswir1
5700 1f587329 blueswir1
int float128_compare( float128 a, float128 b STATUS_PARAM )
5701 1f587329 blueswir1
{
5702 1f587329 blueswir1
    return float128_compare_internal(a, b, 0 STATUS_VAR);
5703 1f587329 blueswir1
}
5704 1f587329 blueswir1
5705 1f587329 blueswir1
int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
5706 1f587329 blueswir1
{
5707 1f587329 blueswir1
    return float128_compare_internal(a, b, 1 STATUS_VAR);
5708 1f587329 blueswir1
}
5709 1f587329 blueswir1
5710 9ee6e8bb pbrook
/* Multiply A by 2 raised to the power N.  */
5711 9ee6e8bb pbrook
float32 float32_scalbn( float32 a, int n STATUS_PARAM )
5712 9ee6e8bb pbrook
{
5713 9ee6e8bb pbrook
    flag aSign;
5714 9ee6e8bb pbrook
    int16 aExp;
5715 9ee6e8bb pbrook
    bits32 aSig;
5716 9ee6e8bb pbrook
5717 9ee6e8bb pbrook
    aSig = extractFloat32Frac( a );
5718 9ee6e8bb pbrook
    aExp = extractFloat32Exp( a );
5719 9ee6e8bb pbrook
    aSign = extractFloat32Sign( a );
5720 9ee6e8bb pbrook
5721 9ee6e8bb pbrook
    if ( aExp == 0xFF ) {
5722 9ee6e8bb pbrook
        return a;
5723 9ee6e8bb pbrook
    }
5724 69397542 pbrook
    if ( aExp != 0 )
5725 69397542 pbrook
        aSig |= 0x00800000;
5726 69397542 pbrook
    else if ( aSig == 0 )
5727 69397542 pbrook
        return a;
5728 69397542 pbrook
5729 69397542 pbrook
    aExp += n - 1;
5730 69397542 pbrook
    aSig <<= 7;
5731 69397542 pbrook
    return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
5732 9ee6e8bb pbrook
}
5733 9ee6e8bb pbrook
5734 9ee6e8bb pbrook
float64 float64_scalbn( float64 a, int n STATUS_PARAM )
5735 9ee6e8bb pbrook
{
5736 9ee6e8bb pbrook
    flag aSign;
5737 9ee6e8bb pbrook
    int16 aExp;
5738 9ee6e8bb pbrook
    bits64 aSig;
5739 9ee6e8bb pbrook
5740 9ee6e8bb pbrook
    aSig = extractFloat64Frac( a );
5741 9ee6e8bb pbrook
    aExp = extractFloat64Exp( a );
5742 9ee6e8bb pbrook
    aSign = extractFloat64Sign( a );
5743 9ee6e8bb pbrook
5744 9ee6e8bb pbrook
    if ( aExp == 0x7FF ) {
5745 9ee6e8bb pbrook
        return a;
5746 9ee6e8bb pbrook
    }
5747 69397542 pbrook
    if ( aExp != 0 )
5748 69397542 pbrook
        aSig |= LIT64( 0x0010000000000000 );
5749 69397542 pbrook
    else if ( aSig == 0 )
5750 69397542 pbrook
        return a;
5751 69397542 pbrook
5752 69397542 pbrook
    aExp += n - 1;
5753 69397542 pbrook
    aSig <<= 10;
5754 69397542 pbrook
    return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
5755 9ee6e8bb pbrook
}
5756 9ee6e8bb pbrook
5757 9ee6e8bb pbrook
#ifdef FLOATX80
5758 9ee6e8bb pbrook
floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
5759 9ee6e8bb pbrook
{
5760 9ee6e8bb pbrook
    flag aSign;
5761 9ee6e8bb pbrook
    int16 aExp;
5762 9ee6e8bb pbrook
    bits64 aSig;
5763 9ee6e8bb pbrook
5764 9ee6e8bb pbrook
    aSig = extractFloatx80Frac( a );
5765 9ee6e8bb pbrook
    aExp = extractFloatx80Exp( a );
5766 9ee6e8bb pbrook
    aSign = extractFloatx80Sign( a );
5767 9ee6e8bb pbrook
5768 9ee6e8bb pbrook
    if ( aExp == 0x7FF ) {
5769 9ee6e8bb pbrook
        return a;
5770 9ee6e8bb pbrook
    }
5771 69397542 pbrook
    if (aExp == 0 && aSig == 0)
5772 69397542 pbrook
        return a;
5773 69397542 pbrook
5774 9ee6e8bb pbrook
    aExp += n;
5775 69397542 pbrook
    return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
5776 69397542 pbrook
                                          aSign, aExp, aSig, 0 STATUS_VAR );
5777 9ee6e8bb pbrook
}
5778 9ee6e8bb pbrook
#endif
5779 9ee6e8bb pbrook
5780 9ee6e8bb pbrook
#ifdef FLOAT128
5781 9ee6e8bb pbrook
float128 float128_scalbn( float128 a, int n STATUS_PARAM )
5782 9ee6e8bb pbrook
{
5783 9ee6e8bb pbrook
    flag aSign;
5784 9ee6e8bb pbrook
    int32 aExp;
5785 9ee6e8bb pbrook
    bits64 aSig0, aSig1;
5786 9ee6e8bb pbrook
5787 9ee6e8bb pbrook
    aSig1 = extractFloat128Frac1( a );
5788 9ee6e8bb pbrook
    aSig0 = extractFloat128Frac0( a );
5789 9ee6e8bb pbrook
    aExp = extractFloat128Exp( a );
5790 9ee6e8bb pbrook
    aSign = extractFloat128Sign( a );
5791 9ee6e8bb pbrook
    if ( aExp == 0x7FFF ) {
5792 9ee6e8bb pbrook
        return a;
5793 9ee6e8bb pbrook
    }
5794 69397542 pbrook
    if ( aExp != 0 )
5795 69397542 pbrook
        aSig0 |= LIT64( 0x0001000000000000 );
5796 69397542 pbrook
    else if ( aSig0 == 0 && aSig1 == 0 )
5797 69397542 pbrook
        return a;
5798 69397542 pbrook
5799 69397542 pbrook
    aExp += n - 1;
5800 69397542 pbrook
    return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
5801 69397542 pbrook
                                          STATUS_VAR );
5802 9ee6e8bb pbrook
5803 9ee6e8bb pbrook
}
5804 9ee6e8bb pbrook
#endif