Statistics
| Branch: | Revision:

root / fpu / softfloat.c @ 543fc7b2

History | View | Annotate | Download (202.2 kB)

1 158142c2 bellard
2 158142c2 bellard
/*============================================================================
3 158142c2 bellard

4 158142c2 bellard
This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
5 158142c2 bellard
Package, Release 2b.
6 158142c2 bellard

7 158142c2 bellard
Written by John R. Hauser.  This work was made possible in part by the
8 158142c2 bellard
International Computer Science Institute, located at Suite 600, 1947 Center
9 158142c2 bellard
Street, Berkeley, California 94704.  Funding was partially provided by the
10 158142c2 bellard
National Science Foundation under grant MIP-9311980.  The original version
11 158142c2 bellard
of this code was written as part of a project to build a fixed-point vector
12 158142c2 bellard
processor in collaboration with the University of California at Berkeley,
13 158142c2 bellard
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
14 158142c2 bellard
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
15 158142c2 bellard
arithmetic/SoftFloat.html'.
16 158142c2 bellard

17 158142c2 bellard
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
18 158142c2 bellard
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
19 158142c2 bellard
RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
20 158142c2 bellard
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
21 158142c2 bellard
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
22 158142c2 bellard
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
23 158142c2 bellard
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
24 158142c2 bellard
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
25 158142c2 bellard

26 158142c2 bellard
Derivative works are acceptable, even for commercial purposes, so long as
27 158142c2 bellard
(1) the source code for the derivative work includes prominent notice that
28 158142c2 bellard
the work is derivative, and (2) the source code includes prominent notice with
29 158142c2 bellard
these four paragraphs for those parts of this code that are retained.
30 158142c2 bellard

31 158142c2 bellard
=============================================================================*/
32 158142c2 bellard
33 fe76d976 pbrook
/* FIXME: Flush-To-Zero only effects results.  Denormal inputs should also
34 fe76d976 pbrook
   be flushed to zero.  */
35 158142c2 bellard
#include "softfloat.h"
36 158142c2 bellard
37 158142c2 bellard
/*----------------------------------------------------------------------------
38 158142c2 bellard
| Primitive arithmetic functions, including multi-word arithmetic, and
39 158142c2 bellard
| division and square root approximations.  (Can be specialized to target if
40 158142c2 bellard
| desired.)
41 158142c2 bellard
*----------------------------------------------------------------------------*/
42 158142c2 bellard
#include "softfloat-macros.h"
43 158142c2 bellard
44 158142c2 bellard
/*----------------------------------------------------------------------------
45 158142c2 bellard
| Functions and definitions to determine:  (1) whether tininess for underflow
46 158142c2 bellard
| is detected before or after rounding by default, (2) what (if anything)
47 158142c2 bellard
| happens when exceptions are raised, (3) how signaling NaNs are distinguished
48 158142c2 bellard
| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
49 158142c2 bellard
| are propagated from function inputs to output.  These details are target-
50 158142c2 bellard
| specific.
51 158142c2 bellard
*----------------------------------------------------------------------------*/
52 158142c2 bellard
#include "softfloat-specialize.h"
53 158142c2 bellard
54 158142c2 bellard
void set_float_rounding_mode(int val STATUS_PARAM)
55 158142c2 bellard
{
56 158142c2 bellard
    STATUS(float_rounding_mode) = val;
57 158142c2 bellard
}
58 158142c2 bellard
59 1d6bda35 bellard
void set_float_exception_flags(int val STATUS_PARAM)
60 1d6bda35 bellard
{
61 1d6bda35 bellard
    STATUS(float_exception_flags) = val;
62 1d6bda35 bellard
}
63 1d6bda35 bellard
64 158142c2 bellard
#ifdef FLOATX80
65 158142c2 bellard
void set_floatx80_rounding_precision(int val STATUS_PARAM)
66 158142c2 bellard
{
67 158142c2 bellard
    STATUS(floatx80_rounding_precision) = val;
68 158142c2 bellard
}
69 158142c2 bellard
#endif
70 158142c2 bellard
71 158142c2 bellard
/*----------------------------------------------------------------------------
72 158142c2 bellard
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
73 158142c2 bellard
| and 7, and returns the properly rounded 32-bit integer corresponding to the
74 158142c2 bellard
| input.  If `zSign' is 1, the input is negated before being converted to an
75 158142c2 bellard
| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
76 158142c2 bellard
| is simply rounded to an integer, with the inexact exception raised if the
77 158142c2 bellard
| input cannot be represented exactly as an integer.  However, if the fixed-
78 158142c2 bellard
| point input is too large, the invalid exception is raised and the largest
79 158142c2 bellard
| positive or negative integer is returned.
80 158142c2 bellard
*----------------------------------------------------------------------------*/
81 158142c2 bellard
82 158142c2 bellard
static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM)
83 158142c2 bellard
{
84 158142c2 bellard
    int8 roundingMode;
85 158142c2 bellard
    flag roundNearestEven;
86 158142c2 bellard
    int8 roundIncrement, roundBits;
87 158142c2 bellard
    int32 z;
88 158142c2 bellard
89 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
90 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
91 158142c2 bellard
    roundIncrement = 0x40;
92 158142c2 bellard
    if ( ! roundNearestEven ) {
93 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
94 158142c2 bellard
            roundIncrement = 0;
95 158142c2 bellard
        }
96 158142c2 bellard
        else {
97 158142c2 bellard
            roundIncrement = 0x7F;
98 158142c2 bellard
            if ( zSign ) {
99 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
100 158142c2 bellard
            }
101 158142c2 bellard
            else {
102 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
103 158142c2 bellard
            }
104 158142c2 bellard
        }
105 158142c2 bellard
    }
106 158142c2 bellard
    roundBits = absZ & 0x7F;
107 158142c2 bellard
    absZ = ( absZ + roundIncrement )>>7;
108 158142c2 bellard
    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
109 158142c2 bellard
    z = absZ;
110 158142c2 bellard
    if ( zSign ) z = - z;
111 158142c2 bellard
    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
112 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
113 158142c2 bellard
        return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
114 158142c2 bellard
    }
115 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
116 158142c2 bellard
    return z;
117 158142c2 bellard
118 158142c2 bellard
}
119 158142c2 bellard
120 158142c2 bellard
/*----------------------------------------------------------------------------
121 158142c2 bellard
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
122 158142c2 bellard
| `absZ1', with binary point between bits 63 and 64 (between the input words),
123 158142c2 bellard
| and returns the properly rounded 64-bit integer corresponding to the input.
124 158142c2 bellard
| If `zSign' is 1, the input is negated before being converted to an integer.
125 158142c2 bellard
| Ordinarily, the fixed-point input is simply rounded to an integer, with
126 158142c2 bellard
| the inexact exception raised if the input cannot be represented exactly as
127 158142c2 bellard
| an integer.  However, if the fixed-point input is too large, the invalid
128 158142c2 bellard
| exception is raised and the largest positive or negative integer is
129 158142c2 bellard
| returned.
130 158142c2 bellard
*----------------------------------------------------------------------------*/
131 158142c2 bellard
132 158142c2 bellard
static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM)
133 158142c2 bellard
{
134 158142c2 bellard
    int8 roundingMode;
135 158142c2 bellard
    flag roundNearestEven, increment;
136 158142c2 bellard
    int64 z;
137 158142c2 bellard
138 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
139 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
140 158142c2 bellard
    increment = ( (sbits64) absZ1 < 0 );
141 158142c2 bellard
    if ( ! roundNearestEven ) {
142 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
143 158142c2 bellard
            increment = 0;
144 158142c2 bellard
        }
145 158142c2 bellard
        else {
146 158142c2 bellard
            if ( zSign ) {
147 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && absZ1;
148 158142c2 bellard
            }
149 158142c2 bellard
            else {
150 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && absZ1;
151 158142c2 bellard
            }
152 158142c2 bellard
        }
153 158142c2 bellard
    }
154 158142c2 bellard
    if ( increment ) {
155 158142c2 bellard
        ++absZ0;
156 158142c2 bellard
        if ( absZ0 == 0 ) goto overflow;
157 158142c2 bellard
        absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158 158142c2 bellard
    }
159 158142c2 bellard
    z = absZ0;
160 158142c2 bellard
    if ( zSign ) z = - z;
161 158142c2 bellard
    if ( z && ( ( z < 0 ) ^ zSign ) ) {
162 158142c2 bellard
 overflow:
163 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
164 158142c2 bellard
        return
165 158142c2 bellard
              zSign ? (sbits64) LIT64( 0x8000000000000000 )
166 158142c2 bellard
            : LIT64( 0x7FFFFFFFFFFFFFFF );
167 158142c2 bellard
    }
168 158142c2 bellard
    if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
169 158142c2 bellard
    return z;
170 158142c2 bellard
171 158142c2 bellard
}
172 158142c2 bellard
173 158142c2 bellard
/*----------------------------------------------------------------------------
174 158142c2 bellard
| Returns the fraction bits of the single-precision floating-point value `a'.
175 158142c2 bellard
*----------------------------------------------------------------------------*/
176 158142c2 bellard
177 158142c2 bellard
INLINE bits32 extractFloat32Frac( float32 a )
178 158142c2 bellard
{
179 158142c2 bellard
180 f090c9d4 pbrook
    return float32_val(a) & 0x007FFFFF;
181 158142c2 bellard
182 158142c2 bellard
}
183 158142c2 bellard
184 158142c2 bellard
/*----------------------------------------------------------------------------
185 158142c2 bellard
| Returns the exponent bits of the single-precision floating-point value `a'.
186 158142c2 bellard
*----------------------------------------------------------------------------*/
187 158142c2 bellard
188 158142c2 bellard
INLINE int16 extractFloat32Exp( float32 a )
189 158142c2 bellard
{
190 158142c2 bellard
191 f090c9d4 pbrook
    return ( float32_val(a)>>23 ) & 0xFF;
192 158142c2 bellard
193 158142c2 bellard
}
194 158142c2 bellard
195 158142c2 bellard
/*----------------------------------------------------------------------------
196 158142c2 bellard
| Returns the sign bit of the single-precision floating-point value `a'.
197 158142c2 bellard
*----------------------------------------------------------------------------*/
198 158142c2 bellard
199 158142c2 bellard
INLINE flag extractFloat32Sign( float32 a )
200 158142c2 bellard
{
201 158142c2 bellard
202 f090c9d4 pbrook
    return float32_val(a)>>31;
203 158142c2 bellard
204 158142c2 bellard
}
205 158142c2 bellard
206 158142c2 bellard
/*----------------------------------------------------------------------------
207 158142c2 bellard
| Normalizes the subnormal single-precision floating-point value represented
208 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
209 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
210 158142c2 bellard
| `zSigPtr', respectively.
211 158142c2 bellard
*----------------------------------------------------------------------------*/
212 158142c2 bellard
213 158142c2 bellard
static void
214 158142c2 bellard
 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
215 158142c2 bellard
{
216 158142c2 bellard
    int8 shiftCount;
217 158142c2 bellard
218 158142c2 bellard
    shiftCount = countLeadingZeros32( aSig ) - 8;
219 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
220 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
221 158142c2 bellard
222 158142c2 bellard
}
223 158142c2 bellard
224 158142c2 bellard
/*----------------------------------------------------------------------------
225 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
226 158142c2 bellard
| single-precision floating-point value, returning the result.  After being
227 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
228 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
229 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
230 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
231 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
232 158142c2 bellard
| significand.
233 158142c2 bellard
*----------------------------------------------------------------------------*/
234 158142c2 bellard
235 158142c2 bellard
INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
236 158142c2 bellard
{
237 158142c2 bellard
238 f090c9d4 pbrook
    return make_float32(
239 f090c9d4 pbrook
          ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig);
240 158142c2 bellard
241 158142c2 bellard
}
242 158142c2 bellard
243 158142c2 bellard
/*----------------------------------------------------------------------------
244 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
245 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
246 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
247 158142c2 bellard
| value is simply rounded and packed into the single-precision format, with
248 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
249 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
250 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
251 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
252 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
253 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal single-
254 158142c2 bellard
| precision floating-point number.
255 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 30
256 158142c2 bellard
| and 29, which is 7 bits to the left of the usual location.  This shifted
257 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
258 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
259 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
260 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
261 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
262 158142c2 bellard
| Binary Floating-Point Arithmetic.
263 158142c2 bellard
*----------------------------------------------------------------------------*/
264 158142c2 bellard
265 158142c2 bellard
static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
266 158142c2 bellard
{
267 158142c2 bellard
    int8 roundingMode;
268 158142c2 bellard
    flag roundNearestEven;
269 158142c2 bellard
    int8 roundIncrement, roundBits;
270 158142c2 bellard
    flag isTiny;
271 158142c2 bellard
272 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
273 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
274 158142c2 bellard
    roundIncrement = 0x40;
275 158142c2 bellard
    if ( ! roundNearestEven ) {
276 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
277 158142c2 bellard
            roundIncrement = 0;
278 158142c2 bellard
        }
279 158142c2 bellard
        else {
280 158142c2 bellard
            roundIncrement = 0x7F;
281 158142c2 bellard
            if ( zSign ) {
282 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
283 158142c2 bellard
            }
284 158142c2 bellard
            else {
285 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
286 158142c2 bellard
            }
287 158142c2 bellard
        }
288 158142c2 bellard
    }
289 158142c2 bellard
    roundBits = zSig & 0x7F;
290 158142c2 bellard
    if ( 0xFD <= (bits16) zExp ) {
291 158142c2 bellard
        if (    ( 0xFD < zExp )
292 158142c2 bellard
             || (    ( zExp == 0xFD )
293 158142c2 bellard
                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
294 158142c2 bellard
           ) {
295 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
296 f090c9d4 pbrook
            return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
297 158142c2 bellard
        }
298 158142c2 bellard
        if ( zExp < 0 ) {
299 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
300 158142c2 bellard
            isTiny =
301 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
302 158142c2 bellard
                || ( zExp < -1 )
303 158142c2 bellard
                || ( zSig + roundIncrement < 0x80000000 );
304 158142c2 bellard
            shift32RightJamming( zSig, - zExp, &zSig );
305 158142c2 bellard
            zExp = 0;
306 158142c2 bellard
            roundBits = zSig & 0x7F;
307 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
308 158142c2 bellard
        }
309 158142c2 bellard
    }
310 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
311 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>7;
312 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
313 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
314 158142c2 bellard
    return packFloat32( zSign, zExp, zSig );
315 158142c2 bellard
316 158142c2 bellard
}
317 158142c2 bellard
318 158142c2 bellard
/*----------------------------------------------------------------------------
319 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
320 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
321 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
322 158142c2 bellard
| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
323 158142c2 bellard
| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
324 158142c2 bellard
| floating-point exponent.
325 158142c2 bellard
*----------------------------------------------------------------------------*/
326 158142c2 bellard
327 158142c2 bellard
static float32
328 158142c2 bellard
 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
329 158142c2 bellard
{
330 158142c2 bellard
    int8 shiftCount;
331 158142c2 bellard
332 158142c2 bellard
    shiftCount = countLeadingZeros32( zSig ) - 1;
333 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
334 158142c2 bellard
335 158142c2 bellard
}
336 158142c2 bellard
337 158142c2 bellard
/*----------------------------------------------------------------------------
338 158142c2 bellard
| Returns the fraction bits of the double-precision floating-point value `a'.
339 158142c2 bellard
*----------------------------------------------------------------------------*/
340 158142c2 bellard
341 158142c2 bellard
INLINE bits64 extractFloat64Frac( float64 a )
342 158142c2 bellard
{
343 158142c2 bellard
344 f090c9d4 pbrook
    return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
345 158142c2 bellard
346 158142c2 bellard
}
347 158142c2 bellard
348 158142c2 bellard
/*----------------------------------------------------------------------------
349 158142c2 bellard
| Returns the exponent bits of the double-precision floating-point value `a'.
350 158142c2 bellard
*----------------------------------------------------------------------------*/
351 158142c2 bellard
352 158142c2 bellard
INLINE int16 extractFloat64Exp( float64 a )
353 158142c2 bellard
{
354 158142c2 bellard
355 f090c9d4 pbrook
    return ( float64_val(a)>>52 ) & 0x7FF;
356 158142c2 bellard
357 158142c2 bellard
}
358 158142c2 bellard
359 158142c2 bellard
/*----------------------------------------------------------------------------
360 158142c2 bellard
| Returns the sign bit of the double-precision floating-point value `a'.
361 158142c2 bellard
*----------------------------------------------------------------------------*/
362 158142c2 bellard
363 158142c2 bellard
INLINE flag extractFloat64Sign( float64 a )
364 158142c2 bellard
{
365 158142c2 bellard
366 f090c9d4 pbrook
    return float64_val(a)>>63;
367 158142c2 bellard
368 158142c2 bellard
}
369 158142c2 bellard
370 158142c2 bellard
/*----------------------------------------------------------------------------
371 158142c2 bellard
| Normalizes the subnormal double-precision floating-point value represented
372 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
373 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
374 158142c2 bellard
| `zSigPtr', respectively.
375 158142c2 bellard
*----------------------------------------------------------------------------*/
376 158142c2 bellard
377 158142c2 bellard
static void
378 158142c2 bellard
 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
379 158142c2 bellard
{
380 158142c2 bellard
    int8 shiftCount;
381 158142c2 bellard
382 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig ) - 11;
383 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
384 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
385 158142c2 bellard
386 158142c2 bellard
}
387 158142c2 bellard
388 158142c2 bellard
/*----------------------------------------------------------------------------
389 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
390 158142c2 bellard
| double-precision floating-point value, returning the result.  After being
391 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
392 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
393 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
394 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
395 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
396 158142c2 bellard
| significand.
397 158142c2 bellard
*----------------------------------------------------------------------------*/
398 158142c2 bellard
399 158142c2 bellard
INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
400 158142c2 bellard
{
401 158142c2 bellard
402 f090c9d4 pbrook
    return make_float64(
403 f090c9d4 pbrook
        ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig);
404 158142c2 bellard
405 158142c2 bellard
}
406 158142c2 bellard
407 158142c2 bellard
/*----------------------------------------------------------------------------
408 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
409 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
410 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
411 158142c2 bellard
| value is simply rounded and packed into the double-precision format, with
412 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
413 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
414 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
415 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded
416 158142c2 bellard
| to a subnormal number, and the underflow and inexact exceptions are raised
417 158142c2 bellard
| if the abstract input cannot be represented exactly as a subnormal double-
418 158142c2 bellard
| precision floating-point number.
419 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 62
420 158142c2 bellard
| and 61, which is 10 bits to the left of the usual location.  This shifted
421 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
422 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
423 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
424 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
425 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
426 158142c2 bellard
| Binary Floating-Point Arithmetic.
427 158142c2 bellard
*----------------------------------------------------------------------------*/
428 158142c2 bellard
429 158142c2 bellard
static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
430 158142c2 bellard
{
431 158142c2 bellard
    int8 roundingMode;
432 158142c2 bellard
    flag roundNearestEven;
433 158142c2 bellard
    int16 roundIncrement, roundBits;
434 158142c2 bellard
    flag isTiny;
435 158142c2 bellard
436 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
437 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
438 158142c2 bellard
    roundIncrement = 0x200;
439 158142c2 bellard
    if ( ! roundNearestEven ) {
440 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
441 158142c2 bellard
            roundIncrement = 0;
442 158142c2 bellard
        }
443 158142c2 bellard
        else {
444 158142c2 bellard
            roundIncrement = 0x3FF;
445 158142c2 bellard
            if ( zSign ) {
446 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
447 158142c2 bellard
            }
448 158142c2 bellard
            else {
449 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
450 158142c2 bellard
            }
451 158142c2 bellard
        }
452 158142c2 bellard
    }
453 158142c2 bellard
    roundBits = zSig & 0x3FF;
454 158142c2 bellard
    if ( 0x7FD <= (bits16) zExp ) {
455 158142c2 bellard
        if (    ( 0x7FD < zExp )
456 158142c2 bellard
             || (    ( zExp == 0x7FD )
457 158142c2 bellard
                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
458 158142c2 bellard
           ) {
459 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
460 f090c9d4 pbrook
            return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
461 158142c2 bellard
        }
462 158142c2 bellard
        if ( zExp < 0 ) {
463 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
464 158142c2 bellard
            isTiny =
465 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
466 158142c2 bellard
                || ( zExp < -1 )
467 158142c2 bellard
                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
468 158142c2 bellard
            shift64RightJamming( zSig, - zExp, &zSig );
469 158142c2 bellard
            zExp = 0;
470 158142c2 bellard
            roundBits = zSig & 0x3FF;
471 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
472 158142c2 bellard
        }
473 158142c2 bellard
    }
474 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
475 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>10;
476 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
477 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
478 158142c2 bellard
    return packFloat64( zSign, zExp, zSig );
479 158142c2 bellard
480 158142c2 bellard
}
481 158142c2 bellard
482 158142c2 bellard
/*----------------------------------------------------------------------------
483 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
484 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
485 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
486 158142c2 bellard
| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
487 158142c2 bellard
| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
488 158142c2 bellard
| floating-point exponent.
489 158142c2 bellard
*----------------------------------------------------------------------------*/
490 158142c2 bellard
491 158142c2 bellard
static float64
492 158142c2 bellard
 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
493 158142c2 bellard
{
494 158142c2 bellard
    int8 shiftCount;
495 158142c2 bellard
496 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig ) - 1;
497 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
498 158142c2 bellard
499 158142c2 bellard
}
500 158142c2 bellard
501 158142c2 bellard
#ifdef FLOATX80
502 158142c2 bellard
503 158142c2 bellard
/*----------------------------------------------------------------------------
504 158142c2 bellard
| Returns the fraction bits of the extended double-precision floating-point
505 158142c2 bellard
| value `a'.
506 158142c2 bellard
*----------------------------------------------------------------------------*/
507 158142c2 bellard
508 158142c2 bellard
INLINE bits64 extractFloatx80Frac( floatx80 a )
509 158142c2 bellard
{
510 158142c2 bellard
511 158142c2 bellard
    return a.low;
512 158142c2 bellard
513 158142c2 bellard
}
514 158142c2 bellard
515 158142c2 bellard
/*----------------------------------------------------------------------------
516 158142c2 bellard
| Returns the exponent bits of the extended double-precision floating-point
517 158142c2 bellard
| value `a'.
518 158142c2 bellard
*----------------------------------------------------------------------------*/
519 158142c2 bellard
520 158142c2 bellard
INLINE int32 extractFloatx80Exp( floatx80 a )
521 158142c2 bellard
{
522 158142c2 bellard
523 158142c2 bellard
    return a.high & 0x7FFF;
524 158142c2 bellard
525 158142c2 bellard
}
526 158142c2 bellard
527 158142c2 bellard
/*----------------------------------------------------------------------------
528 158142c2 bellard
| Returns the sign bit of the extended double-precision floating-point value
529 158142c2 bellard
| `a'.
530 158142c2 bellard
*----------------------------------------------------------------------------*/
531 158142c2 bellard
532 158142c2 bellard
INLINE flag extractFloatx80Sign( floatx80 a )
533 158142c2 bellard
{
534 158142c2 bellard
535 158142c2 bellard
    return a.high>>15;
536 158142c2 bellard
537 158142c2 bellard
}
538 158142c2 bellard
539 158142c2 bellard
/*----------------------------------------------------------------------------
540 158142c2 bellard
| Normalizes the subnormal extended double-precision floating-point value
541 158142c2 bellard
| represented by the denormalized significand `aSig'.  The normalized exponent
542 158142c2 bellard
| and significand are stored at the locations pointed to by `zExpPtr' and
543 158142c2 bellard
| `zSigPtr', respectively.
544 158142c2 bellard
*----------------------------------------------------------------------------*/
545 158142c2 bellard
546 158142c2 bellard
static void
547 158142c2 bellard
 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
548 158142c2 bellard
{
549 158142c2 bellard
    int8 shiftCount;
550 158142c2 bellard
551 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig );
552 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
553 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
554 158142c2 bellard
555 158142c2 bellard
}
556 158142c2 bellard
557 158142c2 bellard
/*----------------------------------------------------------------------------
558 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
559 158142c2 bellard
| extended double-precision floating-point value, returning the result.
560 158142c2 bellard
*----------------------------------------------------------------------------*/
561 158142c2 bellard
562 158142c2 bellard
INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
563 158142c2 bellard
{
564 158142c2 bellard
    floatx80 z;
565 158142c2 bellard
566 158142c2 bellard
    z.low = zSig;
567 158142c2 bellard
    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
568 158142c2 bellard
    return z;
569 158142c2 bellard
570 158142c2 bellard
}
571 158142c2 bellard
572 158142c2 bellard
/*----------------------------------------------------------------------------
573 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
574 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0' and `zSig1',
575 158142c2 bellard
| and returns the proper extended double-precision floating-point value
576 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
577 158142c2 bellard
| rounded and packed into the extended double-precision format, with the
578 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
579 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
580 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
581 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
582 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
583 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal extended
584 158142c2 bellard
| double-precision floating-point number.
585 158142c2 bellard
|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
586 158142c2 bellard
| number of bits as single or double precision, respectively.  Otherwise, the
587 158142c2 bellard
| result is rounded to the full precision of the extended double-precision
588 158142c2 bellard
| format.
589 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
590 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
591 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  The
592 158142c2 bellard
| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
593 158142c2 bellard
| Floating-Point Arithmetic.
594 158142c2 bellard
*----------------------------------------------------------------------------*/
595 158142c2 bellard
596 158142c2 bellard
static floatx80
597 158142c2 bellard
 roundAndPackFloatx80(
598 158142c2 bellard
     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
599 158142c2 bellard
 STATUS_PARAM)
600 158142c2 bellard
{
601 158142c2 bellard
    int8 roundingMode;
602 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
603 158142c2 bellard
    int64 roundIncrement, roundMask, roundBits;
604 158142c2 bellard
605 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
606 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
607 158142c2 bellard
    if ( roundingPrecision == 80 ) goto precision80;
608 158142c2 bellard
    if ( roundingPrecision == 64 ) {
609 158142c2 bellard
        roundIncrement = LIT64( 0x0000000000000400 );
610 158142c2 bellard
        roundMask = LIT64( 0x00000000000007FF );
611 158142c2 bellard
    }
612 158142c2 bellard
    else if ( roundingPrecision == 32 ) {
613 158142c2 bellard
        roundIncrement = LIT64( 0x0000008000000000 );
614 158142c2 bellard
        roundMask = LIT64( 0x000000FFFFFFFFFF );
615 158142c2 bellard
    }
616 158142c2 bellard
    else {
617 158142c2 bellard
        goto precision80;
618 158142c2 bellard
    }
619 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
620 158142c2 bellard
    if ( ! roundNearestEven ) {
621 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
622 158142c2 bellard
            roundIncrement = 0;
623 158142c2 bellard
        }
624 158142c2 bellard
        else {
625 158142c2 bellard
            roundIncrement = roundMask;
626 158142c2 bellard
            if ( zSign ) {
627 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
628 158142c2 bellard
            }
629 158142c2 bellard
            else {
630 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
631 158142c2 bellard
            }
632 158142c2 bellard
        }
633 158142c2 bellard
    }
634 158142c2 bellard
    roundBits = zSig0 & roundMask;
635 158142c2 bellard
    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
636 158142c2 bellard
        if (    ( 0x7FFE < zExp )
637 158142c2 bellard
             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
638 158142c2 bellard
           ) {
639 158142c2 bellard
            goto overflow;
640 158142c2 bellard
        }
641 158142c2 bellard
        if ( zExp <= 0 ) {
642 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloatx80( zSign, 0, 0 );
643 158142c2 bellard
            isTiny =
644 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
645 158142c2 bellard
                || ( zExp < 0 )
646 158142c2 bellard
                || ( zSig0 <= zSig0 + roundIncrement );
647 158142c2 bellard
            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
648 158142c2 bellard
            zExp = 0;
649 158142c2 bellard
            roundBits = zSig0 & roundMask;
650 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
651 158142c2 bellard
            if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
652 158142c2 bellard
            zSig0 += roundIncrement;
653 158142c2 bellard
            if ( (sbits64) zSig0 < 0 ) zExp = 1;
654 158142c2 bellard
            roundIncrement = roundMask + 1;
655 158142c2 bellard
            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
656 158142c2 bellard
                roundMask |= roundIncrement;
657 158142c2 bellard
            }
658 158142c2 bellard
            zSig0 &= ~ roundMask;
659 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
660 158142c2 bellard
        }
661 158142c2 bellard
    }
662 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
663 158142c2 bellard
    zSig0 += roundIncrement;
664 158142c2 bellard
    if ( zSig0 < roundIncrement ) {
665 158142c2 bellard
        ++zExp;
666 158142c2 bellard
        zSig0 = LIT64( 0x8000000000000000 );
667 158142c2 bellard
    }
668 158142c2 bellard
    roundIncrement = roundMask + 1;
669 158142c2 bellard
    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
670 158142c2 bellard
        roundMask |= roundIncrement;
671 158142c2 bellard
    }
672 158142c2 bellard
    zSig0 &= ~ roundMask;
673 158142c2 bellard
    if ( zSig0 == 0 ) zExp = 0;
674 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
675 158142c2 bellard
 precision80:
676 158142c2 bellard
    increment = ( (sbits64) zSig1 < 0 );
677 158142c2 bellard
    if ( ! roundNearestEven ) {
678 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
679 158142c2 bellard
            increment = 0;
680 158142c2 bellard
        }
681 158142c2 bellard
        else {
682 158142c2 bellard
            if ( zSign ) {
683 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig1;
684 158142c2 bellard
            }
685 158142c2 bellard
            else {
686 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig1;
687 158142c2 bellard
            }
688 158142c2 bellard
        }
689 158142c2 bellard
    }
690 158142c2 bellard
    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
691 158142c2 bellard
        if (    ( 0x7FFE < zExp )
692 158142c2 bellard
             || (    ( zExp == 0x7FFE )
693 158142c2 bellard
                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
694 158142c2 bellard
                  && increment
695 158142c2 bellard
                )
696 158142c2 bellard
           ) {
697 158142c2 bellard
            roundMask = 0;
698 158142c2 bellard
 overflow:
699 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
700 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
701 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
702 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
703 158142c2 bellard
               ) {
704 158142c2 bellard
                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
705 158142c2 bellard
            }
706 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
707 158142c2 bellard
        }
708 158142c2 bellard
        if ( zExp <= 0 ) {
709 158142c2 bellard
            isTiny =
710 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
711 158142c2 bellard
                || ( zExp < 0 )
712 158142c2 bellard
                || ! increment
713 158142c2 bellard
                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
714 158142c2 bellard
            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
715 158142c2 bellard
            zExp = 0;
716 158142c2 bellard
            if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
717 158142c2 bellard
            if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
718 158142c2 bellard
            if ( roundNearestEven ) {
719 158142c2 bellard
                increment = ( (sbits64) zSig1 < 0 );
720 158142c2 bellard
            }
721 158142c2 bellard
            else {
722 158142c2 bellard
                if ( zSign ) {
723 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig1;
724 158142c2 bellard
                }
725 158142c2 bellard
                else {
726 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig1;
727 158142c2 bellard
                }
728 158142c2 bellard
            }
729 158142c2 bellard
            if ( increment ) {
730 158142c2 bellard
                ++zSig0;
731 158142c2 bellard
                zSig0 &=
732 158142c2 bellard
                    ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
733 158142c2 bellard
                if ( (sbits64) zSig0 < 0 ) zExp = 1;
734 158142c2 bellard
            }
735 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
736 158142c2 bellard
        }
737 158142c2 bellard
    }
738 158142c2 bellard
    if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
739 158142c2 bellard
    if ( increment ) {
740 158142c2 bellard
        ++zSig0;
741 158142c2 bellard
        if ( zSig0 == 0 ) {
742 158142c2 bellard
            ++zExp;
743 158142c2 bellard
            zSig0 = LIT64( 0x8000000000000000 );
744 158142c2 bellard
        }
745 158142c2 bellard
        else {
746 158142c2 bellard
            zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
747 158142c2 bellard
        }
748 158142c2 bellard
    }
749 158142c2 bellard
    else {
750 158142c2 bellard
        if ( zSig0 == 0 ) zExp = 0;
751 158142c2 bellard
    }
752 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
753 158142c2 bellard
754 158142c2 bellard
}
755 158142c2 bellard
756 158142c2 bellard
/*----------------------------------------------------------------------------
757 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent
758 158142c2 bellard
| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
759 158142c2 bellard
| and returns the proper extended double-precision floating-point value
760 158142c2 bellard
| corresponding to the abstract input.  This routine is just like
761 158142c2 bellard
| `roundAndPackFloatx80' except that the input significand does not have to be
762 158142c2 bellard
| normalized.
763 158142c2 bellard
*----------------------------------------------------------------------------*/
764 158142c2 bellard
765 158142c2 bellard
static floatx80
766 158142c2 bellard
 normalizeRoundAndPackFloatx80(
767 158142c2 bellard
     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
768 158142c2 bellard
 STATUS_PARAM)
769 158142c2 bellard
{
770 158142c2 bellard
    int8 shiftCount;
771 158142c2 bellard
772 158142c2 bellard
    if ( zSig0 == 0 ) {
773 158142c2 bellard
        zSig0 = zSig1;
774 158142c2 bellard
        zSig1 = 0;
775 158142c2 bellard
        zExp -= 64;
776 158142c2 bellard
    }
777 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 );
778 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
779 158142c2 bellard
    zExp -= shiftCount;
780 158142c2 bellard
    return
781 158142c2 bellard
        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
782 158142c2 bellard
783 158142c2 bellard
}
784 158142c2 bellard
785 158142c2 bellard
#endif
786 158142c2 bellard
787 158142c2 bellard
#ifdef FLOAT128
788 158142c2 bellard
789 158142c2 bellard
/*----------------------------------------------------------------------------
790 158142c2 bellard
| Returns the least-significant 64 fraction bits of the quadruple-precision
791 158142c2 bellard
| floating-point value `a'.
792 158142c2 bellard
*----------------------------------------------------------------------------*/
793 158142c2 bellard
794 158142c2 bellard
INLINE bits64 extractFloat128Frac1( float128 a )
795 158142c2 bellard
{
796 158142c2 bellard
797 158142c2 bellard
    return a.low;
798 158142c2 bellard
799 158142c2 bellard
}
800 158142c2 bellard
801 158142c2 bellard
/*----------------------------------------------------------------------------
802 158142c2 bellard
| Returns the most-significant 48 fraction bits of the quadruple-precision
803 158142c2 bellard
| floating-point value `a'.
804 158142c2 bellard
*----------------------------------------------------------------------------*/
805 158142c2 bellard
806 158142c2 bellard
INLINE bits64 extractFloat128Frac0( float128 a )
807 158142c2 bellard
{
808 158142c2 bellard
809 158142c2 bellard
    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
810 158142c2 bellard
811 158142c2 bellard
}
812 158142c2 bellard
813 158142c2 bellard
/*----------------------------------------------------------------------------
814 158142c2 bellard
| Returns the exponent bits of the quadruple-precision floating-point value
815 158142c2 bellard
| `a'.
816 158142c2 bellard
*----------------------------------------------------------------------------*/
817 158142c2 bellard
818 158142c2 bellard
INLINE int32 extractFloat128Exp( float128 a )
819 158142c2 bellard
{
820 158142c2 bellard
821 158142c2 bellard
    return ( a.high>>48 ) & 0x7FFF;
822 158142c2 bellard
823 158142c2 bellard
}
824 158142c2 bellard
825 158142c2 bellard
/*----------------------------------------------------------------------------
826 158142c2 bellard
| Returns the sign bit of the quadruple-precision floating-point value `a'.
827 158142c2 bellard
*----------------------------------------------------------------------------*/
828 158142c2 bellard
829 158142c2 bellard
INLINE flag extractFloat128Sign( float128 a )
830 158142c2 bellard
{
831 158142c2 bellard
832 158142c2 bellard
    return a.high>>63;
833 158142c2 bellard
834 158142c2 bellard
}
835 158142c2 bellard
836 158142c2 bellard
/*----------------------------------------------------------------------------
837 158142c2 bellard
| Normalizes the subnormal quadruple-precision floating-point value
838 158142c2 bellard
| represented by the denormalized significand formed by the concatenation of
839 158142c2 bellard
| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
840 158142c2 bellard
| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
841 158142c2 bellard
| significand are stored at the location pointed to by `zSig0Ptr', and the
842 158142c2 bellard
| least significant 64 bits of the normalized significand are stored at the
843 158142c2 bellard
| location pointed to by `zSig1Ptr'.
844 158142c2 bellard
*----------------------------------------------------------------------------*/
845 158142c2 bellard
846 158142c2 bellard
static void
847 158142c2 bellard
 normalizeFloat128Subnormal(
848 158142c2 bellard
     bits64 aSig0,
849 158142c2 bellard
     bits64 aSig1,
850 158142c2 bellard
     int32 *zExpPtr,
851 158142c2 bellard
     bits64 *zSig0Ptr,
852 158142c2 bellard
     bits64 *zSig1Ptr
853 158142c2 bellard
 )
854 158142c2 bellard
{
855 158142c2 bellard
    int8 shiftCount;
856 158142c2 bellard
857 158142c2 bellard
    if ( aSig0 == 0 ) {
858 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig1 ) - 15;
859 158142c2 bellard
        if ( shiftCount < 0 ) {
860 158142c2 bellard
            *zSig0Ptr = aSig1>>( - shiftCount );
861 158142c2 bellard
            *zSig1Ptr = aSig1<<( shiftCount & 63 );
862 158142c2 bellard
        }
863 158142c2 bellard
        else {
864 158142c2 bellard
            *zSig0Ptr = aSig1<<shiftCount;
865 158142c2 bellard
            *zSig1Ptr = 0;
866 158142c2 bellard
        }
867 158142c2 bellard
        *zExpPtr = - shiftCount - 63;
868 158142c2 bellard
    }
869 158142c2 bellard
    else {
870 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig0 ) - 15;
871 158142c2 bellard
        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
872 158142c2 bellard
        *zExpPtr = 1 - shiftCount;
873 158142c2 bellard
    }
874 158142c2 bellard
875 158142c2 bellard
}
876 158142c2 bellard
877 158142c2 bellard
/*----------------------------------------------------------------------------
878 158142c2 bellard
| Packs the sign `zSign', the exponent `zExp', and the significand formed
879 158142c2 bellard
| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
880 158142c2 bellard
| floating-point value, returning the result.  After being shifted into the
881 158142c2 bellard
| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
882 158142c2 bellard
| added together to form the most significant 32 bits of the result.  This
883 158142c2 bellard
| means that any integer portion of `zSig0' will be added into the exponent.
884 158142c2 bellard
| Since a properly normalized significand will have an integer portion equal
885 158142c2 bellard
| to 1, the `zExp' input should be 1 less than the desired result exponent
886 158142c2 bellard
| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
887 158142c2 bellard
| significand.
888 158142c2 bellard
*----------------------------------------------------------------------------*/
889 158142c2 bellard
890 158142c2 bellard
INLINE float128
891 158142c2 bellard
 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
892 158142c2 bellard
{
893 158142c2 bellard
    float128 z;
894 158142c2 bellard
895 158142c2 bellard
    z.low = zSig1;
896 158142c2 bellard
    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
897 158142c2 bellard
    return z;
898 158142c2 bellard
899 158142c2 bellard
}
900 158142c2 bellard
901 158142c2 bellard
/*----------------------------------------------------------------------------
902 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
903 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0', `zSig1',
904 158142c2 bellard
| and `zSig2', and returns the proper quadruple-precision floating-point value
905 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
906 158142c2 bellard
| simply rounded and packed into the quadruple-precision format, with the
907 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
908 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
909 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
910 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
911 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
912 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal quadruple-
913 158142c2 bellard
| precision floating-point number.
914 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
915 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
916 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  In the
917 158142c2 bellard
| usual case that the input significand is normalized, `zExp' must be 1 less
918 158142c2 bellard
| than the ``true'' floating-point exponent.  The handling of underflow and
919 158142c2 bellard
| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
920 158142c2 bellard
*----------------------------------------------------------------------------*/
921 158142c2 bellard
922 158142c2 bellard
static float128
923 158142c2 bellard
 roundAndPackFloat128(
924 158142c2 bellard
     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM)
925 158142c2 bellard
{
926 158142c2 bellard
    int8 roundingMode;
927 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
928 158142c2 bellard
929 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
930 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
931 158142c2 bellard
    increment = ( (sbits64) zSig2 < 0 );
932 158142c2 bellard
    if ( ! roundNearestEven ) {
933 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
934 158142c2 bellard
            increment = 0;
935 158142c2 bellard
        }
936 158142c2 bellard
        else {
937 158142c2 bellard
            if ( zSign ) {
938 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig2;
939 158142c2 bellard
            }
940 158142c2 bellard
            else {
941 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig2;
942 158142c2 bellard
            }
943 158142c2 bellard
        }
944 158142c2 bellard
    }
945 158142c2 bellard
    if ( 0x7FFD <= (bits32) zExp ) {
946 158142c2 bellard
        if (    ( 0x7FFD < zExp )
947 158142c2 bellard
             || (    ( zExp == 0x7FFD )
948 158142c2 bellard
                  && eq128(
949 158142c2 bellard
                         LIT64( 0x0001FFFFFFFFFFFF ),
950 158142c2 bellard
                         LIT64( 0xFFFFFFFFFFFFFFFF ),
951 158142c2 bellard
                         zSig0,
952 158142c2 bellard
                         zSig1
953 158142c2 bellard
                     )
954 158142c2 bellard
                  && increment
955 158142c2 bellard
                )
956 158142c2 bellard
           ) {
957 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
958 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
959 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
960 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
961 158142c2 bellard
               ) {
962 158142c2 bellard
                return
963 158142c2 bellard
                    packFloat128(
964 158142c2 bellard
                        zSign,
965 158142c2 bellard
                        0x7FFE,
966 158142c2 bellard
                        LIT64( 0x0000FFFFFFFFFFFF ),
967 158142c2 bellard
                        LIT64( 0xFFFFFFFFFFFFFFFF )
968 158142c2 bellard
                    );
969 158142c2 bellard
            }
970 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
971 158142c2 bellard
        }
972 158142c2 bellard
        if ( zExp < 0 ) {
973 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
974 158142c2 bellard
            isTiny =
975 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
976 158142c2 bellard
                || ( zExp < -1 )
977 158142c2 bellard
                || ! increment
978 158142c2 bellard
                || lt128(
979 158142c2 bellard
                       zSig0,
980 158142c2 bellard
                       zSig1,
981 158142c2 bellard
                       LIT64( 0x0001FFFFFFFFFFFF ),
982 158142c2 bellard
                       LIT64( 0xFFFFFFFFFFFFFFFF )
983 158142c2 bellard
                   );
984 158142c2 bellard
            shift128ExtraRightJamming(
985 158142c2 bellard
                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
986 158142c2 bellard
            zExp = 0;
987 158142c2 bellard
            if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
988 158142c2 bellard
            if ( roundNearestEven ) {
989 158142c2 bellard
                increment = ( (sbits64) zSig2 < 0 );
990 158142c2 bellard
            }
991 158142c2 bellard
            else {
992 158142c2 bellard
                if ( zSign ) {
993 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig2;
994 158142c2 bellard
                }
995 158142c2 bellard
                else {
996 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig2;
997 158142c2 bellard
                }
998 158142c2 bellard
            }
999 158142c2 bellard
        }
1000 158142c2 bellard
    }
1001 158142c2 bellard
    if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
1002 158142c2 bellard
    if ( increment ) {
1003 158142c2 bellard
        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1004 158142c2 bellard
        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1005 158142c2 bellard
    }
1006 158142c2 bellard
    else {
1007 158142c2 bellard
        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1008 158142c2 bellard
    }
1009 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1010 158142c2 bellard
1011 158142c2 bellard
}
1012 158142c2 bellard
1013 158142c2 bellard
/*----------------------------------------------------------------------------
1014 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1015 158142c2 bellard
| and significand formed by the concatenation of `zSig0' and `zSig1', and
1016 158142c2 bellard
| returns the proper quadruple-precision floating-point value corresponding
1017 158142c2 bellard
| to the abstract input.  This routine is just like `roundAndPackFloat128'
1018 158142c2 bellard
| except that the input significand has fewer bits and does not have to be
1019 158142c2 bellard
| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1020 158142c2 bellard
| point exponent.
1021 158142c2 bellard
*----------------------------------------------------------------------------*/
1022 158142c2 bellard
1023 158142c2 bellard
static float128
1024 158142c2 bellard
 normalizeRoundAndPackFloat128(
1025 158142c2 bellard
     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM)
1026 158142c2 bellard
{
1027 158142c2 bellard
    int8 shiftCount;
1028 158142c2 bellard
    bits64 zSig2;
1029 158142c2 bellard
1030 158142c2 bellard
    if ( zSig0 == 0 ) {
1031 158142c2 bellard
        zSig0 = zSig1;
1032 158142c2 bellard
        zSig1 = 0;
1033 158142c2 bellard
        zExp -= 64;
1034 158142c2 bellard
    }
1035 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 ) - 15;
1036 158142c2 bellard
    if ( 0 <= shiftCount ) {
1037 158142c2 bellard
        zSig2 = 0;
1038 158142c2 bellard
        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1039 158142c2 bellard
    }
1040 158142c2 bellard
    else {
1041 158142c2 bellard
        shift128ExtraRightJamming(
1042 158142c2 bellard
            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1043 158142c2 bellard
    }
1044 158142c2 bellard
    zExp -= shiftCount;
1045 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
1046 158142c2 bellard
1047 158142c2 bellard
}
1048 158142c2 bellard
1049 158142c2 bellard
#endif
1050 158142c2 bellard
1051 158142c2 bellard
/*----------------------------------------------------------------------------
1052 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1053 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1054 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1055 158142c2 bellard
*----------------------------------------------------------------------------*/
1056 158142c2 bellard
1057 158142c2 bellard
float32 int32_to_float32( int32 a STATUS_PARAM )
1058 158142c2 bellard
{
1059 158142c2 bellard
    flag zSign;
1060 158142c2 bellard
1061 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1062 158142c2 bellard
    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1063 158142c2 bellard
    zSign = ( a < 0 );
1064 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
1065 158142c2 bellard
1066 158142c2 bellard
}
1067 158142c2 bellard
1068 158142c2 bellard
/*----------------------------------------------------------------------------
1069 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1070 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1071 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1072 158142c2 bellard
*----------------------------------------------------------------------------*/
1073 158142c2 bellard
1074 158142c2 bellard
float64 int32_to_float64( int32 a STATUS_PARAM )
1075 158142c2 bellard
{
1076 158142c2 bellard
    flag zSign;
1077 158142c2 bellard
    uint32 absA;
1078 158142c2 bellard
    int8 shiftCount;
1079 158142c2 bellard
    bits64 zSig;
1080 158142c2 bellard
1081 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1082 158142c2 bellard
    zSign = ( a < 0 );
1083 158142c2 bellard
    absA = zSign ? - a : a;
1084 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 21;
1085 158142c2 bellard
    zSig = absA;
1086 158142c2 bellard
    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1087 158142c2 bellard
1088 158142c2 bellard
}
1089 158142c2 bellard
1090 158142c2 bellard
#ifdef FLOATX80
1091 158142c2 bellard
1092 158142c2 bellard
/*----------------------------------------------------------------------------
1093 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1094 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1095 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1096 158142c2 bellard
| Arithmetic.
1097 158142c2 bellard
*----------------------------------------------------------------------------*/
1098 158142c2 bellard
1099 158142c2 bellard
floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
1100 158142c2 bellard
{
1101 158142c2 bellard
    flag zSign;
1102 158142c2 bellard
    uint32 absA;
1103 158142c2 bellard
    int8 shiftCount;
1104 158142c2 bellard
    bits64 zSig;
1105 158142c2 bellard
1106 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1107 158142c2 bellard
    zSign = ( a < 0 );
1108 158142c2 bellard
    absA = zSign ? - a : a;
1109 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 32;
1110 158142c2 bellard
    zSig = absA;
1111 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1112 158142c2 bellard
1113 158142c2 bellard
}
1114 158142c2 bellard
1115 158142c2 bellard
#endif
1116 158142c2 bellard
1117 158142c2 bellard
#ifdef FLOAT128
1118 158142c2 bellard
1119 158142c2 bellard
/*----------------------------------------------------------------------------
1120 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a' to
1121 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1122 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1123 158142c2 bellard
*----------------------------------------------------------------------------*/
1124 158142c2 bellard
1125 158142c2 bellard
float128 int32_to_float128( int32 a STATUS_PARAM )
1126 158142c2 bellard
{
1127 158142c2 bellard
    flag zSign;
1128 158142c2 bellard
    uint32 absA;
1129 158142c2 bellard
    int8 shiftCount;
1130 158142c2 bellard
    bits64 zSig0;
1131 158142c2 bellard
1132 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1133 158142c2 bellard
    zSign = ( a < 0 );
1134 158142c2 bellard
    absA = zSign ? - a : a;
1135 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 17;
1136 158142c2 bellard
    zSig0 = absA;
1137 158142c2 bellard
    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1138 158142c2 bellard
1139 158142c2 bellard
}
1140 158142c2 bellard
1141 158142c2 bellard
#endif
1142 158142c2 bellard
1143 158142c2 bellard
/*----------------------------------------------------------------------------
1144 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1145 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1146 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1147 158142c2 bellard
*----------------------------------------------------------------------------*/
1148 158142c2 bellard
1149 158142c2 bellard
float32 int64_to_float32( int64 a STATUS_PARAM )
1150 158142c2 bellard
{
1151 158142c2 bellard
    flag zSign;
1152 158142c2 bellard
    uint64 absA;
1153 158142c2 bellard
    int8 shiftCount;
1154 158142c2 bellard
1155 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1156 158142c2 bellard
    zSign = ( a < 0 );
1157 158142c2 bellard
    absA = zSign ? - a : a;
1158 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) - 40;
1159 158142c2 bellard
    if ( 0 <= shiftCount ) {
1160 158142c2 bellard
        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1161 158142c2 bellard
    }
1162 158142c2 bellard
    else {
1163 158142c2 bellard
        shiftCount += 7;
1164 158142c2 bellard
        if ( shiftCount < 0 ) {
1165 158142c2 bellard
            shift64RightJamming( absA, - shiftCount, &absA );
1166 158142c2 bellard
        }
1167 158142c2 bellard
        else {
1168 158142c2 bellard
            absA <<= shiftCount;
1169 158142c2 bellard
        }
1170 158142c2 bellard
        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
1171 158142c2 bellard
    }
1172 158142c2 bellard
1173 158142c2 bellard
}
1174 158142c2 bellard
1175 3430b0be j_mayer
float32 uint64_to_float32( uint64 a STATUS_PARAM )
1176 75d62a58 j_mayer
{
1177 75d62a58 j_mayer
    int8 shiftCount;
1178 75d62a58 j_mayer
1179 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1180 75d62a58 j_mayer
    shiftCount = countLeadingZeros64( a ) - 40;
1181 75d62a58 j_mayer
    if ( 0 <= shiftCount ) {
1182 75d62a58 j_mayer
        return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
1183 75d62a58 j_mayer
    }
1184 75d62a58 j_mayer
    else {
1185 75d62a58 j_mayer
        shiftCount += 7;
1186 75d62a58 j_mayer
        if ( shiftCount < 0 ) {
1187 75d62a58 j_mayer
            shift64RightJamming( a, - shiftCount, &a );
1188 75d62a58 j_mayer
        }
1189 75d62a58 j_mayer
        else {
1190 75d62a58 j_mayer
            a <<= shiftCount;
1191 75d62a58 j_mayer
        }
1192 75d62a58 j_mayer
        return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
1193 75d62a58 j_mayer
    }
1194 75d62a58 j_mayer
}
1195 75d62a58 j_mayer
1196 158142c2 bellard
/*----------------------------------------------------------------------------
1197 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1198 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1199 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1200 158142c2 bellard
*----------------------------------------------------------------------------*/
1201 158142c2 bellard
1202 158142c2 bellard
float64 int64_to_float64( int64 a STATUS_PARAM )
1203 158142c2 bellard
{
1204 158142c2 bellard
    flag zSign;
1205 158142c2 bellard
1206 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1207 158142c2 bellard
    if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
1208 158142c2 bellard
        return packFloat64( 1, 0x43E, 0 );
1209 158142c2 bellard
    }
1210 158142c2 bellard
    zSign = ( a < 0 );
1211 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
1212 158142c2 bellard
1213 158142c2 bellard
}
1214 158142c2 bellard
1215 75d62a58 j_mayer
float64 uint64_to_float64( uint64 a STATUS_PARAM )
1216 75d62a58 j_mayer
{
1217 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1218 75d62a58 j_mayer
    return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
1219 75d62a58 j_mayer
1220 75d62a58 j_mayer
}
1221 75d62a58 j_mayer
1222 158142c2 bellard
#ifdef FLOATX80
1223 158142c2 bellard
1224 158142c2 bellard
/*----------------------------------------------------------------------------
1225 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1226 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1227 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1228 158142c2 bellard
| Arithmetic.
1229 158142c2 bellard
*----------------------------------------------------------------------------*/
1230 158142c2 bellard
1231 158142c2 bellard
floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
1232 158142c2 bellard
{
1233 158142c2 bellard
    flag zSign;
1234 158142c2 bellard
    uint64 absA;
1235 158142c2 bellard
    int8 shiftCount;
1236 158142c2 bellard
1237 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1238 158142c2 bellard
    zSign = ( a < 0 );
1239 158142c2 bellard
    absA = zSign ? - a : a;
1240 158142c2 bellard
    shiftCount = countLeadingZeros64( absA );
1241 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1242 158142c2 bellard
1243 158142c2 bellard
}
1244 158142c2 bellard
1245 158142c2 bellard
#endif
1246 158142c2 bellard
1247 158142c2 bellard
#ifdef FLOAT128
1248 158142c2 bellard
1249 158142c2 bellard
/*----------------------------------------------------------------------------
1250 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a' to
1251 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1252 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1253 158142c2 bellard
*----------------------------------------------------------------------------*/
1254 158142c2 bellard
1255 158142c2 bellard
float128 int64_to_float128( int64 a STATUS_PARAM )
1256 158142c2 bellard
{
1257 158142c2 bellard
    flag zSign;
1258 158142c2 bellard
    uint64 absA;
1259 158142c2 bellard
    int8 shiftCount;
1260 158142c2 bellard
    int32 zExp;
1261 158142c2 bellard
    bits64 zSig0, zSig1;
1262 158142c2 bellard
1263 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1264 158142c2 bellard
    zSign = ( a < 0 );
1265 158142c2 bellard
    absA = zSign ? - a : a;
1266 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) + 49;
1267 158142c2 bellard
    zExp = 0x406E - shiftCount;
1268 158142c2 bellard
    if ( 64 <= shiftCount ) {
1269 158142c2 bellard
        zSig1 = 0;
1270 158142c2 bellard
        zSig0 = absA;
1271 158142c2 bellard
        shiftCount -= 64;
1272 158142c2 bellard
    }
1273 158142c2 bellard
    else {
1274 158142c2 bellard
        zSig1 = absA;
1275 158142c2 bellard
        zSig0 = 0;
1276 158142c2 bellard
    }
1277 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1278 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1279 158142c2 bellard
1280 158142c2 bellard
}
1281 158142c2 bellard
1282 158142c2 bellard
#endif
1283 158142c2 bellard
1284 158142c2 bellard
/*----------------------------------------------------------------------------
1285 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1286 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1287 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1288 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1289 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1290 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1291 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1292 158142c2 bellard
*----------------------------------------------------------------------------*/
1293 158142c2 bellard
1294 158142c2 bellard
int32 float32_to_int32( float32 a STATUS_PARAM )
1295 158142c2 bellard
{
1296 158142c2 bellard
    flag aSign;
1297 158142c2 bellard
    int16 aExp, shiftCount;
1298 158142c2 bellard
    bits32 aSig;
1299 158142c2 bellard
    bits64 aSig64;
1300 158142c2 bellard
1301 158142c2 bellard
    aSig = extractFloat32Frac( a );
1302 158142c2 bellard
    aExp = extractFloat32Exp( a );
1303 158142c2 bellard
    aSign = extractFloat32Sign( a );
1304 158142c2 bellard
    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1305 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1306 158142c2 bellard
    shiftCount = 0xAF - aExp;
1307 158142c2 bellard
    aSig64 = aSig;
1308 158142c2 bellard
    aSig64 <<= 32;
1309 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1310 158142c2 bellard
    return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
1311 158142c2 bellard
1312 158142c2 bellard
}
1313 158142c2 bellard
1314 158142c2 bellard
/*----------------------------------------------------------------------------
1315 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1316 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1317 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1318 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
1319 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1320 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
1321 158142c2 bellard
| returned.
1322 158142c2 bellard
*----------------------------------------------------------------------------*/
1323 158142c2 bellard
1324 158142c2 bellard
int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
1325 158142c2 bellard
{
1326 158142c2 bellard
    flag aSign;
1327 158142c2 bellard
    int16 aExp, shiftCount;
1328 158142c2 bellard
    bits32 aSig;
1329 158142c2 bellard
    int32 z;
1330 158142c2 bellard
1331 158142c2 bellard
    aSig = extractFloat32Frac( a );
1332 158142c2 bellard
    aExp = extractFloat32Exp( a );
1333 158142c2 bellard
    aSign = extractFloat32Sign( a );
1334 158142c2 bellard
    shiftCount = aExp - 0x9E;
1335 158142c2 bellard
    if ( 0 <= shiftCount ) {
1336 f090c9d4 pbrook
        if ( float32_val(a) != 0xCF000000 ) {
1337 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1338 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1339 158142c2 bellard
        }
1340 158142c2 bellard
        return (sbits32) 0x80000000;
1341 158142c2 bellard
    }
1342 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1343 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1344 158142c2 bellard
        return 0;
1345 158142c2 bellard
    }
1346 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
1347 158142c2 bellard
    z = aSig>>( - shiftCount );
1348 158142c2 bellard
    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1349 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1350 158142c2 bellard
    }
1351 158142c2 bellard
    if ( aSign ) z = - z;
1352 158142c2 bellard
    return z;
1353 158142c2 bellard
1354 158142c2 bellard
}
1355 158142c2 bellard
1356 158142c2 bellard
/*----------------------------------------------------------------------------
1357 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1358 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1359 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1360 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1361 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1362 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1363 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1364 158142c2 bellard
*----------------------------------------------------------------------------*/
1365 158142c2 bellard
1366 158142c2 bellard
int64 float32_to_int64( float32 a STATUS_PARAM )
1367 158142c2 bellard
{
1368 158142c2 bellard
    flag aSign;
1369 158142c2 bellard
    int16 aExp, shiftCount;
1370 158142c2 bellard
    bits32 aSig;
1371 158142c2 bellard
    bits64 aSig64, aSigExtra;
1372 158142c2 bellard
1373 158142c2 bellard
    aSig = extractFloat32Frac( a );
1374 158142c2 bellard
    aExp = extractFloat32Exp( a );
1375 158142c2 bellard
    aSign = extractFloat32Sign( a );
1376 158142c2 bellard
    shiftCount = 0xBE - aExp;
1377 158142c2 bellard
    if ( shiftCount < 0 ) {
1378 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1379 158142c2 bellard
        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1380 158142c2 bellard
            return LIT64( 0x7FFFFFFFFFFFFFFF );
1381 158142c2 bellard
        }
1382 158142c2 bellard
        return (sbits64) LIT64( 0x8000000000000000 );
1383 158142c2 bellard
    }
1384 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1385 158142c2 bellard
    aSig64 = aSig;
1386 158142c2 bellard
    aSig64 <<= 40;
1387 158142c2 bellard
    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1388 158142c2 bellard
    return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
1389 158142c2 bellard
1390 158142c2 bellard
}
1391 158142c2 bellard
1392 158142c2 bellard
/*----------------------------------------------------------------------------
1393 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1394 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1395 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1396 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
1397 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1398 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
1399 158142c2 bellard
| returned.
1400 158142c2 bellard
*----------------------------------------------------------------------------*/
1401 158142c2 bellard
1402 158142c2 bellard
int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
1403 158142c2 bellard
{
1404 158142c2 bellard
    flag aSign;
1405 158142c2 bellard
    int16 aExp, shiftCount;
1406 158142c2 bellard
    bits32 aSig;
1407 158142c2 bellard
    bits64 aSig64;
1408 158142c2 bellard
    int64 z;
1409 158142c2 bellard
1410 158142c2 bellard
    aSig = extractFloat32Frac( a );
1411 158142c2 bellard
    aExp = extractFloat32Exp( a );
1412 158142c2 bellard
    aSign = extractFloat32Sign( a );
1413 158142c2 bellard
    shiftCount = aExp - 0xBE;
1414 158142c2 bellard
    if ( 0 <= shiftCount ) {
1415 f090c9d4 pbrook
        if ( float32_val(a) != 0xDF000000 ) {
1416 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1417 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1418 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
1419 158142c2 bellard
            }
1420 158142c2 bellard
        }
1421 158142c2 bellard
        return (sbits64) LIT64( 0x8000000000000000 );
1422 158142c2 bellard
    }
1423 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1424 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1425 158142c2 bellard
        return 0;
1426 158142c2 bellard
    }
1427 158142c2 bellard
    aSig64 = aSig | 0x00800000;
1428 158142c2 bellard
    aSig64 <<= 40;
1429 158142c2 bellard
    z = aSig64>>( - shiftCount );
1430 158142c2 bellard
    if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1431 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1432 158142c2 bellard
    }
1433 158142c2 bellard
    if ( aSign ) z = - z;
1434 158142c2 bellard
    return z;
1435 158142c2 bellard
1436 158142c2 bellard
}
1437 158142c2 bellard
1438 158142c2 bellard
/*----------------------------------------------------------------------------
1439 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1440 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1441 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1442 158142c2 bellard
| Arithmetic.
1443 158142c2 bellard
*----------------------------------------------------------------------------*/
1444 158142c2 bellard
1445 158142c2 bellard
float64 float32_to_float64( float32 a STATUS_PARAM )
1446 158142c2 bellard
{
1447 158142c2 bellard
    flag aSign;
1448 158142c2 bellard
    int16 aExp;
1449 158142c2 bellard
    bits32 aSig;
1450 158142c2 bellard
1451 158142c2 bellard
    aSig = extractFloat32Frac( a );
1452 158142c2 bellard
    aExp = extractFloat32Exp( a );
1453 158142c2 bellard
    aSign = extractFloat32Sign( a );
1454 158142c2 bellard
    if ( aExp == 0xFF ) {
1455 158142c2 bellard
        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ));
1456 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
1457 158142c2 bellard
    }
1458 158142c2 bellard
    if ( aExp == 0 ) {
1459 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1460 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1461 158142c2 bellard
        --aExp;
1462 158142c2 bellard
    }
1463 158142c2 bellard
    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
1464 158142c2 bellard
1465 158142c2 bellard
}
1466 158142c2 bellard
1467 158142c2 bellard
#ifdef FLOATX80
1468 158142c2 bellard
1469 158142c2 bellard
/*----------------------------------------------------------------------------
1470 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1471 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
1472 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1473 158142c2 bellard
| Arithmetic.
1474 158142c2 bellard
*----------------------------------------------------------------------------*/
1475 158142c2 bellard
1476 158142c2 bellard
floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
1477 158142c2 bellard
{
1478 158142c2 bellard
    flag aSign;
1479 158142c2 bellard
    int16 aExp;
1480 158142c2 bellard
    bits32 aSig;
1481 158142c2 bellard
1482 158142c2 bellard
    aSig = extractFloat32Frac( a );
1483 158142c2 bellard
    aExp = extractFloat32Exp( a );
1484 158142c2 bellard
    aSign = extractFloat32Sign( a );
1485 158142c2 bellard
    if ( aExp == 0xFF ) {
1486 158142c2 bellard
        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) );
1487 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1488 158142c2 bellard
    }
1489 158142c2 bellard
    if ( aExp == 0 ) {
1490 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1491 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1492 158142c2 bellard
    }
1493 158142c2 bellard
    aSig |= 0x00800000;
1494 158142c2 bellard
    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
1495 158142c2 bellard
1496 158142c2 bellard
}
1497 158142c2 bellard
1498 158142c2 bellard
#endif
1499 158142c2 bellard
1500 158142c2 bellard
#ifdef FLOAT128
1501 158142c2 bellard
1502 158142c2 bellard
/*----------------------------------------------------------------------------
1503 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1504 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1505 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1506 158142c2 bellard
| Arithmetic.
1507 158142c2 bellard
*----------------------------------------------------------------------------*/
1508 158142c2 bellard
1509 158142c2 bellard
float128 float32_to_float128( float32 a STATUS_PARAM )
1510 158142c2 bellard
{
1511 158142c2 bellard
    flag aSign;
1512 158142c2 bellard
    int16 aExp;
1513 158142c2 bellard
    bits32 aSig;
1514 158142c2 bellard
1515 158142c2 bellard
    aSig = extractFloat32Frac( a );
1516 158142c2 bellard
    aExp = extractFloat32Exp( a );
1517 158142c2 bellard
    aSign = extractFloat32Sign( a );
1518 158142c2 bellard
    if ( aExp == 0xFF ) {
1519 158142c2 bellard
        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) );
1520 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
1521 158142c2 bellard
    }
1522 158142c2 bellard
    if ( aExp == 0 ) {
1523 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1524 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1525 158142c2 bellard
        --aExp;
1526 158142c2 bellard
    }
1527 158142c2 bellard
    return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
1528 158142c2 bellard
1529 158142c2 bellard
}
1530 158142c2 bellard
1531 158142c2 bellard
#endif
1532 158142c2 bellard
1533 158142c2 bellard
/*----------------------------------------------------------------------------
1534 158142c2 bellard
| Rounds the single-precision floating-point value `a' to an integer, and
1535 158142c2 bellard
| returns the result as a single-precision floating-point value.  The
1536 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
1537 158142c2 bellard
| Floating-Point Arithmetic.
1538 158142c2 bellard
*----------------------------------------------------------------------------*/
1539 158142c2 bellard
1540 158142c2 bellard
float32 float32_round_to_int( float32 a STATUS_PARAM)
1541 158142c2 bellard
{
1542 158142c2 bellard
    flag aSign;
1543 158142c2 bellard
    int16 aExp;
1544 158142c2 bellard
    bits32 lastBitMask, roundBitsMask;
1545 158142c2 bellard
    int8 roundingMode;
1546 f090c9d4 pbrook
    bits32 z;
1547 158142c2 bellard
1548 158142c2 bellard
    aExp = extractFloat32Exp( a );
1549 158142c2 bellard
    if ( 0x96 <= aExp ) {
1550 158142c2 bellard
        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1551 158142c2 bellard
            return propagateFloat32NaN( a, a STATUS_VAR );
1552 158142c2 bellard
        }
1553 158142c2 bellard
        return a;
1554 158142c2 bellard
    }
1555 158142c2 bellard
    if ( aExp <= 0x7E ) {
1556 f090c9d4 pbrook
        if ( (bits32) ( float32_val(a)<<1 ) == 0 ) return a;
1557 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1558 158142c2 bellard
        aSign = extractFloat32Sign( a );
1559 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
1560 158142c2 bellard
         case float_round_nearest_even:
1561 158142c2 bellard
            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1562 158142c2 bellard
                return packFloat32( aSign, 0x7F, 0 );
1563 158142c2 bellard
            }
1564 158142c2 bellard
            break;
1565 158142c2 bellard
         case float_round_down:
1566 f090c9d4 pbrook
            return make_float32(aSign ? 0xBF800000 : 0);
1567 158142c2 bellard
         case float_round_up:
1568 f090c9d4 pbrook
            return make_float32(aSign ? 0x80000000 : 0x3F800000);
1569 158142c2 bellard
        }
1570 158142c2 bellard
        return packFloat32( aSign, 0, 0 );
1571 158142c2 bellard
    }
1572 158142c2 bellard
    lastBitMask = 1;
1573 158142c2 bellard
    lastBitMask <<= 0x96 - aExp;
1574 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
1575 f090c9d4 pbrook
    z = float32_val(a);
1576 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
1577 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
1578 158142c2 bellard
        z += lastBitMask>>1;
1579 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1580 158142c2 bellard
    }
1581 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
1582 f090c9d4 pbrook
        if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
1583 158142c2 bellard
            z += roundBitsMask;
1584 158142c2 bellard
        }
1585 158142c2 bellard
    }
1586 158142c2 bellard
    z &= ~ roundBitsMask;
1587 f090c9d4 pbrook
    if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
1588 f090c9d4 pbrook
    return make_float32(z);
1589 158142c2 bellard
1590 158142c2 bellard
}
1591 158142c2 bellard
1592 158142c2 bellard
/*----------------------------------------------------------------------------
1593 158142c2 bellard
| Returns the result of adding the absolute values of the single-precision
1594 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1595 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
1596 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
1597 158142c2 bellard
| Floating-Point Arithmetic.
1598 158142c2 bellard
*----------------------------------------------------------------------------*/
1599 158142c2 bellard
1600 158142c2 bellard
static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1601 158142c2 bellard
{
1602 158142c2 bellard
    int16 aExp, bExp, zExp;
1603 158142c2 bellard
    bits32 aSig, bSig, zSig;
1604 158142c2 bellard
    int16 expDiff;
1605 158142c2 bellard
1606 158142c2 bellard
    aSig = extractFloat32Frac( a );
1607 158142c2 bellard
    aExp = extractFloat32Exp( a );
1608 158142c2 bellard
    bSig = extractFloat32Frac( b );
1609 158142c2 bellard
    bExp = extractFloat32Exp( b );
1610 158142c2 bellard
    expDiff = aExp - bExp;
1611 158142c2 bellard
    aSig <<= 6;
1612 158142c2 bellard
    bSig <<= 6;
1613 158142c2 bellard
    if ( 0 < expDiff ) {
1614 158142c2 bellard
        if ( aExp == 0xFF ) {
1615 158142c2 bellard
            if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1616 158142c2 bellard
            return a;
1617 158142c2 bellard
        }
1618 158142c2 bellard
        if ( bExp == 0 ) {
1619 158142c2 bellard
            --expDiff;
1620 158142c2 bellard
        }
1621 158142c2 bellard
        else {
1622 158142c2 bellard
            bSig |= 0x20000000;
1623 158142c2 bellard
        }
1624 158142c2 bellard
        shift32RightJamming( bSig, expDiff, &bSig );
1625 158142c2 bellard
        zExp = aExp;
1626 158142c2 bellard
    }
1627 158142c2 bellard
    else if ( expDiff < 0 ) {
1628 158142c2 bellard
        if ( bExp == 0xFF ) {
1629 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1630 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1631 158142c2 bellard
        }
1632 158142c2 bellard
        if ( aExp == 0 ) {
1633 158142c2 bellard
            ++expDiff;
1634 158142c2 bellard
        }
1635 158142c2 bellard
        else {
1636 158142c2 bellard
            aSig |= 0x20000000;
1637 158142c2 bellard
        }
1638 158142c2 bellard
        shift32RightJamming( aSig, - expDiff, &aSig );
1639 158142c2 bellard
        zExp = bExp;
1640 158142c2 bellard
    }
1641 158142c2 bellard
    else {
1642 158142c2 bellard
        if ( aExp == 0xFF ) {
1643 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1644 158142c2 bellard
            return a;
1645 158142c2 bellard
        }
1646 fe76d976 pbrook
        if ( aExp == 0 ) {
1647 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
1648 fe76d976 pbrook
            return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1649 fe76d976 pbrook
        }
1650 158142c2 bellard
        zSig = 0x40000000 + aSig + bSig;
1651 158142c2 bellard
        zExp = aExp;
1652 158142c2 bellard
        goto roundAndPack;
1653 158142c2 bellard
    }
1654 158142c2 bellard
    aSig |= 0x20000000;
1655 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
1656 158142c2 bellard
    --zExp;
1657 158142c2 bellard
    if ( (sbits32) zSig < 0 ) {
1658 158142c2 bellard
        zSig = aSig + bSig;
1659 158142c2 bellard
        ++zExp;
1660 158142c2 bellard
    }
1661 158142c2 bellard
 roundAndPack:
1662 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1663 158142c2 bellard
1664 158142c2 bellard
}
1665 158142c2 bellard
1666 158142c2 bellard
/*----------------------------------------------------------------------------
1667 158142c2 bellard
| Returns the result of subtracting the absolute values of the single-
1668 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
1669 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
1670 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
1671 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
1672 158142c2 bellard
*----------------------------------------------------------------------------*/
1673 158142c2 bellard
1674 158142c2 bellard
static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1675 158142c2 bellard
{
1676 158142c2 bellard
    int16 aExp, bExp, zExp;
1677 158142c2 bellard
    bits32 aSig, bSig, zSig;
1678 158142c2 bellard
    int16 expDiff;
1679 158142c2 bellard
1680 158142c2 bellard
    aSig = extractFloat32Frac( a );
1681 158142c2 bellard
    aExp = extractFloat32Exp( a );
1682 158142c2 bellard
    bSig = extractFloat32Frac( b );
1683 158142c2 bellard
    bExp = extractFloat32Exp( b );
1684 158142c2 bellard
    expDiff = aExp - bExp;
1685 158142c2 bellard
    aSig <<= 7;
1686 158142c2 bellard
    bSig <<= 7;
1687 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
1688 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
1689 158142c2 bellard
    if ( aExp == 0xFF ) {
1690 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1691 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1692 158142c2 bellard
        return float32_default_nan;
1693 158142c2 bellard
    }
1694 158142c2 bellard
    if ( aExp == 0 ) {
1695 158142c2 bellard
        aExp = 1;
1696 158142c2 bellard
        bExp = 1;
1697 158142c2 bellard
    }
1698 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
1699 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
1700 158142c2 bellard
    return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
1701 158142c2 bellard
 bExpBigger:
1702 158142c2 bellard
    if ( bExp == 0xFF ) {
1703 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1704 158142c2 bellard
        return packFloat32( zSign ^ 1, 0xFF, 0 );
1705 158142c2 bellard
    }
1706 158142c2 bellard
    if ( aExp == 0 ) {
1707 158142c2 bellard
        ++expDiff;
1708 158142c2 bellard
    }
1709 158142c2 bellard
    else {
1710 158142c2 bellard
        aSig |= 0x40000000;
1711 158142c2 bellard
    }
1712 158142c2 bellard
    shift32RightJamming( aSig, - expDiff, &aSig );
1713 158142c2 bellard
    bSig |= 0x40000000;
1714 158142c2 bellard
 bBigger:
1715 158142c2 bellard
    zSig = bSig - aSig;
1716 158142c2 bellard
    zExp = bExp;
1717 158142c2 bellard
    zSign ^= 1;
1718 158142c2 bellard
    goto normalizeRoundAndPack;
1719 158142c2 bellard
 aExpBigger:
1720 158142c2 bellard
    if ( aExp == 0xFF ) {
1721 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1722 158142c2 bellard
        return a;
1723 158142c2 bellard
    }
1724 158142c2 bellard
    if ( bExp == 0 ) {
1725 158142c2 bellard
        --expDiff;
1726 158142c2 bellard
    }
1727 158142c2 bellard
    else {
1728 158142c2 bellard
        bSig |= 0x40000000;
1729 158142c2 bellard
    }
1730 158142c2 bellard
    shift32RightJamming( bSig, expDiff, &bSig );
1731 158142c2 bellard
    aSig |= 0x40000000;
1732 158142c2 bellard
 aBigger:
1733 158142c2 bellard
    zSig = aSig - bSig;
1734 158142c2 bellard
    zExp = aExp;
1735 158142c2 bellard
 normalizeRoundAndPack:
1736 158142c2 bellard
    --zExp;
1737 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1738 158142c2 bellard
1739 158142c2 bellard
}
1740 158142c2 bellard
1741 158142c2 bellard
/*----------------------------------------------------------------------------
1742 158142c2 bellard
| Returns the result of adding the single-precision floating-point values `a'
1743 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
1744 158142c2 bellard
| Binary Floating-Point Arithmetic.
1745 158142c2 bellard
*----------------------------------------------------------------------------*/
1746 158142c2 bellard
1747 158142c2 bellard
float32 float32_add( float32 a, float32 b STATUS_PARAM )
1748 158142c2 bellard
{
1749 158142c2 bellard
    flag aSign, bSign;
1750 158142c2 bellard
1751 158142c2 bellard
    aSign = extractFloat32Sign( a );
1752 158142c2 bellard
    bSign = extractFloat32Sign( b );
1753 158142c2 bellard
    if ( aSign == bSign ) {
1754 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR);
1755 158142c2 bellard
    }
1756 158142c2 bellard
    else {
1757 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1758 158142c2 bellard
    }
1759 158142c2 bellard
1760 158142c2 bellard
}
1761 158142c2 bellard
1762 158142c2 bellard
/*----------------------------------------------------------------------------
1763 158142c2 bellard
| Returns the result of subtracting the single-precision floating-point values
1764 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1765 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1766 158142c2 bellard
*----------------------------------------------------------------------------*/
1767 158142c2 bellard
1768 158142c2 bellard
float32 float32_sub( float32 a, float32 b STATUS_PARAM )
1769 158142c2 bellard
{
1770 158142c2 bellard
    flag aSign, bSign;
1771 158142c2 bellard
1772 158142c2 bellard
    aSign = extractFloat32Sign( a );
1773 158142c2 bellard
    bSign = extractFloat32Sign( b );
1774 158142c2 bellard
    if ( aSign == bSign ) {
1775 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1776 158142c2 bellard
    }
1777 158142c2 bellard
    else {
1778 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR );
1779 158142c2 bellard
    }
1780 158142c2 bellard
1781 158142c2 bellard
}
1782 158142c2 bellard
1783 158142c2 bellard
/*----------------------------------------------------------------------------
1784 158142c2 bellard
| Returns the result of multiplying the single-precision floating-point values
1785 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1786 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1787 158142c2 bellard
*----------------------------------------------------------------------------*/
1788 158142c2 bellard
1789 158142c2 bellard
float32 float32_mul( float32 a, float32 b STATUS_PARAM )
1790 158142c2 bellard
{
1791 158142c2 bellard
    flag aSign, bSign, zSign;
1792 158142c2 bellard
    int16 aExp, bExp, zExp;
1793 158142c2 bellard
    bits32 aSig, bSig;
1794 158142c2 bellard
    bits64 zSig64;
1795 158142c2 bellard
    bits32 zSig;
1796 158142c2 bellard
1797 158142c2 bellard
    aSig = extractFloat32Frac( a );
1798 158142c2 bellard
    aExp = extractFloat32Exp( a );
1799 158142c2 bellard
    aSign = extractFloat32Sign( a );
1800 158142c2 bellard
    bSig = extractFloat32Frac( b );
1801 158142c2 bellard
    bExp = extractFloat32Exp( b );
1802 158142c2 bellard
    bSign = extractFloat32Sign( b );
1803 158142c2 bellard
    zSign = aSign ^ bSign;
1804 158142c2 bellard
    if ( aExp == 0xFF ) {
1805 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1806 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
1807 158142c2 bellard
        }
1808 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
1809 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1810 158142c2 bellard
            return float32_default_nan;
1811 158142c2 bellard
        }
1812 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1813 158142c2 bellard
    }
1814 158142c2 bellard
    if ( bExp == 0xFF ) {
1815 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1816 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
1817 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1818 158142c2 bellard
            return float32_default_nan;
1819 158142c2 bellard
        }
1820 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1821 158142c2 bellard
    }
1822 158142c2 bellard
    if ( aExp == 0 ) {
1823 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1824 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1825 158142c2 bellard
    }
1826 158142c2 bellard
    if ( bExp == 0 ) {
1827 158142c2 bellard
        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1828 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1829 158142c2 bellard
    }
1830 158142c2 bellard
    zExp = aExp + bExp - 0x7F;
1831 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
1832 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
1833 158142c2 bellard
    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1834 158142c2 bellard
    zSig = zSig64;
1835 158142c2 bellard
    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1836 158142c2 bellard
        zSig <<= 1;
1837 158142c2 bellard
        --zExp;
1838 158142c2 bellard
    }
1839 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1840 158142c2 bellard
1841 158142c2 bellard
}
1842 158142c2 bellard
1843 158142c2 bellard
/*----------------------------------------------------------------------------
1844 158142c2 bellard
| Returns the result of dividing the single-precision floating-point value `a'
1845 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to the
1846 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1847 158142c2 bellard
*----------------------------------------------------------------------------*/
1848 158142c2 bellard
1849 158142c2 bellard
float32 float32_div( float32 a, float32 b STATUS_PARAM )
1850 158142c2 bellard
{
1851 158142c2 bellard
    flag aSign, bSign, zSign;
1852 158142c2 bellard
    int16 aExp, bExp, zExp;
1853 158142c2 bellard
    bits32 aSig, bSig, zSig;
1854 158142c2 bellard
1855 158142c2 bellard
    aSig = extractFloat32Frac( a );
1856 158142c2 bellard
    aExp = extractFloat32Exp( a );
1857 158142c2 bellard
    aSign = extractFloat32Sign( a );
1858 158142c2 bellard
    bSig = extractFloat32Frac( b );
1859 158142c2 bellard
    bExp = extractFloat32Exp( b );
1860 158142c2 bellard
    bSign = extractFloat32Sign( b );
1861 158142c2 bellard
    zSign = aSign ^ bSign;
1862 158142c2 bellard
    if ( aExp == 0xFF ) {
1863 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1864 158142c2 bellard
        if ( bExp == 0xFF ) {
1865 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1866 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1867 158142c2 bellard
            return float32_default_nan;
1868 158142c2 bellard
        }
1869 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1870 158142c2 bellard
    }
1871 158142c2 bellard
    if ( bExp == 0xFF ) {
1872 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1873 158142c2 bellard
        return packFloat32( zSign, 0, 0 );
1874 158142c2 bellard
    }
1875 158142c2 bellard
    if ( bExp == 0 ) {
1876 158142c2 bellard
        if ( bSig == 0 ) {
1877 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
1878 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
1879 158142c2 bellard
                return float32_default_nan;
1880 158142c2 bellard
            }
1881 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
1882 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1883 158142c2 bellard
        }
1884 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1885 158142c2 bellard
    }
1886 158142c2 bellard
    if ( aExp == 0 ) {
1887 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1888 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1889 158142c2 bellard
    }
1890 158142c2 bellard
    zExp = aExp - bExp + 0x7D;
1891 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
1892 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
1893 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
1894 158142c2 bellard
        aSig >>= 1;
1895 158142c2 bellard
        ++zExp;
1896 158142c2 bellard
    }
1897 158142c2 bellard
    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
1898 158142c2 bellard
    if ( ( zSig & 0x3F ) == 0 ) {
1899 158142c2 bellard
        zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
1900 158142c2 bellard
    }
1901 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1902 158142c2 bellard
1903 158142c2 bellard
}
1904 158142c2 bellard
1905 158142c2 bellard
/*----------------------------------------------------------------------------
1906 158142c2 bellard
| Returns the remainder of the single-precision floating-point value `a'
1907 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
1908 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1909 158142c2 bellard
*----------------------------------------------------------------------------*/
1910 158142c2 bellard
1911 158142c2 bellard
float32 float32_rem( float32 a, float32 b STATUS_PARAM )
1912 158142c2 bellard
{
1913 158142c2 bellard
    flag aSign, bSign, zSign;
1914 158142c2 bellard
    int16 aExp, bExp, expDiff;
1915 158142c2 bellard
    bits32 aSig, bSig;
1916 158142c2 bellard
    bits32 q;
1917 158142c2 bellard
    bits64 aSig64, bSig64, q64;
1918 158142c2 bellard
    bits32 alternateASig;
1919 158142c2 bellard
    sbits32 sigMean;
1920 158142c2 bellard
1921 158142c2 bellard
    aSig = extractFloat32Frac( a );
1922 158142c2 bellard
    aExp = extractFloat32Exp( a );
1923 158142c2 bellard
    aSign = extractFloat32Sign( a );
1924 158142c2 bellard
    bSig = extractFloat32Frac( b );
1925 158142c2 bellard
    bExp = extractFloat32Exp( b );
1926 158142c2 bellard
    bSign = extractFloat32Sign( b );
1927 158142c2 bellard
    if ( aExp == 0xFF ) {
1928 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1929 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
1930 158142c2 bellard
        }
1931 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1932 158142c2 bellard
        return float32_default_nan;
1933 158142c2 bellard
    }
1934 158142c2 bellard
    if ( bExp == 0xFF ) {
1935 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1936 158142c2 bellard
        return a;
1937 158142c2 bellard
    }
1938 158142c2 bellard
    if ( bExp == 0 ) {
1939 158142c2 bellard
        if ( bSig == 0 ) {
1940 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1941 158142c2 bellard
            return float32_default_nan;
1942 158142c2 bellard
        }
1943 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1944 158142c2 bellard
    }
1945 158142c2 bellard
    if ( aExp == 0 ) {
1946 158142c2 bellard
        if ( aSig == 0 ) return a;
1947 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1948 158142c2 bellard
    }
1949 158142c2 bellard
    expDiff = aExp - bExp;
1950 158142c2 bellard
    aSig |= 0x00800000;
1951 158142c2 bellard
    bSig |= 0x00800000;
1952 158142c2 bellard
    if ( expDiff < 32 ) {
1953 158142c2 bellard
        aSig <<= 8;
1954 158142c2 bellard
        bSig <<= 8;
1955 158142c2 bellard
        if ( expDiff < 0 ) {
1956 158142c2 bellard
            if ( expDiff < -1 ) return a;
1957 158142c2 bellard
            aSig >>= 1;
1958 158142c2 bellard
        }
1959 158142c2 bellard
        q = ( bSig <= aSig );
1960 158142c2 bellard
        if ( q ) aSig -= bSig;
1961 158142c2 bellard
        if ( 0 < expDiff ) {
1962 158142c2 bellard
            q = ( ( (bits64) aSig )<<32 ) / bSig;
1963 158142c2 bellard
            q >>= 32 - expDiff;
1964 158142c2 bellard
            bSig >>= 2;
1965 158142c2 bellard
            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1966 158142c2 bellard
        }
1967 158142c2 bellard
        else {
1968 158142c2 bellard
            aSig >>= 2;
1969 158142c2 bellard
            bSig >>= 2;
1970 158142c2 bellard
        }
1971 158142c2 bellard
    }
1972 158142c2 bellard
    else {
1973 158142c2 bellard
        if ( bSig <= aSig ) aSig -= bSig;
1974 158142c2 bellard
        aSig64 = ( (bits64) aSig )<<40;
1975 158142c2 bellard
        bSig64 = ( (bits64) bSig )<<40;
1976 158142c2 bellard
        expDiff -= 64;
1977 158142c2 bellard
        while ( 0 < expDiff ) {
1978 158142c2 bellard
            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1979 158142c2 bellard
            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1980 158142c2 bellard
            aSig64 = - ( ( bSig * q64 )<<38 );
1981 158142c2 bellard
            expDiff -= 62;
1982 158142c2 bellard
        }
1983 158142c2 bellard
        expDiff += 64;
1984 158142c2 bellard
        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1985 158142c2 bellard
        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1986 158142c2 bellard
        q = q64>>( 64 - expDiff );
1987 158142c2 bellard
        bSig <<= 6;
1988 158142c2 bellard
        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
1989 158142c2 bellard
    }
1990 158142c2 bellard
    do {
1991 158142c2 bellard
        alternateASig = aSig;
1992 158142c2 bellard
        ++q;
1993 158142c2 bellard
        aSig -= bSig;
1994 158142c2 bellard
    } while ( 0 <= (sbits32) aSig );
1995 158142c2 bellard
    sigMean = aSig + alternateASig;
1996 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1997 158142c2 bellard
        aSig = alternateASig;
1998 158142c2 bellard
    }
1999 158142c2 bellard
    zSign = ( (sbits32) aSig < 0 );
2000 158142c2 bellard
    if ( zSign ) aSig = - aSig;
2001 158142c2 bellard
    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
2002 158142c2 bellard
2003 158142c2 bellard
}
2004 158142c2 bellard
2005 158142c2 bellard
/*----------------------------------------------------------------------------
2006 158142c2 bellard
| Returns the square root of the single-precision floating-point value `a'.
2007 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
2008 158142c2 bellard
| Floating-Point Arithmetic.
2009 158142c2 bellard
*----------------------------------------------------------------------------*/
2010 158142c2 bellard
2011 158142c2 bellard
float32 float32_sqrt( float32 a STATUS_PARAM )
2012 158142c2 bellard
{
2013 158142c2 bellard
    flag aSign;
2014 158142c2 bellard
    int16 aExp, zExp;
2015 158142c2 bellard
    bits32 aSig, zSig;
2016 158142c2 bellard
    bits64 rem, term;
2017 158142c2 bellard
2018 158142c2 bellard
    aSig = extractFloat32Frac( a );
2019 158142c2 bellard
    aExp = extractFloat32Exp( a );
2020 158142c2 bellard
    aSign = extractFloat32Sign( a );
2021 158142c2 bellard
    if ( aExp == 0xFF ) {
2022 f090c9d4 pbrook
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2023 158142c2 bellard
        if ( ! aSign ) return a;
2024 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2025 158142c2 bellard
        return float32_default_nan;
2026 158142c2 bellard
    }
2027 158142c2 bellard
    if ( aSign ) {
2028 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
2029 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2030 158142c2 bellard
        return float32_default_nan;
2031 158142c2 bellard
    }
2032 158142c2 bellard
    if ( aExp == 0 ) {
2033 f090c9d4 pbrook
        if ( aSig == 0 ) return float32_zero;
2034 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2035 158142c2 bellard
    }
2036 158142c2 bellard
    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2037 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
2038 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig ) + 2;
2039 158142c2 bellard
    if ( ( zSig & 0x7F ) <= 5 ) {
2040 158142c2 bellard
        if ( zSig < 2 ) {
2041 158142c2 bellard
            zSig = 0x7FFFFFFF;
2042 158142c2 bellard
            goto roundAndPack;
2043 158142c2 bellard
        }
2044 158142c2 bellard
        aSig >>= aExp & 1;
2045 158142c2 bellard
        term = ( (bits64) zSig ) * zSig;
2046 158142c2 bellard
        rem = ( ( (bits64) aSig )<<32 ) - term;
2047 158142c2 bellard
        while ( (sbits64) rem < 0 ) {
2048 158142c2 bellard
            --zSig;
2049 158142c2 bellard
            rem += ( ( (bits64) zSig )<<1 ) | 1;
2050 158142c2 bellard
        }
2051 158142c2 bellard
        zSig |= ( rem != 0 );
2052 158142c2 bellard
    }
2053 158142c2 bellard
    shift32RightJamming( zSig, 1, &zSig );
2054 158142c2 bellard
 roundAndPack:
2055 158142c2 bellard
    return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
2056 158142c2 bellard
2057 158142c2 bellard
}
2058 158142c2 bellard
2059 158142c2 bellard
/*----------------------------------------------------------------------------
2060 374dfc33 aurel32
| Returns the binary log of the single-precision floating-point value `a'.
2061 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
2062 374dfc33 aurel32
| Floating-Point Arithmetic.
2063 374dfc33 aurel32
*----------------------------------------------------------------------------*/
2064 374dfc33 aurel32
float32 float32_log2( float32 a STATUS_PARAM )
2065 374dfc33 aurel32
{
2066 374dfc33 aurel32
    flag aSign, zSign;
2067 374dfc33 aurel32
    int16 aExp;
2068 374dfc33 aurel32
    bits32 aSig, zSig, i;
2069 374dfc33 aurel32
2070 374dfc33 aurel32
    aSig = extractFloat32Frac( a );
2071 374dfc33 aurel32
    aExp = extractFloat32Exp( a );
2072 374dfc33 aurel32
    aSign = extractFloat32Sign( a );
2073 374dfc33 aurel32
2074 374dfc33 aurel32
    if ( aExp == 0 ) {
2075 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2076 374dfc33 aurel32
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2077 374dfc33 aurel32
    }
2078 374dfc33 aurel32
    if ( aSign ) {
2079 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
2080 374dfc33 aurel32
        return float32_default_nan;
2081 374dfc33 aurel32
    }
2082 374dfc33 aurel32
    if ( aExp == 0xFF ) {
2083 374dfc33 aurel32
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2084 374dfc33 aurel32
        return a;
2085 374dfc33 aurel32
    }
2086 374dfc33 aurel32
2087 374dfc33 aurel32
    aExp -= 0x7F;
2088 374dfc33 aurel32
    aSig |= 0x00800000;
2089 374dfc33 aurel32
    zSign = aExp < 0;
2090 374dfc33 aurel32
    zSig = aExp << 23;
2091 374dfc33 aurel32
2092 374dfc33 aurel32
    for (i = 1 << 22; i > 0; i >>= 1) {
2093 374dfc33 aurel32
        aSig = ( (bits64)aSig * aSig ) >> 23;
2094 374dfc33 aurel32
        if ( aSig & 0x01000000 ) {
2095 374dfc33 aurel32
            aSig >>= 1;
2096 374dfc33 aurel32
            zSig |= i;
2097 374dfc33 aurel32
        }
2098 374dfc33 aurel32
    }
2099 374dfc33 aurel32
2100 374dfc33 aurel32
    if ( zSign )
2101 374dfc33 aurel32
        zSig = -zSig;
2102 374dfc33 aurel32
2103 374dfc33 aurel32
    return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
2104 374dfc33 aurel32
}
2105 374dfc33 aurel32
2106 374dfc33 aurel32
/*----------------------------------------------------------------------------
2107 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2108 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
2109 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2110 158142c2 bellard
*----------------------------------------------------------------------------*/
2111 158142c2 bellard
2112 750afe93 bellard
int float32_eq( float32 a, float32 b STATUS_PARAM )
2113 158142c2 bellard
{
2114 158142c2 bellard
2115 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2116 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2117 158142c2 bellard
       ) {
2118 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2119 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2120 158142c2 bellard
        }
2121 158142c2 bellard
        return 0;
2122 158142c2 bellard
    }
2123 f090c9d4 pbrook
    return ( float32_val(a) == float32_val(b) ) ||
2124 f090c9d4 pbrook
            ( (bits32) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
2125 158142c2 bellard
2126 158142c2 bellard
}
2127 158142c2 bellard
2128 158142c2 bellard
/*----------------------------------------------------------------------------
2129 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2130 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  The comparison
2131 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2132 158142c2 bellard
| Arithmetic.
2133 158142c2 bellard
*----------------------------------------------------------------------------*/
2134 158142c2 bellard
2135 750afe93 bellard
int float32_le( float32 a, float32 b STATUS_PARAM )
2136 158142c2 bellard
{
2137 158142c2 bellard
    flag aSign, bSign;
2138 f090c9d4 pbrook
    bits32 av, bv;
2139 158142c2 bellard
2140 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2141 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2142 158142c2 bellard
       ) {
2143 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2144 158142c2 bellard
        return 0;
2145 158142c2 bellard
    }
2146 158142c2 bellard
    aSign = extractFloat32Sign( a );
2147 158142c2 bellard
    bSign = extractFloat32Sign( b );
2148 f090c9d4 pbrook
    av = float32_val(a);
2149 f090c9d4 pbrook
    bv = float32_val(b);
2150 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
2151 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2152 158142c2 bellard
2153 158142c2 bellard
}
2154 158142c2 bellard
2155 158142c2 bellard
/*----------------------------------------------------------------------------
2156 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2157 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
2158 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2159 158142c2 bellard
*----------------------------------------------------------------------------*/
2160 158142c2 bellard
2161 750afe93 bellard
int float32_lt( float32 a, float32 b STATUS_PARAM )
2162 158142c2 bellard
{
2163 158142c2 bellard
    flag aSign, bSign;
2164 f090c9d4 pbrook
    bits32 av, bv;
2165 158142c2 bellard
2166 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2167 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2168 158142c2 bellard
       ) {
2169 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2170 158142c2 bellard
        return 0;
2171 158142c2 bellard
    }
2172 158142c2 bellard
    aSign = extractFloat32Sign( a );
2173 158142c2 bellard
    bSign = extractFloat32Sign( b );
2174 f090c9d4 pbrook
    av = float32_val(a);
2175 f090c9d4 pbrook
    bv = float32_val(b);
2176 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
2177 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2178 158142c2 bellard
2179 158142c2 bellard
}
2180 158142c2 bellard
2181 158142c2 bellard
/*----------------------------------------------------------------------------
2182 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2183 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The invalid exception is
2184 158142c2 bellard
| raised if either operand is a NaN.  Otherwise, the comparison is performed
2185 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2186 158142c2 bellard
*----------------------------------------------------------------------------*/
2187 158142c2 bellard
2188 750afe93 bellard
int float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
2189 158142c2 bellard
{
2190 f090c9d4 pbrook
    bits32 av, bv;
2191 158142c2 bellard
2192 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2193 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2194 158142c2 bellard
       ) {
2195 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2196 158142c2 bellard
        return 0;
2197 158142c2 bellard
    }
2198 f090c9d4 pbrook
    av = float32_val(a);
2199 f090c9d4 pbrook
    bv = float32_val(b);
2200 f090c9d4 pbrook
    return ( av == bv ) || ( (bits32) ( ( av | bv )<<1 ) == 0 );
2201 158142c2 bellard
2202 158142c2 bellard
}
2203 158142c2 bellard
2204 158142c2 bellard
/*----------------------------------------------------------------------------
2205 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than or
2206 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2207 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
2208 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2209 158142c2 bellard
*----------------------------------------------------------------------------*/
2210 158142c2 bellard
2211 750afe93 bellard
int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
2212 158142c2 bellard
{
2213 158142c2 bellard
    flag aSign, bSign;
2214 f090c9d4 pbrook
    bits32 av, bv;
2215 158142c2 bellard
2216 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2217 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2218 158142c2 bellard
       ) {
2219 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2220 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2221 158142c2 bellard
        }
2222 158142c2 bellard
        return 0;
2223 158142c2 bellard
    }
2224 158142c2 bellard
    aSign = extractFloat32Sign( a );
2225 158142c2 bellard
    bSign = extractFloat32Sign( b );
2226 f090c9d4 pbrook
    av = float32_val(a);
2227 f090c9d4 pbrook
    bv = float32_val(b);
2228 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
2229 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2230 158142c2 bellard
2231 158142c2 bellard
}
2232 158142c2 bellard
2233 158142c2 bellard
/*----------------------------------------------------------------------------
2234 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2235 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2236 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2237 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
2238 158142c2 bellard
*----------------------------------------------------------------------------*/
2239 158142c2 bellard
2240 750afe93 bellard
int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
2241 158142c2 bellard
{
2242 158142c2 bellard
    flag aSign, bSign;
2243 f090c9d4 pbrook
    bits32 av, bv;
2244 158142c2 bellard
2245 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2246 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2247 158142c2 bellard
       ) {
2248 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2249 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2250 158142c2 bellard
        }
2251 158142c2 bellard
        return 0;
2252 158142c2 bellard
    }
2253 158142c2 bellard
    aSign = extractFloat32Sign( a );
2254 158142c2 bellard
    bSign = extractFloat32Sign( b );
2255 f090c9d4 pbrook
    av = float32_val(a);
2256 f090c9d4 pbrook
    bv = float32_val(b);
2257 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
2258 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2259 158142c2 bellard
2260 158142c2 bellard
}
2261 158142c2 bellard
2262 158142c2 bellard
/*----------------------------------------------------------------------------
2263 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2264 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2265 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2266 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2267 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2268 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2269 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2270 158142c2 bellard
*----------------------------------------------------------------------------*/
2271 158142c2 bellard
2272 158142c2 bellard
int32 float64_to_int32( float64 a STATUS_PARAM )
2273 158142c2 bellard
{
2274 158142c2 bellard
    flag aSign;
2275 158142c2 bellard
    int16 aExp, shiftCount;
2276 158142c2 bellard
    bits64 aSig;
2277 158142c2 bellard
2278 158142c2 bellard
    aSig = extractFloat64Frac( a );
2279 158142c2 bellard
    aExp = extractFloat64Exp( a );
2280 158142c2 bellard
    aSign = extractFloat64Sign( a );
2281 158142c2 bellard
    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2282 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2283 158142c2 bellard
    shiftCount = 0x42C - aExp;
2284 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2285 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
2286 158142c2 bellard
2287 158142c2 bellard
}
2288 158142c2 bellard
2289 158142c2 bellard
/*----------------------------------------------------------------------------
2290 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2291 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2292 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2293 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2294 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2295 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2296 158142c2 bellard
| returned.
2297 158142c2 bellard
*----------------------------------------------------------------------------*/
2298 158142c2 bellard
2299 158142c2 bellard
int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
2300 158142c2 bellard
{
2301 158142c2 bellard
    flag aSign;
2302 158142c2 bellard
    int16 aExp, shiftCount;
2303 158142c2 bellard
    bits64 aSig, savedASig;
2304 158142c2 bellard
    int32 z;
2305 158142c2 bellard
2306 158142c2 bellard
    aSig = extractFloat64Frac( a );
2307 158142c2 bellard
    aExp = extractFloat64Exp( a );
2308 158142c2 bellard
    aSign = extractFloat64Sign( a );
2309 158142c2 bellard
    if ( 0x41E < aExp ) {
2310 158142c2 bellard
        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2311 158142c2 bellard
        goto invalid;
2312 158142c2 bellard
    }
2313 158142c2 bellard
    else if ( aExp < 0x3FF ) {
2314 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2315 158142c2 bellard
        return 0;
2316 158142c2 bellard
    }
2317 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
2318 158142c2 bellard
    shiftCount = 0x433 - aExp;
2319 158142c2 bellard
    savedASig = aSig;
2320 158142c2 bellard
    aSig >>= shiftCount;
2321 158142c2 bellard
    z = aSig;
2322 158142c2 bellard
    if ( aSign ) z = - z;
2323 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
2324 158142c2 bellard
 invalid:
2325 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2326 158142c2 bellard
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
2327 158142c2 bellard
    }
2328 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
2329 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2330 158142c2 bellard
    }
2331 158142c2 bellard
    return z;
2332 158142c2 bellard
2333 158142c2 bellard
}
2334 158142c2 bellard
2335 158142c2 bellard
/*----------------------------------------------------------------------------
2336 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2337 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2338 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2339 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2340 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2341 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2342 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2343 158142c2 bellard
*----------------------------------------------------------------------------*/
2344 158142c2 bellard
2345 158142c2 bellard
int64 float64_to_int64( float64 a STATUS_PARAM )
2346 158142c2 bellard
{
2347 158142c2 bellard
    flag aSign;
2348 158142c2 bellard
    int16 aExp, shiftCount;
2349 158142c2 bellard
    bits64 aSig, aSigExtra;
2350 158142c2 bellard
2351 158142c2 bellard
    aSig = extractFloat64Frac( a );
2352 158142c2 bellard
    aExp = extractFloat64Exp( a );
2353 158142c2 bellard
    aSign = extractFloat64Sign( a );
2354 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2355 158142c2 bellard
    shiftCount = 0x433 - aExp;
2356 158142c2 bellard
    if ( shiftCount <= 0 ) {
2357 158142c2 bellard
        if ( 0x43E < aExp ) {
2358 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2359 158142c2 bellard
            if (    ! aSign
2360 158142c2 bellard
                 || (    ( aExp == 0x7FF )
2361 158142c2 bellard
                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
2362 158142c2 bellard
               ) {
2363 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
2364 158142c2 bellard
            }
2365 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
2366 158142c2 bellard
        }
2367 158142c2 bellard
        aSigExtra = 0;
2368 158142c2 bellard
        aSig <<= - shiftCount;
2369 158142c2 bellard
    }
2370 158142c2 bellard
    else {
2371 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2372 158142c2 bellard
    }
2373 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
2374 158142c2 bellard
2375 158142c2 bellard
}
2376 158142c2 bellard
2377 158142c2 bellard
/*----------------------------------------------------------------------------
2378 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2379 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2380 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2381 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2382 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2383 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2384 158142c2 bellard
| returned.
2385 158142c2 bellard
*----------------------------------------------------------------------------*/
2386 158142c2 bellard
2387 158142c2 bellard
int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
2388 158142c2 bellard
{
2389 158142c2 bellard
    flag aSign;
2390 158142c2 bellard
    int16 aExp, shiftCount;
2391 158142c2 bellard
    bits64 aSig;
2392 158142c2 bellard
    int64 z;
2393 158142c2 bellard
2394 158142c2 bellard
    aSig = extractFloat64Frac( a );
2395 158142c2 bellard
    aExp = extractFloat64Exp( a );
2396 158142c2 bellard
    aSign = extractFloat64Sign( a );
2397 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2398 158142c2 bellard
    shiftCount = aExp - 0x433;
2399 158142c2 bellard
    if ( 0 <= shiftCount ) {
2400 158142c2 bellard
        if ( 0x43E <= aExp ) {
2401 f090c9d4 pbrook
            if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
2402 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
2403 158142c2 bellard
                if (    ! aSign
2404 158142c2 bellard
                     || (    ( aExp == 0x7FF )
2405 158142c2 bellard
                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
2406 158142c2 bellard
                   ) {
2407 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
2408 158142c2 bellard
                }
2409 158142c2 bellard
            }
2410 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
2411 158142c2 bellard
        }
2412 158142c2 bellard
        z = aSig<<shiftCount;
2413 158142c2 bellard
    }
2414 158142c2 bellard
    else {
2415 158142c2 bellard
        if ( aExp < 0x3FE ) {
2416 158142c2 bellard
            if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2417 158142c2 bellard
            return 0;
2418 158142c2 bellard
        }
2419 158142c2 bellard
        z = aSig>>( - shiftCount );
2420 158142c2 bellard
        if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
2421 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
2422 158142c2 bellard
        }
2423 158142c2 bellard
    }
2424 158142c2 bellard
    if ( aSign ) z = - z;
2425 158142c2 bellard
    return z;
2426 158142c2 bellard
2427 158142c2 bellard
}
2428 158142c2 bellard
2429 158142c2 bellard
/*----------------------------------------------------------------------------
2430 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2431 158142c2 bellard
| `a' to the single-precision floating-point format.  The conversion is
2432 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2433 158142c2 bellard
| Arithmetic.
2434 158142c2 bellard
*----------------------------------------------------------------------------*/
2435 158142c2 bellard
2436 158142c2 bellard
float32 float64_to_float32( float64 a STATUS_PARAM )
2437 158142c2 bellard
{
2438 158142c2 bellard
    flag aSign;
2439 158142c2 bellard
    int16 aExp;
2440 158142c2 bellard
    bits64 aSig;
2441 158142c2 bellard
    bits32 zSig;
2442 158142c2 bellard
2443 158142c2 bellard
    aSig = extractFloat64Frac( a );
2444 158142c2 bellard
    aExp = extractFloat64Exp( a );
2445 158142c2 bellard
    aSign = extractFloat64Sign( a );
2446 158142c2 bellard
    if ( aExp == 0x7FF ) {
2447 158142c2 bellard
        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) );
2448 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
2449 158142c2 bellard
    }
2450 158142c2 bellard
    shift64RightJamming( aSig, 22, &aSig );
2451 158142c2 bellard
    zSig = aSig;
2452 158142c2 bellard
    if ( aExp || zSig ) {
2453 158142c2 bellard
        zSig |= 0x40000000;
2454 158142c2 bellard
        aExp -= 0x381;
2455 158142c2 bellard
    }
2456 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
2457 158142c2 bellard
2458 158142c2 bellard
}
2459 158142c2 bellard
2460 60011498 Paul Brook
2461 60011498 Paul Brook
/*----------------------------------------------------------------------------
2462 60011498 Paul Brook
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2463 60011498 Paul Brook
| half-precision floating-point value, returning the result.  After being
2464 60011498 Paul Brook
| shifted into the proper positions, the three fields are simply added
2465 60011498 Paul Brook
| together to form the result.  This means that any integer portion of `zSig'
2466 60011498 Paul Brook
| will be added into the exponent.  Since a properly normalized significand
2467 60011498 Paul Brook
| will have an integer portion equal to 1, the `zExp' input should be 1 less
2468 60011498 Paul Brook
| than the desired result exponent whenever `zSig' is a complete, normalized
2469 60011498 Paul Brook
| significand.
2470 60011498 Paul Brook
*----------------------------------------------------------------------------*/
2471 60011498 Paul Brook
static bits16 packFloat16(flag zSign, int16 zExp, bits16 zSig)
2472 60011498 Paul Brook
{
2473 60011498 Paul Brook
    return (((bits32)zSign) << 15) + (((bits32)zExp) << 10) + zSig;
2474 60011498 Paul Brook
}
2475 60011498 Paul Brook
2476 60011498 Paul Brook
/* Half precision floats come in two formats: standard IEEE and "ARM" format.
2477 60011498 Paul Brook
   The latter gains extra exponent range by omitting the NaN/Inf encodings.  */
2478 60011498 Paul Brook
  
2479 60011498 Paul Brook
float32 float16_to_float32( bits16 a, flag ieee STATUS_PARAM )
2480 60011498 Paul Brook
{
2481 60011498 Paul Brook
    flag aSign;
2482 60011498 Paul Brook
    int16 aExp;
2483 60011498 Paul Brook
    bits32 aSig;
2484 60011498 Paul Brook
2485 60011498 Paul Brook
    aSign = a >> 15;
2486 60011498 Paul Brook
    aExp = (a >> 10) & 0x1f;
2487 60011498 Paul Brook
    aSig = a & 0x3ff;
2488 60011498 Paul Brook
2489 60011498 Paul Brook
    if (aExp == 0x1f && ieee) {
2490 60011498 Paul Brook
        if (aSig) {
2491 60011498 Paul Brook
            /* Make sure correct exceptions are raised.  */
2492 60011498 Paul Brook
            float32ToCommonNaN(a STATUS_VAR);
2493 60011498 Paul Brook
            aSig |= 0x200;
2494 60011498 Paul Brook
        }
2495 60011498 Paul Brook
        return packFloat32(aSign, 0xff, aSig << 13);
2496 60011498 Paul Brook
    }
2497 60011498 Paul Brook
    if (aExp == 0) {
2498 60011498 Paul Brook
        int8 shiftCount;
2499 60011498 Paul Brook
2500 60011498 Paul Brook
        if (aSig == 0) {
2501 60011498 Paul Brook
            return packFloat32(aSign, 0, 0);
2502 60011498 Paul Brook
        }
2503 60011498 Paul Brook
2504 60011498 Paul Brook
        shiftCount = countLeadingZeros32( aSig ) - 21;
2505 60011498 Paul Brook
        aSig = aSig << shiftCount;
2506 60011498 Paul Brook
        aExp = -shiftCount;
2507 60011498 Paul Brook
    }
2508 60011498 Paul Brook
    return packFloat32( aSign, aExp + 0x70, aSig << 13);
2509 60011498 Paul Brook
}
2510 60011498 Paul Brook
2511 60011498 Paul Brook
bits16 float32_to_float16( float32 a, flag ieee STATUS_PARAM)
2512 60011498 Paul Brook
{
2513 60011498 Paul Brook
    flag aSign;
2514 60011498 Paul Brook
    int16 aExp;
2515 60011498 Paul Brook
    bits32 aSig;
2516 60011498 Paul Brook
    bits32 mask;
2517 60011498 Paul Brook
    bits32 increment;
2518 60011498 Paul Brook
    int8 roundingMode;
2519 60011498 Paul Brook
2520 60011498 Paul Brook
    aSig = extractFloat32Frac( a );
2521 60011498 Paul Brook
    aExp = extractFloat32Exp( a );
2522 60011498 Paul Brook
    aSign = extractFloat32Sign( a );
2523 60011498 Paul Brook
    if ( aExp == 0xFF ) {
2524 60011498 Paul Brook
        if (aSig) {
2525 60011498 Paul Brook
            /* Make sure correct exceptions are raised.  */
2526 60011498 Paul Brook
            float32ToCommonNaN(a STATUS_VAR);
2527 60011498 Paul Brook
            aSig |= 0x00400000;
2528 60011498 Paul Brook
        }
2529 60011498 Paul Brook
        return packFloat16(aSign, 0x1f, aSig >> 13);
2530 60011498 Paul Brook
    }
2531 60011498 Paul Brook
    if (aExp == 0 && aSign == 0) {
2532 60011498 Paul Brook
        return packFloat16(aSign, 0, 0);
2533 60011498 Paul Brook
    }
2534 60011498 Paul Brook
    /* Decimal point between bits 22 and 23.  */
2535 60011498 Paul Brook
    aSig |= 0x00800000;
2536 60011498 Paul Brook
    aExp -= 0x7f;
2537 60011498 Paul Brook
    if (aExp < -14) {
2538 60011498 Paul Brook
        mask = 0x007fffff;
2539 60011498 Paul Brook
        if (aExp < -24) {
2540 60011498 Paul Brook
            aExp = -25;
2541 60011498 Paul Brook
        } else {
2542 60011498 Paul Brook
            mask >>= 24 + aExp;
2543 60011498 Paul Brook
        }
2544 60011498 Paul Brook
    } else {
2545 60011498 Paul Brook
        mask = 0x00001fff;
2546 60011498 Paul Brook
    }
2547 60011498 Paul Brook
    if (aSig & mask) {
2548 60011498 Paul Brook
        float_raise( float_flag_underflow STATUS_VAR );
2549 60011498 Paul Brook
        roundingMode = STATUS(float_rounding_mode);
2550 60011498 Paul Brook
        switch (roundingMode) {
2551 60011498 Paul Brook
        case float_round_nearest_even:
2552 60011498 Paul Brook
            increment = (mask + 1) >> 1;
2553 60011498 Paul Brook
            if ((aSig & mask) == increment) {
2554 60011498 Paul Brook
                increment = aSig & (increment << 1);
2555 60011498 Paul Brook
            }
2556 60011498 Paul Brook
            break;
2557 60011498 Paul Brook
        case float_round_up:
2558 60011498 Paul Brook
            increment = aSign ? 0 : mask;
2559 60011498 Paul Brook
            break;
2560 60011498 Paul Brook
        case float_round_down:
2561 60011498 Paul Brook
            increment = aSign ? mask : 0;
2562 60011498 Paul Brook
            break;
2563 60011498 Paul Brook
        default: /* round_to_zero */
2564 60011498 Paul Brook
            increment = 0;
2565 60011498 Paul Brook
            break;
2566 60011498 Paul Brook
        }
2567 60011498 Paul Brook
        aSig += increment;
2568 60011498 Paul Brook
        if (aSig >= 0x01000000) {
2569 60011498 Paul Brook
            aSig >>= 1;
2570 60011498 Paul Brook
            aExp++;
2571 60011498 Paul Brook
        }
2572 60011498 Paul Brook
    } else if (aExp < -14
2573 60011498 Paul Brook
          && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
2574 60011498 Paul Brook
        float_raise( float_flag_underflow STATUS_VAR);
2575 60011498 Paul Brook
    }
2576 60011498 Paul Brook
2577 60011498 Paul Brook
    if (ieee) {
2578 60011498 Paul Brook
        if (aExp > 15) {
2579 60011498 Paul Brook
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
2580 60011498 Paul Brook
            return packFloat16(aSign, 0x1f, 0);
2581 60011498 Paul Brook
        }
2582 60011498 Paul Brook
    } else {
2583 60011498 Paul Brook
        if (aExp > 16) {
2584 60011498 Paul Brook
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
2585 60011498 Paul Brook
            return packFloat16(aSign, 0x1f, 0x3ff);
2586 60011498 Paul Brook
        }
2587 60011498 Paul Brook
    }
2588 60011498 Paul Brook
    if (aExp < -24) {
2589 60011498 Paul Brook
        return packFloat16(aSign, 0, 0);
2590 60011498 Paul Brook
    }
2591 60011498 Paul Brook
    if (aExp < -14) {
2592 60011498 Paul Brook
        aSig >>= -14 - aExp;
2593 60011498 Paul Brook
        aExp = -14;
2594 60011498 Paul Brook
    }
2595 60011498 Paul Brook
    return packFloat16(aSign, aExp + 14, aSig >> 13);
2596 60011498 Paul Brook
}
2597 60011498 Paul Brook
2598 158142c2 bellard
#ifdef FLOATX80
2599 158142c2 bellard
2600 158142c2 bellard
/*----------------------------------------------------------------------------
2601 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2602 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
2603 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2604 158142c2 bellard
| Arithmetic.
2605 158142c2 bellard
*----------------------------------------------------------------------------*/
2606 158142c2 bellard
2607 158142c2 bellard
floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
2608 158142c2 bellard
{
2609 158142c2 bellard
    flag aSign;
2610 158142c2 bellard
    int16 aExp;
2611 158142c2 bellard
    bits64 aSig;
2612 158142c2 bellard
2613 158142c2 bellard
    aSig = extractFloat64Frac( a );
2614 158142c2 bellard
    aExp = extractFloat64Exp( a );
2615 158142c2 bellard
    aSign = extractFloat64Sign( a );
2616 158142c2 bellard
    if ( aExp == 0x7FF ) {
2617 158142c2 bellard
        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) );
2618 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2619 158142c2 bellard
    }
2620 158142c2 bellard
    if ( aExp == 0 ) {
2621 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2622 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2623 158142c2 bellard
    }
2624 158142c2 bellard
    return
2625 158142c2 bellard
        packFloatx80(
2626 158142c2 bellard
            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2627 158142c2 bellard
2628 158142c2 bellard
}
2629 158142c2 bellard
2630 158142c2 bellard
#endif
2631 158142c2 bellard
2632 158142c2 bellard
#ifdef FLOAT128
2633 158142c2 bellard
2634 158142c2 bellard
/*----------------------------------------------------------------------------
2635 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2636 158142c2 bellard
| `a' to the quadruple-precision floating-point format.  The conversion is
2637 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2638 158142c2 bellard
| Arithmetic.
2639 158142c2 bellard
*----------------------------------------------------------------------------*/
2640 158142c2 bellard
2641 158142c2 bellard
float128 float64_to_float128( float64 a STATUS_PARAM )
2642 158142c2 bellard
{
2643 158142c2 bellard
    flag aSign;
2644 158142c2 bellard
    int16 aExp;
2645 158142c2 bellard
    bits64 aSig, zSig0, zSig1;
2646 158142c2 bellard
2647 158142c2 bellard
    aSig = extractFloat64Frac( a );
2648 158142c2 bellard
    aExp = extractFloat64Exp( a );
2649 158142c2 bellard
    aSign = extractFloat64Sign( a );
2650 158142c2 bellard
    if ( aExp == 0x7FF ) {
2651 158142c2 bellard
        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) );
2652 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
2653 158142c2 bellard
    }
2654 158142c2 bellard
    if ( aExp == 0 ) {
2655 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2656 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2657 158142c2 bellard
        --aExp;
2658 158142c2 bellard
    }
2659 158142c2 bellard
    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2660 158142c2 bellard
    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2661 158142c2 bellard
2662 158142c2 bellard
}
2663 158142c2 bellard
2664 158142c2 bellard
#endif
2665 158142c2 bellard
2666 158142c2 bellard
/*----------------------------------------------------------------------------
2667 158142c2 bellard
| Rounds the double-precision floating-point value `a' to an integer, and
2668 158142c2 bellard
| returns the result as a double-precision floating-point value.  The
2669 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
2670 158142c2 bellard
| Floating-Point Arithmetic.
2671 158142c2 bellard
*----------------------------------------------------------------------------*/
2672 158142c2 bellard
2673 158142c2 bellard
float64 float64_round_to_int( float64 a STATUS_PARAM )
2674 158142c2 bellard
{
2675 158142c2 bellard
    flag aSign;
2676 158142c2 bellard
    int16 aExp;
2677 158142c2 bellard
    bits64 lastBitMask, roundBitsMask;
2678 158142c2 bellard
    int8 roundingMode;
2679 f090c9d4 pbrook
    bits64 z;
2680 158142c2 bellard
2681 158142c2 bellard
    aExp = extractFloat64Exp( a );
2682 158142c2 bellard
    if ( 0x433 <= aExp ) {
2683 158142c2 bellard
        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
2684 158142c2 bellard
            return propagateFloat64NaN( a, a STATUS_VAR );
2685 158142c2 bellard
        }
2686 158142c2 bellard
        return a;
2687 158142c2 bellard
    }
2688 158142c2 bellard
    if ( aExp < 0x3FF ) {
2689 f090c9d4 pbrook
        if ( (bits64) ( float64_val(a)<<1 ) == 0 ) return a;
2690 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2691 158142c2 bellard
        aSign = extractFloat64Sign( a );
2692 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
2693 158142c2 bellard
         case float_round_nearest_even:
2694 158142c2 bellard
            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
2695 158142c2 bellard
                return packFloat64( aSign, 0x3FF, 0 );
2696 158142c2 bellard
            }
2697 158142c2 bellard
            break;
2698 158142c2 bellard
         case float_round_down:
2699 f090c9d4 pbrook
            return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
2700 158142c2 bellard
         case float_round_up:
2701 f090c9d4 pbrook
            return make_float64(
2702 f090c9d4 pbrook
            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
2703 158142c2 bellard
        }
2704 158142c2 bellard
        return packFloat64( aSign, 0, 0 );
2705 158142c2 bellard
    }
2706 158142c2 bellard
    lastBitMask = 1;
2707 158142c2 bellard
    lastBitMask <<= 0x433 - aExp;
2708 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
2709 f090c9d4 pbrook
    z = float64_val(a);
2710 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
2711 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
2712 158142c2 bellard
        z += lastBitMask>>1;
2713 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
2714 158142c2 bellard
    }
2715 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
2716 f090c9d4 pbrook
        if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
2717 158142c2 bellard
            z += roundBitsMask;
2718 158142c2 bellard
        }
2719 158142c2 bellard
    }
2720 158142c2 bellard
    z &= ~ roundBitsMask;
2721 f090c9d4 pbrook
    if ( z != float64_val(a) )
2722 f090c9d4 pbrook
        STATUS(float_exception_flags) |= float_flag_inexact;
2723 f090c9d4 pbrook
    return make_float64(z);
2724 158142c2 bellard
2725 158142c2 bellard
}
2726 158142c2 bellard
2727 e6e5906b pbrook
float64 float64_trunc_to_int( float64 a STATUS_PARAM)
2728 e6e5906b pbrook
{
2729 e6e5906b pbrook
    int oldmode;
2730 e6e5906b pbrook
    float64 res;
2731 e6e5906b pbrook
    oldmode = STATUS(float_rounding_mode);
2732 e6e5906b pbrook
    STATUS(float_rounding_mode) = float_round_to_zero;
2733 e6e5906b pbrook
    res = float64_round_to_int(a STATUS_VAR);
2734 e6e5906b pbrook
    STATUS(float_rounding_mode) = oldmode;
2735 e6e5906b pbrook
    return res;
2736 e6e5906b pbrook
}
2737 e6e5906b pbrook
2738 158142c2 bellard
/*----------------------------------------------------------------------------
2739 158142c2 bellard
| Returns the result of adding the absolute values of the double-precision
2740 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
2741 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
2742 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
2743 158142c2 bellard
| Floating-Point Arithmetic.
2744 158142c2 bellard
*----------------------------------------------------------------------------*/
2745 158142c2 bellard
2746 158142c2 bellard
static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
2747 158142c2 bellard
{
2748 158142c2 bellard
    int16 aExp, bExp, zExp;
2749 158142c2 bellard
    bits64 aSig, bSig, zSig;
2750 158142c2 bellard
    int16 expDiff;
2751 158142c2 bellard
2752 158142c2 bellard
    aSig = extractFloat64Frac( a );
2753 158142c2 bellard
    aExp = extractFloat64Exp( a );
2754 158142c2 bellard
    bSig = extractFloat64Frac( b );
2755 158142c2 bellard
    bExp = extractFloat64Exp( b );
2756 158142c2 bellard
    expDiff = aExp - bExp;
2757 158142c2 bellard
    aSig <<= 9;
2758 158142c2 bellard
    bSig <<= 9;
2759 158142c2 bellard
    if ( 0 < expDiff ) {
2760 158142c2 bellard
        if ( aExp == 0x7FF ) {
2761 158142c2 bellard
            if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2762 158142c2 bellard
            return a;
2763 158142c2 bellard
        }
2764 158142c2 bellard
        if ( bExp == 0 ) {
2765 158142c2 bellard
            --expDiff;
2766 158142c2 bellard
        }
2767 158142c2 bellard
        else {
2768 158142c2 bellard
            bSig |= LIT64( 0x2000000000000000 );
2769 158142c2 bellard
        }
2770 158142c2 bellard
        shift64RightJamming( bSig, expDiff, &bSig );
2771 158142c2 bellard
        zExp = aExp;
2772 158142c2 bellard
    }
2773 158142c2 bellard
    else if ( expDiff < 0 ) {
2774 158142c2 bellard
        if ( bExp == 0x7FF ) {
2775 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2776 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
2777 158142c2 bellard
        }
2778 158142c2 bellard
        if ( aExp == 0 ) {
2779 158142c2 bellard
            ++expDiff;
2780 158142c2 bellard
        }
2781 158142c2 bellard
        else {
2782 158142c2 bellard
            aSig |= LIT64( 0x2000000000000000 );
2783 158142c2 bellard
        }
2784 158142c2 bellard
        shift64RightJamming( aSig, - expDiff, &aSig );
2785 158142c2 bellard
        zExp = bExp;
2786 158142c2 bellard
    }
2787 158142c2 bellard
    else {
2788 158142c2 bellard
        if ( aExp == 0x7FF ) {
2789 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2790 158142c2 bellard
            return a;
2791 158142c2 bellard
        }
2792 fe76d976 pbrook
        if ( aExp == 0 ) {
2793 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
2794 fe76d976 pbrook
            return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
2795 fe76d976 pbrook
        }
2796 158142c2 bellard
        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
2797 158142c2 bellard
        zExp = aExp;
2798 158142c2 bellard
        goto roundAndPack;
2799 158142c2 bellard
    }
2800 158142c2 bellard
    aSig |= LIT64( 0x2000000000000000 );
2801 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
2802 158142c2 bellard
    --zExp;
2803 158142c2 bellard
    if ( (sbits64) zSig < 0 ) {
2804 158142c2 bellard
        zSig = aSig + bSig;
2805 158142c2 bellard
        ++zExp;
2806 158142c2 bellard
    }
2807 158142c2 bellard
 roundAndPack:
2808 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
2809 158142c2 bellard
2810 158142c2 bellard
}
2811 158142c2 bellard
2812 158142c2 bellard
/*----------------------------------------------------------------------------
2813 158142c2 bellard
| Returns the result of subtracting the absolute values of the double-
2814 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
2815 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
2816 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
2817 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
2818 158142c2 bellard
*----------------------------------------------------------------------------*/
2819 158142c2 bellard
2820 158142c2 bellard
static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
2821 158142c2 bellard
{
2822 158142c2 bellard
    int16 aExp, bExp, zExp;
2823 158142c2 bellard
    bits64 aSig, bSig, zSig;
2824 158142c2 bellard
    int16 expDiff;
2825 158142c2 bellard
2826 158142c2 bellard
    aSig = extractFloat64Frac( a );
2827 158142c2 bellard
    aExp = extractFloat64Exp( a );
2828 158142c2 bellard
    bSig = extractFloat64Frac( b );
2829 158142c2 bellard
    bExp = extractFloat64Exp( b );
2830 158142c2 bellard
    expDiff = aExp - bExp;
2831 158142c2 bellard
    aSig <<= 10;
2832 158142c2 bellard
    bSig <<= 10;
2833 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
2834 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
2835 158142c2 bellard
    if ( aExp == 0x7FF ) {
2836 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2837 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2838 158142c2 bellard
        return float64_default_nan;
2839 158142c2 bellard
    }
2840 158142c2 bellard
    if ( aExp == 0 ) {
2841 158142c2 bellard
        aExp = 1;
2842 158142c2 bellard
        bExp = 1;
2843 158142c2 bellard
    }
2844 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
2845 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
2846 158142c2 bellard
    return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
2847 158142c2 bellard
 bExpBigger:
2848 158142c2 bellard
    if ( bExp == 0x7FF ) {
2849 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2850 158142c2 bellard
        return packFloat64( zSign ^ 1, 0x7FF, 0 );
2851 158142c2 bellard
    }
2852 158142c2 bellard
    if ( aExp == 0 ) {
2853 158142c2 bellard
        ++expDiff;
2854 158142c2 bellard
    }
2855 158142c2 bellard
    else {
2856 158142c2 bellard
        aSig |= LIT64( 0x4000000000000000 );
2857 158142c2 bellard
    }
2858 158142c2 bellard
    shift64RightJamming( aSig, - expDiff, &aSig );
2859 158142c2 bellard
    bSig |= LIT64( 0x4000000000000000 );
2860 158142c2 bellard
 bBigger:
2861 158142c2 bellard
    zSig = bSig - aSig;
2862 158142c2 bellard
    zExp = bExp;
2863 158142c2 bellard
    zSign ^= 1;
2864 158142c2 bellard
    goto normalizeRoundAndPack;
2865 158142c2 bellard
 aExpBigger:
2866 158142c2 bellard
    if ( aExp == 0x7FF ) {
2867 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2868 158142c2 bellard
        return a;
2869 158142c2 bellard
    }
2870 158142c2 bellard
    if ( bExp == 0 ) {
2871 158142c2 bellard
        --expDiff;
2872 158142c2 bellard
    }
2873 158142c2 bellard
    else {
2874 158142c2 bellard
        bSig |= LIT64( 0x4000000000000000 );
2875 158142c2 bellard
    }
2876 158142c2 bellard
    shift64RightJamming( bSig, expDiff, &bSig );
2877 158142c2 bellard
    aSig |= LIT64( 0x4000000000000000 );
2878 158142c2 bellard
 aBigger:
2879 158142c2 bellard
    zSig = aSig - bSig;
2880 158142c2 bellard
    zExp = aExp;
2881 158142c2 bellard
 normalizeRoundAndPack:
2882 158142c2 bellard
    --zExp;
2883 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
2884 158142c2 bellard
2885 158142c2 bellard
}
2886 158142c2 bellard
2887 158142c2 bellard
/*----------------------------------------------------------------------------
2888 158142c2 bellard
| Returns the result of adding the double-precision floating-point values `a'
2889 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
2890 158142c2 bellard
| Binary Floating-Point Arithmetic.
2891 158142c2 bellard
*----------------------------------------------------------------------------*/
2892 158142c2 bellard
2893 158142c2 bellard
float64 float64_add( float64 a, float64 b STATUS_PARAM )
2894 158142c2 bellard
{
2895 158142c2 bellard
    flag aSign, bSign;
2896 158142c2 bellard
2897 158142c2 bellard
    aSign = extractFloat64Sign( a );
2898 158142c2 bellard
    bSign = extractFloat64Sign( b );
2899 158142c2 bellard
    if ( aSign == bSign ) {
2900 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
2901 158142c2 bellard
    }
2902 158142c2 bellard
    else {
2903 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
2904 158142c2 bellard
    }
2905 158142c2 bellard
2906 158142c2 bellard
}
2907 158142c2 bellard
2908 158142c2 bellard
/*----------------------------------------------------------------------------
2909 158142c2 bellard
| Returns the result of subtracting the double-precision floating-point values
2910 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2911 158142c2 bellard
| for Binary Floating-Point Arithmetic.
2912 158142c2 bellard
*----------------------------------------------------------------------------*/
2913 158142c2 bellard
2914 158142c2 bellard
float64 float64_sub( float64 a, float64 b STATUS_PARAM )
2915 158142c2 bellard
{
2916 158142c2 bellard
    flag aSign, bSign;
2917 158142c2 bellard
2918 158142c2 bellard
    aSign = extractFloat64Sign( a );
2919 158142c2 bellard
    bSign = extractFloat64Sign( b );
2920 158142c2 bellard
    if ( aSign == bSign ) {
2921 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
2922 158142c2 bellard
    }
2923 158142c2 bellard
    else {
2924 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
2925 158142c2 bellard
    }
2926 158142c2 bellard
2927 158142c2 bellard
}
2928 158142c2 bellard
2929 158142c2 bellard
/*----------------------------------------------------------------------------
2930 158142c2 bellard
| Returns the result of multiplying the double-precision floating-point values
2931 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2932 158142c2 bellard
| for Binary Floating-Point Arithmetic.
2933 158142c2 bellard
*----------------------------------------------------------------------------*/
2934 158142c2 bellard
2935 158142c2 bellard
float64 float64_mul( float64 a, float64 b STATUS_PARAM )
2936 158142c2 bellard
{
2937 158142c2 bellard
    flag aSign, bSign, zSign;
2938 158142c2 bellard
    int16 aExp, bExp, zExp;
2939 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
2940 158142c2 bellard
2941 158142c2 bellard
    aSig = extractFloat64Frac( a );
2942 158142c2 bellard
    aExp = extractFloat64Exp( a );
2943 158142c2 bellard
    aSign = extractFloat64Sign( a );
2944 158142c2 bellard
    bSig = extractFloat64Frac( b );
2945 158142c2 bellard
    bExp = extractFloat64Exp( b );
2946 158142c2 bellard
    bSign = extractFloat64Sign( b );
2947 158142c2 bellard
    zSign = aSign ^ bSign;
2948 158142c2 bellard
    if ( aExp == 0x7FF ) {
2949 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2950 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
2951 158142c2 bellard
        }
2952 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
2953 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2954 158142c2 bellard
            return float64_default_nan;
2955 158142c2 bellard
        }
2956 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
2957 158142c2 bellard
    }
2958 158142c2 bellard
    if ( bExp == 0x7FF ) {
2959 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2960 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
2961 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2962 158142c2 bellard
            return float64_default_nan;
2963 158142c2 bellard
        }
2964 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
2965 158142c2 bellard
    }
2966 158142c2 bellard
    if ( aExp == 0 ) {
2967 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2968 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2969 158142c2 bellard
    }
2970 158142c2 bellard
    if ( bExp == 0 ) {
2971 158142c2 bellard
        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2972 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2973 158142c2 bellard
    }
2974 158142c2 bellard
    zExp = aExp + bExp - 0x3FF;
2975 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2976 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2977 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
2978 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
2979 158142c2 bellard
    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2980 158142c2 bellard
        zSig0 <<= 1;
2981 158142c2 bellard
        --zExp;
2982 158142c2 bellard
    }
2983 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
2984 158142c2 bellard
2985 158142c2 bellard
}
2986 158142c2 bellard
2987 158142c2 bellard
/*----------------------------------------------------------------------------
2988 158142c2 bellard
| Returns the result of dividing the double-precision floating-point value `a'
2989 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to
2990 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2991 158142c2 bellard
*----------------------------------------------------------------------------*/
2992 158142c2 bellard
2993 158142c2 bellard
float64 float64_div( float64 a, float64 b STATUS_PARAM )
2994 158142c2 bellard
{
2995 158142c2 bellard
    flag aSign, bSign, zSign;
2996 158142c2 bellard
    int16 aExp, bExp, zExp;
2997 158142c2 bellard
    bits64 aSig, bSig, zSig;
2998 158142c2 bellard
    bits64 rem0, rem1;
2999 158142c2 bellard
    bits64 term0, term1;
3000 158142c2 bellard
3001 158142c2 bellard
    aSig = extractFloat64Frac( a );
3002 158142c2 bellard
    aExp = extractFloat64Exp( a );
3003 158142c2 bellard
    aSign = extractFloat64Sign( a );
3004 158142c2 bellard
    bSig = extractFloat64Frac( b );
3005 158142c2 bellard
    bExp = extractFloat64Exp( b );
3006 158142c2 bellard
    bSign = extractFloat64Sign( b );
3007 158142c2 bellard
    zSign = aSign ^ bSign;
3008 158142c2 bellard
    if ( aExp == 0x7FF ) {
3009 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3010 158142c2 bellard
        if ( bExp == 0x7FF ) {
3011 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3012 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3013 158142c2 bellard
            return float64_default_nan;
3014 158142c2 bellard
        }
3015 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3016 158142c2 bellard
    }
3017 158142c2 bellard
    if ( bExp == 0x7FF ) {
3018 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3019 158142c2 bellard
        return packFloat64( zSign, 0, 0 );
3020 158142c2 bellard
    }
3021 158142c2 bellard
    if ( bExp == 0 ) {
3022 158142c2 bellard
        if ( bSig == 0 ) {
3023 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
3024 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
3025 158142c2 bellard
                return float64_default_nan;
3026 158142c2 bellard
            }
3027 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
3028 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
3029 158142c2 bellard
        }
3030 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3031 158142c2 bellard
    }
3032 158142c2 bellard
    if ( aExp == 0 ) {
3033 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3034 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3035 158142c2 bellard
    }
3036 158142c2 bellard
    zExp = aExp - bExp + 0x3FD;
3037 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3038 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3039 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
3040 158142c2 bellard
        aSig >>= 1;
3041 158142c2 bellard
        ++zExp;
3042 158142c2 bellard
    }
3043 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, bSig );
3044 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 2 ) {
3045 158142c2 bellard
        mul64To128( bSig, zSig, &term0, &term1 );
3046 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3047 158142c2 bellard
        while ( (sbits64) rem0 < 0 ) {
3048 158142c2 bellard
            --zSig;
3049 158142c2 bellard
            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3050 158142c2 bellard
        }
3051 158142c2 bellard
        zSig |= ( rem1 != 0 );
3052 158142c2 bellard
    }
3053 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3054 158142c2 bellard
3055 158142c2 bellard
}
3056 158142c2 bellard
3057 158142c2 bellard
/*----------------------------------------------------------------------------
3058 158142c2 bellard
| Returns the remainder of the double-precision floating-point value `a'
3059 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
3060 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3061 158142c2 bellard
*----------------------------------------------------------------------------*/
3062 158142c2 bellard
3063 158142c2 bellard
float64 float64_rem( float64 a, float64 b STATUS_PARAM )
3064 158142c2 bellard
{
3065 158142c2 bellard
    flag aSign, bSign, zSign;
3066 158142c2 bellard
    int16 aExp, bExp, expDiff;
3067 158142c2 bellard
    bits64 aSig, bSig;
3068 158142c2 bellard
    bits64 q, alternateASig;
3069 158142c2 bellard
    sbits64 sigMean;
3070 158142c2 bellard
3071 158142c2 bellard
    aSig = extractFloat64Frac( a );
3072 158142c2 bellard
    aExp = extractFloat64Exp( a );
3073 158142c2 bellard
    aSign = extractFloat64Sign( a );
3074 158142c2 bellard
    bSig = extractFloat64Frac( b );
3075 158142c2 bellard
    bExp = extractFloat64Exp( b );
3076 158142c2 bellard
    bSign = extractFloat64Sign( b );
3077 158142c2 bellard
    if ( aExp == 0x7FF ) {
3078 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3079 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
3080 158142c2 bellard
        }
3081 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3082 158142c2 bellard
        return float64_default_nan;
3083 158142c2 bellard
    }
3084 158142c2 bellard
    if ( bExp == 0x7FF ) {
3085 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3086 158142c2 bellard
        return a;
3087 158142c2 bellard
    }
3088 158142c2 bellard
    if ( bExp == 0 ) {
3089 158142c2 bellard
        if ( bSig == 0 ) {
3090 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3091 158142c2 bellard
            return float64_default_nan;
3092 158142c2 bellard
        }
3093 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3094 158142c2 bellard
    }
3095 158142c2 bellard
    if ( aExp == 0 ) {
3096 158142c2 bellard
        if ( aSig == 0 ) return a;
3097 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3098 158142c2 bellard
    }
3099 158142c2 bellard
    expDiff = aExp - bExp;
3100 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3101 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3102 158142c2 bellard
    if ( expDiff < 0 ) {
3103 158142c2 bellard
        if ( expDiff < -1 ) return a;
3104 158142c2 bellard
        aSig >>= 1;
3105 158142c2 bellard
    }
3106 158142c2 bellard
    q = ( bSig <= aSig );
3107 158142c2 bellard
    if ( q ) aSig -= bSig;
3108 158142c2 bellard
    expDiff -= 64;
3109 158142c2 bellard
    while ( 0 < expDiff ) {
3110 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
3111 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3112 158142c2 bellard
        aSig = - ( ( bSig>>2 ) * q );
3113 158142c2 bellard
        expDiff -= 62;
3114 158142c2 bellard
    }
3115 158142c2 bellard
    expDiff += 64;
3116 158142c2 bellard
    if ( 0 < expDiff ) {
3117 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
3118 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3119 158142c2 bellard
        q >>= 64 - expDiff;
3120 158142c2 bellard
        bSig >>= 2;
3121 158142c2 bellard
        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3122 158142c2 bellard
    }
3123 158142c2 bellard
    else {
3124 158142c2 bellard
        aSig >>= 2;
3125 158142c2 bellard
        bSig >>= 2;
3126 158142c2 bellard
    }
3127 158142c2 bellard
    do {
3128 158142c2 bellard
        alternateASig = aSig;
3129 158142c2 bellard
        ++q;
3130 158142c2 bellard
        aSig -= bSig;
3131 158142c2 bellard
    } while ( 0 <= (sbits64) aSig );
3132 158142c2 bellard
    sigMean = aSig + alternateASig;
3133 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3134 158142c2 bellard
        aSig = alternateASig;
3135 158142c2 bellard
    }
3136 158142c2 bellard
    zSign = ( (sbits64) aSig < 0 );
3137 158142c2 bellard
    if ( zSign ) aSig = - aSig;
3138 158142c2 bellard
    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
3139 158142c2 bellard
3140 158142c2 bellard
}
3141 158142c2 bellard
3142 158142c2 bellard
/*----------------------------------------------------------------------------
3143 158142c2 bellard
| Returns the square root of the double-precision floating-point value `a'.
3144 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
3145 158142c2 bellard
| Floating-Point Arithmetic.
3146 158142c2 bellard
*----------------------------------------------------------------------------*/
3147 158142c2 bellard
3148 158142c2 bellard
float64 float64_sqrt( float64 a STATUS_PARAM )
3149 158142c2 bellard
{
3150 158142c2 bellard
    flag aSign;
3151 158142c2 bellard
    int16 aExp, zExp;
3152 158142c2 bellard
    bits64 aSig, zSig, doubleZSig;
3153 158142c2 bellard
    bits64 rem0, rem1, term0, term1;
3154 158142c2 bellard
3155 158142c2 bellard
    aSig = extractFloat64Frac( a );
3156 158142c2 bellard
    aExp = extractFloat64Exp( a );
3157 158142c2 bellard
    aSign = extractFloat64Sign( a );
3158 158142c2 bellard
    if ( aExp == 0x7FF ) {
3159 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
3160 158142c2 bellard
        if ( ! aSign ) return a;
3161 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3162 158142c2 bellard
        return float64_default_nan;
3163 158142c2 bellard
    }
3164 158142c2 bellard
    if ( aSign ) {
3165 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
3166 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3167 158142c2 bellard
        return float64_default_nan;
3168 158142c2 bellard
    }
3169 158142c2 bellard
    if ( aExp == 0 ) {
3170 f090c9d4 pbrook
        if ( aSig == 0 ) return float64_zero;
3171 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3172 158142c2 bellard
    }
3173 158142c2 bellard
    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3174 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
3175 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig>>21 );
3176 158142c2 bellard
    aSig <<= 9 - ( aExp & 1 );
3177 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3178 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 5 ) {
3179 158142c2 bellard
        doubleZSig = zSig<<1;
3180 158142c2 bellard
        mul64To128( zSig, zSig, &term0, &term1 );
3181 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3182 158142c2 bellard
        while ( (sbits64) rem0 < 0 ) {
3183 158142c2 bellard
            --zSig;
3184 158142c2 bellard
            doubleZSig -= 2;
3185 158142c2 bellard
            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3186 158142c2 bellard
        }
3187 158142c2 bellard
        zSig |= ( ( rem0 | rem1 ) != 0 );
3188 158142c2 bellard
    }
3189 158142c2 bellard
    return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
3190 158142c2 bellard
3191 158142c2 bellard
}
3192 158142c2 bellard
3193 158142c2 bellard
/*----------------------------------------------------------------------------
3194 374dfc33 aurel32
| Returns the binary log of the double-precision floating-point value `a'.
3195 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
3196 374dfc33 aurel32
| Floating-Point Arithmetic.
3197 374dfc33 aurel32
*----------------------------------------------------------------------------*/
3198 374dfc33 aurel32
float64 float64_log2( float64 a STATUS_PARAM )
3199 374dfc33 aurel32
{
3200 374dfc33 aurel32
    flag aSign, zSign;
3201 374dfc33 aurel32
    int16 aExp;
3202 374dfc33 aurel32
    bits64 aSig, aSig0, aSig1, zSig, i;
3203 374dfc33 aurel32
3204 374dfc33 aurel32
    aSig = extractFloat64Frac( a );
3205 374dfc33 aurel32
    aExp = extractFloat64Exp( a );
3206 374dfc33 aurel32
    aSign = extractFloat64Sign( a );
3207 374dfc33 aurel32
3208 374dfc33 aurel32
    if ( aExp == 0 ) {
3209 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
3210 374dfc33 aurel32
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3211 374dfc33 aurel32
    }
3212 374dfc33 aurel32
    if ( aSign ) {
3213 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
3214 374dfc33 aurel32
        return float64_default_nan;
3215 374dfc33 aurel32
    }
3216 374dfc33 aurel32
    if ( aExp == 0x7FF ) {
3217 374dfc33 aurel32
        if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
3218 374dfc33 aurel32
        return a;
3219 374dfc33 aurel32
    }
3220 374dfc33 aurel32
3221 374dfc33 aurel32
    aExp -= 0x3FF;
3222 374dfc33 aurel32
    aSig |= LIT64( 0x0010000000000000 );
3223 374dfc33 aurel32
    zSign = aExp < 0;
3224 374dfc33 aurel32
    zSig = (bits64)aExp << 52;
3225 374dfc33 aurel32
    for (i = 1LL << 51; i > 0; i >>= 1) {
3226 374dfc33 aurel32
        mul64To128( aSig, aSig, &aSig0, &aSig1 );
3227 374dfc33 aurel32
        aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
3228 374dfc33 aurel32
        if ( aSig & LIT64( 0x0020000000000000 ) ) {
3229 374dfc33 aurel32
            aSig >>= 1;
3230 374dfc33 aurel32
            zSig |= i;
3231 374dfc33 aurel32
        }
3232 374dfc33 aurel32
    }
3233 374dfc33 aurel32
3234 374dfc33 aurel32
    if ( zSign )
3235 374dfc33 aurel32
        zSig = -zSig;
3236 374dfc33 aurel32
    return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
3237 374dfc33 aurel32
}
3238 374dfc33 aurel32
3239 374dfc33 aurel32
/*----------------------------------------------------------------------------
3240 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
3241 158142c2 bellard
| corresponding value `b', and 0 otherwise.  The comparison is performed
3242 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3243 158142c2 bellard
*----------------------------------------------------------------------------*/
3244 158142c2 bellard
3245 750afe93 bellard
int float64_eq( float64 a, float64 b STATUS_PARAM )
3246 158142c2 bellard
{
3247 f090c9d4 pbrook
    bits64 av, bv;
3248 158142c2 bellard
3249 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3250 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3251 158142c2 bellard
       ) {
3252 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3253 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3254 158142c2 bellard
        }
3255 158142c2 bellard
        return 0;
3256 158142c2 bellard
    }
3257 f090c9d4 pbrook
    av = float64_val(a);
3258 a1b91bb4 pbrook
    bv = float64_val(b);
3259 f090c9d4 pbrook
    return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3260 158142c2 bellard
3261 158142c2 bellard
}
3262 158142c2 bellard
3263 158142c2 bellard
/*----------------------------------------------------------------------------
3264 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
3265 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  The comparison is
3266 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
3267 158142c2 bellard
| Arithmetic.
3268 158142c2 bellard
*----------------------------------------------------------------------------*/
3269 158142c2 bellard
3270 750afe93 bellard
int float64_le( float64 a, float64 b STATUS_PARAM )
3271 158142c2 bellard
{
3272 158142c2 bellard
    flag aSign, bSign;
3273 f090c9d4 pbrook
    bits64 av, bv;
3274 158142c2 bellard
3275 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3276 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3277 158142c2 bellard
       ) {
3278 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3279 158142c2 bellard
        return 0;
3280 158142c2 bellard
    }
3281 158142c2 bellard
    aSign = extractFloat64Sign( a );
3282 158142c2 bellard
    bSign = extractFloat64Sign( b );
3283 f090c9d4 pbrook
    av = float64_val(a);
3284 a1b91bb4 pbrook
    bv = float64_val(b);
3285 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3286 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
3287 158142c2 bellard
3288 158142c2 bellard
}
3289 158142c2 bellard
3290 158142c2 bellard
/*----------------------------------------------------------------------------
3291 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
3292 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
3293 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3294 158142c2 bellard
*----------------------------------------------------------------------------*/
3295 158142c2 bellard
3296 750afe93 bellard
int float64_lt( float64 a, float64 b STATUS_PARAM )
3297 158142c2 bellard
{
3298 158142c2 bellard
    flag aSign, bSign;
3299 f090c9d4 pbrook
    bits64 av, bv;
3300 158142c2 bellard
3301 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3302 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3303 158142c2 bellard
       ) {
3304 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3305 158142c2 bellard
        return 0;
3306 158142c2 bellard
    }
3307 158142c2 bellard
    aSign = extractFloat64Sign( a );
3308 158142c2 bellard
    bSign = extractFloat64Sign( b );
3309 f090c9d4 pbrook
    av = float64_val(a);
3310 a1b91bb4 pbrook
    bv = float64_val(b);
3311 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
3312 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
3313 158142c2 bellard
3314 158142c2 bellard
}
3315 158142c2 bellard
3316 158142c2 bellard
/*----------------------------------------------------------------------------
3317 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
3318 158142c2 bellard
| corresponding value `b', and 0 otherwise.  The invalid exception is raised
3319 158142c2 bellard
| if either operand is a NaN.  Otherwise, the comparison is performed
3320 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3321 158142c2 bellard
*----------------------------------------------------------------------------*/
3322 158142c2 bellard
3323 750afe93 bellard
int float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
3324 158142c2 bellard
{
3325 f090c9d4 pbrook
    bits64 av, bv;
3326 158142c2 bellard
3327 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3328 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3329 158142c2 bellard
       ) {
3330 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3331 158142c2 bellard
        return 0;
3332 158142c2 bellard
    }
3333 f090c9d4 pbrook
    av = float64_val(a);
3334 a1b91bb4 pbrook
    bv = float64_val(b);
3335 f090c9d4 pbrook
    return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3336 158142c2 bellard
3337 158142c2 bellard
}
3338 158142c2 bellard
3339 158142c2 bellard
/*----------------------------------------------------------------------------
3340 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
3341 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
3342 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
3343 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3344 158142c2 bellard
*----------------------------------------------------------------------------*/
3345 158142c2 bellard
3346 750afe93 bellard
int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
3347 158142c2 bellard
{
3348 158142c2 bellard
    flag aSign, bSign;
3349 f090c9d4 pbrook
    bits64 av, bv;
3350 158142c2 bellard
3351 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3352 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3353 158142c2 bellard
       ) {
3354 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3355 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3356 158142c2 bellard
        }
3357 158142c2 bellard
        return 0;
3358 158142c2 bellard
    }
3359 158142c2 bellard
    aSign = extractFloat64Sign( a );
3360 158142c2 bellard
    bSign = extractFloat64Sign( b );
3361 f090c9d4 pbrook
    av = float64_val(a);
3362 a1b91bb4 pbrook
    bv = float64_val(b);
3363 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3364 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
3365 158142c2 bellard
3366 158142c2 bellard
}
3367 158142c2 bellard
3368 158142c2 bellard
/*----------------------------------------------------------------------------
3369 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
3370 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3371 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
3372 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3373 158142c2 bellard
*----------------------------------------------------------------------------*/
3374 158142c2 bellard
3375 750afe93 bellard
int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
3376 158142c2 bellard
{
3377 158142c2 bellard
    flag aSign, bSign;
3378 f090c9d4 pbrook
    bits64 av, bv;
3379 158142c2 bellard
3380 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3381 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3382 158142c2 bellard
       ) {
3383 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3384 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3385 158142c2 bellard
        }
3386 158142c2 bellard
        return 0;
3387 158142c2 bellard
    }
3388 158142c2 bellard
    aSign = extractFloat64Sign( a );
3389 158142c2 bellard
    bSign = extractFloat64Sign( b );
3390 f090c9d4 pbrook
    av = float64_val(a);
3391 a1b91bb4 pbrook
    bv = float64_val(b);
3392 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
3393 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
3394 158142c2 bellard
3395 158142c2 bellard
}
3396 158142c2 bellard
3397 158142c2 bellard
#ifdef FLOATX80
3398 158142c2 bellard
3399 158142c2 bellard
/*----------------------------------------------------------------------------
3400 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3401 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3402 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3403 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3404 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN, the
3405 158142c2 bellard
| largest positive integer is returned.  Otherwise, if the conversion
3406 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3407 158142c2 bellard
*----------------------------------------------------------------------------*/
3408 158142c2 bellard
3409 158142c2 bellard
int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
3410 158142c2 bellard
{
3411 158142c2 bellard
    flag aSign;
3412 158142c2 bellard
    int32 aExp, shiftCount;
3413 158142c2 bellard
    bits64 aSig;
3414 158142c2 bellard
3415 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3416 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3417 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3418 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3419 158142c2 bellard
    shiftCount = 0x4037 - aExp;
3420 158142c2 bellard
    if ( shiftCount <= 0 ) shiftCount = 1;
3421 158142c2 bellard
    shift64RightJamming( aSig, shiftCount, &aSig );
3422 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
3423 158142c2 bellard
3424 158142c2 bellard
}
3425 158142c2 bellard
3426 158142c2 bellard
/*----------------------------------------------------------------------------
3427 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3428 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3429 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3430 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3431 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3432 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3433 158142c2 bellard
| sign as `a' is returned.
3434 158142c2 bellard
*----------------------------------------------------------------------------*/
3435 158142c2 bellard
3436 158142c2 bellard
int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
3437 158142c2 bellard
{
3438 158142c2 bellard
    flag aSign;
3439 158142c2 bellard
    int32 aExp, shiftCount;
3440 158142c2 bellard
    bits64 aSig, savedASig;
3441 158142c2 bellard
    int32 z;
3442 158142c2 bellard
3443 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3444 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3445 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3446 158142c2 bellard
    if ( 0x401E < aExp ) {
3447 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3448 158142c2 bellard
        goto invalid;
3449 158142c2 bellard
    }
3450 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3451 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3452 158142c2 bellard
        return 0;
3453 158142c2 bellard
    }
3454 158142c2 bellard
    shiftCount = 0x403E - aExp;
3455 158142c2 bellard
    savedASig = aSig;
3456 158142c2 bellard
    aSig >>= shiftCount;
3457 158142c2 bellard
    z = aSig;
3458 158142c2 bellard
    if ( aSign ) z = - z;
3459 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
3460 158142c2 bellard
 invalid:
3461 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3462 158142c2 bellard
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
3463 158142c2 bellard
    }
3464 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
3465 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3466 158142c2 bellard
    }
3467 158142c2 bellard
    return z;
3468 158142c2 bellard
3469 158142c2 bellard
}
3470 158142c2 bellard
3471 158142c2 bellard
/*----------------------------------------------------------------------------
3472 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3473 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3474 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3475 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3476 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN,
3477 158142c2 bellard
| the largest positive integer is returned.  Otherwise, if the conversion
3478 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3479 158142c2 bellard
*----------------------------------------------------------------------------*/
3480 158142c2 bellard
3481 158142c2 bellard
int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
3482 158142c2 bellard
{
3483 158142c2 bellard
    flag aSign;
3484 158142c2 bellard
    int32 aExp, shiftCount;
3485 158142c2 bellard
    bits64 aSig, aSigExtra;
3486 158142c2 bellard
3487 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3488 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3489 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3490 158142c2 bellard
    shiftCount = 0x403E - aExp;
3491 158142c2 bellard
    if ( shiftCount <= 0 ) {
3492 158142c2 bellard
        if ( shiftCount ) {
3493 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3494 158142c2 bellard
            if (    ! aSign
3495 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
3496 158142c2 bellard
                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
3497 158142c2 bellard
               ) {
3498 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3499 158142c2 bellard
            }
3500 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
3501 158142c2 bellard
        }
3502 158142c2 bellard
        aSigExtra = 0;
3503 158142c2 bellard
    }
3504 158142c2 bellard
    else {
3505 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3506 158142c2 bellard
    }
3507 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
3508 158142c2 bellard
3509 158142c2 bellard
}
3510 158142c2 bellard
3511 158142c2 bellard
/*----------------------------------------------------------------------------
3512 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3513 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3514 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3515 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3516 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3517 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3518 158142c2 bellard
| sign as `a' is returned.
3519 158142c2 bellard
*----------------------------------------------------------------------------*/
3520 158142c2 bellard
3521 158142c2 bellard
int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
3522 158142c2 bellard
{
3523 158142c2 bellard
    flag aSign;
3524 158142c2 bellard
    int32 aExp, shiftCount;
3525 158142c2 bellard
    bits64 aSig;
3526 158142c2 bellard
    int64 z;
3527 158142c2 bellard
3528 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3529 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3530 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3531 158142c2 bellard
    shiftCount = aExp - 0x403E;
3532 158142c2 bellard
    if ( 0 <= shiftCount ) {
3533 158142c2 bellard
        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3534 158142c2 bellard
        if ( ( a.high != 0xC03E ) || aSig ) {
3535 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3536 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3537 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3538 158142c2 bellard
            }
3539 158142c2 bellard
        }
3540 158142c2 bellard
        return (sbits64) LIT64( 0x8000000000000000 );
3541 158142c2 bellard
    }
3542 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3543 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3544 158142c2 bellard
        return 0;
3545 158142c2 bellard
    }
3546 158142c2 bellard
    z = aSig>>( - shiftCount );
3547 158142c2 bellard
    if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3548 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3549 158142c2 bellard
    }
3550 158142c2 bellard
    if ( aSign ) z = - z;
3551 158142c2 bellard
    return z;
3552 158142c2 bellard
3553 158142c2 bellard
}
3554 158142c2 bellard
3555 158142c2 bellard
/*----------------------------------------------------------------------------
3556 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3557 158142c2 bellard
| point value `a' to the single-precision floating-point format.  The
3558 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3559 158142c2 bellard
| Floating-Point Arithmetic.
3560 158142c2 bellard
*----------------------------------------------------------------------------*/
3561 158142c2 bellard
3562 158142c2 bellard
float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
3563 158142c2 bellard
{
3564 158142c2 bellard
    flag aSign;
3565 158142c2 bellard
    int32 aExp;
3566 158142c2 bellard
    bits64 aSig;
3567 158142c2 bellard
3568 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3569 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3570 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3571 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3572 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) {
3573 158142c2 bellard
            return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) );
3574 158142c2 bellard
        }
3575 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
3576 158142c2 bellard
    }
3577 158142c2 bellard
    shift64RightJamming( aSig, 33, &aSig );
3578 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3F81;
3579 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
3580 158142c2 bellard
3581 158142c2 bellard
}
3582 158142c2 bellard
3583 158142c2 bellard
/*----------------------------------------------------------------------------
3584 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3585 158142c2 bellard
| point value `a' to the double-precision floating-point format.  The
3586 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3587 158142c2 bellard
| Floating-Point Arithmetic.
3588 158142c2 bellard
*----------------------------------------------------------------------------*/
3589 158142c2 bellard
3590 158142c2 bellard
float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
3591 158142c2 bellard
{
3592 158142c2 bellard
    flag aSign;
3593 158142c2 bellard
    int32 aExp;
3594 158142c2 bellard
    bits64 aSig, zSig;
3595 158142c2 bellard
3596 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3597 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3598 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3599 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3600 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) {
3601 158142c2 bellard
            return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) );
3602 158142c2 bellard
        }
3603 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
3604 158142c2 bellard
    }
3605 158142c2 bellard
    shift64RightJamming( aSig, 1, &zSig );
3606 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3C01;
3607 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
3608 158142c2 bellard
3609 158142c2 bellard
}
3610 158142c2 bellard
3611 158142c2 bellard
#ifdef FLOAT128
3612 158142c2 bellard
3613 158142c2 bellard
/*----------------------------------------------------------------------------
3614 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3615 158142c2 bellard
| point value `a' to the quadruple-precision floating-point format.  The
3616 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3617 158142c2 bellard
| Floating-Point Arithmetic.
3618 158142c2 bellard
*----------------------------------------------------------------------------*/
3619 158142c2 bellard
3620 158142c2 bellard
float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
3621 158142c2 bellard
{
3622 158142c2 bellard
    flag aSign;
3623 158142c2 bellard
    int16 aExp;
3624 158142c2 bellard
    bits64 aSig, zSig0, zSig1;
3625 158142c2 bellard
3626 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3627 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3628 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3629 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
3630 158142c2 bellard
        return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) );
3631 158142c2 bellard
    }
3632 158142c2 bellard
    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
3633 158142c2 bellard
    return packFloat128( aSign, aExp, zSig0, zSig1 );
3634 158142c2 bellard
3635 158142c2 bellard
}
3636 158142c2 bellard
3637 158142c2 bellard
#endif
3638 158142c2 bellard
3639 158142c2 bellard
/*----------------------------------------------------------------------------
3640 158142c2 bellard
| Rounds the extended double-precision floating-point value `a' to an integer,
3641 158142c2 bellard
| and returns the result as an extended quadruple-precision floating-point
3642 158142c2 bellard
| value.  The operation is performed according to the IEC/IEEE Standard for
3643 158142c2 bellard
| Binary Floating-Point Arithmetic.
3644 158142c2 bellard
*----------------------------------------------------------------------------*/
3645 158142c2 bellard
3646 158142c2 bellard
floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
3647 158142c2 bellard
{
3648 158142c2 bellard
    flag aSign;
3649 158142c2 bellard
    int32 aExp;
3650 158142c2 bellard
    bits64 lastBitMask, roundBitsMask;
3651 158142c2 bellard
    int8 roundingMode;
3652 158142c2 bellard
    floatx80 z;
3653 158142c2 bellard
3654 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3655 158142c2 bellard
    if ( 0x403E <= aExp ) {
3656 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
3657 158142c2 bellard
            return propagateFloatx80NaN( a, a STATUS_VAR );
3658 158142c2 bellard
        }
3659 158142c2 bellard
        return a;
3660 158142c2 bellard
    }
3661 158142c2 bellard
    if ( aExp < 0x3FFF ) {
3662 158142c2 bellard
        if (    ( aExp == 0 )
3663 158142c2 bellard
             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
3664 158142c2 bellard
            return a;
3665 158142c2 bellard
        }
3666 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3667 158142c2 bellard
        aSign = extractFloatx80Sign( a );
3668 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
3669 158142c2 bellard
         case float_round_nearest_even:
3670 158142c2 bellard
            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
3671 158142c2 bellard
               ) {
3672 158142c2 bellard
                return
3673 158142c2 bellard
                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3674 158142c2 bellard
            }
3675 158142c2 bellard
            break;
3676 158142c2 bellard
         case float_round_down:
3677 158142c2 bellard
            return
3678 158142c2 bellard
                  aSign ?
3679 158142c2 bellard
                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
3680 158142c2 bellard
                : packFloatx80( 0, 0, 0 );
3681 158142c2 bellard
         case float_round_up:
3682 158142c2 bellard
            return
3683 158142c2 bellard
                  aSign ? packFloatx80( 1, 0, 0 )
3684 158142c2 bellard
                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
3685 158142c2 bellard
        }
3686 158142c2 bellard
        return packFloatx80( aSign, 0, 0 );
3687 158142c2 bellard
    }
3688 158142c2 bellard
    lastBitMask = 1;
3689 158142c2 bellard
    lastBitMask <<= 0x403E - aExp;
3690 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
3691 158142c2 bellard
    z = a;
3692 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
3693 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
3694 158142c2 bellard
        z.low += lastBitMask>>1;
3695 158142c2 bellard
        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
3696 158142c2 bellard
    }
3697 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
3698 158142c2 bellard
        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
3699 158142c2 bellard
            z.low += roundBitsMask;
3700 158142c2 bellard
        }
3701 158142c2 bellard
    }
3702 158142c2 bellard
    z.low &= ~ roundBitsMask;
3703 158142c2 bellard
    if ( z.low == 0 ) {
3704 158142c2 bellard
        ++z.high;
3705 158142c2 bellard
        z.low = LIT64( 0x8000000000000000 );
3706 158142c2 bellard
    }
3707 158142c2 bellard
    if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
3708 158142c2 bellard
    return z;
3709 158142c2 bellard
3710 158142c2 bellard
}
3711 158142c2 bellard
3712 158142c2 bellard
/*----------------------------------------------------------------------------
3713 158142c2 bellard
| Returns the result of adding the absolute values of the extended double-
3714 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
3715 158142c2 bellard
| negated before being returned.  `zSign' is ignored if the result is a NaN.
3716 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
3717 158142c2 bellard
| Floating-Point Arithmetic.
3718 158142c2 bellard
*----------------------------------------------------------------------------*/
3719 158142c2 bellard
3720 158142c2 bellard
static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
3721 158142c2 bellard
{
3722 158142c2 bellard
    int32 aExp, bExp, zExp;
3723 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3724 158142c2 bellard
    int32 expDiff;
3725 158142c2 bellard
3726 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3727 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3728 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3729 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3730 158142c2 bellard
    expDiff = aExp - bExp;
3731 158142c2 bellard
    if ( 0 < expDiff ) {
3732 158142c2 bellard
        if ( aExp == 0x7FFF ) {
3733 158142c2 bellard
            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3734 158142c2 bellard
            return a;
3735 158142c2 bellard
        }
3736 158142c2 bellard
        if ( bExp == 0 ) --expDiff;
3737 158142c2 bellard
        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3738 158142c2 bellard
        zExp = aExp;
3739 158142c2 bellard
    }
3740 158142c2 bellard
    else if ( expDiff < 0 ) {
3741 158142c2 bellard
        if ( bExp == 0x7FFF ) {
3742 158142c2 bellard
            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3743 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3744 158142c2 bellard
        }
3745 158142c2 bellard
        if ( aExp == 0 ) ++expDiff;
3746 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3747 158142c2 bellard
        zExp = bExp;
3748 158142c2 bellard
    }
3749 158142c2 bellard
    else {
3750 158142c2 bellard
        if ( aExp == 0x7FFF ) {
3751 158142c2 bellard
            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3752 158142c2 bellard
                return propagateFloatx80NaN( a, b STATUS_VAR );
3753 158142c2 bellard
            }
3754 158142c2 bellard
            return a;
3755 158142c2 bellard
        }
3756 158142c2 bellard
        zSig1 = 0;
3757 158142c2 bellard
        zSig0 = aSig + bSig;
3758 158142c2 bellard
        if ( aExp == 0 ) {
3759 158142c2 bellard
            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
3760 158142c2 bellard
            goto roundAndPack;
3761 158142c2 bellard
        }
3762 158142c2 bellard
        zExp = aExp;
3763 158142c2 bellard
        goto shiftRight1;
3764 158142c2 bellard
    }
3765 158142c2 bellard
    zSig0 = aSig + bSig;
3766 158142c2 bellard
    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
3767 158142c2 bellard
 shiftRight1:
3768 158142c2 bellard
    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
3769 158142c2 bellard
    zSig0 |= LIT64( 0x8000000000000000 );
3770 158142c2 bellard
    ++zExp;
3771 158142c2 bellard
 roundAndPack:
3772 158142c2 bellard
    return
3773 158142c2 bellard
        roundAndPackFloatx80(
3774 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3775 158142c2 bellard
3776 158142c2 bellard
}
3777 158142c2 bellard
3778 158142c2 bellard
/*----------------------------------------------------------------------------
3779 158142c2 bellard
| Returns the result of subtracting the absolute values of the extended
3780 158142c2 bellard
| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
3781 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
3782 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
3783 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3784 158142c2 bellard
*----------------------------------------------------------------------------*/
3785 158142c2 bellard
3786 158142c2 bellard
static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
3787 158142c2 bellard
{
3788 158142c2 bellard
    int32 aExp, bExp, zExp;
3789 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3790 158142c2 bellard
    int32 expDiff;
3791 158142c2 bellard
    floatx80 z;
3792 158142c2 bellard
3793 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3794 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3795 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3796 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3797 158142c2 bellard
    expDiff = aExp - bExp;
3798 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
3799 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
3800 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3801 158142c2 bellard
        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3802 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
3803 158142c2 bellard
        }
3804 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3805 158142c2 bellard
        z.low = floatx80_default_nan_low;
3806 158142c2 bellard
        z.high = floatx80_default_nan_high;
3807 158142c2 bellard
        return z;
3808 158142c2 bellard
    }
3809 158142c2 bellard
    if ( aExp == 0 ) {
3810 158142c2 bellard
        aExp = 1;
3811 158142c2 bellard
        bExp = 1;
3812 158142c2 bellard
    }
3813 158142c2 bellard
    zSig1 = 0;
3814 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
3815 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
3816 158142c2 bellard
    return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
3817 158142c2 bellard
 bExpBigger:
3818 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3819 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3820 158142c2 bellard
        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
3821 158142c2 bellard
    }
3822 158142c2 bellard
    if ( aExp == 0 ) ++expDiff;
3823 158142c2 bellard
    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3824 158142c2 bellard
 bBigger:
3825 158142c2 bellard
    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
3826 158142c2 bellard
    zExp = bExp;
3827 158142c2 bellard
    zSign ^= 1;
3828 158142c2 bellard
    goto normalizeRoundAndPack;
3829 158142c2 bellard
 aExpBigger:
3830 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3831 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3832 158142c2 bellard
        return a;
3833 158142c2 bellard
    }
3834 158142c2 bellard
    if ( bExp == 0 ) --expDiff;
3835 158142c2 bellard
    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3836 158142c2 bellard
 aBigger:
3837 158142c2 bellard
    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
3838 158142c2 bellard
    zExp = aExp;
3839 158142c2 bellard
 normalizeRoundAndPack:
3840 158142c2 bellard
    return
3841 158142c2 bellard
        normalizeRoundAndPackFloatx80(
3842 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3843 158142c2 bellard
3844 158142c2 bellard
}
3845 158142c2 bellard
3846 158142c2 bellard
/*----------------------------------------------------------------------------
3847 158142c2 bellard
| Returns the result of adding the extended double-precision floating-point
3848 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
3849 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3850 158142c2 bellard
*----------------------------------------------------------------------------*/
3851 158142c2 bellard
3852 158142c2 bellard
floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
3853 158142c2 bellard
{
3854 158142c2 bellard
    flag aSign, bSign;
3855 158142c2 bellard
3856 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3857 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3858 158142c2 bellard
    if ( aSign == bSign ) {
3859 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
3860 158142c2 bellard
    }
3861 158142c2 bellard
    else {
3862 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
3863 158142c2 bellard
    }
3864 158142c2 bellard
3865 158142c2 bellard
}
3866 158142c2 bellard
3867 158142c2 bellard
/*----------------------------------------------------------------------------
3868 158142c2 bellard
| Returns the result of subtracting the extended double-precision floating-
3869 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
3870 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3871 158142c2 bellard
*----------------------------------------------------------------------------*/
3872 158142c2 bellard
3873 158142c2 bellard
floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
3874 158142c2 bellard
{
3875 158142c2 bellard
    flag aSign, bSign;
3876 158142c2 bellard
3877 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3878 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3879 158142c2 bellard
    if ( aSign == bSign ) {
3880 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
3881 158142c2 bellard
    }
3882 158142c2 bellard
    else {
3883 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
3884 158142c2 bellard
    }
3885 158142c2 bellard
3886 158142c2 bellard
}
3887 158142c2 bellard
3888 158142c2 bellard
/*----------------------------------------------------------------------------
3889 158142c2 bellard
| Returns the result of multiplying the extended double-precision floating-
3890 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
3891 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3892 158142c2 bellard
*----------------------------------------------------------------------------*/
3893 158142c2 bellard
3894 158142c2 bellard
floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
3895 158142c2 bellard
{
3896 158142c2 bellard
    flag aSign, bSign, zSign;
3897 158142c2 bellard
    int32 aExp, bExp, zExp;
3898 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3899 158142c2 bellard
    floatx80 z;
3900 158142c2 bellard
3901 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3902 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3903 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3904 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3905 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3906 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3907 158142c2 bellard
    zSign = aSign ^ bSign;
3908 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3909 158142c2 bellard
        if (    (bits64) ( aSig<<1 )
3910 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3911 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
3912 158142c2 bellard
        }
3913 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) goto invalid;
3914 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3915 158142c2 bellard
    }
3916 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3917 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3918 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
3919 158142c2 bellard
 invalid:
3920 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3921 158142c2 bellard
            z.low = floatx80_default_nan_low;
3922 158142c2 bellard
            z.high = floatx80_default_nan_high;
3923 158142c2 bellard
            return z;
3924 158142c2 bellard
        }
3925 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3926 158142c2 bellard
    }
3927 158142c2 bellard
    if ( aExp == 0 ) {
3928 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3929 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3930 158142c2 bellard
    }
3931 158142c2 bellard
    if ( bExp == 0 ) {
3932 158142c2 bellard
        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
3933 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3934 158142c2 bellard
    }
3935 158142c2 bellard
    zExp = aExp + bExp - 0x3FFE;
3936 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
3937 158142c2 bellard
    if ( 0 < (sbits64) zSig0 ) {
3938 158142c2 bellard
        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
3939 158142c2 bellard
        --zExp;
3940 158142c2 bellard
    }
3941 158142c2 bellard
    return
3942 158142c2 bellard
        roundAndPackFloatx80(
3943 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3944 158142c2 bellard
3945 158142c2 bellard
}
3946 158142c2 bellard
3947 158142c2 bellard
/*----------------------------------------------------------------------------
3948 158142c2 bellard
| Returns the result of dividing the extended double-precision floating-point
3949 158142c2 bellard
| value `a' by the corresponding value `b'.  The operation is performed
3950 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3951 158142c2 bellard
*----------------------------------------------------------------------------*/
3952 158142c2 bellard
3953 158142c2 bellard
floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
3954 158142c2 bellard
{
3955 158142c2 bellard
    flag aSign, bSign, zSign;
3956 158142c2 bellard
    int32 aExp, bExp, zExp;
3957 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3958 158142c2 bellard
    bits64 rem0, rem1, rem2, term0, term1, term2;
3959 158142c2 bellard
    floatx80 z;
3960 158142c2 bellard
3961 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3962 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3963 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3964 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3965 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3966 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3967 158142c2 bellard
    zSign = aSign ^ bSign;
3968 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3969 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3970 158142c2 bellard
        if ( bExp == 0x7FFF ) {
3971 158142c2 bellard
            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3972 158142c2 bellard
            goto invalid;
3973 158142c2 bellard
        }
3974 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3975 158142c2 bellard
    }
3976 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3977 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3978 158142c2 bellard
        return packFloatx80( zSign, 0, 0 );
3979 158142c2 bellard
    }
3980 158142c2 bellard
    if ( bExp == 0 ) {
3981 158142c2 bellard
        if ( bSig == 0 ) {
3982 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
3983 158142c2 bellard
 invalid:
3984 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
3985 158142c2 bellard
                z.low = floatx80_default_nan_low;
3986 158142c2 bellard
                z.high = floatx80_default_nan_high;
3987 158142c2 bellard
                return z;
3988 158142c2 bellard
            }
3989 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
3990 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3991 158142c2 bellard
        }
3992 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3993 158142c2 bellard
    }
3994 158142c2 bellard
    if ( aExp == 0 ) {
3995 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3996 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3997 158142c2 bellard
    }
3998 158142c2 bellard
    zExp = aExp - bExp + 0x3FFE;
3999 158142c2 bellard
    rem1 = 0;
4000 158142c2 bellard
    if ( bSig <= aSig ) {
4001 158142c2 bellard
        shift128Right( aSig, 0, 1, &aSig, &rem1 );
4002 158142c2 bellard
        ++zExp;
4003 158142c2 bellard
    }
4004 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
4005 158142c2 bellard
    mul64To128( bSig, zSig0, &term0, &term1 );
4006 158142c2 bellard
    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
4007 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
4008 158142c2 bellard
        --zSig0;
4009 158142c2 bellard
        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4010 158142c2 bellard
    }
4011 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, bSig );
4012 158142c2 bellard
    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
4013 158142c2 bellard
        mul64To128( bSig, zSig1, &term1, &term2 );
4014 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4015 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
4016 158142c2 bellard
            --zSig1;
4017 158142c2 bellard
            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
4018 158142c2 bellard
        }
4019 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 ) != 0 );
4020 158142c2 bellard
    }
4021 158142c2 bellard
    return
4022 158142c2 bellard
        roundAndPackFloatx80(
4023 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4024 158142c2 bellard
4025 158142c2 bellard
}
4026 158142c2 bellard
4027 158142c2 bellard
/*----------------------------------------------------------------------------
4028 158142c2 bellard
| Returns the remainder of the extended double-precision floating-point value
4029 158142c2 bellard
| `a' with respect to the corresponding value `b'.  The operation is performed
4030 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4031 158142c2 bellard
*----------------------------------------------------------------------------*/
4032 158142c2 bellard
4033 158142c2 bellard
floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
4034 158142c2 bellard
{
4035 158142c2 bellard
    flag aSign, bSign, zSign;
4036 158142c2 bellard
    int32 aExp, bExp, expDiff;
4037 158142c2 bellard
    bits64 aSig0, aSig1, bSig;
4038 158142c2 bellard
    bits64 q, term0, term1, alternateASig0, alternateASig1;
4039 158142c2 bellard
    floatx80 z;
4040 158142c2 bellard
4041 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
4042 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4043 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4044 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4045 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4046 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4047 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4048 158142c2 bellard
        if (    (bits64) ( aSig0<<1 )
4049 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
4050 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4051 158142c2 bellard
        }
4052 158142c2 bellard
        goto invalid;
4053 158142c2 bellard
    }
4054 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4055 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4056 158142c2 bellard
        return a;
4057 158142c2 bellard
    }
4058 158142c2 bellard
    if ( bExp == 0 ) {
4059 158142c2 bellard
        if ( bSig == 0 ) {
4060 158142c2 bellard
 invalid:
4061 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4062 158142c2 bellard
            z.low = floatx80_default_nan_low;
4063 158142c2 bellard
            z.high = floatx80_default_nan_high;
4064 158142c2 bellard
            return z;
4065 158142c2 bellard
        }
4066 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4067 158142c2 bellard
    }
4068 158142c2 bellard
    if ( aExp == 0 ) {
4069 158142c2 bellard
        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
4070 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4071 158142c2 bellard
    }
4072 158142c2 bellard
    bSig |= LIT64( 0x8000000000000000 );
4073 158142c2 bellard
    zSign = aSign;
4074 158142c2 bellard
    expDiff = aExp - bExp;
4075 158142c2 bellard
    aSig1 = 0;
4076 158142c2 bellard
    if ( expDiff < 0 ) {
4077 158142c2 bellard
        if ( expDiff < -1 ) return a;
4078 158142c2 bellard
        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
4079 158142c2 bellard
        expDiff = 0;
4080 158142c2 bellard
    }
4081 158142c2 bellard
    q = ( bSig <= aSig0 );
4082 158142c2 bellard
    if ( q ) aSig0 -= bSig;
4083 158142c2 bellard
    expDiff -= 64;
4084 158142c2 bellard
    while ( 0 < expDiff ) {
4085 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
4086 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
4087 158142c2 bellard
        mul64To128( bSig, q, &term0, &term1 );
4088 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4089 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
4090 158142c2 bellard
        expDiff -= 62;
4091 158142c2 bellard
    }
4092 158142c2 bellard
    expDiff += 64;
4093 158142c2 bellard
    if ( 0 < expDiff ) {
4094 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
4095 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
4096 158142c2 bellard
        q >>= 64 - expDiff;
4097 158142c2 bellard
        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4098 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4099 158142c2 bellard
        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4100 158142c2 bellard
        while ( le128( term0, term1, aSig0, aSig1 ) ) {
4101 158142c2 bellard
            ++q;
4102 158142c2 bellard
            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4103 158142c2 bellard
        }
4104 158142c2 bellard
    }
4105 158142c2 bellard
    else {
4106 158142c2 bellard
        term1 = 0;
4107 158142c2 bellard
        term0 = bSig;
4108 158142c2 bellard
    }
4109 158142c2 bellard
    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4110 158142c2 bellard
    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4111 158142c2 bellard
         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4112 158142c2 bellard
              && ( q & 1 ) )
4113 158142c2 bellard
       ) {
4114 158142c2 bellard
        aSig0 = alternateASig0;
4115 158142c2 bellard
        aSig1 = alternateASig1;
4116 158142c2 bellard
        zSign = ! zSign;
4117 158142c2 bellard
    }
4118 158142c2 bellard
    return
4119 158142c2 bellard
        normalizeRoundAndPackFloatx80(
4120 158142c2 bellard
            80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
4121 158142c2 bellard
4122 158142c2 bellard
}
4123 158142c2 bellard
4124 158142c2 bellard
/*----------------------------------------------------------------------------
4125 158142c2 bellard
| Returns the square root of the extended double-precision floating-point
4126 158142c2 bellard
| value `a'.  The operation is performed according to the IEC/IEEE Standard
4127 158142c2 bellard
| for Binary Floating-Point Arithmetic.
4128 158142c2 bellard
*----------------------------------------------------------------------------*/
4129 158142c2 bellard
4130 158142c2 bellard
floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
4131 158142c2 bellard
{
4132 158142c2 bellard
    flag aSign;
4133 158142c2 bellard
    int32 aExp, zExp;
4134 158142c2 bellard
    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4135 158142c2 bellard
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4136 158142c2 bellard
    floatx80 z;
4137 158142c2 bellard
4138 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
4139 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4140 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4141 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4142 158142c2 bellard
        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
4143 158142c2 bellard
        if ( ! aSign ) return a;
4144 158142c2 bellard
        goto invalid;
4145 158142c2 bellard
    }
4146 158142c2 bellard
    if ( aSign ) {
4147 158142c2 bellard
        if ( ( aExp | aSig0 ) == 0 ) return a;
4148 158142c2 bellard
 invalid:
4149 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4150 158142c2 bellard
        z.low = floatx80_default_nan_low;
4151 158142c2 bellard
        z.high = floatx80_default_nan_high;
4152 158142c2 bellard
        return z;
4153 158142c2 bellard
    }
4154 158142c2 bellard
    if ( aExp == 0 ) {
4155 158142c2 bellard
        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4156 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4157 158142c2 bellard
    }
4158 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4159 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4160 158142c2 bellard
    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4161 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4162 158142c2 bellard
    doubleZSig0 = zSig0<<1;
4163 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
4164 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4165 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
4166 158142c2 bellard
        --zSig0;
4167 158142c2 bellard
        doubleZSig0 -= 2;
4168 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4169 158142c2 bellard
    }
4170 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4171 158142c2 bellard
    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4172 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
4173 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4174 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4175 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
4176 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4177 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
4178 158142c2 bellard
            --zSig1;
4179 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4180 158142c2 bellard
            term3 |= 1;
4181 158142c2 bellard
            term2 |= doubleZSig0;
4182 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4183 158142c2 bellard
        }
4184 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4185 158142c2 bellard
    }
4186 158142c2 bellard
    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4187 158142c2 bellard
    zSig0 |= doubleZSig0;
4188 158142c2 bellard
    return
4189 158142c2 bellard
        roundAndPackFloatx80(
4190 158142c2 bellard
            STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
4191 158142c2 bellard
4192 158142c2 bellard
}
4193 158142c2 bellard
4194 158142c2 bellard
/*----------------------------------------------------------------------------
4195 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4196 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  The comparison is
4197 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
4198 158142c2 bellard
| Arithmetic.
4199 158142c2 bellard
*----------------------------------------------------------------------------*/
4200 158142c2 bellard
4201 750afe93 bellard
int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
4202 158142c2 bellard
{
4203 158142c2 bellard
4204 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4205 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4206 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4207 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4208 158142c2 bellard
       ) {
4209 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4210 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4211 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4212 158142c2 bellard
        }
4213 158142c2 bellard
        return 0;
4214 158142c2 bellard
    }
4215 158142c2 bellard
    return
4216 158142c2 bellard
           ( a.low == b.low )
4217 158142c2 bellard
        && (    ( a.high == b.high )
4218 158142c2 bellard
             || (    ( a.low == 0 )
4219 158142c2 bellard
                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4220 158142c2 bellard
           );
4221 158142c2 bellard
4222 158142c2 bellard
}
4223 158142c2 bellard
4224 158142c2 bellard
/*----------------------------------------------------------------------------
4225 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4226 158142c2 bellard
| less than or equal to the corresponding value `b', and 0 otherwise.  The
4227 158142c2 bellard
| comparison is performed according to the IEC/IEEE Standard for Binary
4228 158142c2 bellard
| Floating-Point Arithmetic.
4229 158142c2 bellard
*----------------------------------------------------------------------------*/
4230 158142c2 bellard
4231 750afe93 bellard
int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
4232 158142c2 bellard
{
4233 158142c2 bellard
    flag aSign, bSign;
4234 158142c2 bellard
4235 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4236 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4237 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4238 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4239 158142c2 bellard
       ) {
4240 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4241 158142c2 bellard
        return 0;
4242 158142c2 bellard
    }
4243 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4244 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4245 158142c2 bellard
    if ( aSign != bSign ) {
4246 158142c2 bellard
        return
4247 158142c2 bellard
               aSign
4248 158142c2 bellard
            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4249 158142c2 bellard
                 == 0 );
4250 158142c2 bellard
    }
4251 158142c2 bellard
    return
4252 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
4253 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
4254 158142c2 bellard
4255 158142c2 bellard
}
4256 158142c2 bellard
4257 158142c2 bellard
/*----------------------------------------------------------------------------
4258 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4259 158142c2 bellard
| less than the corresponding value `b', and 0 otherwise.  The comparison
4260 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4261 158142c2 bellard
| Arithmetic.
4262 158142c2 bellard
*----------------------------------------------------------------------------*/
4263 158142c2 bellard
4264 750afe93 bellard
int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
4265 158142c2 bellard
{
4266 158142c2 bellard
    flag aSign, bSign;
4267 158142c2 bellard
4268 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4269 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4270 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4271 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4272 158142c2 bellard
       ) {
4273 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4274 158142c2 bellard
        return 0;
4275 158142c2 bellard
    }
4276 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4277 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4278 158142c2 bellard
    if ( aSign != bSign ) {
4279 158142c2 bellard
        return
4280 158142c2 bellard
               aSign
4281 158142c2 bellard
            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4282 158142c2 bellard
                 != 0 );
4283 158142c2 bellard
    }
4284 158142c2 bellard
    return
4285 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
4286 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
4287 158142c2 bellard
4288 158142c2 bellard
}
4289 158142c2 bellard
4290 158142c2 bellard
/*----------------------------------------------------------------------------
4291 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is equal
4292 158142c2 bellard
| to the corresponding value `b', and 0 otherwise.  The invalid exception is
4293 158142c2 bellard
| raised if either operand is a NaN.  Otherwise, the comparison is performed
4294 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4295 158142c2 bellard
*----------------------------------------------------------------------------*/
4296 158142c2 bellard
4297 750afe93 bellard
int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM )
4298 158142c2 bellard
{
4299 158142c2 bellard
4300 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4301 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4302 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4303 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4304 158142c2 bellard
       ) {
4305 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4306 158142c2 bellard
        return 0;
4307 158142c2 bellard
    }
4308 158142c2 bellard
    return
4309 158142c2 bellard
           ( a.low == b.low )
4310 158142c2 bellard
        && (    ( a.high == b.high )
4311 158142c2 bellard
             || (    ( a.low == 0 )
4312 158142c2 bellard
                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4313 158142c2 bellard
           );
4314 158142c2 bellard
4315 158142c2 bellard
}
4316 158142c2 bellard
4317 158142c2 bellard
/*----------------------------------------------------------------------------
4318 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4319 158142c2 bellard
| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
4320 158142c2 bellard
| do not cause an exception.  Otherwise, the comparison is performed according
4321 158142c2 bellard
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4322 158142c2 bellard
*----------------------------------------------------------------------------*/
4323 158142c2 bellard
4324 750afe93 bellard
int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4325 158142c2 bellard
{
4326 158142c2 bellard
    flag aSign, bSign;
4327 158142c2 bellard
4328 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4329 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4330 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4331 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4332 158142c2 bellard
       ) {
4333 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4334 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4335 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4336 158142c2 bellard
        }
4337 158142c2 bellard
        return 0;
4338 158142c2 bellard
    }
4339 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4340 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4341 158142c2 bellard
    if ( aSign != bSign ) {
4342 158142c2 bellard
        return
4343 158142c2 bellard
               aSign
4344 158142c2 bellard
            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4345 158142c2 bellard
                 == 0 );
4346 158142c2 bellard
    }
4347 158142c2 bellard
    return
4348 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
4349 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
4350 158142c2 bellard
4351 158142c2 bellard
}
4352 158142c2 bellard
4353 158142c2 bellard
/*----------------------------------------------------------------------------
4354 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4355 158142c2 bellard
| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
4356 158142c2 bellard
| an exception.  Otherwise, the comparison is performed according to the
4357 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4358 158142c2 bellard
*----------------------------------------------------------------------------*/
4359 158142c2 bellard
4360 750afe93 bellard
int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4361 158142c2 bellard
{
4362 158142c2 bellard
    flag aSign, bSign;
4363 158142c2 bellard
4364 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4365 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4366 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4367 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4368 158142c2 bellard
       ) {
4369 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4370 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4371 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4372 158142c2 bellard
        }
4373 158142c2 bellard
        return 0;
4374 158142c2 bellard
    }
4375 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4376 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4377 158142c2 bellard
    if ( aSign != bSign ) {
4378 158142c2 bellard
        return
4379 158142c2 bellard
               aSign
4380 158142c2 bellard
            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4381 158142c2 bellard
                 != 0 );
4382 158142c2 bellard
    }
4383 158142c2 bellard
    return
4384 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
4385 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
4386 158142c2 bellard
4387 158142c2 bellard
}
4388 158142c2 bellard
4389 158142c2 bellard
#endif
4390 158142c2 bellard
4391 158142c2 bellard
#ifdef FLOAT128
4392 158142c2 bellard
4393 158142c2 bellard
/*----------------------------------------------------------------------------
4394 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4395 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4396 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4397 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4398 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4399 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4400 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4401 158142c2 bellard
*----------------------------------------------------------------------------*/
4402 158142c2 bellard
4403 158142c2 bellard
int32 float128_to_int32( float128 a STATUS_PARAM )
4404 158142c2 bellard
{
4405 158142c2 bellard
    flag aSign;
4406 158142c2 bellard
    int32 aExp, shiftCount;
4407 158142c2 bellard
    bits64 aSig0, aSig1;
4408 158142c2 bellard
4409 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4410 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4411 158142c2 bellard
    aExp = extractFloat128Exp( a );
4412 158142c2 bellard
    aSign = extractFloat128Sign( a );
4413 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4414 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4415 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4416 158142c2 bellard
    shiftCount = 0x4028 - aExp;
4417 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4418 158142c2 bellard
    return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
4419 158142c2 bellard
4420 158142c2 bellard
}
4421 158142c2 bellard
4422 158142c2 bellard
/*----------------------------------------------------------------------------
4423 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4424 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4425 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4426 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
4427 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
4428 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
4429 158142c2 bellard
| returned.
4430 158142c2 bellard
*----------------------------------------------------------------------------*/
4431 158142c2 bellard
4432 158142c2 bellard
int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
4433 158142c2 bellard
{
4434 158142c2 bellard
    flag aSign;
4435 158142c2 bellard
    int32 aExp, shiftCount;
4436 158142c2 bellard
    bits64 aSig0, aSig1, savedASig;
4437 158142c2 bellard
    int32 z;
4438 158142c2 bellard
4439 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4440 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4441 158142c2 bellard
    aExp = extractFloat128Exp( a );
4442 158142c2 bellard
    aSign = extractFloat128Sign( a );
4443 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4444 158142c2 bellard
    if ( 0x401E < aExp ) {
4445 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4446 158142c2 bellard
        goto invalid;
4447 158142c2 bellard
    }
4448 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
4449 158142c2 bellard
        if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
4450 158142c2 bellard
        return 0;
4451 158142c2 bellard
    }
4452 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4453 158142c2 bellard
    shiftCount = 0x402F - aExp;
4454 158142c2 bellard
    savedASig = aSig0;
4455 158142c2 bellard
    aSig0 >>= shiftCount;
4456 158142c2 bellard
    z = aSig0;
4457 158142c2 bellard
    if ( aSign ) z = - z;
4458 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
4459 158142c2 bellard
 invalid:
4460 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4461 158142c2 bellard
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
4462 158142c2 bellard
    }
4463 158142c2 bellard
    if ( ( aSig0<<shiftCount ) != savedASig ) {
4464 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4465 158142c2 bellard
    }
4466 158142c2 bellard
    return z;
4467 158142c2 bellard
4468 158142c2 bellard
}
4469 158142c2 bellard
4470 158142c2 bellard
/*----------------------------------------------------------------------------
4471 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4472 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4473 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4474 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4475 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4476 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4477 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4478 158142c2 bellard
*----------------------------------------------------------------------------*/
4479 158142c2 bellard
4480 158142c2 bellard
int64 float128_to_int64( float128 a STATUS_PARAM )
4481 158142c2 bellard
{
4482 158142c2 bellard
    flag aSign;
4483 158142c2 bellard
    int32 aExp, shiftCount;
4484 158142c2 bellard
    bits64 aSig0, aSig1;
4485 158142c2 bellard
4486 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4487 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4488 158142c2 bellard
    aExp = extractFloat128Exp( a );
4489 158142c2 bellard
    aSign = extractFloat128Sign( a );
4490 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4491 158142c2 bellard
    shiftCount = 0x402F - aExp;
4492 158142c2 bellard
    if ( shiftCount <= 0 ) {
4493 158142c2 bellard
        if ( 0x403E < aExp ) {
4494 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4495 158142c2 bellard
            if (    ! aSign
4496 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
4497 158142c2 bellard
                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4498 158142c2 bellard
                    )
4499 158142c2 bellard
               ) {
4500 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
4501 158142c2 bellard
            }
4502 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
4503 158142c2 bellard
        }
4504 158142c2 bellard
        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4505 158142c2 bellard
    }
4506 158142c2 bellard
    else {
4507 158142c2 bellard
        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4508 158142c2 bellard
    }
4509 158142c2 bellard
    return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
4510 158142c2 bellard
4511 158142c2 bellard
}
4512 158142c2 bellard
4513 158142c2 bellard
/*----------------------------------------------------------------------------
4514 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4515 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4516 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4517 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
4518 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
4519 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
4520 158142c2 bellard
| returned.
4521 158142c2 bellard
*----------------------------------------------------------------------------*/
4522 158142c2 bellard
4523 158142c2 bellard
int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
4524 158142c2 bellard
{
4525 158142c2 bellard
    flag aSign;
4526 158142c2 bellard
    int32 aExp, shiftCount;
4527 158142c2 bellard
    bits64 aSig0, aSig1;
4528 158142c2 bellard
    int64 z;
4529 158142c2 bellard
4530 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4531 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4532 158142c2 bellard
    aExp = extractFloat128Exp( a );
4533 158142c2 bellard
    aSign = extractFloat128Sign( a );
4534 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4535 158142c2 bellard
    shiftCount = aExp - 0x402F;
4536 158142c2 bellard
    if ( 0 < shiftCount ) {
4537 158142c2 bellard
        if ( 0x403E <= aExp ) {
4538 158142c2 bellard
            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4539 158142c2 bellard
            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4540 158142c2 bellard
                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4541 158142c2 bellard
                if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
4542 158142c2 bellard
            }
4543 158142c2 bellard
            else {
4544 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
4545 158142c2 bellard
                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4546 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
4547 158142c2 bellard
                }
4548 158142c2 bellard
            }
4549 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
4550 158142c2 bellard
        }
4551 158142c2 bellard
        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4552 158142c2 bellard
        if ( (bits64) ( aSig1<<shiftCount ) ) {
4553 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4554 158142c2 bellard
        }
4555 158142c2 bellard
    }
4556 158142c2 bellard
    else {
4557 158142c2 bellard
        if ( aExp < 0x3FFF ) {
4558 158142c2 bellard
            if ( aExp | aSig0 | aSig1 ) {
4559 158142c2 bellard
                STATUS(float_exception_flags) |= float_flag_inexact;
4560 158142c2 bellard
            }
4561 158142c2 bellard
            return 0;
4562 158142c2 bellard
        }
4563 158142c2 bellard
        z = aSig0>>( - shiftCount );
4564 158142c2 bellard
        if (    aSig1
4565 158142c2 bellard
             || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4566 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4567 158142c2 bellard
        }
4568 158142c2 bellard
    }
4569 158142c2 bellard
    if ( aSign ) z = - z;
4570 158142c2 bellard
    return z;
4571 158142c2 bellard
4572 158142c2 bellard
}
4573 158142c2 bellard
4574 158142c2 bellard
/*----------------------------------------------------------------------------
4575 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4576 158142c2 bellard
| value `a' to the single-precision floating-point format.  The conversion
4577 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4578 158142c2 bellard
| Arithmetic.
4579 158142c2 bellard
*----------------------------------------------------------------------------*/
4580 158142c2 bellard
4581 158142c2 bellard
float32 float128_to_float32( float128 a STATUS_PARAM )
4582 158142c2 bellard
{
4583 158142c2 bellard
    flag aSign;
4584 158142c2 bellard
    int32 aExp;
4585 158142c2 bellard
    bits64 aSig0, aSig1;
4586 158142c2 bellard
    bits32 zSig;
4587 158142c2 bellard
4588 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4589 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4590 158142c2 bellard
    aExp = extractFloat128Exp( a );
4591 158142c2 bellard
    aSign = extractFloat128Sign( a );
4592 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4593 158142c2 bellard
        if ( aSig0 | aSig1 ) {
4594 158142c2 bellard
            return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) );
4595 158142c2 bellard
        }
4596 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
4597 158142c2 bellard
    }
4598 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4599 158142c2 bellard
    shift64RightJamming( aSig0, 18, &aSig0 );
4600 158142c2 bellard
    zSig = aSig0;
4601 158142c2 bellard
    if ( aExp || zSig ) {
4602 158142c2 bellard
        zSig |= 0x40000000;
4603 158142c2 bellard
        aExp -= 0x3F81;
4604 158142c2 bellard
    }
4605 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
4606 158142c2 bellard
4607 158142c2 bellard
}
4608 158142c2 bellard
4609 158142c2 bellard
/*----------------------------------------------------------------------------
4610 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4611 158142c2 bellard
| value `a' to the double-precision floating-point format.  The conversion
4612 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4613 158142c2 bellard
| Arithmetic.
4614 158142c2 bellard
*----------------------------------------------------------------------------*/
4615 158142c2 bellard
4616 158142c2 bellard
float64 float128_to_float64( float128 a STATUS_PARAM )
4617 158142c2 bellard
{
4618 158142c2 bellard
    flag aSign;
4619 158142c2 bellard
    int32 aExp;
4620 158142c2 bellard
    bits64 aSig0, aSig1;
4621 158142c2 bellard
4622 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4623 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4624 158142c2 bellard
    aExp = extractFloat128Exp( a );
4625 158142c2 bellard
    aSign = extractFloat128Sign( a );
4626 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4627 158142c2 bellard
        if ( aSig0 | aSig1 ) {
4628 158142c2 bellard
            return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) );
4629 158142c2 bellard
        }
4630 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
4631 158142c2 bellard
    }
4632 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4633 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4634 158142c2 bellard
    if ( aExp || aSig0 ) {
4635 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
4636 158142c2 bellard
        aExp -= 0x3C01;
4637 158142c2 bellard
    }
4638 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
4639 158142c2 bellard
4640 158142c2 bellard
}
4641 158142c2 bellard
4642 158142c2 bellard
#ifdef FLOATX80
4643 158142c2 bellard
4644 158142c2 bellard
/*----------------------------------------------------------------------------
4645 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4646 158142c2 bellard
| value `a' to the extended double-precision floating-point format.  The
4647 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4648 158142c2 bellard
| Floating-Point Arithmetic.
4649 158142c2 bellard
*----------------------------------------------------------------------------*/
4650 158142c2 bellard
4651 158142c2 bellard
floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
4652 158142c2 bellard
{
4653 158142c2 bellard
    flag aSign;
4654 158142c2 bellard
    int32 aExp;
4655 158142c2 bellard
    bits64 aSig0, aSig1;
4656 158142c2 bellard
4657 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4658 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4659 158142c2 bellard
    aExp = extractFloat128Exp( a );
4660 158142c2 bellard
    aSign = extractFloat128Sign( a );
4661 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4662 158142c2 bellard
        if ( aSig0 | aSig1 ) {
4663 158142c2 bellard
            return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) );
4664 158142c2 bellard
        }
4665 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4666 158142c2 bellard
    }
4667 158142c2 bellard
    if ( aExp == 0 ) {
4668 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
4669 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4670 158142c2 bellard
    }
4671 158142c2 bellard
    else {
4672 158142c2 bellard
        aSig0 |= LIT64( 0x0001000000000000 );
4673 158142c2 bellard
    }
4674 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
4675 158142c2 bellard
    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
4676 158142c2 bellard
4677 158142c2 bellard
}
4678 158142c2 bellard
4679 158142c2 bellard
#endif
4680 158142c2 bellard
4681 158142c2 bellard
/*----------------------------------------------------------------------------
4682 158142c2 bellard
| Rounds the quadruple-precision floating-point value `a' to an integer, and
4683 158142c2 bellard
| returns the result as a quadruple-precision floating-point value.  The
4684 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
4685 158142c2 bellard
| Floating-Point Arithmetic.
4686 158142c2 bellard
*----------------------------------------------------------------------------*/
4687 158142c2 bellard
4688 158142c2 bellard
float128 float128_round_to_int( float128 a STATUS_PARAM )
4689 158142c2 bellard
{
4690 158142c2 bellard
    flag aSign;
4691 158142c2 bellard
    int32 aExp;
4692 158142c2 bellard
    bits64 lastBitMask, roundBitsMask;
4693 158142c2 bellard
    int8 roundingMode;
4694 158142c2 bellard
    float128 z;
4695 158142c2 bellard
4696 158142c2 bellard
    aExp = extractFloat128Exp( a );
4697 158142c2 bellard
    if ( 0x402F <= aExp ) {
4698 158142c2 bellard
        if ( 0x406F <= aExp ) {
4699 158142c2 bellard
            if (    ( aExp == 0x7FFF )
4700 158142c2 bellard
                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
4701 158142c2 bellard
               ) {
4702 158142c2 bellard
                return propagateFloat128NaN( a, a STATUS_VAR );
4703 158142c2 bellard
            }
4704 158142c2 bellard
            return a;
4705 158142c2 bellard
        }
4706 158142c2 bellard
        lastBitMask = 1;
4707 158142c2 bellard
        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
4708 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
4709 158142c2 bellard
        z = a;
4710 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
4711 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
4712 158142c2 bellard
            if ( lastBitMask ) {
4713 158142c2 bellard
                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
4714 158142c2 bellard
                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4715 158142c2 bellard
            }
4716 158142c2 bellard
            else {
4717 158142c2 bellard
                if ( (sbits64) z.low < 0 ) {
4718 158142c2 bellard
                    ++z.high;
4719 158142c2 bellard
                    if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
4720 158142c2 bellard
                }
4721 158142c2 bellard
            }
4722 158142c2 bellard
        }
4723 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
4724 158142c2 bellard
            if (   extractFloat128Sign( z )
4725 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
4726 158142c2 bellard
                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
4727 158142c2 bellard
            }
4728 158142c2 bellard
        }
4729 158142c2 bellard
        z.low &= ~ roundBitsMask;
4730 158142c2 bellard
    }
4731 158142c2 bellard
    else {
4732 158142c2 bellard
        if ( aExp < 0x3FFF ) {
4733 158142c2 bellard
            if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4734 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4735 158142c2 bellard
            aSign = extractFloat128Sign( a );
4736 158142c2 bellard
            switch ( STATUS(float_rounding_mode) ) {
4737 158142c2 bellard
             case float_round_nearest_even:
4738 158142c2 bellard
                if (    ( aExp == 0x3FFE )
4739 158142c2 bellard
                     && (   extractFloat128Frac0( a )
4740 158142c2 bellard
                          | extractFloat128Frac1( a ) )
4741 158142c2 bellard
                   ) {
4742 158142c2 bellard
                    return packFloat128( aSign, 0x3FFF, 0, 0 );
4743 158142c2 bellard
                }
4744 158142c2 bellard
                break;
4745 158142c2 bellard
             case float_round_down:
4746 158142c2 bellard
                return
4747 158142c2 bellard
                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
4748 158142c2 bellard
                    : packFloat128( 0, 0, 0, 0 );
4749 158142c2 bellard
             case float_round_up:
4750 158142c2 bellard
                return
4751 158142c2 bellard
                      aSign ? packFloat128( 1, 0, 0, 0 )
4752 158142c2 bellard
                    : packFloat128( 0, 0x3FFF, 0, 0 );
4753 158142c2 bellard
            }
4754 158142c2 bellard
            return packFloat128( aSign, 0, 0, 0 );
4755 158142c2 bellard
        }
4756 158142c2 bellard
        lastBitMask = 1;
4757 158142c2 bellard
        lastBitMask <<= 0x402F - aExp;
4758 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
4759 158142c2 bellard
        z.low = 0;
4760 158142c2 bellard
        z.high = a.high;
4761 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
4762 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
4763 158142c2 bellard
            z.high += lastBitMask>>1;
4764 158142c2 bellard
            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
4765 158142c2 bellard
                z.high &= ~ lastBitMask;
4766 158142c2 bellard
            }
4767 158142c2 bellard
        }
4768 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
4769 158142c2 bellard
            if (   extractFloat128Sign( z )
4770 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
4771 158142c2 bellard
                z.high |= ( a.low != 0 );
4772 158142c2 bellard
                z.high += roundBitsMask;
4773 158142c2 bellard
            }
4774 158142c2 bellard
        }
4775 158142c2 bellard
        z.high &= ~ roundBitsMask;
4776 158142c2 bellard
    }
4777 158142c2 bellard
    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4778 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4779 158142c2 bellard
    }
4780 158142c2 bellard
    return z;
4781 158142c2 bellard
4782 158142c2 bellard
}
4783 158142c2 bellard
4784 158142c2 bellard
/*----------------------------------------------------------------------------
4785 158142c2 bellard
| Returns the result of adding the absolute values of the quadruple-precision
4786 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
4787 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
4788 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
4789 158142c2 bellard
| Floating-Point Arithmetic.
4790 158142c2 bellard
*----------------------------------------------------------------------------*/
4791 158142c2 bellard
4792 158142c2 bellard
static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
4793 158142c2 bellard
{
4794 158142c2 bellard
    int32 aExp, bExp, zExp;
4795 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4796 158142c2 bellard
    int32 expDiff;
4797 158142c2 bellard
4798 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4799 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4800 158142c2 bellard
    aExp = extractFloat128Exp( a );
4801 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4802 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4803 158142c2 bellard
    bExp = extractFloat128Exp( b );
4804 158142c2 bellard
    expDiff = aExp - bExp;
4805 158142c2 bellard
    if ( 0 < expDiff ) {
4806 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4807 158142c2 bellard
            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4808 158142c2 bellard
            return a;
4809 158142c2 bellard
        }
4810 158142c2 bellard
        if ( bExp == 0 ) {
4811 158142c2 bellard
            --expDiff;
4812 158142c2 bellard
        }
4813 158142c2 bellard
        else {
4814 158142c2 bellard
            bSig0 |= LIT64( 0x0001000000000000 );
4815 158142c2 bellard
        }
4816 158142c2 bellard
        shift128ExtraRightJamming(
4817 158142c2 bellard
            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
4818 158142c2 bellard
        zExp = aExp;
4819 158142c2 bellard
    }
4820 158142c2 bellard
    else if ( expDiff < 0 ) {
4821 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4822 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4823 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
4824 158142c2 bellard
        }
4825 158142c2 bellard
        if ( aExp == 0 ) {
4826 158142c2 bellard
            ++expDiff;
4827 158142c2 bellard
        }
4828 158142c2 bellard
        else {
4829 158142c2 bellard
            aSig0 |= LIT64( 0x0001000000000000 );
4830 158142c2 bellard
        }
4831 158142c2 bellard
        shift128ExtraRightJamming(
4832 158142c2 bellard
            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
4833 158142c2 bellard
        zExp = bExp;
4834 158142c2 bellard
    }
4835 158142c2 bellard
    else {
4836 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4837 158142c2 bellard
            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4838 158142c2 bellard
                return propagateFloat128NaN( a, b STATUS_VAR );
4839 158142c2 bellard
            }
4840 158142c2 bellard
            return a;
4841 158142c2 bellard
        }
4842 158142c2 bellard
        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4843 fe76d976 pbrook
        if ( aExp == 0 ) {
4844 fe76d976 pbrook
            if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
4845 fe76d976 pbrook
            return packFloat128( zSign, 0, zSig0, zSig1 );
4846 fe76d976 pbrook
        }
4847 158142c2 bellard
        zSig2 = 0;
4848 158142c2 bellard
        zSig0 |= LIT64( 0x0002000000000000 );
4849 158142c2 bellard
        zExp = aExp;
4850 158142c2 bellard
        goto shiftRight1;
4851 158142c2 bellard
    }
4852 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4853 158142c2 bellard
    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4854 158142c2 bellard
    --zExp;
4855 158142c2 bellard
    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
4856 158142c2 bellard
    ++zExp;
4857 158142c2 bellard
 shiftRight1:
4858 158142c2 bellard
    shift128ExtraRightJamming(
4859 158142c2 bellard
        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4860 158142c2 bellard
 roundAndPack:
4861 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
4862 158142c2 bellard
4863 158142c2 bellard
}
4864 158142c2 bellard
4865 158142c2 bellard
/*----------------------------------------------------------------------------
4866 158142c2 bellard
| Returns the result of subtracting the absolute values of the quadruple-
4867 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
4868 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
4869 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
4870 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4871 158142c2 bellard
*----------------------------------------------------------------------------*/
4872 158142c2 bellard
4873 158142c2 bellard
static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
4874 158142c2 bellard
{
4875 158142c2 bellard
    int32 aExp, bExp, zExp;
4876 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
4877 158142c2 bellard
    int32 expDiff;
4878 158142c2 bellard
    float128 z;
4879 158142c2 bellard
4880 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4881 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4882 158142c2 bellard
    aExp = extractFloat128Exp( a );
4883 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4884 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4885 158142c2 bellard
    bExp = extractFloat128Exp( b );
4886 158142c2 bellard
    expDiff = aExp - bExp;
4887 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4888 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
4889 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
4890 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
4891 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4892 158142c2 bellard
        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4893 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
4894 158142c2 bellard
        }
4895 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4896 158142c2 bellard
        z.low = float128_default_nan_low;
4897 158142c2 bellard
        z.high = float128_default_nan_high;
4898 158142c2 bellard
        return z;
4899 158142c2 bellard
    }
4900 158142c2 bellard
    if ( aExp == 0 ) {
4901 158142c2 bellard
        aExp = 1;
4902 158142c2 bellard
        bExp = 1;
4903 158142c2 bellard
    }
4904 158142c2 bellard
    if ( bSig0 < aSig0 ) goto aBigger;
4905 158142c2 bellard
    if ( aSig0 < bSig0 ) goto bBigger;
4906 158142c2 bellard
    if ( bSig1 < aSig1 ) goto aBigger;
4907 158142c2 bellard
    if ( aSig1 < bSig1 ) goto bBigger;
4908 158142c2 bellard
    return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
4909 158142c2 bellard
 bExpBigger:
4910 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4911 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4912 158142c2 bellard
        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
4913 158142c2 bellard
    }
4914 158142c2 bellard
    if ( aExp == 0 ) {
4915 158142c2 bellard
        ++expDiff;
4916 158142c2 bellard
    }
4917 158142c2 bellard
    else {
4918 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
4919 158142c2 bellard
    }
4920 158142c2 bellard
    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4921 158142c2 bellard
    bSig0 |= LIT64( 0x4000000000000000 );
4922 158142c2 bellard
 bBigger:
4923 158142c2 bellard
    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
4924 158142c2 bellard
    zExp = bExp;
4925 158142c2 bellard
    zSign ^= 1;
4926 158142c2 bellard
    goto normalizeRoundAndPack;
4927 158142c2 bellard
 aExpBigger:
4928 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4929 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4930 158142c2 bellard
        return a;
4931 158142c2 bellard
    }
4932 158142c2 bellard
    if ( bExp == 0 ) {
4933 158142c2 bellard
        --expDiff;
4934 158142c2 bellard
    }
4935 158142c2 bellard
    else {
4936 158142c2 bellard
        bSig0 |= LIT64( 0x4000000000000000 );
4937 158142c2 bellard
    }
4938 158142c2 bellard
    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
4939 158142c2 bellard
    aSig0 |= LIT64( 0x4000000000000000 );
4940 158142c2 bellard
 aBigger:
4941 158142c2 bellard
    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4942 158142c2 bellard
    zExp = aExp;
4943 158142c2 bellard
 normalizeRoundAndPack:
4944 158142c2 bellard
    --zExp;
4945 158142c2 bellard
    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
4946 158142c2 bellard
4947 158142c2 bellard
}
4948 158142c2 bellard
4949 158142c2 bellard
/*----------------------------------------------------------------------------
4950 158142c2 bellard
| Returns the result of adding the quadruple-precision floating-point values
4951 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
4952 158142c2 bellard
| for Binary Floating-Point Arithmetic.
4953 158142c2 bellard
*----------------------------------------------------------------------------*/
4954 158142c2 bellard
4955 158142c2 bellard
float128 float128_add( float128 a, float128 b STATUS_PARAM )
4956 158142c2 bellard
{
4957 158142c2 bellard
    flag aSign, bSign;
4958 158142c2 bellard
4959 158142c2 bellard
    aSign = extractFloat128Sign( a );
4960 158142c2 bellard
    bSign = extractFloat128Sign( b );
4961 158142c2 bellard
    if ( aSign == bSign ) {
4962 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
4963 158142c2 bellard
    }
4964 158142c2 bellard
    else {
4965 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
4966 158142c2 bellard
    }
4967 158142c2 bellard
4968 158142c2 bellard
}
4969 158142c2 bellard
4970 158142c2 bellard
/*----------------------------------------------------------------------------
4971 158142c2 bellard
| Returns the result of subtracting the quadruple-precision floating-point
4972 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
4973 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4974 158142c2 bellard
*----------------------------------------------------------------------------*/
4975 158142c2 bellard
4976 158142c2 bellard
float128 float128_sub( float128 a, float128 b STATUS_PARAM )
4977 158142c2 bellard
{
4978 158142c2 bellard
    flag aSign, bSign;
4979 158142c2 bellard
4980 158142c2 bellard
    aSign = extractFloat128Sign( a );
4981 158142c2 bellard
    bSign = extractFloat128Sign( b );
4982 158142c2 bellard
    if ( aSign == bSign ) {
4983 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
4984 158142c2 bellard
    }
4985 158142c2 bellard
    else {
4986 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
4987 158142c2 bellard
    }
4988 158142c2 bellard
4989 158142c2 bellard
}
4990 158142c2 bellard
4991 158142c2 bellard
/*----------------------------------------------------------------------------
4992 158142c2 bellard
| Returns the result of multiplying the quadruple-precision floating-point
4993 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
4994 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4995 158142c2 bellard
*----------------------------------------------------------------------------*/
4996 158142c2 bellard
4997 158142c2 bellard
float128 float128_mul( float128 a, float128 b STATUS_PARAM )
4998 158142c2 bellard
{
4999 158142c2 bellard
    flag aSign, bSign, zSign;
5000 158142c2 bellard
    int32 aExp, bExp, zExp;
5001 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
5002 158142c2 bellard
    float128 z;
5003 158142c2 bellard
5004 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5005 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5006 158142c2 bellard
    aExp = extractFloat128Exp( a );
5007 158142c2 bellard
    aSign = extractFloat128Sign( a );
5008 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5009 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5010 158142c2 bellard
    bExp = extractFloat128Exp( b );
5011 158142c2 bellard
    bSign = extractFloat128Sign( b );
5012 158142c2 bellard
    zSign = aSign ^ bSign;
5013 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5014 158142c2 bellard
        if (    ( aSig0 | aSig1 )
5015 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5016 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5017 158142c2 bellard
        }
5018 158142c2 bellard
        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
5019 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5020 158142c2 bellard
    }
5021 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5022 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5023 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5024 158142c2 bellard
 invalid:
5025 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5026 158142c2 bellard
            z.low = float128_default_nan_low;
5027 158142c2 bellard
            z.high = float128_default_nan_high;
5028 158142c2 bellard
            return z;
5029 158142c2 bellard
        }
5030 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5031 158142c2 bellard
    }
5032 158142c2 bellard
    if ( aExp == 0 ) {
5033 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5034 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5035 158142c2 bellard
    }
5036 158142c2 bellard
    if ( bExp == 0 ) {
5037 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5038 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5039 158142c2 bellard
    }
5040 158142c2 bellard
    zExp = aExp + bExp - 0x4000;
5041 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5042 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
5043 158142c2 bellard
    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
5044 158142c2 bellard
    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
5045 158142c2 bellard
    zSig2 |= ( zSig3 != 0 );
5046 158142c2 bellard
    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
5047 158142c2 bellard
        shift128ExtraRightJamming(
5048 158142c2 bellard
            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5049 158142c2 bellard
        ++zExp;
5050 158142c2 bellard
    }
5051 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5052 158142c2 bellard
5053 158142c2 bellard
}
5054 158142c2 bellard
5055 158142c2 bellard
/*----------------------------------------------------------------------------
5056 158142c2 bellard
| Returns the result of dividing the quadruple-precision floating-point value
5057 158142c2 bellard
| `a' by the corresponding value `b'.  The operation is performed according to
5058 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5059 158142c2 bellard
*----------------------------------------------------------------------------*/
5060 158142c2 bellard
5061 158142c2 bellard
float128 float128_div( float128 a, float128 b STATUS_PARAM )
5062 158142c2 bellard
{
5063 158142c2 bellard
    flag aSign, bSign, zSign;
5064 158142c2 bellard
    int32 aExp, bExp, zExp;
5065 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5066 158142c2 bellard
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5067 158142c2 bellard
    float128 z;
5068 158142c2 bellard
5069 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5070 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5071 158142c2 bellard
    aExp = extractFloat128Exp( a );
5072 158142c2 bellard
    aSign = extractFloat128Sign( a );
5073 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5074 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5075 158142c2 bellard
    bExp = extractFloat128Exp( b );
5076 158142c2 bellard
    bSign = extractFloat128Sign( b );
5077 158142c2 bellard
    zSign = aSign ^ bSign;
5078 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5079 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5080 158142c2 bellard
        if ( bExp == 0x7FFF ) {
5081 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5082 158142c2 bellard
            goto invalid;
5083 158142c2 bellard
        }
5084 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5085 158142c2 bellard
    }
5086 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5087 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5088 158142c2 bellard
        return packFloat128( zSign, 0, 0, 0 );
5089 158142c2 bellard
    }
5090 158142c2 bellard
    if ( bExp == 0 ) {
5091 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
5092 158142c2 bellard
            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5093 158142c2 bellard
 invalid:
5094 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
5095 158142c2 bellard
                z.low = float128_default_nan_low;
5096 158142c2 bellard
                z.high = float128_default_nan_high;
5097 158142c2 bellard
                return z;
5098 158142c2 bellard
            }
5099 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
5100 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
5101 158142c2 bellard
        }
5102 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5103 158142c2 bellard
    }
5104 158142c2 bellard
    if ( aExp == 0 ) {
5105 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5106 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5107 158142c2 bellard
    }
5108 158142c2 bellard
    zExp = aExp - bExp + 0x3FFD;
5109 158142c2 bellard
    shortShift128Left(
5110 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
5111 158142c2 bellard
    shortShift128Left(
5112 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5113 158142c2 bellard
    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
5114 158142c2 bellard
        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
5115 158142c2 bellard
        ++zExp;
5116 158142c2 bellard
    }
5117 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
5118 158142c2 bellard
    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
5119 158142c2 bellard
    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
5120 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
5121 158142c2 bellard
        --zSig0;
5122 158142c2 bellard
        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5123 158142c2 bellard
    }
5124 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5125 158142c2 bellard
    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5126 158142c2 bellard
        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5127 158142c2 bellard
        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5128 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
5129 158142c2 bellard
            --zSig1;
5130 158142c2 bellard
            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5131 158142c2 bellard
        }
5132 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5133 158142c2 bellard
    }
5134 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5135 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5136 158142c2 bellard
5137 158142c2 bellard
}
5138 158142c2 bellard
5139 158142c2 bellard
/*----------------------------------------------------------------------------
5140 158142c2 bellard
| Returns the remainder of the quadruple-precision floating-point value `a'
5141 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
5142 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5143 158142c2 bellard
*----------------------------------------------------------------------------*/
5144 158142c2 bellard
5145 158142c2 bellard
float128 float128_rem( float128 a, float128 b STATUS_PARAM )
5146 158142c2 bellard
{
5147 158142c2 bellard
    flag aSign, bSign, zSign;
5148 158142c2 bellard
    int32 aExp, bExp, expDiff;
5149 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5150 158142c2 bellard
    bits64 allZero, alternateASig0, alternateASig1, sigMean1;
5151 158142c2 bellard
    sbits64 sigMean0;
5152 158142c2 bellard
    float128 z;
5153 158142c2 bellard
5154 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5155 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5156 158142c2 bellard
    aExp = extractFloat128Exp( a );
5157 158142c2 bellard
    aSign = extractFloat128Sign( a );
5158 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5159 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5160 158142c2 bellard
    bExp = extractFloat128Exp( b );
5161 158142c2 bellard
    bSign = extractFloat128Sign( b );
5162 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5163 158142c2 bellard
        if (    ( aSig0 | aSig1 )
5164 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5165 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5166 158142c2 bellard
        }
5167 158142c2 bellard
        goto invalid;
5168 158142c2 bellard
    }
5169 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5170 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5171 158142c2 bellard
        return a;
5172 158142c2 bellard
    }
5173 158142c2 bellard
    if ( bExp == 0 ) {
5174 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
5175 158142c2 bellard
 invalid:
5176 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5177 158142c2 bellard
            z.low = float128_default_nan_low;
5178 158142c2 bellard
            z.high = float128_default_nan_high;
5179 158142c2 bellard
            return z;
5180 158142c2 bellard
        }
5181 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5182 158142c2 bellard
    }
5183 158142c2 bellard
    if ( aExp == 0 ) {
5184 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return a;
5185 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5186 158142c2 bellard
    }
5187 158142c2 bellard
    expDiff = aExp - bExp;
5188 158142c2 bellard
    if ( expDiff < -1 ) return a;
5189 158142c2 bellard
    shortShift128Left(
5190 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ),
5191 158142c2 bellard
        aSig1,
5192 158142c2 bellard
        15 - ( expDiff < 0 ),
5193 158142c2 bellard
        &aSig0,
5194 158142c2 bellard
        &aSig1
5195 158142c2 bellard
    );
5196 158142c2 bellard
    shortShift128Left(
5197 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5198 158142c2 bellard
    q = le128( bSig0, bSig1, aSig0, aSig1 );
5199 158142c2 bellard
    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5200 158142c2 bellard
    expDiff -= 64;
5201 158142c2 bellard
    while ( 0 < expDiff ) {
5202 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5203 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
5204 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5205 158142c2 bellard
        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5206 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5207 158142c2 bellard
        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5208 158142c2 bellard
        expDiff -= 61;
5209 158142c2 bellard
    }
5210 158142c2 bellard
    if ( -64 < expDiff ) {
5211 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5212 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
5213 158142c2 bellard
        q >>= - expDiff;
5214 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5215 158142c2 bellard
        expDiff += 52;
5216 158142c2 bellard
        if ( expDiff < 0 ) {
5217 158142c2 bellard
            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5218 158142c2 bellard
        }
5219 158142c2 bellard
        else {
5220 158142c2 bellard
            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5221 158142c2 bellard
        }
5222 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5223 158142c2 bellard
        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5224 158142c2 bellard
    }
5225 158142c2 bellard
    else {
5226 158142c2 bellard
        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5227 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5228 158142c2 bellard
    }
5229 158142c2 bellard
    do {
5230 158142c2 bellard
        alternateASig0 = aSig0;
5231 158142c2 bellard
        alternateASig1 = aSig1;
5232 158142c2 bellard
        ++q;
5233 158142c2 bellard
        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5234 158142c2 bellard
    } while ( 0 <= (sbits64) aSig0 );
5235 158142c2 bellard
    add128(
5236 b55266b5 blueswir1
        aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
5237 158142c2 bellard
    if (    ( sigMean0 < 0 )
5238 158142c2 bellard
         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5239 158142c2 bellard
        aSig0 = alternateASig0;
5240 158142c2 bellard
        aSig1 = alternateASig1;
5241 158142c2 bellard
    }
5242 158142c2 bellard
    zSign = ( (sbits64) aSig0 < 0 );
5243 158142c2 bellard
    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5244 158142c2 bellard
    return
5245 158142c2 bellard
        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
5246 158142c2 bellard
5247 158142c2 bellard
}
5248 158142c2 bellard
5249 158142c2 bellard
/*----------------------------------------------------------------------------
5250 158142c2 bellard
| Returns the square root of the quadruple-precision floating-point value `a'.
5251 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
5252 158142c2 bellard
| Floating-Point Arithmetic.
5253 158142c2 bellard
*----------------------------------------------------------------------------*/
5254 158142c2 bellard
5255 158142c2 bellard
float128 float128_sqrt( float128 a STATUS_PARAM )
5256 158142c2 bellard
{
5257 158142c2 bellard
    flag aSign;
5258 158142c2 bellard
    int32 aExp, zExp;
5259 158142c2 bellard
    bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5260 158142c2 bellard
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5261 158142c2 bellard
    float128 z;
5262 158142c2 bellard
5263 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5264 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5265 158142c2 bellard
    aExp = extractFloat128Exp( a );
5266 158142c2 bellard
    aSign = extractFloat128Sign( a );
5267 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5268 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
5269 158142c2 bellard
        if ( ! aSign ) return a;
5270 158142c2 bellard
        goto invalid;
5271 158142c2 bellard
    }
5272 158142c2 bellard
    if ( aSign ) {
5273 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5274 158142c2 bellard
 invalid:
5275 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5276 158142c2 bellard
        z.low = float128_default_nan_low;
5277 158142c2 bellard
        z.high = float128_default_nan_high;
5278 158142c2 bellard
        return z;
5279 158142c2 bellard
    }
5280 158142c2 bellard
    if ( aExp == 0 ) {
5281 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5282 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5283 158142c2 bellard
    }
5284 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5285 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5286 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5287 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5288 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5289 158142c2 bellard
    doubleZSig0 = zSig0<<1;
5290 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
5291 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5292 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
5293 158142c2 bellard
        --zSig0;
5294 158142c2 bellard
        doubleZSig0 -= 2;
5295 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5296 158142c2 bellard
    }
5297 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5298 158142c2 bellard
    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5299 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
5300 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5301 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5302 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
5303 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5304 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
5305 158142c2 bellard
            --zSig1;
5306 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5307 158142c2 bellard
            term3 |= 1;
5308 158142c2 bellard
            term2 |= doubleZSig0;
5309 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5310 158142c2 bellard
        }
5311 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5312 158142c2 bellard
    }
5313 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5314 158142c2 bellard
    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5315 158142c2 bellard
5316 158142c2 bellard
}
5317 158142c2 bellard
5318 158142c2 bellard
/*----------------------------------------------------------------------------
5319 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5320 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
5321 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5322 158142c2 bellard
*----------------------------------------------------------------------------*/
5323 158142c2 bellard
5324 750afe93 bellard
int float128_eq( float128 a, float128 b STATUS_PARAM )
5325 158142c2 bellard
{
5326 158142c2 bellard
5327 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5328 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5329 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5330 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5331 158142c2 bellard
       ) {
5332 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5333 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5334 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5335 158142c2 bellard
        }
5336 158142c2 bellard
        return 0;
5337 158142c2 bellard
    }
5338 158142c2 bellard
    return
5339 158142c2 bellard
           ( a.low == b.low )
5340 158142c2 bellard
        && (    ( a.high == b.high )
5341 158142c2 bellard
             || (    ( a.low == 0 )
5342 158142c2 bellard
                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5343 158142c2 bellard
           );
5344 158142c2 bellard
5345 158142c2 bellard
}
5346 158142c2 bellard
5347 158142c2 bellard
/*----------------------------------------------------------------------------
5348 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5349 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  The comparison
5350 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5351 158142c2 bellard
| Arithmetic.
5352 158142c2 bellard
*----------------------------------------------------------------------------*/
5353 158142c2 bellard
5354 750afe93 bellard
int float128_le( float128 a, float128 b STATUS_PARAM )
5355 158142c2 bellard
{
5356 158142c2 bellard
    flag aSign, bSign;
5357 158142c2 bellard
5358 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5359 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5360 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5361 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5362 158142c2 bellard
       ) {
5363 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5364 158142c2 bellard
        return 0;
5365 158142c2 bellard
    }
5366 158142c2 bellard
    aSign = extractFloat128Sign( a );
5367 158142c2 bellard
    bSign = extractFloat128Sign( b );
5368 158142c2 bellard
    if ( aSign != bSign ) {
5369 158142c2 bellard
        return
5370 158142c2 bellard
               aSign
5371 158142c2 bellard
            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5372 158142c2 bellard
                 == 0 );
5373 158142c2 bellard
    }
5374 158142c2 bellard
    return
5375 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5376 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5377 158142c2 bellard
5378 158142c2 bellard
}
5379 158142c2 bellard
5380 158142c2 bellard
/*----------------------------------------------------------------------------
5381 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5382 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
5383 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5384 158142c2 bellard
*----------------------------------------------------------------------------*/
5385 158142c2 bellard
5386 750afe93 bellard
int float128_lt( float128 a, float128 b STATUS_PARAM )
5387 158142c2 bellard
{
5388 158142c2 bellard
    flag aSign, bSign;
5389 158142c2 bellard
5390 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5391 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5392 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5393 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5394 158142c2 bellard
       ) {
5395 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5396 158142c2 bellard
        return 0;
5397 158142c2 bellard
    }
5398 158142c2 bellard
    aSign = extractFloat128Sign( a );
5399 158142c2 bellard
    bSign = extractFloat128Sign( b );
5400 158142c2 bellard
    if ( aSign != bSign ) {
5401 158142c2 bellard
        return
5402 158142c2 bellard
               aSign
5403 158142c2 bellard
            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5404 158142c2 bellard
                 != 0 );
5405 158142c2 bellard
    }
5406 158142c2 bellard
    return
5407 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5408 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5409 158142c2 bellard
5410 158142c2 bellard
}
5411 158142c2 bellard
5412 158142c2 bellard
/*----------------------------------------------------------------------------
5413 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5414 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The invalid exception is
5415 158142c2 bellard
| raised if either operand is a NaN.  Otherwise, the comparison is performed
5416 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5417 158142c2 bellard
*----------------------------------------------------------------------------*/
5418 158142c2 bellard
5419 750afe93 bellard
int float128_eq_signaling( float128 a, float128 b STATUS_PARAM )
5420 158142c2 bellard
{
5421 158142c2 bellard
5422 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5423 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5424 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5425 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5426 158142c2 bellard
       ) {
5427 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5428 158142c2 bellard
        return 0;
5429 158142c2 bellard
    }
5430 158142c2 bellard
    return
5431 158142c2 bellard
           ( a.low == b.low )
5432 158142c2 bellard
        && (    ( a.high == b.high )
5433 158142c2 bellard
             || (    ( a.low == 0 )
5434 158142c2 bellard
                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5435 158142c2 bellard
           );
5436 158142c2 bellard
5437 158142c2 bellard
}
5438 158142c2 bellard
5439 158142c2 bellard
/*----------------------------------------------------------------------------
5440 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5441 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5442 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
5443 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5444 158142c2 bellard
*----------------------------------------------------------------------------*/
5445 158142c2 bellard
5446 750afe93 bellard
int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
5447 158142c2 bellard
{
5448 158142c2 bellard
    flag aSign, bSign;
5449 158142c2 bellard
5450 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5451 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5452 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5453 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5454 158142c2 bellard
       ) {
5455 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5456 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5457 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5458 158142c2 bellard
        }
5459 158142c2 bellard
        return 0;
5460 158142c2 bellard
    }
5461 158142c2 bellard
    aSign = extractFloat128Sign( a );
5462 158142c2 bellard
    bSign = extractFloat128Sign( b );
5463 158142c2 bellard
    if ( aSign != bSign ) {
5464 158142c2 bellard
        return
5465 158142c2 bellard
               aSign
5466 158142c2 bellard
            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5467 158142c2 bellard
                 == 0 );
5468 158142c2 bellard
    }
5469 158142c2 bellard
    return
5470 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5471 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5472 158142c2 bellard
5473 158142c2 bellard
}
5474 158142c2 bellard
5475 158142c2 bellard
/*----------------------------------------------------------------------------
5476 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5477 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5478 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
5479 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5480 158142c2 bellard
*----------------------------------------------------------------------------*/
5481 158142c2 bellard
5482 750afe93 bellard
int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
5483 158142c2 bellard
{
5484 158142c2 bellard
    flag aSign, bSign;
5485 158142c2 bellard
5486 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5487 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5488 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5489 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5490 158142c2 bellard
       ) {
5491 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5492 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5493 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5494 158142c2 bellard
        }
5495 158142c2 bellard
        return 0;
5496 158142c2 bellard
    }
5497 158142c2 bellard
    aSign = extractFloat128Sign( a );
5498 158142c2 bellard
    bSign = extractFloat128Sign( b );
5499 158142c2 bellard
    if ( aSign != bSign ) {
5500 158142c2 bellard
        return
5501 158142c2 bellard
               aSign
5502 158142c2 bellard
            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5503 158142c2 bellard
                 != 0 );
5504 158142c2 bellard
    }
5505 158142c2 bellard
    return
5506 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5507 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5508 158142c2 bellard
5509 158142c2 bellard
}
5510 158142c2 bellard
5511 158142c2 bellard
#endif
5512 158142c2 bellard
5513 1d6bda35 bellard
/* misc functions */
5514 1d6bda35 bellard
float32 uint32_to_float32( unsigned int a STATUS_PARAM )
5515 1d6bda35 bellard
{
5516 1d6bda35 bellard
    return int64_to_float32(a STATUS_VAR);
5517 1d6bda35 bellard
}
5518 1d6bda35 bellard
5519 1d6bda35 bellard
float64 uint32_to_float64( unsigned int a STATUS_PARAM )
5520 1d6bda35 bellard
{
5521 1d6bda35 bellard
    return int64_to_float64(a STATUS_VAR);
5522 1d6bda35 bellard
}
5523 1d6bda35 bellard
5524 1d6bda35 bellard
unsigned int float32_to_uint32( float32 a STATUS_PARAM )
5525 1d6bda35 bellard
{
5526 1d6bda35 bellard
    int64_t v;
5527 1d6bda35 bellard
    unsigned int res;
5528 1d6bda35 bellard
5529 1d6bda35 bellard
    v = float32_to_int64(a STATUS_VAR);
5530 1d6bda35 bellard
    if (v < 0) {
5531 1d6bda35 bellard
        res = 0;
5532 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5533 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5534 1d6bda35 bellard
        res = 0xffffffff;
5535 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5536 1d6bda35 bellard
    } else {
5537 1d6bda35 bellard
        res = v;
5538 1d6bda35 bellard
    }
5539 1d6bda35 bellard
    return res;
5540 1d6bda35 bellard
}
5541 1d6bda35 bellard
5542 1d6bda35 bellard
unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
5543 1d6bda35 bellard
{
5544 1d6bda35 bellard
    int64_t v;
5545 1d6bda35 bellard
    unsigned int res;
5546 1d6bda35 bellard
5547 1d6bda35 bellard
    v = float32_to_int64_round_to_zero(a STATUS_VAR);
5548 1d6bda35 bellard
    if (v < 0) {
5549 1d6bda35 bellard
        res = 0;
5550 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5551 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5552 1d6bda35 bellard
        res = 0xffffffff;
5553 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5554 1d6bda35 bellard
    } else {
5555 1d6bda35 bellard
        res = v;
5556 1d6bda35 bellard
    }
5557 1d6bda35 bellard
    return res;
5558 1d6bda35 bellard
}
5559 1d6bda35 bellard
5560 1d6bda35 bellard
unsigned int float64_to_uint32( float64 a STATUS_PARAM )
5561 1d6bda35 bellard
{
5562 1d6bda35 bellard
    int64_t v;
5563 1d6bda35 bellard
    unsigned int res;
5564 1d6bda35 bellard
5565 1d6bda35 bellard
    v = float64_to_int64(a STATUS_VAR);
5566 1d6bda35 bellard
    if (v < 0) {
5567 1d6bda35 bellard
        res = 0;
5568 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5569 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5570 1d6bda35 bellard
        res = 0xffffffff;
5571 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5572 1d6bda35 bellard
    } else {
5573 1d6bda35 bellard
        res = v;
5574 1d6bda35 bellard
    }
5575 1d6bda35 bellard
    return res;
5576 1d6bda35 bellard
}
5577 1d6bda35 bellard
5578 1d6bda35 bellard
unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
5579 1d6bda35 bellard
{
5580 1d6bda35 bellard
    int64_t v;
5581 1d6bda35 bellard
    unsigned int res;
5582 1d6bda35 bellard
5583 1d6bda35 bellard
    v = float64_to_int64_round_to_zero(a STATUS_VAR);
5584 1d6bda35 bellard
    if (v < 0) {
5585 1d6bda35 bellard
        res = 0;
5586 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5587 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5588 1d6bda35 bellard
        res = 0xffffffff;
5589 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5590 1d6bda35 bellard
    } else {
5591 1d6bda35 bellard
        res = v;
5592 1d6bda35 bellard
    }
5593 1d6bda35 bellard
    return res;
5594 1d6bda35 bellard
}
5595 1d6bda35 bellard
5596 f090c9d4 pbrook
/* FIXME: This looks broken.  */
5597 75d62a58 j_mayer
uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
5598 75d62a58 j_mayer
{
5599 75d62a58 j_mayer
    int64_t v;
5600 75d62a58 j_mayer
5601 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
5602 f090c9d4 pbrook
    v += float64_val(a);
5603 f090c9d4 pbrook
    v = float64_to_int64(make_float64(v) STATUS_VAR);
5604 75d62a58 j_mayer
5605 75d62a58 j_mayer
    return v - INT64_MIN;
5606 75d62a58 j_mayer
}
5607 75d62a58 j_mayer
5608 75d62a58 j_mayer
uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
5609 75d62a58 j_mayer
{
5610 75d62a58 j_mayer
    int64_t v;
5611 75d62a58 j_mayer
5612 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
5613 f090c9d4 pbrook
    v += float64_val(a);
5614 f090c9d4 pbrook
    v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
5615 75d62a58 j_mayer
5616 75d62a58 j_mayer
    return v - INT64_MIN;
5617 75d62a58 j_mayer
}
5618 75d62a58 j_mayer
5619 1d6bda35 bellard
#define COMPARE(s, nan_exp)                                                  \
5620 750afe93 bellard
INLINE int float ## s ## _compare_internal( float ## s a, float ## s b,      \
5621 1d6bda35 bellard
                                      int is_quiet STATUS_PARAM )            \
5622 1d6bda35 bellard
{                                                                            \
5623 1d6bda35 bellard
    flag aSign, bSign;                                                       \
5624 f090c9d4 pbrook
    bits ## s av, bv;                                                        \
5625 1d6bda35 bellard
                                                                             \
5626 1d6bda35 bellard
    if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) &&                    \
5627 1d6bda35 bellard
         extractFloat ## s ## Frac( a ) ) ||                                 \
5628 1d6bda35 bellard
        ( ( extractFloat ## s ## Exp( b ) == nan_exp ) &&                    \
5629 1d6bda35 bellard
          extractFloat ## s ## Frac( b ) )) {                                \
5630 1d6bda35 bellard
        if (!is_quiet ||                                                     \
5631 1d6bda35 bellard
            float ## s ## _is_signaling_nan( a ) ||                          \
5632 1d6bda35 bellard
            float ## s ## _is_signaling_nan( b ) ) {                         \
5633 1d6bda35 bellard
            float_raise( float_flag_invalid STATUS_VAR);                     \
5634 1d6bda35 bellard
        }                                                                    \
5635 1d6bda35 bellard
        return float_relation_unordered;                                     \
5636 1d6bda35 bellard
    }                                                                        \
5637 1d6bda35 bellard
    aSign = extractFloat ## s ## Sign( a );                                  \
5638 1d6bda35 bellard
    bSign = extractFloat ## s ## Sign( b );                                  \
5639 f090c9d4 pbrook
    av = float ## s ## _val(a);                                              \
5640 cd8a2533 blueswir1
    bv = float ## s ## _val(b);                                              \
5641 1d6bda35 bellard
    if ( aSign != bSign ) {                                                  \
5642 f090c9d4 pbrook
        if ( (bits ## s) ( ( av | bv )<<1 ) == 0 ) {                         \
5643 1d6bda35 bellard
            /* zero case */                                                  \
5644 1d6bda35 bellard
            return float_relation_equal;                                     \
5645 1d6bda35 bellard
        } else {                                                             \
5646 1d6bda35 bellard
            return 1 - (2 * aSign);                                          \
5647 1d6bda35 bellard
        }                                                                    \
5648 1d6bda35 bellard
    } else {                                                                 \
5649 f090c9d4 pbrook
        if (av == bv) {                                                      \
5650 1d6bda35 bellard
            return float_relation_equal;                                     \
5651 1d6bda35 bellard
        } else {                                                             \
5652 f090c9d4 pbrook
            return 1 - 2 * (aSign ^ ( av < bv ));                            \
5653 1d6bda35 bellard
        }                                                                    \
5654 1d6bda35 bellard
    }                                                                        \
5655 1d6bda35 bellard
}                                                                            \
5656 1d6bda35 bellard
                                                                             \
5657 750afe93 bellard
int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM )        \
5658 1d6bda35 bellard
{                                                                            \
5659 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 0 STATUS_VAR);              \
5660 1d6bda35 bellard
}                                                                            \
5661 1d6bda35 bellard
                                                                             \
5662 750afe93 bellard
int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM )  \
5663 1d6bda35 bellard
{                                                                            \
5664 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 1 STATUS_VAR);              \
5665 1d6bda35 bellard
}
5666 1d6bda35 bellard
5667 1d6bda35 bellard
COMPARE(32, 0xff)
5668 1d6bda35 bellard
COMPARE(64, 0x7ff)
5669 9ee6e8bb pbrook
5670 1f587329 blueswir1
INLINE int float128_compare_internal( float128 a, float128 b,
5671 1f587329 blueswir1
                                      int is_quiet STATUS_PARAM )
5672 1f587329 blueswir1
{
5673 1f587329 blueswir1
    flag aSign, bSign;
5674 1f587329 blueswir1
5675 1f587329 blueswir1
    if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
5676 1f587329 blueswir1
          ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
5677 1f587329 blueswir1
        ( ( extractFloat128Exp( b ) == 0x7fff ) &&
5678 1f587329 blueswir1
          ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
5679 1f587329 blueswir1
        if (!is_quiet ||
5680 1f587329 blueswir1
            float128_is_signaling_nan( a ) ||
5681 1f587329 blueswir1
            float128_is_signaling_nan( b ) ) {
5682 1f587329 blueswir1
            float_raise( float_flag_invalid STATUS_VAR);
5683 1f587329 blueswir1
        }
5684 1f587329 blueswir1
        return float_relation_unordered;
5685 1f587329 blueswir1
    }
5686 1f587329 blueswir1
    aSign = extractFloat128Sign( a );
5687 1f587329 blueswir1
    bSign = extractFloat128Sign( b );
5688 1f587329 blueswir1
    if ( aSign != bSign ) {
5689 1f587329 blueswir1
        if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
5690 1f587329 blueswir1
            /* zero case */
5691 1f587329 blueswir1
            return float_relation_equal;
5692 1f587329 blueswir1
        } else {
5693 1f587329 blueswir1
            return 1 - (2 * aSign);
5694 1f587329 blueswir1
        }
5695 1f587329 blueswir1
    } else {
5696 1f587329 blueswir1
        if (a.low == b.low && a.high == b.high) {
5697 1f587329 blueswir1
            return float_relation_equal;
5698 1f587329 blueswir1
        } else {
5699 1f587329 blueswir1
            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
5700 1f587329 blueswir1
        }
5701 1f587329 blueswir1
    }
5702 1f587329 blueswir1
}
5703 1f587329 blueswir1
5704 1f587329 blueswir1
int float128_compare( float128 a, float128 b STATUS_PARAM )
5705 1f587329 blueswir1
{
5706 1f587329 blueswir1
    return float128_compare_internal(a, b, 0 STATUS_VAR);
5707 1f587329 blueswir1
}
5708 1f587329 blueswir1
5709 1f587329 blueswir1
int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
5710 1f587329 blueswir1
{
5711 1f587329 blueswir1
    return float128_compare_internal(a, b, 1 STATUS_VAR);
5712 1f587329 blueswir1
}
5713 1f587329 blueswir1
5714 9ee6e8bb pbrook
/* Multiply A by 2 raised to the power N.  */
5715 9ee6e8bb pbrook
float32 float32_scalbn( float32 a, int n STATUS_PARAM )
5716 9ee6e8bb pbrook
{
5717 9ee6e8bb pbrook
    flag aSign;
5718 9ee6e8bb pbrook
    int16 aExp;
5719 9ee6e8bb pbrook
    bits32 aSig;
5720 9ee6e8bb pbrook
5721 9ee6e8bb pbrook
    aSig = extractFloat32Frac( a );
5722 9ee6e8bb pbrook
    aExp = extractFloat32Exp( a );
5723 9ee6e8bb pbrook
    aSign = extractFloat32Sign( a );
5724 9ee6e8bb pbrook
5725 9ee6e8bb pbrook
    if ( aExp == 0xFF ) {
5726 9ee6e8bb pbrook
        return a;
5727 9ee6e8bb pbrook
    }
5728 69397542 pbrook
    if ( aExp != 0 )
5729 69397542 pbrook
        aSig |= 0x00800000;
5730 69397542 pbrook
    else if ( aSig == 0 )
5731 69397542 pbrook
        return a;
5732 69397542 pbrook
5733 69397542 pbrook
    aExp += n - 1;
5734 69397542 pbrook
    aSig <<= 7;
5735 69397542 pbrook
    return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
5736 9ee6e8bb pbrook
}
5737 9ee6e8bb pbrook
5738 9ee6e8bb pbrook
float64 float64_scalbn( float64 a, int n STATUS_PARAM )
5739 9ee6e8bb pbrook
{
5740 9ee6e8bb pbrook
    flag aSign;
5741 9ee6e8bb pbrook
    int16 aExp;
5742 9ee6e8bb pbrook
    bits64 aSig;
5743 9ee6e8bb pbrook
5744 9ee6e8bb pbrook
    aSig = extractFloat64Frac( a );
5745 9ee6e8bb pbrook
    aExp = extractFloat64Exp( a );
5746 9ee6e8bb pbrook
    aSign = extractFloat64Sign( a );
5747 9ee6e8bb pbrook
5748 9ee6e8bb pbrook
    if ( aExp == 0x7FF ) {
5749 9ee6e8bb pbrook
        return a;
5750 9ee6e8bb pbrook
    }
5751 69397542 pbrook
    if ( aExp != 0 )
5752 69397542 pbrook
        aSig |= LIT64( 0x0010000000000000 );
5753 69397542 pbrook
    else if ( aSig == 0 )
5754 69397542 pbrook
        return a;
5755 69397542 pbrook
5756 69397542 pbrook
    aExp += n - 1;
5757 69397542 pbrook
    aSig <<= 10;
5758 69397542 pbrook
    return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
5759 9ee6e8bb pbrook
}
5760 9ee6e8bb pbrook
5761 9ee6e8bb pbrook
#ifdef FLOATX80
5762 9ee6e8bb pbrook
floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
5763 9ee6e8bb pbrook
{
5764 9ee6e8bb pbrook
    flag aSign;
5765 9ee6e8bb pbrook
    int16 aExp;
5766 9ee6e8bb pbrook
    bits64 aSig;
5767 9ee6e8bb pbrook
5768 9ee6e8bb pbrook
    aSig = extractFloatx80Frac( a );
5769 9ee6e8bb pbrook
    aExp = extractFloatx80Exp( a );
5770 9ee6e8bb pbrook
    aSign = extractFloatx80Sign( a );
5771 9ee6e8bb pbrook
5772 9ee6e8bb pbrook
    if ( aExp == 0x7FF ) {
5773 9ee6e8bb pbrook
        return a;
5774 9ee6e8bb pbrook
    }
5775 69397542 pbrook
    if (aExp == 0 && aSig == 0)
5776 69397542 pbrook
        return a;
5777 69397542 pbrook
5778 9ee6e8bb pbrook
    aExp += n;
5779 69397542 pbrook
    return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
5780 69397542 pbrook
                                          aSign, aExp, aSig, 0 STATUS_VAR );
5781 9ee6e8bb pbrook
}
5782 9ee6e8bb pbrook
#endif
5783 9ee6e8bb pbrook
5784 9ee6e8bb pbrook
#ifdef FLOAT128
5785 9ee6e8bb pbrook
float128 float128_scalbn( float128 a, int n STATUS_PARAM )
5786 9ee6e8bb pbrook
{
5787 9ee6e8bb pbrook
    flag aSign;
5788 9ee6e8bb pbrook
    int32 aExp;
5789 9ee6e8bb pbrook
    bits64 aSig0, aSig1;
5790 9ee6e8bb pbrook
5791 9ee6e8bb pbrook
    aSig1 = extractFloat128Frac1( a );
5792 9ee6e8bb pbrook
    aSig0 = extractFloat128Frac0( a );
5793 9ee6e8bb pbrook
    aExp = extractFloat128Exp( a );
5794 9ee6e8bb pbrook
    aSign = extractFloat128Sign( a );
5795 9ee6e8bb pbrook
    if ( aExp == 0x7FFF ) {
5796 9ee6e8bb pbrook
        return a;
5797 9ee6e8bb pbrook
    }
5798 69397542 pbrook
    if ( aExp != 0 )
5799 69397542 pbrook
        aSig0 |= LIT64( 0x0001000000000000 );
5800 69397542 pbrook
    else if ( aSig0 == 0 && aSig1 == 0 )
5801 69397542 pbrook
        return a;
5802 69397542 pbrook
5803 69397542 pbrook
    aExp += n - 1;
5804 69397542 pbrook
    return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
5805 69397542 pbrook
                                          STATUS_VAR );
5806 9ee6e8bb pbrook
5807 9ee6e8bb pbrook
}
5808 9ee6e8bb pbrook
#endif