Statistics
| Branch: | Revision:

root / fpu / softfloat.c @ a1b91bb4

History | View | Annotate | Download (192.8 kB)

1 158142c2 bellard
2 158142c2 bellard
/*============================================================================
3 158142c2 bellard

4 158142c2 bellard
This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
5 158142c2 bellard
Package, Release 2b.
6 158142c2 bellard

7 158142c2 bellard
Written by John R. Hauser.  This work was made possible in part by the
8 158142c2 bellard
International Computer Science Institute, located at Suite 600, 1947 Center
9 158142c2 bellard
Street, Berkeley, California 94704.  Funding was partially provided by the
10 158142c2 bellard
National Science Foundation under grant MIP-9311980.  The original version
11 158142c2 bellard
of this code was written as part of a project to build a fixed-point vector
12 158142c2 bellard
processor in collaboration with the University of California at Berkeley,
13 158142c2 bellard
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
14 158142c2 bellard
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
15 158142c2 bellard
arithmetic/SoftFloat.html'.
16 158142c2 bellard

17 158142c2 bellard
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
18 158142c2 bellard
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
19 158142c2 bellard
RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
20 158142c2 bellard
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
21 158142c2 bellard
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
22 158142c2 bellard
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
23 158142c2 bellard
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
24 158142c2 bellard
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
25 158142c2 bellard

26 158142c2 bellard
Derivative works are acceptable, even for commercial purposes, so long as
27 158142c2 bellard
(1) the source code for the derivative work includes prominent notice that
28 158142c2 bellard
the work is derivative, and (2) the source code includes prominent notice with
29 158142c2 bellard
these four paragraphs for those parts of this code that are retained.
30 158142c2 bellard

31 158142c2 bellard
=============================================================================*/
32 158142c2 bellard
33 158142c2 bellard
#include "softfloat.h"
34 158142c2 bellard
35 158142c2 bellard
/*----------------------------------------------------------------------------
36 158142c2 bellard
| Primitive arithmetic functions, including multi-word arithmetic, and
37 158142c2 bellard
| division and square root approximations.  (Can be specialized to target if
38 158142c2 bellard
| desired.)
39 158142c2 bellard
*----------------------------------------------------------------------------*/
40 158142c2 bellard
#include "softfloat-macros.h"
41 158142c2 bellard
42 158142c2 bellard
/*----------------------------------------------------------------------------
43 158142c2 bellard
| Functions and definitions to determine:  (1) whether tininess for underflow
44 158142c2 bellard
| is detected before or after rounding by default, (2) what (if anything)
45 158142c2 bellard
| happens when exceptions are raised, (3) how signaling NaNs are distinguished
46 158142c2 bellard
| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
47 158142c2 bellard
| are propagated from function inputs to output.  These details are target-
48 158142c2 bellard
| specific.
49 158142c2 bellard
*----------------------------------------------------------------------------*/
50 158142c2 bellard
#include "softfloat-specialize.h"
51 158142c2 bellard
52 158142c2 bellard
void set_float_rounding_mode(int val STATUS_PARAM)
53 158142c2 bellard
{
54 158142c2 bellard
    STATUS(float_rounding_mode) = val;
55 158142c2 bellard
}
56 158142c2 bellard
57 1d6bda35 bellard
void set_float_exception_flags(int val STATUS_PARAM)
58 1d6bda35 bellard
{
59 1d6bda35 bellard
    STATUS(float_exception_flags) = val;
60 1d6bda35 bellard
}
61 1d6bda35 bellard
62 158142c2 bellard
#ifdef FLOATX80
63 158142c2 bellard
void set_floatx80_rounding_precision(int val STATUS_PARAM)
64 158142c2 bellard
{
65 158142c2 bellard
    STATUS(floatx80_rounding_precision) = val;
66 158142c2 bellard
}
67 158142c2 bellard
#endif
68 158142c2 bellard
69 158142c2 bellard
/*----------------------------------------------------------------------------
70 158142c2 bellard
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
71 158142c2 bellard
| and 7, and returns the properly rounded 32-bit integer corresponding to the
72 158142c2 bellard
| input.  If `zSign' is 1, the input is negated before being converted to an
73 158142c2 bellard
| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
74 158142c2 bellard
| is simply rounded to an integer, with the inexact exception raised if the
75 158142c2 bellard
| input cannot be represented exactly as an integer.  However, if the fixed-
76 158142c2 bellard
| point input is too large, the invalid exception is raised and the largest
77 158142c2 bellard
| positive or negative integer is returned.
78 158142c2 bellard
*----------------------------------------------------------------------------*/
79 158142c2 bellard
80 158142c2 bellard
static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM)
81 158142c2 bellard
{
82 158142c2 bellard
    int8 roundingMode;
83 158142c2 bellard
    flag roundNearestEven;
84 158142c2 bellard
    int8 roundIncrement, roundBits;
85 158142c2 bellard
    int32 z;
86 158142c2 bellard
87 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
88 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
89 158142c2 bellard
    roundIncrement = 0x40;
90 158142c2 bellard
    if ( ! roundNearestEven ) {
91 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
92 158142c2 bellard
            roundIncrement = 0;
93 158142c2 bellard
        }
94 158142c2 bellard
        else {
95 158142c2 bellard
            roundIncrement = 0x7F;
96 158142c2 bellard
            if ( zSign ) {
97 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
98 158142c2 bellard
            }
99 158142c2 bellard
            else {
100 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
101 158142c2 bellard
            }
102 158142c2 bellard
        }
103 158142c2 bellard
    }
104 158142c2 bellard
    roundBits = absZ & 0x7F;
105 158142c2 bellard
    absZ = ( absZ + roundIncrement )>>7;
106 158142c2 bellard
    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
107 158142c2 bellard
    z = absZ;
108 158142c2 bellard
    if ( zSign ) z = - z;
109 158142c2 bellard
    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
110 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
111 158142c2 bellard
        return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
112 158142c2 bellard
    }
113 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
114 158142c2 bellard
    return z;
115 158142c2 bellard
116 158142c2 bellard
}
117 158142c2 bellard
118 158142c2 bellard
/*----------------------------------------------------------------------------
119 158142c2 bellard
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
120 158142c2 bellard
| `absZ1', with binary point between bits 63 and 64 (between the input words),
121 158142c2 bellard
| and returns the properly rounded 64-bit integer corresponding to the input.
122 158142c2 bellard
| If `zSign' is 1, the input is negated before being converted to an integer.
123 158142c2 bellard
| Ordinarily, the fixed-point input is simply rounded to an integer, with
124 158142c2 bellard
| the inexact exception raised if the input cannot be represented exactly as
125 158142c2 bellard
| an integer.  However, if the fixed-point input is too large, the invalid
126 158142c2 bellard
| exception is raised and the largest positive or negative integer is
127 158142c2 bellard
| returned.
128 158142c2 bellard
*----------------------------------------------------------------------------*/
129 158142c2 bellard
130 158142c2 bellard
static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM)
131 158142c2 bellard
{
132 158142c2 bellard
    int8 roundingMode;
133 158142c2 bellard
    flag roundNearestEven, increment;
134 158142c2 bellard
    int64 z;
135 158142c2 bellard
136 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
137 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
138 158142c2 bellard
    increment = ( (sbits64) absZ1 < 0 );
139 158142c2 bellard
    if ( ! roundNearestEven ) {
140 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
141 158142c2 bellard
            increment = 0;
142 158142c2 bellard
        }
143 158142c2 bellard
        else {
144 158142c2 bellard
            if ( zSign ) {
145 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && absZ1;
146 158142c2 bellard
            }
147 158142c2 bellard
            else {
148 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && absZ1;
149 158142c2 bellard
            }
150 158142c2 bellard
        }
151 158142c2 bellard
    }
152 158142c2 bellard
    if ( increment ) {
153 158142c2 bellard
        ++absZ0;
154 158142c2 bellard
        if ( absZ0 == 0 ) goto overflow;
155 158142c2 bellard
        absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
156 158142c2 bellard
    }
157 158142c2 bellard
    z = absZ0;
158 158142c2 bellard
    if ( zSign ) z = - z;
159 158142c2 bellard
    if ( z && ( ( z < 0 ) ^ zSign ) ) {
160 158142c2 bellard
 overflow:
161 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
162 158142c2 bellard
        return
163 158142c2 bellard
              zSign ? (sbits64) LIT64( 0x8000000000000000 )
164 158142c2 bellard
            : LIT64( 0x7FFFFFFFFFFFFFFF );
165 158142c2 bellard
    }
166 158142c2 bellard
    if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
167 158142c2 bellard
    return z;
168 158142c2 bellard
169 158142c2 bellard
}
170 158142c2 bellard
171 158142c2 bellard
/*----------------------------------------------------------------------------
172 158142c2 bellard
| Returns the fraction bits of the single-precision floating-point value `a'.
173 158142c2 bellard
*----------------------------------------------------------------------------*/
174 158142c2 bellard
175 158142c2 bellard
INLINE bits32 extractFloat32Frac( float32 a )
176 158142c2 bellard
{
177 158142c2 bellard
178 f090c9d4 pbrook
    return float32_val(a) & 0x007FFFFF;
179 158142c2 bellard
180 158142c2 bellard
}
181 158142c2 bellard
182 158142c2 bellard
/*----------------------------------------------------------------------------
183 158142c2 bellard
| Returns the exponent bits of the single-precision floating-point value `a'.
184 158142c2 bellard
*----------------------------------------------------------------------------*/
185 158142c2 bellard
186 158142c2 bellard
INLINE int16 extractFloat32Exp( float32 a )
187 158142c2 bellard
{
188 158142c2 bellard
189 f090c9d4 pbrook
    return ( float32_val(a)>>23 ) & 0xFF;
190 158142c2 bellard
191 158142c2 bellard
}
192 158142c2 bellard
193 158142c2 bellard
/*----------------------------------------------------------------------------
194 158142c2 bellard
| Returns the sign bit of the single-precision floating-point value `a'.
195 158142c2 bellard
*----------------------------------------------------------------------------*/
196 158142c2 bellard
197 158142c2 bellard
INLINE flag extractFloat32Sign( float32 a )
198 158142c2 bellard
{
199 158142c2 bellard
200 f090c9d4 pbrook
    return float32_val(a)>>31;
201 158142c2 bellard
202 158142c2 bellard
}
203 158142c2 bellard
204 158142c2 bellard
/*----------------------------------------------------------------------------
205 158142c2 bellard
| Normalizes the subnormal single-precision floating-point value represented
206 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
207 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
208 158142c2 bellard
| `zSigPtr', respectively.
209 158142c2 bellard
*----------------------------------------------------------------------------*/
210 158142c2 bellard
211 158142c2 bellard
static void
212 158142c2 bellard
 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
213 158142c2 bellard
{
214 158142c2 bellard
    int8 shiftCount;
215 158142c2 bellard
216 158142c2 bellard
    shiftCount = countLeadingZeros32( aSig ) - 8;
217 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
218 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
219 158142c2 bellard
220 158142c2 bellard
}
221 158142c2 bellard
222 158142c2 bellard
/*----------------------------------------------------------------------------
223 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
224 158142c2 bellard
| single-precision floating-point value, returning the result.  After being
225 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
226 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
227 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
228 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
229 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
230 158142c2 bellard
| significand.
231 158142c2 bellard
*----------------------------------------------------------------------------*/
232 158142c2 bellard
233 158142c2 bellard
INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
234 158142c2 bellard
{
235 158142c2 bellard
236 f090c9d4 pbrook
    return make_float32(
237 f090c9d4 pbrook
          ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig);
238 158142c2 bellard
239 158142c2 bellard
}
240 158142c2 bellard
241 158142c2 bellard
/*----------------------------------------------------------------------------
242 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
243 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
244 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
245 158142c2 bellard
| value is simply rounded and packed into the single-precision format, with
246 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
247 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
248 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
249 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
250 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
251 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal single-
252 158142c2 bellard
| precision floating-point number.
253 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 30
254 158142c2 bellard
| and 29, which is 7 bits to the left of the usual location.  This shifted
255 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
256 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
257 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
258 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
259 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
260 158142c2 bellard
| Binary Floating-Point Arithmetic.
261 158142c2 bellard
*----------------------------------------------------------------------------*/
262 158142c2 bellard
263 158142c2 bellard
static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
264 158142c2 bellard
{
265 158142c2 bellard
    int8 roundingMode;
266 158142c2 bellard
    flag roundNearestEven;
267 158142c2 bellard
    int8 roundIncrement, roundBits;
268 158142c2 bellard
    flag isTiny;
269 158142c2 bellard
270 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
271 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
272 158142c2 bellard
    roundIncrement = 0x40;
273 158142c2 bellard
    if ( ! roundNearestEven ) {
274 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
275 158142c2 bellard
            roundIncrement = 0;
276 158142c2 bellard
        }
277 158142c2 bellard
        else {
278 158142c2 bellard
            roundIncrement = 0x7F;
279 158142c2 bellard
            if ( zSign ) {
280 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
281 158142c2 bellard
            }
282 158142c2 bellard
            else {
283 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
284 158142c2 bellard
            }
285 158142c2 bellard
        }
286 158142c2 bellard
    }
287 158142c2 bellard
    roundBits = zSig & 0x7F;
288 158142c2 bellard
    if ( 0xFD <= (bits16) zExp ) {
289 158142c2 bellard
        if (    ( 0xFD < zExp )
290 158142c2 bellard
             || (    ( zExp == 0xFD )
291 158142c2 bellard
                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
292 158142c2 bellard
           ) {
293 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
294 f090c9d4 pbrook
            return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
295 158142c2 bellard
        }
296 158142c2 bellard
        if ( zExp < 0 ) {
297 158142c2 bellard
            isTiny =
298 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
299 158142c2 bellard
                || ( zExp < -1 )
300 158142c2 bellard
                || ( zSig + roundIncrement < 0x80000000 );
301 158142c2 bellard
            shift32RightJamming( zSig, - zExp, &zSig );
302 158142c2 bellard
            zExp = 0;
303 158142c2 bellard
            roundBits = zSig & 0x7F;
304 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
305 158142c2 bellard
        }
306 158142c2 bellard
    }
307 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
308 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>7;
309 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
310 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
311 158142c2 bellard
    return packFloat32( zSign, zExp, zSig );
312 158142c2 bellard
313 158142c2 bellard
}
314 158142c2 bellard
315 158142c2 bellard
/*----------------------------------------------------------------------------
316 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
317 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
318 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
319 158142c2 bellard
| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
320 158142c2 bellard
| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
321 158142c2 bellard
| floating-point exponent.
322 158142c2 bellard
*----------------------------------------------------------------------------*/
323 158142c2 bellard
324 158142c2 bellard
static float32
325 158142c2 bellard
 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
326 158142c2 bellard
{
327 158142c2 bellard
    int8 shiftCount;
328 158142c2 bellard
329 158142c2 bellard
    shiftCount = countLeadingZeros32( zSig ) - 1;
330 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
331 158142c2 bellard
332 158142c2 bellard
}
333 158142c2 bellard
334 158142c2 bellard
/*----------------------------------------------------------------------------
335 158142c2 bellard
| Returns the fraction bits of the double-precision floating-point value `a'.
336 158142c2 bellard
*----------------------------------------------------------------------------*/
337 158142c2 bellard
338 158142c2 bellard
INLINE bits64 extractFloat64Frac( float64 a )
339 158142c2 bellard
{
340 158142c2 bellard
341 f090c9d4 pbrook
    return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
342 158142c2 bellard
343 158142c2 bellard
}
344 158142c2 bellard
345 158142c2 bellard
/*----------------------------------------------------------------------------
346 158142c2 bellard
| Returns the exponent bits of the double-precision floating-point value `a'.
347 158142c2 bellard
*----------------------------------------------------------------------------*/
348 158142c2 bellard
349 158142c2 bellard
INLINE int16 extractFloat64Exp( float64 a )
350 158142c2 bellard
{
351 158142c2 bellard
352 f090c9d4 pbrook
    return ( float64_val(a)>>52 ) & 0x7FF;
353 158142c2 bellard
354 158142c2 bellard
}
355 158142c2 bellard
356 158142c2 bellard
/*----------------------------------------------------------------------------
357 158142c2 bellard
| Returns the sign bit of the double-precision floating-point value `a'.
358 158142c2 bellard
*----------------------------------------------------------------------------*/
359 158142c2 bellard
360 158142c2 bellard
INLINE flag extractFloat64Sign( float64 a )
361 158142c2 bellard
{
362 158142c2 bellard
363 f090c9d4 pbrook
    return float64_val(a)>>63;
364 158142c2 bellard
365 158142c2 bellard
}
366 158142c2 bellard
367 158142c2 bellard
/*----------------------------------------------------------------------------
368 158142c2 bellard
| Normalizes the subnormal double-precision floating-point value represented
369 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
370 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
371 158142c2 bellard
| `zSigPtr', respectively.
372 158142c2 bellard
*----------------------------------------------------------------------------*/
373 158142c2 bellard
374 158142c2 bellard
static void
375 158142c2 bellard
 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
376 158142c2 bellard
{
377 158142c2 bellard
    int8 shiftCount;
378 158142c2 bellard
379 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig ) - 11;
380 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
381 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
382 158142c2 bellard
383 158142c2 bellard
}
384 158142c2 bellard
385 158142c2 bellard
/*----------------------------------------------------------------------------
386 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
387 158142c2 bellard
| double-precision floating-point value, returning the result.  After being
388 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
389 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
390 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
391 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
392 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
393 158142c2 bellard
| significand.
394 158142c2 bellard
*----------------------------------------------------------------------------*/
395 158142c2 bellard
396 158142c2 bellard
INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
397 158142c2 bellard
{
398 158142c2 bellard
399 f090c9d4 pbrook
    return make_float64(
400 f090c9d4 pbrook
        ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig);
401 158142c2 bellard
402 158142c2 bellard
}
403 158142c2 bellard
404 158142c2 bellard
/*----------------------------------------------------------------------------
405 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
406 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
407 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
408 158142c2 bellard
| value is simply rounded and packed into the double-precision format, with
409 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
410 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
411 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
412 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded
413 158142c2 bellard
| to a subnormal number, and the underflow and inexact exceptions are raised
414 158142c2 bellard
| if the abstract input cannot be represented exactly as a subnormal double-
415 158142c2 bellard
| precision floating-point number.
416 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 62
417 158142c2 bellard
| and 61, which is 10 bits to the left of the usual location.  This shifted
418 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
419 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
420 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
421 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
422 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
423 158142c2 bellard
| Binary Floating-Point Arithmetic.
424 158142c2 bellard
*----------------------------------------------------------------------------*/
425 158142c2 bellard
426 158142c2 bellard
static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
427 158142c2 bellard
{
428 158142c2 bellard
    int8 roundingMode;
429 158142c2 bellard
    flag roundNearestEven;
430 158142c2 bellard
    int16 roundIncrement, roundBits;
431 158142c2 bellard
    flag isTiny;
432 158142c2 bellard
433 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
434 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
435 158142c2 bellard
    roundIncrement = 0x200;
436 158142c2 bellard
    if ( ! roundNearestEven ) {
437 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
438 158142c2 bellard
            roundIncrement = 0;
439 158142c2 bellard
        }
440 158142c2 bellard
        else {
441 158142c2 bellard
            roundIncrement = 0x3FF;
442 158142c2 bellard
            if ( zSign ) {
443 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
444 158142c2 bellard
            }
445 158142c2 bellard
            else {
446 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
447 158142c2 bellard
            }
448 158142c2 bellard
        }
449 158142c2 bellard
    }
450 158142c2 bellard
    roundBits = zSig & 0x3FF;
451 158142c2 bellard
    if ( 0x7FD <= (bits16) zExp ) {
452 158142c2 bellard
        if (    ( 0x7FD < zExp )
453 158142c2 bellard
             || (    ( zExp == 0x7FD )
454 158142c2 bellard
                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
455 158142c2 bellard
           ) {
456 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
457 f090c9d4 pbrook
            return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
458 158142c2 bellard
        }
459 158142c2 bellard
        if ( zExp < 0 ) {
460 158142c2 bellard
            isTiny =
461 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
462 158142c2 bellard
                || ( zExp < -1 )
463 158142c2 bellard
                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
464 158142c2 bellard
            shift64RightJamming( zSig, - zExp, &zSig );
465 158142c2 bellard
            zExp = 0;
466 158142c2 bellard
            roundBits = zSig & 0x3FF;
467 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
468 158142c2 bellard
        }
469 158142c2 bellard
    }
470 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
471 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>10;
472 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
473 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
474 158142c2 bellard
    return packFloat64( zSign, zExp, zSig );
475 158142c2 bellard
476 158142c2 bellard
}
477 158142c2 bellard
478 158142c2 bellard
/*----------------------------------------------------------------------------
479 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
480 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
481 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
482 158142c2 bellard
| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
483 158142c2 bellard
| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
484 158142c2 bellard
| floating-point exponent.
485 158142c2 bellard
*----------------------------------------------------------------------------*/
486 158142c2 bellard
487 158142c2 bellard
static float64
488 158142c2 bellard
 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
489 158142c2 bellard
{
490 158142c2 bellard
    int8 shiftCount;
491 158142c2 bellard
492 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig ) - 1;
493 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
494 158142c2 bellard
495 158142c2 bellard
}
496 158142c2 bellard
497 158142c2 bellard
#ifdef FLOATX80
498 158142c2 bellard
499 158142c2 bellard
/*----------------------------------------------------------------------------
500 158142c2 bellard
| Returns the fraction bits of the extended double-precision floating-point
501 158142c2 bellard
| value `a'.
502 158142c2 bellard
*----------------------------------------------------------------------------*/
503 158142c2 bellard
504 158142c2 bellard
INLINE bits64 extractFloatx80Frac( floatx80 a )
505 158142c2 bellard
{
506 158142c2 bellard
507 158142c2 bellard
    return a.low;
508 158142c2 bellard
509 158142c2 bellard
}
510 158142c2 bellard
511 158142c2 bellard
/*----------------------------------------------------------------------------
512 158142c2 bellard
| Returns the exponent bits of the extended double-precision floating-point
513 158142c2 bellard
| value `a'.
514 158142c2 bellard
*----------------------------------------------------------------------------*/
515 158142c2 bellard
516 158142c2 bellard
INLINE int32 extractFloatx80Exp( floatx80 a )
517 158142c2 bellard
{
518 158142c2 bellard
519 158142c2 bellard
    return a.high & 0x7FFF;
520 158142c2 bellard
521 158142c2 bellard
}
522 158142c2 bellard
523 158142c2 bellard
/*----------------------------------------------------------------------------
524 158142c2 bellard
| Returns the sign bit of the extended double-precision floating-point value
525 158142c2 bellard
| `a'.
526 158142c2 bellard
*----------------------------------------------------------------------------*/
527 158142c2 bellard
528 158142c2 bellard
INLINE flag extractFloatx80Sign( floatx80 a )
529 158142c2 bellard
{
530 158142c2 bellard
531 158142c2 bellard
    return a.high>>15;
532 158142c2 bellard
533 158142c2 bellard
}
534 158142c2 bellard
535 158142c2 bellard
/*----------------------------------------------------------------------------
536 158142c2 bellard
| Normalizes the subnormal extended double-precision floating-point value
537 158142c2 bellard
| represented by the denormalized significand `aSig'.  The normalized exponent
538 158142c2 bellard
| and significand are stored at the locations pointed to by `zExpPtr' and
539 158142c2 bellard
| `zSigPtr', respectively.
540 158142c2 bellard
*----------------------------------------------------------------------------*/
541 158142c2 bellard
542 158142c2 bellard
static void
543 158142c2 bellard
 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
544 158142c2 bellard
{
545 158142c2 bellard
    int8 shiftCount;
546 158142c2 bellard
547 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig );
548 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
549 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
550 158142c2 bellard
551 158142c2 bellard
}
552 158142c2 bellard
553 158142c2 bellard
/*----------------------------------------------------------------------------
554 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
555 158142c2 bellard
| extended double-precision floating-point value, returning the result.
556 158142c2 bellard
*----------------------------------------------------------------------------*/
557 158142c2 bellard
558 158142c2 bellard
INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
559 158142c2 bellard
{
560 158142c2 bellard
    floatx80 z;
561 158142c2 bellard
562 158142c2 bellard
    z.low = zSig;
563 158142c2 bellard
    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
564 158142c2 bellard
    return z;
565 158142c2 bellard
566 158142c2 bellard
}
567 158142c2 bellard
568 158142c2 bellard
/*----------------------------------------------------------------------------
569 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
570 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0' and `zSig1',
571 158142c2 bellard
| and returns the proper extended double-precision floating-point value
572 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
573 158142c2 bellard
| rounded and packed into the extended double-precision format, with the
574 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
575 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
576 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
577 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
578 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
579 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal extended
580 158142c2 bellard
| double-precision floating-point number.
581 158142c2 bellard
|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
582 158142c2 bellard
| number of bits as single or double precision, respectively.  Otherwise, the
583 158142c2 bellard
| result is rounded to the full precision of the extended double-precision
584 158142c2 bellard
| format.
585 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
586 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
587 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  The
588 158142c2 bellard
| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
589 158142c2 bellard
| Floating-Point Arithmetic.
590 158142c2 bellard
*----------------------------------------------------------------------------*/
591 158142c2 bellard
592 158142c2 bellard
static floatx80
593 158142c2 bellard
 roundAndPackFloatx80(
594 158142c2 bellard
     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
595 158142c2 bellard
 STATUS_PARAM)
596 158142c2 bellard
{
597 158142c2 bellard
    int8 roundingMode;
598 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
599 158142c2 bellard
    int64 roundIncrement, roundMask, roundBits;
600 158142c2 bellard
601 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
602 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
603 158142c2 bellard
    if ( roundingPrecision == 80 ) goto precision80;
604 158142c2 bellard
    if ( roundingPrecision == 64 ) {
605 158142c2 bellard
        roundIncrement = LIT64( 0x0000000000000400 );
606 158142c2 bellard
        roundMask = LIT64( 0x00000000000007FF );
607 158142c2 bellard
    }
608 158142c2 bellard
    else if ( roundingPrecision == 32 ) {
609 158142c2 bellard
        roundIncrement = LIT64( 0x0000008000000000 );
610 158142c2 bellard
        roundMask = LIT64( 0x000000FFFFFFFFFF );
611 158142c2 bellard
    }
612 158142c2 bellard
    else {
613 158142c2 bellard
        goto precision80;
614 158142c2 bellard
    }
615 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
616 158142c2 bellard
    if ( ! roundNearestEven ) {
617 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
618 158142c2 bellard
            roundIncrement = 0;
619 158142c2 bellard
        }
620 158142c2 bellard
        else {
621 158142c2 bellard
            roundIncrement = roundMask;
622 158142c2 bellard
            if ( zSign ) {
623 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
624 158142c2 bellard
            }
625 158142c2 bellard
            else {
626 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
627 158142c2 bellard
            }
628 158142c2 bellard
        }
629 158142c2 bellard
    }
630 158142c2 bellard
    roundBits = zSig0 & roundMask;
631 158142c2 bellard
    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
632 158142c2 bellard
        if (    ( 0x7FFE < zExp )
633 158142c2 bellard
             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
634 158142c2 bellard
           ) {
635 158142c2 bellard
            goto overflow;
636 158142c2 bellard
        }
637 158142c2 bellard
        if ( zExp <= 0 ) {
638 158142c2 bellard
            isTiny =
639 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
640 158142c2 bellard
                || ( zExp < 0 )
641 158142c2 bellard
                || ( zSig0 <= zSig0 + roundIncrement );
642 158142c2 bellard
            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
643 158142c2 bellard
            zExp = 0;
644 158142c2 bellard
            roundBits = zSig0 & roundMask;
645 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
646 158142c2 bellard
            if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
647 158142c2 bellard
            zSig0 += roundIncrement;
648 158142c2 bellard
            if ( (sbits64) zSig0 < 0 ) zExp = 1;
649 158142c2 bellard
            roundIncrement = roundMask + 1;
650 158142c2 bellard
            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
651 158142c2 bellard
                roundMask |= roundIncrement;
652 158142c2 bellard
            }
653 158142c2 bellard
            zSig0 &= ~ roundMask;
654 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
655 158142c2 bellard
        }
656 158142c2 bellard
    }
657 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
658 158142c2 bellard
    zSig0 += roundIncrement;
659 158142c2 bellard
    if ( zSig0 < roundIncrement ) {
660 158142c2 bellard
        ++zExp;
661 158142c2 bellard
        zSig0 = LIT64( 0x8000000000000000 );
662 158142c2 bellard
    }
663 158142c2 bellard
    roundIncrement = roundMask + 1;
664 158142c2 bellard
    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
665 158142c2 bellard
        roundMask |= roundIncrement;
666 158142c2 bellard
    }
667 158142c2 bellard
    zSig0 &= ~ roundMask;
668 158142c2 bellard
    if ( zSig0 == 0 ) zExp = 0;
669 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
670 158142c2 bellard
 precision80:
671 158142c2 bellard
    increment = ( (sbits64) zSig1 < 0 );
672 158142c2 bellard
    if ( ! roundNearestEven ) {
673 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
674 158142c2 bellard
            increment = 0;
675 158142c2 bellard
        }
676 158142c2 bellard
        else {
677 158142c2 bellard
            if ( zSign ) {
678 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig1;
679 158142c2 bellard
            }
680 158142c2 bellard
            else {
681 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig1;
682 158142c2 bellard
            }
683 158142c2 bellard
        }
684 158142c2 bellard
    }
685 158142c2 bellard
    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
686 158142c2 bellard
        if (    ( 0x7FFE < zExp )
687 158142c2 bellard
             || (    ( zExp == 0x7FFE )
688 158142c2 bellard
                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
689 158142c2 bellard
                  && increment
690 158142c2 bellard
                )
691 158142c2 bellard
           ) {
692 158142c2 bellard
            roundMask = 0;
693 158142c2 bellard
 overflow:
694 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
695 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
696 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
697 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
698 158142c2 bellard
               ) {
699 158142c2 bellard
                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
700 158142c2 bellard
            }
701 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
702 158142c2 bellard
        }
703 158142c2 bellard
        if ( zExp <= 0 ) {
704 158142c2 bellard
            isTiny =
705 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
706 158142c2 bellard
                || ( zExp < 0 )
707 158142c2 bellard
                || ! increment
708 158142c2 bellard
                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
709 158142c2 bellard
            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
710 158142c2 bellard
            zExp = 0;
711 158142c2 bellard
            if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
712 158142c2 bellard
            if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
713 158142c2 bellard
            if ( roundNearestEven ) {
714 158142c2 bellard
                increment = ( (sbits64) zSig1 < 0 );
715 158142c2 bellard
            }
716 158142c2 bellard
            else {
717 158142c2 bellard
                if ( zSign ) {
718 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig1;
719 158142c2 bellard
                }
720 158142c2 bellard
                else {
721 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig1;
722 158142c2 bellard
                }
723 158142c2 bellard
            }
724 158142c2 bellard
            if ( increment ) {
725 158142c2 bellard
                ++zSig0;
726 158142c2 bellard
                zSig0 &=
727 158142c2 bellard
                    ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
728 158142c2 bellard
                if ( (sbits64) zSig0 < 0 ) zExp = 1;
729 158142c2 bellard
            }
730 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
731 158142c2 bellard
        }
732 158142c2 bellard
    }
733 158142c2 bellard
    if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
734 158142c2 bellard
    if ( increment ) {
735 158142c2 bellard
        ++zSig0;
736 158142c2 bellard
        if ( zSig0 == 0 ) {
737 158142c2 bellard
            ++zExp;
738 158142c2 bellard
            zSig0 = LIT64( 0x8000000000000000 );
739 158142c2 bellard
        }
740 158142c2 bellard
        else {
741 158142c2 bellard
            zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
742 158142c2 bellard
        }
743 158142c2 bellard
    }
744 158142c2 bellard
    else {
745 158142c2 bellard
        if ( zSig0 == 0 ) zExp = 0;
746 158142c2 bellard
    }
747 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
748 158142c2 bellard
749 158142c2 bellard
}
750 158142c2 bellard
751 158142c2 bellard
/*----------------------------------------------------------------------------
752 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent
753 158142c2 bellard
| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
754 158142c2 bellard
| and returns the proper extended double-precision floating-point value
755 158142c2 bellard
| corresponding to the abstract input.  This routine is just like
756 158142c2 bellard
| `roundAndPackFloatx80' except that the input significand does not have to be
757 158142c2 bellard
| normalized.
758 158142c2 bellard
*----------------------------------------------------------------------------*/
759 158142c2 bellard
760 158142c2 bellard
static floatx80
761 158142c2 bellard
 normalizeRoundAndPackFloatx80(
762 158142c2 bellard
     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
763 158142c2 bellard
 STATUS_PARAM)
764 158142c2 bellard
{
765 158142c2 bellard
    int8 shiftCount;
766 158142c2 bellard
767 158142c2 bellard
    if ( zSig0 == 0 ) {
768 158142c2 bellard
        zSig0 = zSig1;
769 158142c2 bellard
        zSig1 = 0;
770 158142c2 bellard
        zExp -= 64;
771 158142c2 bellard
    }
772 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 );
773 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
774 158142c2 bellard
    zExp -= shiftCount;
775 158142c2 bellard
    return
776 158142c2 bellard
        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
777 158142c2 bellard
778 158142c2 bellard
}
779 158142c2 bellard
780 158142c2 bellard
#endif
781 158142c2 bellard
782 158142c2 bellard
#ifdef FLOAT128
783 158142c2 bellard
784 158142c2 bellard
/*----------------------------------------------------------------------------
785 158142c2 bellard
| Returns the least-significant 64 fraction bits of the quadruple-precision
786 158142c2 bellard
| floating-point value `a'.
787 158142c2 bellard
*----------------------------------------------------------------------------*/
788 158142c2 bellard
789 158142c2 bellard
INLINE bits64 extractFloat128Frac1( float128 a )
790 158142c2 bellard
{
791 158142c2 bellard
792 158142c2 bellard
    return a.low;
793 158142c2 bellard
794 158142c2 bellard
}
795 158142c2 bellard
796 158142c2 bellard
/*----------------------------------------------------------------------------
797 158142c2 bellard
| Returns the most-significant 48 fraction bits of the quadruple-precision
798 158142c2 bellard
| floating-point value `a'.
799 158142c2 bellard
*----------------------------------------------------------------------------*/
800 158142c2 bellard
801 158142c2 bellard
INLINE bits64 extractFloat128Frac0( float128 a )
802 158142c2 bellard
{
803 158142c2 bellard
804 158142c2 bellard
    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
805 158142c2 bellard
806 158142c2 bellard
}
807 158142c2 bellard
808 158142c2 bellard
/*----------------------------------------------------------------------------
809 158142c2 bellard
| Returns the exponent bits of the quadruple-precision floating-point value
810 158142c2 bellard
| `a'.
811 158142c2 bellard
*----------------------------------------------------------------------------*/
812 158142c2 bellard
813 158142c2 bellard
INLINE int32 extractFloat128Exp( float128 a )
814 158142c2 bellard
{
815 158142c2 bellard
816 158142c2 bellard
    return ( a.high>>48 ) & 0x7FFF;
817 158142c2 bellard
818 158142c2 bellard
}
819 158142c2 bellard
820 158142c2 bellard
/*----------------------------------------------------------------------------
821 158142c2 bellard
| Returns the sign bit of the quadruple-precision floating-point value `a'.
822 158142c2 bellard
*----------------------------------------------------------------------------*/
823 158142c2 bellard
824 158142c2 bellard
INLINE flag extractFloat128Sign( float128 a )
825 158142c2 bellard
{
826 158142c2 bellard
827 158142c2 bellard
    return a.high>>63;
828 158142c2 bellard
829 158142c2 bellard
}
830 158142c2 bellard
831 158142c2 bellard
/*----------------------------------------------------------------------------
832 158142c2 bellard
| Normalizes the subnormal quadruple-precision floating-point value
833 158142c2 bellard
| represented by the denormalized significand formed by the concatenation of
834 158142c2 bellard
| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
835 158142c2 bellard
| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
836 158142c2 bellard
| significand are stored at the location pointed to by `zSig0Ptr', and the
837 158142c2 bellard
| least significant 64 bits of the normalized significand are stored at the
838 158142c2 bellard
| location pointed to by `zSig1Ptr'.
839 158142c2 bellard
*----------------------------------------------------------------------------*/
840 158142c2 bellard
841 158142c2 bellard
static void
842 158142c2 bellard
 normalizeFloat128Subnormal(
843 158142c2 bellard
     bits64 aSig0,
844 158142c2 bellard
     bits64 aSig1,
845 158142c2 bellard
     int32 *zExpPtr,
846 158142c2 bellard
     bits64 *zSig0Ptr,
847 158142c2 bellard
     bits64 *zSig1Ptr
848 158142c2 bellard
 )
849 158142c2 bellard
{
850 158142c2 bellard
    int8 shiftCount;
851 158142c2 bellard
852 158142c2 bellard
    if ( aSig0 == 0 ) {
853 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig1 ) - 15;
854 158142c2 bellard
        if ( shiftCount < 0 ) {
855 158142c2 bellard
            *zSig0Ptr = aSig1>>( - shiftCount );
856 158142c2 bellard
            *zSig1Ptr = aSig1<<( shiftCount & 63 );
857 158142c2 bellard
        }
858 158142c2 bellard
        else {
859 158142c2 bellard
            *zSig0Ptr = aSig1<<shiftCount;
860 158142c2 bellard
            *zSig1Ptr = 0;
861 158142c2 bellard
        }
862 158142c2 bellard
        *zExpPtr = - shiftCount - 63;
863 158142c2 bellard
    }
864 158142c2 bellard
    else {
865 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig0 ) - 15;
866 158142c2 bellard
        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
867 158142c2 bellard
        *zExpPtr = 1 - shiftCount;
868 158142c2 bellard
    }
869 158142c2 bellard
870 158142c2 bellard
}
871 158142c2 bellard
872 158142c2 bellard
/*----------------------------------------------------------------------------
873 158142c2 bellard
| Packs the sign `zSign', the exponent `zExp', and the significand formed
874 158142c2 bellard
| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
875 158142c2 bellard
| floating-point value, returning the result.  After being shifted into the
876 158142c2 bellard
| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
877 158142c2 bellard
| added together to form the most significant 32 bits of the result.  This
878 158142c2 bellard
| means that any integer portion of `zSig0' will be added into the exponent.
879 158142c2 bellard
| Since a properly normalized significand will have an integer portion equal
880 158142c2 bellard
| to 1, the `zExp' input should be 1 less than the desired result exponent
881 158142c2 bellard
| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
882 158142c2 bellard
| significand.
883 158142c2 bellard
*----------------------------------------------------------------------------*/
884 158142c2 bellard
885 158142c2 bellard
INLINE float128
886 158142c2 bellard
 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
887 158142c2 bellard
{
888 158142c2 bellard
    float128 z;
889 158142c2 bellard
890 158142c2 bellard
    z.low = zSig1;
891 158142c2 bellard
    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
892 158142c2 bellard
    return z;
893 158142c2 bellard
894 158142c2 bellard
}
895 158142c2 bellard
896 158142c2 bellard
/*----------------------------------------------------------------------------
897 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
898 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0', `zSig1',
899 158142c2 bellard
| and `zSig2', and returns the proper quadruple-precision floating-point value
900 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
901 158142c2 bellard
| simply rounded and packed into the quadruple-precision format, with the
902 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
903 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
904 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
905 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
906 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
907 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal quadruple-
908 158142c2 bellard
| precision floating-point number.
909 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
910 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
911 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  In the
912 158142c2 bellard
| usual case that the input significand is normalized, `zExp' must be 1 less
913 158142c2 bellard
| than the ``true'' floating-point exponent.  The handling of underflow and
914 158142c2 bellard
| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
915 158142c2 bellard
*----------------------------------------------------------------------------*/
916 158142c2 bellard
917 158142c2 bellard
static float128
918 158142c2 bellard
 roundAndPackFloat128(
919 158142c2 bellard
     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM)
920 158142c2 bellard
{
921 158142c2 bellard
    int8 roundingMode;
922 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
923 158142c2 bellard
924 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
925 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
926 158142c2 bellard
    increment = ( (sbits64) zSig2 < 0 );
927 158142c2 bellard
    if ( ! roundNearestEven ) {
928 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
929 158142c2 bellard
            increment = 0;
930 158142c2 bellard
        }
931 158142c2 bellard
        else {
932 158142c2 bellard
            if ( zSign ) {
933 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig2;
934 158142c2 bellard
            }
935 158142c2 bellard
            else {
936 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig2;
937 158142c2 bellard
            }
938 158142c2 bellard
        }
939 158142c2 bellard
    }
940 158142c2 bellard
    if ( 0x7FFD <= (bits32) zExp ) {
941 158142c2 bellard
        if (    ( 0x7FFD < zExp )
942 158142c2 bellard
             || (    ( zExp == 0x7FFD )
943 158142c2 bellard
                  && eq128(
944 158142c2 bellard
                         LIT64( 0x0001FFFFFFFFFFFF ),
945 158142c2 bellard
                         LIT64( 0xFFFFFFFFFFFFFFFF ),
946 158142c2 bellard
                         zSig0,
947 158142c2 bellard
                         zSig1
948 158142c2 bellard
                     )
949 158142c2 bellard
                  && increment
950 158142c2 bellard
                )
951 158142c2 bellard
           ) {
952 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
953 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
954 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
955 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
956 158142c2 bellard
               ) {
957 158142c2 bellard
                return
958 158142c2 bellard
                    packFloat128(
959 158142c2 bellard
                        zSign,
960 158142c2 bellard
                        0x7FFE,
961 158142c2 bellard
                        LIT64( 0x0000FFFFFFFFFFFF ),
962 158142c2 bellard
                        LIT64( 0xFFFFFFFFFFFFFFFF )
963 158142c2 bellard
                    );
964 158142c2 bellard
            }
965 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
966 158142c2 bellard
        }
967 158142c2 bellard
        if ( zExp < 0 ) {
968 158142c2 bellard
            isTiny =
969 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
970 158142c2 bellard
                || ( zExp < -1 )
971 158142c2 bellard
                || ! increment
972 158142c2 bellard
                || lt128(
973 158142c2 bellard
                       zSig0,
974 158142c2 bellard
                       zSig1,
975 158142c2 bellard
                       LIT64( 0x0001FFFFFFFFFFFF ),
976 158142c2 bellard
                       LIT64( 0xFFFFFFFFFFFFFFFF )
977 158142c2 bellard
                   );
978 158142c2 bellard
            shift128ExtraRightJamming(
979 158142c2 bellard
                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
980 158142c2 bellard
            zExp = 0;
981 158142c2 bellard
            if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
982 158142c2 bellard
            if ( roundNearestEven ) {
983 158142c2 bellard
                increment = ( (sbits64) zSig2 < 0 );
984 158142c2 bellard
            }
985 158142c2 bellard
            else {
986 158142c2 bellard
                if ( zSign ) {
987 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig2;
988 158142c2 bellard
                }
989 158142c2 bellard
                else {
990 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig2;
991 158142c2 bellard
                }
992 158142c2 bellard
            }
993 158142c2 bellard
        }
994 158142c2 bellard
    }
995 158142c2 bellard
    if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
996 158142c2 bellard
    if ( increment ) {
997 158142c2 bellard
        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
998 158142c2 bellard
        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
999 158142c2 bellard
    }
1000 158142c2 bellard
    else {
1001 158142c2 bellard
        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1002 158142c2 bellard
    }
1003 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1004 158142c2 bellard
1005 158142c2 bellard
}
1006 158142c2 bellard
1007 158142c2 bellard
/*----------------------------------------------------------------------------
1008 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1009 158142c2 bellard
| and significand formed by the concatenation of `zSig0' and `zSig1', and
1010 158142c2 bellard
| returns the proper quadruple-precision floating-point value corresponding
1011 158142c2 bellard
| to the abstract input.  This routine is just like `roundAndPackFloat128'
1012 158142c2 bellard
| except that the input significand has fewer bits and does not have to be
1013 158142c2 bellard
| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1014 158142c2 bellard
| point exponent.
1015 158142c2 bellard
*----------------------------------------------------------------------------*/
1016 158142c2 bellard
1017 158142c2 bellard
static float128
1018 158142c2 bellard
 normalizeRoundAndPackFloat128(
1019 158142c2 bellard
     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM)
1020 158142c2 bellard
{
1021 158142c2 bellard
    int8 shiftCount;
1022 158142c2 bellard
    bits64 zSig2;
1023 158142c2 bellard
1024 158142c2 bellard
    if ( zSig0 == 0 ) {
1025 158142c2 bellard
        zSig0 = zSig1;
1026 158142c2 bellard
        zSig1 = 0;
1027 158142c2 bellard
        zExp -= 64;
1028 158142c2 bellard
    }
1029 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 ) - 15;
1030 158142c2 bellard
    if ( 0 <= shiftCount ) {
1031 158142c2 bellard
        zSig2 = 0;
1032 158142c2 bellard
        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1033 158142c2 bellard
    }
1034 158142c2 bellard
    else {
1035 158142c2 bellard
        shift128ExtraRightJamming(
1036 158142c2 bellard
            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1037 158142c2 bellard
    }
1038 158142c2 bellard
    zExp -= shiftCount;
1039 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
1040 158142c2 bellard
1041 158142c2 bellard
}
1042 158142c2 bellard
1043 158142c2 bellard
#endif
1044 158142c2 bellard
1045 158142c2 bellard
/*----------------------------------------------------------------------------
1046 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1047 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1048 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1049 158142c2 bellard
*----------------------------------------------------------------------------*/
1050 158142c2 bellard
1051 158142c2 bellard
float32 int32_to_float32( int32 a STATUS_PARAM )
1052 158142c2 bellard
{
1053 158142c2 bellard
    flag zSign;
1054 158142c2 bellard
1055 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1056 158142c2 bellard
    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1057 158142c2 bellard
    zSign = ( a < 0 );
1058 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
1059 158142c2 bellard
1060 158142c2 bellard
}
1061 158142c2 bellard
1062 158142c2 bellard
/*----------------------------------------------------------------------------
1063 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1064 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1065 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1066 158142c2 bellard
*----------------------------------------------------------------------------*/
1067 158142c2 bellard
1068 158142c2 bellard
float64 int32_to_float64( int32 a STATUS_PARAM )
1069 158142c2 bellard
{
1070 158142c2 bellard
    flag zSign;
1071 158142c2 bellard
    uint32 absA;
1072 158142c2 bellard
    int8 shiftCount;
1073 158142c2 bellard
    bits64 zSig;
1074 158142c2 bellard
1075 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1076 158142c2 bellard
    zSign = ( a < 0 );
1077 158142c2 bellard
    absA = zSign ? - a : a;
1078 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 21;
1079 158142c2 bellard
    zSig = absA;
1080 158142c2 bellard
    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1081 158142c2 bellard
1082 158142c2 bellard
}
1083 158142c2 bellard
1084 158142c2 bellard
#ifdef FLOATX80
1085 158142c2 bellard
1086 158142c2 bellard
/*----------------------------------------------------------------------------
1087 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1088 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1089 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1090 158142c2 bellard
| Arithmetic.
1091 158142c2 bellard
*----------------------------------------------------------------------------*/
1092 158142c2 bellard
1093 158142c2 bellard
floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
1094 158142c2 bellard
{
1095 158142c2 bellard
    flag zSign;
1096 158142c2 bellard
    uint32 absA;
1097 158142c2 bellard
    int8 shiftCount;
1098 158142c2 bellard
    bits64 zSig;
1099 158142c2 bellard
1100 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1101 158142c2 bellard
    zSign = ( a < 0 );
1102 158142c2 bellard
    absA = zSign ? - a : a;
1103 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 32;
1104 158142c2 bellard
    zSig = absA;
1105 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1106 158142c2 bellard
1107 158142c2 bellard
}
1108 158142c2 bellard
1109 158142c2 bellard
#endif
1110 158142c2 bellard
1111 158142c2 bellard
#ifdef FLOAT128
1112 158142c2 bellard
1113 158142c2 bellard
/*----------------------------------------------------------------------------
1114 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a' to
1115 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1116 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1117 158142c2 bellard
*----------------------------------------------------------------------------*/
1118 158142c2 bellard
1119 158142c2 bellard
float128 int32_to_float128( int32 a STATUS_PARAM )
1120 158142c2 bellard
{
1121 158142c2 bellard
    flag zSign;
1122 158142c2 bellard
    uint32 absA;
1123 158142c2 bellard
    int8 shiftCount;
1124 158142c2 bellard
    bits64 zSig0;
1125 158142c2 bellard
1126 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1127 158142c2 bellard
    zSign = ( a < 0 );
1128 158142c2 bellard
    absA = zSign ? - a : a;
1129 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 17;
1130 158142c2 bellard
    zSig0 = absA;
1131 158142c2 bellard
    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1132 158142c2 bellard
1133 158142c2 bellard
}
1134 158142c2 bellard
1135 158142c2 bellard
#endif
1136 158142c2 bellard
1137 158142c2 bellard
/*----------------------------------------------------------------------------
1138 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1139 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1140 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1141 158142c2 bellard
*----------------------------------------------------------------------------*/
1142 158142c2 bellard
1143 158142c2 bellard
float32 int64_to_float32( int64 a STATUS_PARAM )
1144 158142c2 bellard
{
1145 158142c2 bellard
    flag zSign;
1146 158142c2 bellard
    uint64 absA;
1147 158142c2 bellard
    int8 shiftCount;
1148 158142c2 bellard
1149 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1150 158142c2 bellard
    zSign = ( a < 0 );
1151 158142c2 bellard
    absA = zSign ? - a : a;
1152 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) - 40;
1153 158142c2 bellard
    if ( 0 <= shiftCount ) {
1154 158142c2 bellard
        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1155 158142c2 bellard
    }
1156 158142c2 bellard
    else {
1157 158142c2 bellard
        shiftCount += 7;
1158 158142c2 bellard
        if ( shiftCount < 0 ) {
1159 158142c2 bellard
            shift64RightJamming( absA, - shiftCount, &absA );
1160 158142c2 bellard
        }
1161 158142c2 bellard
        else {
1162 158142c2 bellard
            absA <<= shiftCount;
1163 158142c2 bellard
        }
1164 158142c2 bellard
        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
1165 158142c2 bellard
    }
1166 158142c2 bellard
1167 158142c2 bellard
}
1168 158142c2 bellard
1169 3430b0be j_mayer
float32 uint64_to_float32( uint64 a STATUS_PARAM )
1170 75d62a58 j_mayer
{
1171 75d62a58 j_mayer
    int8 shiftCount;
1172 75d62a58 j_mayer
1173 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1174 75d62a58 j_mayer
    shiftCount = countLeadingZeros64( a ) - 40;
1175 75d62a58 j_mayer
    if ( 0 <= shiftCount ) {
1176 75d62a58 j_mayer
        return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
1177 75d62a58 j_mayer
    }
1178 75d62a58 j_mayer
    else {
1179 75d62a58 j_mayer
        shiftCount += 7;
1180 75d62a58 j_mayer
        if ( shiftCount < 0 ) {
1181 75d62a58 j_mayer
            shift64RightJamming( a, - shiftCount, &a );
1182 75d62a58 j_mayer
        }
1183 75d62a58 j_mayer
        else {
1184 75d62a58 j_mayer
            a <<= shiftCount;
1185 75d62a58 j_mayer
        }
1186 75d62a58 j_mayer
        return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
1187 75d62a58 j_mayer
    }
1188 75d62a58 j_mayer
}
1189 75d62a58 j_mayer
1190 158142c2 bellard
/*----------------------------------------------------------------------------
1191 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1192 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1193 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1194 158142c2 bellard
*----------------------------------------------------------------------------*/
1195 158142c2 bellard
1196 158142c2 bellard
float64 int64_to_float64( int64 a STATUS_PARAM )
1197 158142c2 bellard
{
1198 158142c2 bellard
    flag zSign;
1199 158142c2 bellard
1200 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1201 158142c2 bellard
    if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
1202 158142c2 bellard
        return packFloat64( 1, 0x43E, 0 );
1203 158142c2 bellard
    }
1204 158142c2 bellard
    zSign = ( a < 0 );
1205 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
1206 158142c2 bellard
1207 158142c2 bellard
}
1208 158142c2 bellard
1209 75d62a58 j_mayer
float64 uint64_to_float64( uint64 a STATUS_PARAM )
1210 75d62a58 j_mayer
{
1211 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1212 75d62a58 j_mayer
    return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
1213 75d62a58 j_mayer
1214 75d62a58 j_mayer
}
1215 75d62a58 j_mayer
1216 158142c2 bellard
#ifdef FLOATX80
1217 158142c2 bellard
1218 158142c2 bellard
/*----------------------------------------------------------------------------
1219 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1220 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1221 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1222 158142c2 bellard
| Arithmetic.
1223 158142c2 bellard
*----------------------------------------------------------------------------*/
1224 158142c2 bellard
1225 158142c2 bellard
floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
1226 158142c2 bellard
{
1227 158142c2 bellard
    flag zSign;
1228 158142c2 bellard
    uint64 absA;
1229 158142c2 bellard
    int8 shiftCount;
1230 158142c2 bellard
1231 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1232 158142c2 bellard
    zSign = ( a < 0 );
1233 158142c2 bellard
    absA = zSign ? - a : a;
1234 158142c2 bellard
    shiftCount = countLeadingZeros64( absA );
1235 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1236 158142c2 bellard
1237 158142c2 bellard
}
1238 158142c2 bellard
1239 158142c2 bellard
#endif
1240 158142c2 bellard
1241 158142c2 bellard
#ifdef FLOAT128
1242 158142c2 bellard
1243 158142c2 bellard
/*----------------------------------------------------------------------------
1244 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a' to
1245 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1246 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1247 158142c2 bellard
*----------------------------------------------------------------------------*/
1248 158142c2 bellard
1249 158142c2 bellard
float128 int64_to_float128( int64 a STATUS_PARAM )
1250 158142c2 bellard
{
1251 158142c2 bellard
    flag zSign;
1252 158142c2 bellard
    uint64 absA;
1253 158142c2 bellard
    int8 shiftCount;
1254 158142c2 bellard
    int32 zExp;
1255 158142c2 bellard
    bits64 zSig0, zSig1;
1256 158142c2 bellard
1257 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1258 158142c2 bellard
    zSign = ( a < 0 );
1259 158142c2 bellard
    absA = zSign ? - a : a;
1260 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) + 49;
1261 158142c2 bellard
    zExp = 0x406E - shiftCount;
1262 158142c2 bellard
    if ( 64 <= shiftCount ) {
1263 158142c2 bellard
        zSig1 = 0;
1264 158142c2 bellard
        zSig0 = absA;
1265 158142c2 bellard
        shiftCount -= 64;
1266 158142c2 bellard
    }
1267 158142c2 bellard
    else {
1268 158142c2 bellard
        zSig1 = absA;
1269 158142c2 bellard
        zSig0 = 0;
1270 158142c2 bellard
    }
1271 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1272 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1273 158142c2 bellard
1274 158142c2 bellard
}
1275 158142c2 bellard
1276 158142c2 bellard
#endif
1277 158142c2 bellard
1278 158142c2 bellard
/*----------------------------------------------------------------------------
1279 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1280 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1281 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1282 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1283 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1284 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1285 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1286 158142c2 bellard
*----------------------------------------------------------------------------*/
1287 158142c2 bellard
1288 158142c2 bellard
int32 float32_to_int32( float32 a STATUS_PARAM )
1289 158142c2 bellard
{
1290 158142c2 bellard
    flag aSign;
1291 158142c2 bellard
    int16 aExp, shiftCount;
1292 158142c2 bellard
    bits32 aSig;
1293 158142c2 bellard
    bits64 aSig64;
1294 158142c2 bellard
1295 158142c2 bellard
    aSig = extractFloat32Frac( a );
1296 158142c2 bellard
    aExp = extractFloat32Exp( a );
1297 158142c2 bellard
    aSign = extractFloat32Sign( a );
1298 158142c2 bellard
    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1299 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1300 158142c2 bellard
    shiftCount = 0xAF - aExp;
1301 158142c2 bellard
    aSig64 = aSig;
1302 158142c2 bellard
    aSig64 <<= 32;
1303 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1304 158142c2 bellard
    return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
1305 158142c2 bellard
1306 158142c2 bellard
}
1307 158142c2 bellard
1308 158142c2 bellard
/*----------------------------------------------------------------------------
1309 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1310 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1311 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1312 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
1313 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1314 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
1315 158142c2 bellard
| returned.
1316 158142c2 bellard
*----------------------------------------------------------------------------*/
1317 158142c2 bellard
1318 158142c2 bellard
int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
1319 158142c2 bellard
{
1320 158142c2 bellard
    flag aSign;
1321 158142c2 bellard
    int16 aExp, shiftCount;
1322 158142c2 bellard
    bits32 aSig;
1323 158142c2 bellard
    int32 z;
1324 158142c2 bellard
1325 158142c2 bellard
    aSig = extractFloat32Frac( a );
1326 158142c2 bellard
    aExp = extractFloat32Exp( a );
1327 158142c2 bellard
    aSign = extractFloat32Sign( a );
1328 158142c2 bellard
    shiftCount = aExp - 0x9E;
1329 158142c2 bellard
    if ( 0 <= shiftCount ) {
1330 f090c9d4 pbrook
        if ( float32_val(a) != 0xCF000000 ) {
1331 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1332 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1333 158142c2 bellard
        }
1334 158142c2 bellard
        return (sbits32) 0x80000000;
1335 158142c2 bellard
    }
1336 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1337 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1338 158142c2 bellard
        return 0;
1339 158142c2 bellard
    }
1340 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
1341 158142c2 bellard
    z = aSig>>( - shiftCount );
1342 158142c2 bellard
    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1343 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1344 158142c2 bellard
    }
1345 158142c2 bellard
    if ( aSign ) z = - z;
1346 158142c2 bellard
    return z;
1347 158142c2 bellard
1348 158142c2 bellard
}
1349 158142c2 bellard
1350 158142c2 bellard
/*----------------------------------------------------------------------------
1351 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1352 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1353 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1354 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1355 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1356 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1357 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1358 158142c2 bellard
*----------------------------------------------------------------------------*/
1359 158142c2 bellard
1360 158142c2 bellard
int64 float32_to_int64( float32 a STATUS_PARAM )
1361 158142c2 bellard
{
1362 158142c2 bellard
    flag aSign;
1363 158142c2 bellard
    int16 aExp, shiftCount;
1364 158142c2 bellard
    bits32 aSig;
1365 158142c2 bellard
    bits64 aSig64, aSigExtra;
1366 158142c2 bellard
1367 158142c2 bellard
    aSig = extractFloat32Frac( a );
1368 158142c2 bellard
    aExp = extractFloat32Exp( a );
1369 158142c2 bellard
    aSign = extractFloat32Sign( a );
1370 158142c2 bellard
    shiftCount = 0xBE - aExp;
1371 158142c2 bellard
    if ( shiftCount < 0 ) {
1372 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1373 158142c2 bellard
        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1374 158142c2 bellard
            return LIT64( 0x7FFFFFFFFFFFFFFF );
1375 158142c2 bellard
        }
1376 158142c2 bellard
        return (sbits64) LIT64( 0x8000000000000000 );
1377 158142c2 bellard
    }
1378 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1379 158142c2 bellard
    aSig64 = aSig;
1380 158142c2 bellard
    aSig64 <<= 40;
1381 158142c2 bellard
    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1382 158142c2 bellard
    return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
1383 158142c2 bellard
1384 158142c2 bellard
}
1385 158142c2 bellard
1386 158142c2 bellard
/*----------------------------------------------------------------------------
1387 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1388 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1389 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1390 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
1391 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1392 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
1393 158142c2 bellard
| returned.
1394 158142c2 bellard
*----------------------------------------------------------------------------*/
1395 158142c2 bellard
1396 158142c2 bellard
int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
1397 158142c2 bellard
{
1398 158142c2 bellard
    flag aSign;
1399 158142c2 bellard
    int16 aExp, shiftCount;
1400 158142c2 bellard
    bits32 aSig;
1401 158142c2 bellard
    bits64 aSig64;
1402 158142c2 bellard
    int64 z;
1403 158142c2 bellard
1404 158142c2 bellard
    aSig = extractFloat32Frac( a );
1405 158142c2 bellard
    aExp = extractFloat32Exp( a );
1406 158142c2 bellard
    aSign = extractFloat32Sign( a );
1407 158142c2 bellard
    shiftCount = aExp - 0xBE;
1408 158142c2 bellard
    if ( 0 <= shiftCount ) {
1409 f090c9d4 pbrook
        if ( float32_val(a) != 0xDF000000 ) {
1410 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1411 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1412 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
1413 158142c2 bellard
            }
1414 158142c2 bellard
        }
1415 158142c2 bellard
        return (sbits64) LIT64( 0x8000000000000000 );
1416 158142c2 bellard
    }
1417 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1418 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1419 158142c2 bellard
        return 0;
1420 158142c2 bellard
    }
1421 158142c2 bellard
    aSig64 = aSig | 0x00800000;
1422 158142c2 bellard
    aSig64 <<= 40;
1423 158142c2 bellard
    z = aSig64>>( - shiftCount );
1424 158142c2 bellard
    if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1425 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1426 158142c2 bellard
    }
1427 158142c2 bellard
    if ( aSign ) z = - z;
1428 158142c2 bellard
    return z;
1429 158142c2 bellard
1430 158142c2 bellard
}
1431 158142c2 bellard
1432 158142c2 bellard
/*----------------------------------------------------------------------------
1433 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1434 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1435 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1436 158142c2 bellard
| Arithmetic.
1437 158142c2 bellard
*----------------------------------------------------------------------------*/
1438 158142c2 bellard
1439 158142c2 bellard
float64 float32_to_float64( float32 a STATUS_PARAM )
1440 158142c2 bellard
{
1441 158142c2 bellard
    flag aSign;
1442 158142c2 bellard
    int16 aExp;
1443 158142c2 bellard
    bits32 aSig;
1444 158142c2 bellard
1445 158142c2 bellard
    aSig = extractFloat32Frac( a );
1446 158142c2 bellard
    aExp = extractFloat32Exp( a );
1447 158142c2 bellard
    aSign = extractFloat32Sign( a );
1448 158142c2 bellard
    if ( aExp == 0xFF ) {
1449 158142c2 bellard
        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ));
1450 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
1451 158142c2 bellard
    }
1452 158142c2 bellard
    if ( aExp == 0 ) {
1453 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1454 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1455 158142c2 bellard
        --aExp;
1456 158142c2 bellard
    }
1457 158142c2 bellard
    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
1458 158142c2 bellard
1459 158142c2 bellard
}
1460 158142c2 bellard
1461 158142c2 bellard
#ifdef FLOATX80
1462 158142c2 bellard
1463 158142c2 bellard
/*----------------------------------------------------------------------------
1464 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1465 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
1466 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1467 158142c2 bellard
| Arithmetic.
1468 158142c2 bellard
*----------------------------------------------------------------------------*/
1469 158142c2 bellard
1470 158142c2 bellard
floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
1471 158142c2 bellard
{
1472 158142c2 bellard
    flag aSign;
1473 158142c2 bellard
    int16 aExp;
1474 158142c2 bellard
    bits32 aSig;
1475 158142c2 bellard
1476 158142c2 bellard
    aSig = extractFloat32Frac( a );
1477 158142c2 bellard
    aExp = extractFloat32Exp( a );
1478 158142c2 bellard
    aSign = extractFloat32Sign( a );
1479 158142c2 bellard
    if ( aExp == 0xFF ) {
1480 158142c2 bellard
        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) );
1481 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1482 158142c2 bellard
    }
1483 158142c2 bellard
    if ( aExp == 0 ) {
1484 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1485 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1486 158142c2 bellard
    }
1487 158142c2 bellard
    aSig |= 0x00800000;
1488 158142c2 bellard
    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
1489 158142c2 bellard
1490 158142c2 bellard
}
1491 158142c2 bellard
1492 158142c2 bellard
#endif
1493 158142c2 bellard
1494 158142c2 bellard
#ifdef FLOAT128
1495 158142c2 bellard
1496 158142c2 bellard
/*----------------------------------------------------------------------------
1497 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1498 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1499 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1500 158142c2 bellard
| Arithmetic.
1501 158142c2 bellard
*----------------------------------------------------------------------------*/
1502 158142c2 bellard
1503 158142c2 bellard
float128 float32_to_float128( float32 a STATUS_PARAM )
1504 158142c2 bellard
{
1505 158142c2 bellard
    flag aSign;
1506 158142c2 bellard
    int16 aExp;
1507 158142c2 bellard
    bits32 aSig;
1508 158142c2 bellard
1509 158142c2 bellard
    aSig = extractFloat32Frac( a );
1510 158142c2 bellard
    aExp = extractFloat32Exp( a );
1511 158142c2 bellard
    aSign = extractFloat32Sign( a );
1512 158142c2 bellard
    if ( aExp == 0xFF ) {
1513 158142c2 bellard
        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) );
1514 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
1515 158142c2 bellard
    }
1516 158142c2 bellard
    if ( aExp == 0 ) {
1517 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1518 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1519 158142c2 bellard
        --aExp;
1520 158142c2 bellard
    }
1521 158142c2 bellard
    return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
1522 158142c2 bellard
1523 158142c2 bellard
}
1524 158142c2 bellard
1525 158142c2 bellard
#endif
1526 158142c2 bellard
1527 158142c2 bellard
/*----------------------------------------------------------------------------
1528 158142c2 bellard
| Rounds the single-precision floating-point value `a' to an integer, and
1529 158142c2 bellard
| returns the result as a single-precision floating-point value.  The
1530 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
1531 158142c2 bellard
| Floating-Point Arithmetic.
1532 158142c2 bellard
*----------------------------------------------------------------------------*/
1533 158142c2 bellard
1534 158142c2 bellard
float32 float32_round_to_int( float32 a STATUS_PARAM)
1535 158142c2 bellard
{
1536 158142c2 bellard
    flag aSign;
1537 158142c2 bellard
    int16 aExp;
1538 158142c2 bellard
    bits32 lastBitMask, roundBitsMask;
1539 158142c2 bellard
    int8 roundingMode;
1540 f090c9d4 pbrook
    bits32 z;
1541 158142c2 bellard
1542 158142c2 bellard
    aExp = extractFloat32Exp( a );
1543 158142c2 bellard
    if ( 0x96 <= aExp ) {
1544 158142c2 bellard
        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1545 158142c2 bellard
            return propagateFloat32NaN( a, a STATUS_VAR );
1546 158142c2 bellard
        }
1547 158142c2 bellard
        return a;
1548 158142c2 bellard
    }
1549 158142c2 bellard
    if ( aExp <= 0x7E ) {
1550 f090c9d4 pbrook
        if ( (bits32) ( float32_val(a)<<1 ) == 0 ) return a;
1551 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1552 158142c2 bellard
        aSign = extractFloat32Sign( a );
1553 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
1554 158142c2 bellard
         case float_round_nearest_even:
1555 158142c2 bellard
            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1556 158142c2 bellard
                return packFloat32( aSign, 0x7F, 0 );
1557 158142c2 bellard
            }
1558 158142c2 bellard
            break;
1559 158142c2 bellard
         case float_round_down:
1560 f090c9d4 pbrook
            return make_float32(aSign ? 0xBF800000 : 0);
1561 158142c2 bellard
         case float_round_up:
1562 f090c9d4 pbrook
            return make_float32(aSign ? 0x80000000 : 0x3F800000);
1563 158142c2 bellard
        }
1564 158142c2 bellard
        return packFloat32( aSign, 0, 0 );
1565 158142c2 bellard
    }
1566 158142c2 bellard
    lastBitMask = 1;
1567 158142c2 bellard
    lastBitMask <<= 0x96 - aExp;
1568 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
1569 f090c9d4 pbrook
    z = float32_val(a);
1570 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
1571 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
1572 158142c2 bellard
        z += lastBitMask>>1;
1573 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1574 158142c2 bellard
    }
1575 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
1576 f090c9d4 pbrook
        if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
1577 158142c2 bellard
            z += roundBitsMask;
1578 158142c2 bellard
        }
1579 158142c2 bellard
    }
1580 158142c2 bellard
    z &= ~ roundBitsMask;
1581 f090c9d4 pbrook
    if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
1582 f090c9d4 pbrook
    return make_float32(z);
1583 158142c2 bellard
1584 158142c2 bellard
}
1585 158142c2 bellard
1586 158142c2 bellard
/*----------------------------------------------------------------------------
1587 158142c2 bellard
| Returns the result of adding the absolute values of the single-precision
1588 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1589 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
1590 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
1591 158142c2 bellard
| Floating-Point Arithmetic.
1592 158142c2 bellard
*----------------------------------------------------------------------------*/
1593 158142c2 bellard
1594 158142c2 bellard
static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1595 158142c2 bellard
{
1596 158142c2 bellard
    int16 aExp, bExp, zExp;
1597 158142c2 bellard
    bits32 aSig, bSig, zSig;
1598 158142c2 bellard
    int16 expDiff;
1599 158142c2 bellard
1600 158142c2 bellard
    aSig = extractFloat32Frac( a );
1601 158142c2 bellard
    aExp = extractFloat32Exp( a );
1602 158142c2 bellard
    bSig = extractFloat32Frac( b );
1603 158142c2 bellard
    bExp = extractFloat32Exp( b );
1604 158142c2 bellard
    expDiff = aExp - bExp;
1605 158142c2 bellard
    aSig <<= 6;
1606 158142c2 bellard
    bSig <<= 6;
1607 158142c2 bellard
    if ( 0 < expDiff ) {
1608 158142c2 bellard
        if ( aExp == 0xFF ) {
1609 158142c2 bellard
            if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1610 158142c2 bellard
            return a;
1611 158142c2 bellard
        }
1612 158142c2 bellard
        if ( bExp == 0 ) {
1613 158142c2 bellard
            --expDiff;
1614 158142c2 bellard
        }
1615 158142c2 bellard
        else {
1616 158142c2 bellard
            bSig |= 0x20000000;
1617 158142c2 bellard
        }
1618 158142c2 bellard
        shift32RightJamming( bSig, expDiff, &bSig );
1619 158142c2 bellard
        zExp = aExp;
1620 158142c2 bellard
    }
1621 158142c2 bellard
    else if ( expDiff < 0 ) {
1622 158142c2 bellard
        if ( bExp == 0xFF ) {
1623 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1624 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1625 158142c2 bellard
        }
1626 158142c2 bellard
        if ( aExp == 0 ) {
1627 158142c2 bellard
            ++expDiff;
1628 158142c2 bellard
        }
1629 158142c2 bellard
        else {
1630 158142c2 bellard
            aSig |= 0x20000000;
1631 158142c2 bellard
        }
1632 158142c2 bellard
        shift32RightJamming( aSig, - expDiff, &aSig );
1633 158142c2 bellard
        zExp = bExp;
1634 158142c2 bellard
    }
1635 158142c2 bellard
    else {
1636 158142c2 bellard
        if ( aExp == 0xFF ) {
1637 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1638 158142c2 bellard
            return a;
1639 158142c2 bellard
        }
1640 158142c2 bellard
        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1641 158142c2 bellard
        zSig = 0x40000000 + aSig + bSig;
1642 158142c2 bellard
        zExp = aExp;
1643 158142c2 bellard
        goto roundAndPack;
1644 158142c2 bellard
    }
1645 158142c2 bellard
    aSig |= 0x20000000;
1646 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
1647 158142c2 bellard
    --zExp;
1648 158142c2 bellard
    if ( (sbits32) zSig < 0 ) {
1649 158142c2 bellard
        zSig = aSig + bSig;
1650 158142c2 bellard
        ++zExp;
1651 158142c2 bellard
    }
1652 158142c2 bellard
 roundAndPack:
1653 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1654 158142c2 bellard
1655 158142c2 bellard
}
1656 158142c2 bellard
1657 158142c2 bellard
/*----------------------------------------------------------------------------
1658 158142c2 bellard
| Returns the result of subtracting the absolute values of the single-
1659 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
1660 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
1661 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
1662 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
1663 158142c2 bellard
*----------------------------------------------------------------------------*/
1664 158142c2 bellard
1665 158142c2 bellard
static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1666 158142c2 bellard
{
1667 158142c2 bellard
    int16 aExp, bExp, zExp;
1668 158142c2 bellard
    bits32 aSig, bSig, zSig;
1669 158142c2 bellard
    int16 expDiff;
1670 158142c2 bellard
1671 158142c2 bellard
    aSig = extractFloat32Frac( a );
1672 158142c2 bellard
    aExp = extractFloat32Exp( a );
1673 158142c2 bellard
    bSig = extractFloat32Frac( b );
1674 158142c2 bellard
    bExp = extractFloat32Exp( b );
1675 158142c2 bellard
    expDiff = aExp - bExp;
1676 158142c2 bellard
    aSig <<= 7;
1677 158142c2 bellard
    bSig <<= 7;
1678 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
1679 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
1680 158142c2 bellard
    if ( aExp == 0xFF ) {
1681 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1682 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1683 158142c2 bellard
        return float32_default_nan;
1684 158142c2 bellard
    }
1685 158142c2 bellard
    if ( aExp == 0 ) {
1686 158142c2 bellard
        aExp = 1;
1687 158142c2 bellard
        bExp = 1;
1688 158142c2 bellard
    }
1689 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
1690 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
1691 158142c2 bellard
    return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
1692 158142c2 bellard
 bExpBigger:
1693 158142c2 bellard
    if ( bExp == 0xFF ) {
1694 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1695 158142c2 bellard
        return packFloat32( zSign ^ 1, 0xFF, 0 );
1696 158142c2 bellard
    }
1697 158142c2 bellard
    if ( aExp == 0 ) {
1698 158142c2 bellard
        ++expDiff;
1699 158142c2 bellard
    }
1700 158142c2 bellard
    else {
1701 158142c2 bellard
        aSig |= 0x40000000;
1702 158142c2 bellard
    }
1703 158142c2 bellard
    shift32RightJamming( aSig, - expDiff, &aSig );
1704 158142c2 bellard
    bSig |= 0x40000000;
1705 158142c2 bellard
 bBigger:
1706 158142c2 bellard
    zSig = bSig - aSig;
1707 158142c2 bellard
    zExp = bExp;
1708 158142c2 bellard
    zSign ^= 1;
1709 158142c2 bellard
    goto normalizeRoundAndPack;
1710 158142c2 bellard
 aExpBigger:
1711 158142c2 bellard
    if ( aExp == 0xFF ) {
1712 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1713 158142c2 bellard
        return a;
1714 158142c2 bellard
    }
1715 158142c2 bellard
    if ( bExp == 0 ) {
1716 158142c2 bellard
        --expDiff;
1717 158142c2 bellard
    }
1718 158142c2 bellard
    else {
1719 158142c2 bellard
        bSig |= 0x40000000;
1720 158142c2 bellard
    }
1721 158142c2 bellard
    shift32RightJamming( bSig, expDiff, &bSig );
1722 158142c2 bellard
    aSig |= 0x40000000;
1723 158142c2 bellard
 aBigger:
1724 158142c2 bellard
    zSig = aSig - bSig;
1725 158142c2 bellard
    zExp = aExp;
1726 158142c2 bellard
 normalizeRoundAndPack:
1727 158142c2 bellard
    --zExp;
1728 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1729 158142c2 bellard
1730 158142c2 bellard
}
1731 158142c2 bellard
1732 158142c2 bellard
/*----------------------------------------------------------------------------
1733 158142c2 bellard
| Returns the result of adding the single-precision floating-point values `a'
1734 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
1735 158142c2 bellard
| Binary Floating-Point Arithmetic.
1736 158142c2 bellard
*----------------------------------------------------------------------------*/
1737 158142c2 bellard
1738 158142c2 bellard
float32 float32_add( float32 a, float32 b STATUS_PARAM )
1739 158142c2 bellard
{
1740 158142c2 bellard
    flag aSign, bSign;
1741 158142c2 bellard
1742 158142c2 bellard
    aSign = extractFloat32Sign( a );
1743 158142c2 bellard
    bSign = extractFloat32Sign( b );
1744 158142c2 bellard
    if ( aSign == bSign ) {
1745 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR);
1746 158142c2 bellard
    }
1747 158142c2 bellard
    else {
1748 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1749 158142c2 bellard
    }
1750 158142c2 bellard
1751 158142c2 bellard
}
1752 158142c2 bellard
1753 158142c2 bellard
/*----------------------------------------------------------------------------
1754 158142c2 bellard
| Returns the result of subtracting the single-precision floating-point values
1755 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1756 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1757 158142c2 bellard
*----------------------------------------------------------------------------*/
1758 158142c2 bellard
1759 158142c2 bellard
float32 float32_sub( float32 a, float32 b STATUS_PARAM )
1760 158142c2 bellard
{
1761 158142c2 bellard
    flag aSign, bSign;
1762 158142c2 bellard
1763 158142c2 bellard
    aSign = extractFloat32Sign( a );
1764 158142c2 bellard
    bSign = extractFloat32Sign( b );
1765 158142c2 bellard
    if ( aSign == bSign ) {
1766 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1767 158142c2 bellard
    }
1768 158142c2 bellard
    else {
1769 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR );
1770 158142c2 bellard
    }
1771 158142c2 bellard
1772 158142c2 bellard
}
1773 158142c2 bellard
1774 158142c2 bellard
/*----------------------------------------------------------------------------
1775 158142c2 bellard
| Returns the result of multiplying the single-precision floating-point values
1776 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1777 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1778 158142c2 bellard
*----------------------------------------------------------------------------*/
1779 158142c2 bellard
1780 158142c2 bellard
float32 float32_mul( float32 a, float32 b STATUS_PARAM )
1781 158142c2 bellard
{
1782 158142c2 bellard
    flag aSign, bSign, zSign;
1783 158142c2 bellard
    int16 aExp, bExp, zExp;
1784 158142c2 bellard
    bits32 aSig, bSig;
1785 158142c2 bellard
    bits64 zSig64;
1786 158142c2 bellard
    bits32 zSig;
1787 158142c2 bellard
1788 158142c2 bellard
    aSig = extractFloat32Frac( a );
1789 158142c2 bellard
    aExp = extractFloat32Exp( a );
1790 158142c2 bellard
    aSign = extractFloat32Sign( a );
1791 158142c2 bellard
    bSig = extractFloat32Frac( b );
1792 158142c2 bellard
    bExp = extractFloat32Exp( b );
1793 158142c2 bellard
    bSign = extractFloat32Sign( b );
1794 158142c2 bellard
    zSign = aSign ^ bSign;
1795 158142c2 bellard
    if ( aExp == 0xFF ) {
1796 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1797 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
1798 158142c2 bellard
        }
1799 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
1800 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1801 158142c2 bellard
            return float32_default_nan;
1802 158142c2 bellard
        }
1803 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1804 158142c2 bellard
    }
1805 158142c2 bellard
    if ( bExp == 0xFF ) {
1806 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1807 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
1808 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1809 158142c2 bellard
            return float32_default_nan;
1810 158142c2 bellard
        }
1811 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1812 158142c2 bellard
    }
1813 158142c2 bellard
    if ( aExp == 0 ) {
1814 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1815 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1816 158142c2 bellard
    }
1817 158142c2 bellard
    if ( bExp == 0 ) {
1818 158142c2 bellard
        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1819 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1820 158142c2 bellard
    }
1821 158142c2 bellard
    zExp = aExp + bExp - 0x7F;
1822 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
1823 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
1824 158142c2 bellard
    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1825 158142c2 bellard
    zSig = zSig64;
1826 158142c2 bellard
    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1827 158142c2 bellard
        zSig <<= 1;
1828 158142c2 bellard
        --zExp;
1829 158142c2 bellard
    }
1830 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1831 158142c2 bellard
1832 158142c2 bellard
}
1833 158142c2 bellard
1834 158142c2 bellard
/*----------------------------------------------------------------------------
1835 158142c2 bellard
| Returns the result of dividing the single-precision floating-point value `a'
1836 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to the
1837 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1838 158142c2 bellard
*----------------------------------------------------------------------------*/
1839 158142c2 bellard
1840 158142c2 bellard
float32 float32_div( float32 a, float32 b STATUS_PARAM )
1841 158142c2 bellard
{
1842 158142c2 bellard
    flag aSign, bSign, zSign;
1843 158142c2 bellard
    int16 aExp, bExp, zExp;
1844 158142c2 bellard
    bits32 aSig, bSig, zSig;
1845 158142c2 bellard
1846 158142c2 bellard
    aSig = extractFloat32Frac( a );
1847 158142c2 bellard
    aExp = extractFloat32Exp( a );
1848 158142c2 bellard
    aSign = extractFloat32Sign( a );
1849 158142c2 bellard
    bSig = extractFloat32Frac( b );
1850 158142c2 bellard
    bExp = extractFloat32Exp( b );
1851 158142c2 bellard
    bSign = extractFloat32Sign( b );
1852 158142c2 bellard
    zSign = aSign ^ bSign;
1853 158142c2 bellard
    if ( aExp == 0xFF ) {
1854 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1855 158142c2 bellard
        if ( bExp == 0xFF ) {
1856 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1857 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1858 158142c2 bellard
            return float32_default_nan;
1859 158142c2 bellard
        }
1860 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1861 158142c2 bellard
    }
1862 158142c2 bellard
    if ( bExp == 0xFF ) {
1863 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1864 158142c2 bellard
        return packFloat32( zSign, 0, 0 );
1865 158142c2 bellard
    }
1866 158142c2 bellard
    if ( bExp == 0 ) {
1867 158142c2 bellard
        if ( bSig == 0 ) {
1868 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
1869 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
1870 158142c2 bellard
                return float32_default_nan;
1871 158142c2 bellard
            }
1872 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
1873 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1874 158142c2 bellard
        }
1875 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1876 158142c2 bellard
    }
1877 158142c2 bellard
    if ( aExp == 0 ) {
1878 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1879 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1880 158142c2 bellard
    }
1881 158142c2 bellard
    zExp = aExp - bExp + 0x7D;
1882 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
1883 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
1884 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
1885 158142c2 bellard
        aSig >>= 1;
1886 158142c2 bellard
        ++zExp;
1887 158142c2 bellard
    }
1888 158142c2 bellard
    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
1889 158142c2 bellard
    if ( ( zSig & 0x3F ) == 0 ) {
1890 158142c2 bellard
        zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
1891 158142c2 bellard
    }
1892 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1893 158142c2 bellard
1894 158142c2 bellard
}
1895 158142c2 bellard
1896 158142c2 bellard
/*----------------------------------------------------------------------------
1897 158142c2 bellard
| Returns the remainder of the single-precision floating-point value `a'
1898 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
1899 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1900 158142c2 bellard
*----------------------------------------------------------------------------*/
1901 158142c2 bellard
1902 158142c2 bellard
float32 float32_rem( float32 a, float32 b STATUS_PARAM )
1903 158142c2 bellard
{
1904 158142c2 bellard
    flag aSign, bSign, zSign;
1905 158142c2 bellard
    int16 aExp, bExp, expDiff;
1906 158142c2 bellard
    bits32 aSig, bSig;
1907 158142c2 bellard
    bits32 q;
1908 158142c2 bellard
    bits64 aSig64, bSig64, q64;
1909 158142c2 bellard
    bits32 alternateASig;
1910 158142c2 bellard
    sbits32 sigMean;
1911 158142c2 bellard
1912 158142c2 bellard
    aSig = extractFloat32Frac( a );
1913 158142c2 bellard
    aExp = extractFloat32Exp( a );
1914 158142c2 bellard
    aSign = extractFloat32Sign( a );
1915 158142c2 bellard
    bSig = extractFloat32Frac( b );
1916 158142c2 bellard
    bExp = extractFloat32Exp( b );
1917 158142c2 bellard
    bSign = extractFloat32Sign( b );
1918 158142c2 bellard
    if ( aExp == 0xFF ) {
1919 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1920 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
1921 158142c2 bellard
        }
1922 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1923 158142c2 bellard
        return float32_default_nan;
1924 158142c2 bellard
    }
1925 158142c2 bellard
    if ( bExp == 0xFF ) {
1926 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1927 158142c2 bellard
        return a;
1928 158142c2 bellard
    }
1929 158142c2 bellard
    if ( bExp == 0 ) {
1930 158142c2 bellard
        if ( bSig == 0 ) {
1931 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1932 158142c2 bellard
            return float32_default_nan;
1933 158142c2 bellard
        }
1934 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1935 158142c2 bellard
    }
1936 158142c2 bellard
    if ( aExp == 0 ) {
1937 158142c2 bellard
        if ( aSig == 0 ) return a;
1938 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1939 158142c2 bellard
    }
1940 158142c2 bellard
    expDiff = aExp - bExp;
1941 158142c2 bellard
    aSig |= 0x00800000;
1942 158142c2 bellard
    bSig |= 0x00800000;
1943 158142c2 bellard
    if ( expDiff < 32 ) {
1944 158142c2 bellard
        aSig <<= 8;
1945 158142c2 bellard
        bSig <<= 8;
1946 158142c2 bellard
        if ( expDiff < 0 ) {
1947 158142c2 bellard
            if ( expDiff < -1 ) return a;
1948 158142c2 bellard
            aSig >>= 1;
1949 158142c2 bellard
        }
1950 158142c2 bellard
        q = ( bSig <= aSig );
1951 158142c2 bellard
        if ( q ) aSig -= bSig;
1952 158142c2 bellard
        if ( 0 < expDiff ) {
1953 158142c2 bellard
            q = ( ( (bits64) aSig )<<32 ) / bSig;
1954 158142c2 bellard
            q >>= 32 - expDiff;
1955 158142c2 bellard
            bSig >>= 2;
1956 158142c2 bellard
            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1957 158142c2 bellard
        }
1958 158142c2 bellard
        else {
1959 158142c2 bellard
            aSig >>= 2;
1960 158142c2 bellard
            bSig >>= 2;
1961 158142c2 bellard
        }
1962 158142c2 bellard
    }
1963 158142c2 bellard
    else {
1964 158142c2 bellard
        if ( bSig <= aSig ) aSig -= bSig;
1965 158142c2 bellard
        aSig64 = ( (bits64) aSig )<<40;
1966 158142c2 bellard
        bSig64 = ( (bits64) bSig )<<40;
1967 158142c2 bellard
        expDiff -= 64;
1968 158142c2 bellard
        while ( 0 < expDiff ) {
1969 158142c2 bellard
            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1970 158142c2 bellard
            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1971 158142c2 bellard
            aSig64 = - ( ( bSig * q64 )<<38 );
1972 158142c2 bellard
            expDiff -= 62;
1973 158142c2 bellard
        }
1974 158142c2 bellard
        expDiff += 64;
1975 158142c2 bellard
        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1976 158142c2 bellard
        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1977 158142c2 bellard
        q = q64>>( 64 - expDiff );
1978 158142c2 bellard
        bSig <<= 6;
1979 158142c2 bellard
        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
1980 158142c2 bellard
    }
1981 158142c2 bellard
    do {
1982 158142c2 bellard
        alternateASig = aSig;
1983 158142c2 bellard
        ++q;
1984 158142c2 bellard
        aSig -= bSig;
1985 158142c2 bellard
    } while ( 0 <= (sbits32) aSig );
1986 158142c2 bellard
    sigMean = aSig + alternateASig;
1987 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1988 158142c2 bellard
        aSig = alternateASig;
1989 158142c2 bellard
    }
1990 158142c2 bellard
    zSign = ( (sbits32) aSig < 0 );
1991 158142c2 bellard
    if ( zSign ) aSig = - aSig;
1992 158142c2 bellard
    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
1993 158142c2 bellard
1994 158142c2 bellard
}
1995 158142c2 bellard
1996 158142c2 bellard
/*----------------------------------------------------------------------------
1997 158142c2 bellard
| Returns the square root of the single-precision floating-point value `a'.
1998 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
1999 158142c2 bellard
| Floating-Point Arithmetic.
2000 158142c2 bellard
*----------------------------------------------------------------------------*/
2001 158142c2 bellard
2002 158142c2 bellard
float32 float32_sqrt( float32 a STATUS_PARAM )
2003 158142c2 bellard
{
2004 158142c2 bellard
    flag aSign;
2005 158142c2 bellard
    int16 aExp, zExp;
2006 158142c2 bellard
    bits32 aSig, zSig;
2007 158142c2 bellard
    bits64 rem, term;
2008 158142c2 bellard
2009 158142c2 bellard
    aSig = extractFloat32Frac( a );
2010 158142c2 bellard
    aExp = extractFloat32Exp( a );
2011 158142c2 bellard
    aSign = extractFloat32Sign( a );
2012 158142c2 bellard
    if ( aExp == 0xFF ) {
2013 f090c9d4 pbrook
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2014 158142c2 bellard
        if ( ! aSign ) return a;
2015 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2016 158142c2 bellard
        return float32_default_nan;
2017 158142c2 bellard
    }
2018 158142c2 bellard
    if ( aSign ) {
2019 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
2020 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2021 158142c2 bellard
        return float32_default_nan;
2022 158142c2 bellard
    }
2023 158142c2 bellard
    if ( aExp == 0 ) {
2024 f090c9d4 pbrook
        if ( aSig == 0 ) return float32_zero;
2025 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2026 158142c2 bellard
    }
2027 158142c2 bellard
    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2028 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
2029 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig ) + 2;
2030 158142c2 bellard
    if ( ( zSig & 0x7F ) <= 5 ) {
2031 158142c2 bellard
        if ( zSig < 2 ) {
2032 158142c2 bellard
            zSig = 0x7FFFFFFF;
2033 158142c2 bellard
            goto roundAndPack;
2034 158142c2 bellard
        }
2035 158142c2 bellard
        aSig >>= aExp & 1;
2036 158142c2 bellard
        term = ( (bits64) zSig ) * zSig;
2037 158142c2 bellard
        rem = ( ( (bits64) aSig )<<32 ) - term;
2038 158142c2 bellard
        while ( (sbits64) rem < 0 ) {
2039 158142c2 bellard
            --zSig;
2040 158142c2 bellard
            rem += ( ( (bits64) zSig )<<1 ) | 1;
2041 158142c2 bellard
        }
2042 158142c2 bellard
        zSig |= ( rem != 0 );
2043 158142c2 bellard
    }
2044 158142c2 bellard
    shift32RightJamming( zSig, 1, &zSig );
2045 158142c2 bellard
 roundAndPack:
2046 158142c2 bellard
    return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
2047 158142c2 bellard
2048 158142c2 bellard
}
2049 158142c2 bellard
2050 158142c2 bellard
/*----------------------------------------------------------------------------
2051 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2052 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
2053 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2054 158142c2 bellard
*----------------------------------------------------------------------------*/
2055 158142c2 bellard
2056 750afe93 bellard
int float32_eq( float32 a, float32 b STATUS_PARAM )
2057 158142c2 bellard
{
2058 158142c2 bellard
2059 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2060 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2061 158142c2 bellard
       ) {
2062 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2063 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2064 158142c2 bellard
        }
2065 158142c2 bellard
        return 0;
2066 158142c2 bellard
    }
2067 f090c9d4 pbrook
    return ( float32_val(a) == float32_val(b) ) ||
2068 f090c9d4 pbrook
            ( (bits32) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
2069 158142c2 bellard
2070 158142c2 bellard
}
2071 158142c2 bellard
2072 158142c2 bellard
/*----------------------------------------------------------------------------
2073 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2074 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  The comparison
2075 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2076 158142c2 bellard
| Arithmetic.
2077 158142c2 bellard
*----------------------------------------------------------------------------*/
2078 158142c2 bellard
2079 750afe93 bellard
int float32_le( float32 a, float32 b STATUS_PARAM )
2080 158142c2 bellard
{
2081 158142c2 bellard
    flag aSign, bSign;
2082 f090c9d4 pbrook
    bits32 av, bv;
2083 158142c2 bellard
2084 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2085 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2086 158142c2 bellard
       ) {
2087 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2088 158142c2 bellard
        return 0;
2089 158142c2 bellard
    }
2090 158142c2 bellard
    aSign = extractFloat32Sign( a );
2091 158142c2 bellard
    bSign = extractFloat32Sign( b );
2092 f090c9d4 pbrook
    av = float32_val(a);
2093 f090c9d4 pbrook
    bv = float32_val(b);
2094 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
2095 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2096 158142c2 bellard
2097 158142c2 bellard
}
2098 158142c2 bellard
2099 158142c2 bellard
/*----------------------------------------------------------------------------
2100 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2101 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
2102 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2103 158142c2 bellard
*----------------------------------------------------------------------------*/
2104 158142c2 bellard
2105 750afe93 bellard
int float32_lt( float32 a, float32 b STATUS_PARAM )
2106 158142c2 bellard
{
2107 158142c2 bellard
    flag aSign, bSign;
2108 f090c9d4 pbrook
    bits32 av, bv;
2109 158142c2 bellard
2110 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2111 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2112 158142c2 bellard
       ) {
2113 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2114 158142c2 bellard
        return 0;
2115 158142c2 bellard
    }
2116 158142c2 bellard
    aSign = extractFloat32Sign( a );
2117 158142c2 bellard
    bSign = extractFloat32Sign( b );
2118 f090c9d4 pbrook
    av = float32_val(a);
2119 f090c9d4 pbrook
    bv = float32_val(b);
2120 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
2121 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2122 158142c2 bellard
2123 158142c2 bellard
}
2124 158142c2 bellard
2125 158142c2 bellard
/*----------------------------------------------------------------------------
2126 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2127 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The invalid exception is
2128 158142c2 bellard
| raised if either operand is a NaN.  Otherwise, the comparison is performed
2129 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2130 158142c2 bellard
*----------------------------------------------------------------------------*/
2131 158142c2 bellard
2132 750afe93 bellard
int float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
2133 158142c2 bellard
{
2134 f090c9d4 pbrook
    bits32 av, bv;
2135 158142c2 bellard
2136 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2137 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2138 158142c2 bellard
       ) {
2139 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2140 158142c2 bellard
        return 0;
2141 158142c2 bellard
    }
2142 f090c9d4 pbrook
    av = float32_val(a);
2143 f090c9d4 pbrook
    bv = float32_val(b);
2144 f090c9d4 pbrook
    return ( av == bv ) || ( (bits32) ( ( av | bv )<<1 ) == 0 );
2145 158142c2 bellard
2146 158142c2 bellard
}
2147 158142c2 bellard
2148 158142c2 bellard
/*----------------------------------------------------------------------------
2149 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than or
2150 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2151 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
2152 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2153 158142c2 bellard
*----------------------------------------------------------------------------*/
2154 158142c2 bellard
2155 750afe93 bellard
int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
2156 158142c2 bellard
{
2157 158142c2 bellard
    flag aSign, bSign;
2158 f090c9d4 pbrook
    bits32 av, bv;
2159 158142c2 bellard
2160 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2161 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2162 158142c2 bellard
       ) {
2163 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2164 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2165 158142c2 bellard
        }
2166 158142c2 bellard
        return 0;
2167 158142c2 bellard
    }
2168 158142c2 bellard
    aSign = extractFloat32Sign( a );
2169 158142c2 bellard
    bSign = extractFloat32Sign( b );
2170 f090c9d4 pbrook
    av = float32_val(a);
2171 f090c9d4 pbrook
    bv = float32_val(b);
2172 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
2173 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2174 158142c2 bellard
2175 158142c2 bellard
}
2176 158142c2 bellard
2177 158142c2 bellard
/*----------------------------------------------------------------------------
2178 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2179 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2180 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2181 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
2182 158142c2 bellard
*----------------------------------------------------------------------------*/
2183 158142c2 bellard
2184 750afe93 bellard
int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
2185 158142c2 bellard
{
2186 158142c2 bellard
    flag aSign, bSign;
2187 f090c9d4 pbrook
    bits32 av, bv;
2188 158142c2 bellard
2189 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2190 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2191 158142c2 bellard
       ) {
2192 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2193 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2194 158142c2 bellard
        }
2195 158142c2 bellard
        return 0;
2196 158142c2 bellard
    }
2197 158142c2 bellard
    aSign = extractFloat32Sign( a );
2198 158142c2 bellard
    bSign = extractFloat32Sign( b );
2199 f090c9d4 pbrook
    av = float32_val(a);
2200 f090c9d4 pbrook
    bv = float32_val(b);
2201 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
2202 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2203 158142c2 bellard
2204 158142c2 bellard
}
2205 158142c2 bellard
2206 158142c2 bellard
/*----------------------------------------------------------------------------
2207 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2208 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2209 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2210 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2211 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2212 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2213 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2214 158142c2 bellard
*----------------------------------------------------------------------------*/
2215 158142c2 bellard
2216 158142c2 bellard
int32 float64_to_int32( float64 a STATUS_PARAM )
2217 158142c2 bellard
{
2218 158142c2 bellard
    flag aSign;
2219 158142c2 bellard
    int16 aExp, shiftCount;
2220 158142c2 bellard
    bits64 aSig;
2221 158142c2 bellard
2222 158142c2 bellard
    aSig = extractFloat64Frac( a );
2223 158142c2 bellard
    aExp = extractFloat64Exp( a );
2224 158142c2 bellard
    aSign = extractFloat64Sign( a );
2225 158142c2 bellard
    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2226 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2227 158142c2 bellard
    shiftCount = 0x42C - aExp;
2228 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2229 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
2230 158142c2 bellard
2231 158142c2 bellard
}
2232 158142c2 bellard
2233 158142c2 bellard
/*----------------------------------------------------------------------------
2234 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2235 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2236 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2237 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2238 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2239 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2240 158142c2 bellard
| returned.
2241 158142c2 bellard
*----------------------------------------------------------------------------*/
2242 158142c2 bellard
2243 158142c2 bellard
int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
2244 158142c2 bellard
{
2245 158142c2 bellard
    flag aSign;
2246 158142c2 bellard
    int16 aExp, shiftCount;
2247 158142c2 bellard
    bits64 aSig, savedASig;
2248 158142c2 bellard
    int32 z;
2249 158142c2 bellard
2250 158142c2 bellard
    aSig = extractFloat64Frac( a );
2251 158142c2 bellard
    aExp = extractFloat64Exp( a );
2252 158142c2 bellard
    aSign = extractFloat64Sign( a );
2253 158142c2 bellard
    if ( 0x41E < aExp ) {
2254 158142c2 bellard
        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2255 158142c2 bellard
        goto invalid;
2256 158142c2 bellard
    }
2257 158142c2 bellard
    else if ( aExp < 0x3FF ) {
2258 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2259 158142c2 bellard
        return 0;
2260 158142c2 bellard
    }
2261 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
2262 158142c2 bellard
    shiftCount = 0x433 - aExp;
2263 158142c2 bellard
    savedASig = aSig;
2264 158142c2 bellard
    aSig >>= shiftCount;
2265 158142c2 bellard
    z = aSig;
2266 158142c2 bellard
    if ( aSign ) z = - z;
2267 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
2268 158142c2 bellard
 invalid:
2269 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2270 158142c2 bellard
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
2271 158142c2 bellard
    }
2272 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
2273 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2274 158142c2 bellard
    }
2275 158142c2 bellard
    return z;
2276 158142c2 bellard
2277 158142c2 bellard
}
2278 158142c2 bellard
2279 158142c2 bellard
/*----------------------------------------------------------------------------
2280 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2281 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2282 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2283 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2284 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2285 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2286 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2287 158142c2 bellard
*----------------------------------------------------------------------------*/
2288 158142c2 bellard
2289 158142c2 bellard
int64 float64_to_int64( float64 a STATUS_PARAM )
2290 158142c2 bellard
{
2291 158142c2 bellard
    flag aSign;
2292 158142c2 bellard
    int16 aExp, shiftCount;
2293 158142c2 bellard
    bits64 aSig, aSigExtra;
2294 158142c2 bellard
2295 158142c2 bellard
    aSig = extractFloat64Frac( a );
2296 158142c2 bellard
    aExp = extractFloat64Exp( a );
2297 158142c2 bellard
    aSign = extractFloat64Sign( a );
2298 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2299 158142c2 bellard
    shiftCount = 0x433 - aExp;
2300 158142c2 bellard
    if ( shiftCount <= 0 ) {
2301 158142c2 bellard
        if ( 0x43E < aExp ) {
2302 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2303 158142c2 bellard
            if (    ! aSign
2304 158142c2 bellard
                 || (    ( aExp == 0x7FF )
2305 158142c2 bellard
                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
2306 158142c2 bellard
               ) {
2307 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
2308 158142c2 bellard
            }
2309 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
2310 158142c2 bellard
        }
2311 158142c2 bellard
        aSigExtra = 0;
2312 158142c2 bellard
        aSig <<= - shiftCount;
2313 158142c2 bellard
    }
2314 158142c2 bellard
    else {
2315 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2316 158142c2 bellard
    }
2317 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
2318 158142c2 bellard
2319 158142c2 bellard
}
2320 158142c2 bellard
2321 158142c2 bellard
/*----------------------------------------------------------------------------
2322 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2323 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2324 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2325 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2326 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2327 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2328 158142c2 bellard
| returned.
2329 158142c2 bellard
*----------------------------------------------------------------------------*/
2330 158142c2 bellard
2331 158142c2 bellard
int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
2332 158142c2 bellard
{
2333 158142c2 bellard
    flag aSign;
2334 158142c2 bellard
    int16 aExp, shiftCount;
2335 158142c2 bellard
    bits64 aSig;
2336 158142c2 bellard
    int64 z;
2337 158142c2 bellard
2338 158142c2 bellard
    aSig = extractFloat64Frac( a );
2339 158142c2 bellard
    aExp = extractFloat64Exp( a );
2340 158142c2 bellard
    aSign = extractFloat64Sign( a );
2341 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2342 158142c2 bellard
    shiftCount = aExp - 0x433;
2343 158142c2 bellard
    if ( 0 <= shiftCount ) {
2344 158142c2 bellard
        if ( 0x43E <= aExp ) {
2345 f090c9d4 pbrook
            if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
2346 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
2347 158142c2 bellard
                if (    ! aSign
2348 158142c2 bellard
                     || (    ( aExp == 0x7FF )
2349 158142c2 bellard
                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
2350 158142c2 bellard
                   ) {
2351 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
2352 158142c2 bellard
                }
2353 158142c2 bellard
            }
2354 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
2355 158142c2 bellard
        }
2356 158142c2 bellard
        z = aSig<<shiftCount;
2357 158142c2 bellard
    }
2358 158142c2 bellard
    else {
2359 158142c2 bellard
        if ( aExp < 0x3FE ) {
2360 158142c2 bellard
            if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2361 158142c2 bellard
            return 0;
2362 158142c2 bellard
        }
2363 158142c2 bellard
        z = aSig>>( - shiftCount );
2364 158142c2 bellard
        if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
2365 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
2366 158142c2 bellard
        }
2367 158142c2 bellard
    }
2368 158142c2 bellard
    if ( aSign ) z = - z;
2369 158142c2 bellard
    return z;
2370 158142c2 bellard
2371 158142c2 bellard
}
2372 158142c2 bellard
2373 158142c2 bellard
/*----------------------------------------------------------------------------
2374 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2375 158142c2 bellard
| `a' to the single-precision floating-point format.  The conversion is
2376 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2377 158142c2 bellard
| Arithmetic.
2378 158142c2 bellard
*----------------------------------------------------------------------------*/
2379 158142c2 bellard
2380 158142c2 bellard
float32 float64_to_float32( float64 a STATUS_PARAM )
2381 158142c2 bellard
{
2382 158142c2 bellard
    flag aSign;
2383 158142c2 bellard
    int16 aExp;
2384 158142c2 bellard
    bits64 aSig;
2385 158142c2 bellard
    bits32 zSig;
2386 158142c2 bellard
2387 158142c2 bellard
    aSig = extractFloat64Frac( a );
2388 158142c2 bellard
    aExp = extractFloat64Exp( a );
2389 158142c2 bellard
    aSign = extractFloat64Sign( a );
2390 158142c2 bellard
    if ( aExp == 0x7FF ) {
2391 158142c2 bellard
        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) );
2392 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
2393 158142c2 bellard
    }
2394 158142c2 bellard
    shift64RightJamming( aSig, 22, &aSig );
2395 158142c2 bellard
    zSig = aSig;
2396 158142c2 bellard
    if ( aExp || zSig ) {
2397 158142c2 bellard
        zSig |= 0x40000000;
2398 158142c2 bellard
        aExp -= 0x381;
2399 158142c2 bellard
    }
2400 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
2401 158142c2 bellard
2402 158142c2 bellard
}
2403 158142c2 bellard
2404 158142c2 bellard
#ifdef FLOATX80
2405 158142c2 bellard
2406 158142c2 bellard
/*----------------------------------------------------------------------------
2407 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2408 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
2409 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2410 158142c2 bellard
| Arithmetic.
2411 158142c2 bellard
*----------------------------------------------------------------------------*/
2412 158142c2 bellard
2413 158142c2 bellard
floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
2414 158142c2 bellard
{
2415 158142c2 bellard
    flag aSign;
2416 158142c2 bellard
    int16 aExp;
2417 158142c2 bellard
    bits64 aSig;
2418 158142c2 bellard
2419 158142c2 bellard
    aSig = extractFloat64Frac( a );
2420 158142c2 bellard
    aExp = extractFloat64Exp( a );
2421 158142c2 bellard
    aSign = extractFloat64Sign( a );
2422 158142c2 bellard
    if ( aExp == 0x7FF ) {
2423 158142c2 bellard
        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) );
2424 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2425 158142c2 bellard
    }
2426 158142c2 bellard
    if ( aExp == 0 ) {
2427 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2428 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2429 158142c2 bellard
    }
2430 158142c2 bellard
    return
2431 158142c2 bellard
        packFloatx80(
2432 158142c2 bellard
            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2433 158142c2 bellard
2434 158142c2 bellard
}
2435 158142c2 bellard
2436 158142c2 bellard
#endif
2437 158142c2 bellard
2438 158142c2 bellard
#ifdef FLOAT128
2439 158142c2 bellard
2440 158142c2 bellard
/*----------------------------------------------------------------------------
2441 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2442 158142c2 bellard
| `a' to the quadruple-precision floating-point format.  The conversion is
2443 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2444 158142c2 bellard
| Arithmetic.
2445 158142c2 bellard
*----------------------------------------------------------------------------*/
2446 158142c2 bellard
2447 158142c2 bellard
float128 float64_to_float128( float64 a STATUS_PARAM )
2448 158142c2 bellard
{
2449 158142c2 bellard
    flag aSign;
2450 158142c2 bellard
    int16 aExp;
2451 158142c2 bellard
    bits64 aSig, zSig0, zSig1;
2452 158142c2 bellard
2453 158142c2 bellard
    aSig = extractFloat64Frac( a );
2454 158142c2 bellard
    aExp = extractFloat64Exp( a );
2455 158142c2 bellard
    aSign = extractFloat64Sign( a );
2456 158142c2 bellard
    if ( aExp == 0x7FF ) {
2457 158142c2 bellard
        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) );
2458 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
2459 158142c2 bellard
    }
2460 158142c2 bellard
    if ( aExp == 0 ) {
2461 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2462 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2463 158142c2 bellard
        --aExp;
2464 158142c2 bellard
    }
2465 158142c2 bellard
    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2466 158142c2 bellard
    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2467 158142c2 bellard
2468 158142c2 bellard
}
2469 158142c2 bellard
2470 158142c2 bellard
#endif
2471 158142c2 bellard
2472 158142c2 bellard
/*----------------------------------------------------------------------------
2473 158142c2 bellard
| Rounds the double-precision floating-point value `a' to an integer, and
2474 158142c2 bellard
| returns the result as a double-precision floating-point value.  The
2475 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
2476 158142c2 bellard
| Floating-Point Arithmetic.
2477 158142c2 bellard
*----------------------------------------------------------------------------*/
2478 158142c2 bellard
2479 158142c2 bellard
float64 float64_round_to_int( float64 a STATUS_PARAM )
2480 158142c2 bellard
{
2481 158142c2 bellard
    flag aSign;
2482 158142c2 bellard
    int16 aExp;
2483 158142c2 bellard
    bits64 lastBitMask, roundBitsMask;
2484 158142c2 bellard
    int8 roundingMode;
2485 f090c9d4 pbrook
    bits64 z;
2486 158142c2 bellard
2487 158142c2 bellard
    aExp = extractFloat64Exp( a );
2488 158142c2 bellard
    if ( 0x433 <= aExp ) {
2489 158142c2 bellard
        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
2490 158142c2 bellard
            return propagateFloat64NaN( a, a STATUS_VAR );
2491 158142c2 bellard
        }
2492 158142c2 bellard
        return a;
2493 158142c2 bellard
    }
2494 158142c2 bellard
    if ( aExp < 0x3FF ) {
2495 f090c9d4 pbrook
        if ( (bits64) ( float64_val(a)<<1 ) == 0 ) return a;
2496 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2497 158142c2 bellard
        aSign = extractFloat64Sign( a );
2498 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
2499 158142c2 bellard
         case float_round_nearest_even:
2500 158142c2 bellard
            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
2501 158142c2 bellard
                return packFloat64( aSign, 0x3FF, 0 );
2502 158142c2 bellard
            }
2503 158142c2 bellard
            break;
2504 158142c2 bellard
         case float_round_down:
2505 f090c9d4 pbrook
            return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
2506 158142c2 bellard
         case float_round_up:
2507 f090c9d4 pbrook
            return make_float64(
2508 f090c9d4 pbrook
            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
2509 158142c2 bellard
        }
2510 158142c2 bellard
        return packFloat64( aSign, 0, 0 );
2511 158142c2 bellard
    }
2512 158142c2 bellard
    lastBitMask = 1;
2513 158142c2 bellard
    lastBitMask <<= 0x433 - aExp;
2514 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
2515 f090c9d4 pbrook
    z = float64_val(a);
2516 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
2517 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
2518 158142c2 bellard
        z += lastBitMask>>1;
2519 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
2520 158142c2 bellard
    }
2521 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
2522 f090c9d4 pbrook
        if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
2523 158142c2 bellard
            z += roundBitsMask;
2524 158142c2 bellard
        }
2525 158142c2 bellard
    }
2526 158142c2 bellard
    z &= ~ roundBitsMask;
2527 f090c9d4 pbrook
    if ( z != float64_val(a) )
2528 f090c9d4 pbrook
        STATUS(float_exception_flags) |= float_flag_inexact;
2529 f090c9d4 pbrook
    return make_float64(z);
2530 158142c2 bellard
2531 158142c2 bellard
}
2532 158142c2 bellard
2533 e6e5906b pbrook
float64 float64_trunc_to_int( float64 a STATUS_PARAM)
2534 e6e5906b pbrook
{
2535 e6e5906b pbrook
    int oldmode;
2536 e6e5906b pbrook
    float64 res;
2537 e6e5906b pbrook
    oldmode = STATUS(float_rounding_mode);
2538 e6e5906b pbrook
    STATUS(float_rounding_mode) = float_round_to_zero;
2539 e6e5906b pbrook
    res = float64_round_to_int(a STATUS_VAR);
2540 e6e5906b pbrook
    STATUS(float_rounding_mode) = oldmode;
2541 e6e5906b pbrook
    return res;
2542 e6e5906b pbrook
}
2543 e6e5906b pbrook
2544 158142c2 bellard
/*----------------------------------------------------------------------------
2545 158142c2 bellard
| Returns the result of adding the absolute values of the double-precision
2546 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
2547 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
2548 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
2549 158142c2 bellard
| Floating-Point Arithmetic.
2550 158142c2 bellard
*----------------------------------------------------------------------------*/
2551 158142c2 bellard
2552 158142c2 bellard
static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
2553 158142c2 bellard
{
2554 158142c2 bellard
    int16 aExp, bExp, zExp;
2555 158142c2 bellard
    bits64 aSig, bSig, zSig;
2556 158142c2 bellard
    int16 expDiff;
2557 158142c2 bellard
2558 158142c2 bellard
    aSig = extractFloat64Frac( a );
2559 158142c2 bellard
    aExp = extractFloat64Exp( a );
2560 158142c2 bellard
    bSig = extractFloat64Frac( b );
2561 158142c2 bellard
    bExp = extractFloat64Exp( b );
2562 158142c2 bellard
    expDiff = aExp - bExp;
2563 158142c2 bellard
    aSig <<= 9;
2564 158142c2 bellard
    bSig <<= 9;
2565 158142c2 bellard
    if ( 0 < expDiff ) {
2566 158142c2 bellard
        if ( aExp == 0x7FF ) {
2567 158142c2 bellard
            if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2568 158142c2 bellard
            return a;
2569 158142c2 bellard
        }
2570 158142c2 bellard
        if ( bExp == 0 ) {
2571 158142c2 bellard
            --expDiff;
2572 158142c2 bellard
        }
2573 158142c2 bellard
        else {
2574 158142c2 bellard
            bSig |= LIT64( 0x2000000000000000 );
2575 158142c2 bellard
        }
2576 158142c2 bellard
        shift64RightJamming( bSig, expDiff, &bSig );
2577 158142c2 bellard
        zExp = aExp;
2578 158142c2 bellard
    }
2579 158142c2 bellard
    else if ( expDiff < 0 ) {
2580 158142c2 bellard
        if ( bExp == 0x7FF ) {
2581 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2582 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
2583 158142c2 bellard
        }
2584 158142c2 bellard
        if ( aExp == 0 ) {
2585 158142c2 bellard
            ++expDiff;
2586 158142c2 bellard
        }
2587 158142c2 bellard
        else {
2588 158142c2 bellard
            aSig |= LIT64( 0x2000000000000000 );
2589 158142c2 bellard
        }
2590 158142c2 bellard
        shift64RightJamming( aSig, - expDiff, &aSig );
2591 158142c2 bellard
        zExp = bExp;
2592 158142c2 bellard
    }
2593 158142c2 bellard
    else {
2594 158142c2 bellard
        if ( aExp == 0x7FF ) {
2595 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2596 158142c2 bellard
            return a;
2597 158142c2 bellard
        }
2598 158142c2 bellard
        if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
2599 158142c2 bellard
        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
2600 158142c2 bellard
        zExp = aExp;
2601 158142c2 bellard
        goto roundAndPack;
2602 158142c2 bellard
    }
2603 158142c2 bellard
    aSig |= LIT64( 0x2000000000000000 );
2604 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
2605 158142c2 bellard
    --zExp;
2606 158142c2 bellard
    if ( (sbits64) zSig < 0 ) {
2607 158142c2 bellard
        zSig = aSig + bSig;
2608 158142c2 bellard
        ++zExp;
2609 158142c2 bellard
    }
2610 158142c2 bellard
 roundAndPack:
2611 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
2612 158142c2 bellard
2613 158142c2 bellard
}
2614 158142c2 bellard
2615 158142c2 bellard
/*----------------------------------------------------------------------------
2616 158142c2 bellard
| Returns the result of subtracting the absolute values of the double-
2617 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
2618 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
2619 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
2620 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
2621 158142c2 bellard
*----------------------------------------------------------------------------*/
2622 158142c2 bellard
2623 158142c2 bellard
static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
2624 158142c2 bellard
{
2625 158142c2 bellard
    int16 aExp, bExp, zExp;
2626 158142c2 bellard
    bits64 aSig, bSig, zSig;
2627 158142c2 bellard
    int16 expDiff;
2628 158142c2 bellard
2629 158142c2 bellard
    aSig = extractFloat64Frac( a );
2630 158142c2 bellard
    aExp = extractFloat64Exp( a );
2631 158142c2 bellard
    bSig = extractFloat64Frac( b );
2632 158142c2 bellard
    bExp = extractFloat64Exp( b );
2633 158142c2 bellard
    expDiff = aExp - bExp;
2634 158142c2 bellard
    aSig <<= 10;
2635 158142c2 bellard
    bSig <<= 10;
2636 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
2637 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
2638 158142c2 bellard
    if ( aExp == 0x7FF ) {
2639 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2640 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2641 158142c2 bellard
        return float64_default_nan;
2642 158142c2 bellard
    }
2643 158142c2 bellard
    if ( aExp == 0 ) {
2644 158142c2 bellard
        aExp = 1;
2645 158142c2 bellard
        bExp = 1;
2646 158142c2 bellard
    }
2647 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
2648 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
2649 158142c2 bellard
    return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
2650 158142c2 bellard
 bExpBigger:
2651 158142c2 bellard
    if ( bExp == 0x7FF ) {
2652 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2653 158142c2 bellard
        return packFloat64( zSign ^ 1, 0x7FF, 0 );
2654 158142c2 bellard
    }
2655 158142c2 bellard
    if ( aExp == 0 ) {
2656 158142c2 bellard
        ++expDiff;
2657 158142c2 bellard
    }
2658 158142c2 bellard
    else {
2659 158142c2 bellard
        aSig |= LIT64( 0x4000000000000000 );
2660 158142c2 bellard
    }
2661 158142c2 bellard
    shift64RightJamming( aSig, - expDiff, &aSig );
2662 158142c2 bellard
    bSig |= LIT64( 0x4000000000000000 );
2663 158142c2 bellard
 bBigger:
2664 158142c2 bellard
    zSig = bSig - aSig;
2665 158142c2 bellard
    zExp = bExp;
2666 158142c2 bellard
    zSign ^= 1;
2667 158142c2 bellard
    goto normalizeRoundAndPack;
2668 158142c2 bellard
 aExpBigger:
2669 158142c2 bellard
    if ( aExp == 0x7FF ) {
2670 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2671 158142c2 bellard
        return a;
2672 158142c2 bellard
    }
2673 158142c2 bellard
    if ( bExp == 0 ) {
2674 158142c2 bellard
        --expDiff;
2675 158142c2 bellard
    }
2676 158142c2 bellard
    else {
2677 158142c2 bellard
        bSig |= LIT64( 0x4000000000000000 );
2678 158142c2 bellard
    }
2679 158142c2 bellard
    shift64RightJamming( bSig, expDiff, &bSig );
2680 158142c2 bellard
    aSig |= LIT64( 0x4000000000000000 );
2681 158142c2 bellard
 aBigger:
2682 158142c2 bellard
    zSig = aSig - bSig;
2683 158142c2 bellard
    zExp = aExp;
2684 158142c2 bellard
 normalizeRoundAndPack:
2685 158142c2 bellard
    --zExp;
2686 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
2687 158142c2 bellard
2688 158142c2 bellard
}
2689 158142c2 bellard
2690 158142c2 bellard
/*----------------------------------------------------------------------------
2691 158142c2 bellard
| Returns the result of adding the double-precision floating-point values `a'
2692 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
2693 158142c2 bellard
| Binary Floating-Point Arithmetic.
2694 158142c2 bellard
*----------------------------------------------------------------------------*/
2695 158142c2 bellard
2696 158142c2 bellard
float64 float64_add( float64 a, float64 b STATUS_PARAM )
2697 158142c2 bellard
{
2698 158142c2 bellard
    flag aSign, bSign;
2699 158142c2 bellard
2700 158142c2 bellard
    aSign = extractFloat64Sign( a );
2701 158142c2 bellard
    bSign = extractFloat64Sign( b );
2702 158142c2 bellard
    if ( aSign == bSign ) {
2703 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
2704 158142c2 bellard
    }
2705 158142c2 bellard
    else {
2706 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
2707 158142c2 bellard
    }
2708 158142c2 bellard
2709 158142c2 bellard
}
2710 158142c2 bellard
2711 158142c2 bellard
/*----------------------------------------------------------------------------
2712 158142c2 bellard
| Returns the result of subtracting the double-precision floating-point values
2713 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2714 158142c2 bellard
| for Binary Floating-Point Arithmetic.
2715 158142c2 bellard
*----------------------------------------------------------------------------*/
2716 158142c2 bellard
2717 158142c2 bellard
float64 float64_sub( float64 a, float64 b STATUS_PARAM )
2718 158142c2 bellard
{
2719 158142c2 bellard
    flag aSign, bSign;
2720 158142c2 bellard
2721 158142c2 bellard
    aSign = extractFloat64Sign( a );
2722 158142c2 bellard
    bSign = extractFloat64Sign( b );
2723 158142c2 bellard
    if ( aSign == bSign ) {
2724 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
2725 158142c2 bellard
    }
2726 158142c2 bellard
    else {
2727 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
2728 158142c2 bellard
    }
2729 158142c2 bellard
2730 158142c2 bellard
}
2731 158142c2 bellard
2732 158142c2 bellard
/*----------------------------------------------------------------------------
2733 158142c2 bellard
| Returns the result of multiplying the double-precision floating-point values
2734 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2735 158142c2 bellard
| for Binary Floating-Point Arithmetic.
2736 158142c2 bellard
*----------------------------------------------------------------------------*/
2737 158142c2 bellard
2738 158142c2 bellard
float64 float64_mul( float64 a, float64 b STATUS_PARAM )
2739 158142c2 bellard
{
2740 158142c2 bellard
    flag aSign, bSign, zSign;
2741 158142c2 bellard
    int16 aExp, bExp, zExp;
2742 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
2743 158142c2 bellard
2744 158142c2 bellard
    aSig = extractFloat64Frac( a );
2745 158142c2 bellard
    aExp = extractFloat64Exp( a );
2746 158142c2 bellard
    aSign = extractFloat64Sign( a );
2747 158142c2 bellard
    bSig = extractFloat64Frac( b );
2748 158142c2 bellard
    bExp = extractFloat64Exp( b );
2749 158142c2 bellard
    bSign = extractFloat64Sign( b );
2750 158142c2 bellard
    zSign = aSign ^ bSign;
2751 158142c2 bellard
    if ( aExp == 0x7FF ) {
2752 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2753 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
2754 158142c2 bellard
        }
2755 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
2756 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2757 158142c2 bellard
            return float64_default_nan;
2758 158142c2 bellard
        }
2759 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
2760 158142c2 bellard
    }
2761 158142c2 bellard
    if ( bExp == 0x7FF ) {
2762 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2763 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
2764 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2765 158142c2 bellard
            return float64_default_nan;
2766 158142c2 bellard
        }
2767 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
2768 158142c2 bellard
    }
2769 158142c2 bellard
    if ( aExp == 0 ) {
2770 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2771 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2772 158142c2 bellard
    }
2773 158142c2 bellard
    if ( bExp == 0 ) {
2774 158142c2 bellard
        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2775 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2776 158142c2 bellard
    }
2777 158142c2 bellard
    zExp = aExp + bExp - 0x3FF;
2778 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2779 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2780 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
2781 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
2782 158142c2 bellard
    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2783 158142c2 bellard
        zSig0 <<= 1;
2784 158142c2 bellard
        --zExp;
2785 158142c2 bellard
    }
2786 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
2787 158142c2 bellard
2788 158142c2 bellard
}
2789 158142c2 bellard
2790 158142c2 bellard
/*----------------------------------------------------------------------------
2791 158142c2 bellard
| Returns the result of dividing the double-precision floating-point value `a'
2792 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to
2793 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2794 158142c2 bellard
*----------------------------------------------------------------------------*/
2795 158142c2 bellard
2796 158142c2 bellard
float64 float64_div( float64 a, float64 b STATUS_PARAM )
2797 158142c2 bellard
{
2798 158142c2 bellard
    flag aSign, bSign, zSign;
2799 158142c2 bellard
    int16 aExp, bExp, zExp;
2800 158142c2 bellard
    bits64 aSig, bSig, zSig;
2801 158142c2 bellard
    bits64 rem0, rem1;
2802 158142c2 bellard
    bits64 term0, term1;
2803 158142c2 bellard
2804 158142c2 bellard
    aSig = extractFloat64Frac( a );
2805 158142c2 bellard
    aExp = extractFloat64Exp( a );
2806 158142c2 bellard
    aSign = extractFloat64Sign( a );
2807 158142c2 bellard
    bSig = extractFloat64Frac( b );
2808 158142c2 bellard
    bExp = extractFloat64Exp( b );
2809 158142c2 bellard
    bSign = extractFloat64Sign( b );
2810 158142c2 bellard
    zSign = aSign ^ bSign;
2811 158142c2 bellard
    if ( aExp == 0x7FF ) {
2812 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2813 158142c2 bellard
        if ( bExp == 0x7FF ) {
2814 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2815 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2816 158142c2 bellard
            return float64_default_nan;
2817 158142c2 bellard
        }
2818 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
2819 158142c2 bellard
    }
2820 158142c2 bellard
    if ( bExp == 0x7FF ) {
2821 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2822 158142c2 bellard
        return packFloat64( zSign, 0, 0 );
2823 158142c2 bellard
    }
2824 158142c2 bellard
    if ( bExp == 0 ) {
2825 158142c2 bellard
        if ( bSig == 0 ) {
2826 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
2827 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
2828 158142c2 bellard
                return float64_default_nan;
2829 158142c2 bellard
            }
2830 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
2831 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
2832 158142c2 bellard
        }
2833 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2834 158142c2 bellard
    }
2835 158142c2 bellard
    if ( aExp == 0 ) {
2836 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2837 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2838 158142c2 bellard
    }
2839 158142c2 bellard
    zExp = aExp - bExp + 0x3FD;
2840 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2841 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2842 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
2843 158142c2 bellard
        aSig >>= 1;
2844 158142c2 bellard
        ++zExp;
2845 158142c2 bellard
    }
2846 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, bSig );
2847 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 2 ) {
2848 158142c2 bellard
        mul64To128( bSig, zSig, &term0, &term1 );
2849 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2850 158142c2 bellard
        while ( (sbits64) rem0 < 0 ) {
2851 158142c2 bellard
            --zSig;
2852 158142c2 bellard
            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2853 158142c2 bellard
        }
2854 158142c2 bellard
        zSig |= ( rem1 != 0 );
2855 158142c2 bellard
    }
2856 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
2857 158142c2 bellard
2858 158142c2 bellard
}
2859 158142c2 bellard
2860 158142c2 bellard
/*----------------------------------------------------------------------------
2861 158142c2 bellard
| Returns the remainder of the double-precision floating-point value `a'
2862 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
2863 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2864 158142c2 bellard
*----------------------------------------------------------------------------*/
2865 158142c2 bellard
2866 158142c2 bellard
float64 float64_rem( float64 a, float64 b STATUS_PARAM )
2867 158142c2 bellard
{
2868 158142c2 bellard
    flag aSign, bSign, zSign;
2869 158142c2 bellard
    int16 aExp, bExp, expDiff;
2870 158142c2 bellard
    bits64 aSig, bSig;
2871 158142c2 bellard
    bits64 q, alternateASig;
2872 158142c2 bellard
    sbits64 sigMean;
2873 158142c2 bellard
2874 158142c2 bellard
    aSig = extractFloat64Frac( a );
2875 158142c2 bellard
    aExp = extractFloat64Exp( a );
2876 158142c2 bellard
    aSign = extractFloat64Sign( a );
2877 158142c2 bellard
    bSig = extractFloat64Frac( b );
2878 158142c2 bellard
    bExp = extractFloat64Exp( b );
2879 158142c2 bellard
    bSign = extractFloat64Sign( b );
2880 158142c2 bellard
    if ( aExp == 0x7FF ) {
2881 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2882 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
2883 158142c2 bellard
        }
2884 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2885 158142c2 bellard
        return float64_default_nan;
2886 158142c2 bellard
    }
2887 158142c2 bellard
    if ( bExp == 0x7FF ) {
2888 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2889 158142c2 bellard
        return a;
2890 158142c2 bellard
    }
2891 158142c2 bellard
    if ( bExp == 0 ) {
2892 158142c2 bellard
        if ( bSig == 0 ) {
2893 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2894 158142c2 bellard
            return float64_default_nan;
2895 158142c2 bellard
        }
2896 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2897 158142c2 bellard
    }
2898 158142c2 bellard
    if ( aExp == 0 ) {
2899 158142c2 bellard
        if ( aSig == 0 ) return a;
2900 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2901 158142c2 bellard
    }
2902 158142c2 bellard
    expDiff = aExp - bExp;
2903 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
2904 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2905 158142c2 bellard
    if ( expDiff < 0 ) {
2906 158142c2 bellard
        if ( expDiff < -1 ) return a;
2907 158142c2 bellard
        aSig >>= 1;
2908 158142c2 bellard
    }
2909 158142c2 bellard
    q = ( bSig <= aSig );
2910 158142c2 bellard
    if ( q ) aSig -= bSig;
2911 158142c2 bellard
    expDiff -= 64;
2912 158142c2 bellard
    while ( 0 < expDiff ) {
2913 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
2914 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
2915 158142c2 bellard
        aSig = - ( ( bSig>>2 ) * q );
2916 158142c2 bellard
        expDiff -= 62;
2917 158142c2 bellard
    }
2918 158142c2 bellard
    expDiff += 64;
2919 158142c2 bellard
    if ( 0 < expDiff ) {
2920 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
2921 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
2922 158142c2 bellard
        q >>= 64 - expDiff;
2923 158142c2 bellard
        bSig >>= 2;
2924 158142c2 bellard
        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2925 158142c2 bellard
    }
2926 158142c2 bellard
    else {
2927 158142c2 bellard
        aSig >>= 2;
2928 158142c2 bellard
        bSig >>= 2;
2929 158142c2 bellard
    }
2930 158142c2 bellard
    do {
2931 158142c2 bellard
        alternateASig = aSig;
2932 158142c2 bellard
        ++q;
2933 158142c2 bellard
        aSig -= bSig;
2934 158142c2 bellard
    } while ( 0 <= (sbits64) aSig );
2935 158142c2 bellard
    sigMean = aSig + alternateASig;
2936 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2937 158142c2 bellard
        aSig = alternateASig;
2938 158142c2 bellard
    }
2939 158142c2 bellard
    zSign = ( (sbits64) aSig < 0 );
2940 158142c2 bellard
    if ( zSign ) aSig = - aSig;
2941 158142c2 bellard
    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
2942 158142c2 bellard
2943 158142c2 bellard
}
2944 158142c2 bellard
2945 158142c2 bellard
/*----------------------------------------------------------------------------
2946 158142c2 bellard
| Returns the square root of the double-precision floating-point value `a'.
2947 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
2948 158142c2 bellard
| Floating-Point Arithmetic.
2949 158142c2 bellard
*----------------------------------------------------------------------------*/
2950 158142c2 bellard
2951 158142c2 bellard
float64 float64_sqrt( float64 a STATUS_PARAM )
2952 158142c2 bellard
{
2953 158142c2 bellard
    flag aSign;
2954 158142c2 bellard
    int16 aExp, zExp;
2955 158142c2 bellard
    bits64 aSig, zSig, doubleZSig;
2956 158142c2 bellard
    bits64 rem0, rem1, term0, term1;
2957 158142c2 bellard
2958 158142c2 bellard
    aSig = extractFloat64Frac( a );
2959 158142c2 bellard
    aExp = extractFloat64Exp( a );
2960 158142c2 bellard
    aSign = extractFloat64Sign( a );
2961 158142c2 bellard
    if ( aExp == 0x7FF ) {
2962 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
2963 158142c2 bellard
        if ( ! aSign ) return a;
2964 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2965 158142c2 bellard
        return float64_default_nan;
2966 158142c2 bellard
    }
2967 158142c2 bellard
    if ( aSign ) {
2968 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
2969 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2970 158142c2 bellard
        return float64_default_nan;
2971 158142c2 bellard
    }
2972 158142c2 bellard
    if ( aExp == 0 ) {
2973 f090c9d4 pbrook
        if ( aSig == 0 ) return float64_zero;
2974 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2975 158142c2 bellard
    }
2976 158142c2 bellard
    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2977 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
2978 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig>>21 );
2979 158142c2 bellard
    aSig <<= 9 - ( aExp & 1 );
2980 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
2981 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 5 ) {
2982 158142c2 bellard
        doubleZSig = zSig<<1;
2983 158142c2 bellard
        mul64To128( zSig, zSig, &term0, &term1 );
2984 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2985 158142c2 bellard
        while ( (sbits64) rem0 < 0 ) {
2986 158142c2 bellard
            --zSig;
2987 158142c2 bellard
            doubleZSig -= 2;
2988 158142c2 bellard
            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
2989 158142c2 bellard
        }
2990 158142c2 bellard
        zSig |= ( ( rem0 | rem1 ) != 0 );
2991 158142c2 bellard
    }
2992 158142c2 bellard
    return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
2993 158142c2 bellard
2994 158142c2 bellard
}
2995 158142c2 bellard
2996 158142c2 bellard
/*----------------------------------------------------------------------------
2997 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
2998 158142c2 bellard
| corresponding value `b', and 0 otherwise.  The comparison is performed
2999 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3000 158142c2 bellard
*----------------------------------------------------------------------------*/
3001 158142c2 bellard
3002 750afe93 bellard
int float64_eq( float64 a, float64 b STATUS_PARAM )
3003 158142c2 bellard
{
3004 f090c9d4 pbrook
    bits64 av, bv;
3005 158142c2 bellard
3006 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3007 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3008 158142c2 bellard
       ) {
3009 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3010 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3011 158142c2 bellard
        }
3012 158142c2 bellard
        return 0;
3013 158142c2 bellard
    }
3014 f090c9d4 pbrook
    av = float64_val(a);
3015 a1b91bb4 pbrook
    bv = float64_val(b);
3016 f090c9d4 pbrook
    return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3017 158142c2 bellard
3018 158142c2 bellard
}
3019 158142c2 bellard
3020 158142c2 bellard
/*----------------------------------------------------------------------------
3021 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
3022 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  The comparison is
3023 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
3024 158142c2 bellard
| Arithmetic.
3025 158142c2 bellard
*----------------------------------------------------------------------------*/
3026 158142c2 bellard
3027 750afe93 bellard
int float64_le( float64 a, float64 b STATUS_PARAM )
3028 158142c2 bellard
{
3029 158142c2 bellard
    flag aSign, bSign;
3030 f090c9d4 pbrook
    bits64 av, bv;
3031 158142c2 bellard
3032 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3033 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3034 158142c2 bellard
       ) {
3035 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3036 158142c2 bellard
        return 0;
3037 158142c2 bellard
    }
3038 158142c2 bellard
    aSign = extractFloat64Sign( a );
3039 158142c2 bellard
    bSign = extractFloat64Sign( b );
3040 f090c9d4 pbrook
    av = float64_val(a);
3041 a1b91bb4 pbrook
    bv = float64_val(b);
3042 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3043 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
3044 158142c2 bellard
3045 158142c2 bellard
}
3046 158142c2 bellard
3047 158142c2 bellard
/*----------------------------------------------------------------------------
3048 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
3049 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
3050 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3051 158142c2 bellard
*----------------------------------------------------------------------------*/
3052 158142c2 bellard
3053 750afe93 bellard
int float64_lt( float64 a, float64 b STATUS_PARAM )
3054 158142c2 bellard
{
3055 158142c2 bellard
    flag aSign, bSign;
3056 f090c9d4 pbrook
    bits64 av, bv;
3057 158142c2 bellard
3058 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3059 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3060 158142c2 bellard
       ) {
3061 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3062 158142c2 bellard
        return 0;
3063 158142c2 bellard
    }
3064 158142c2 bellard
    aSign = extractFloat64Sign( a );
3065 158142c2 bellard
    bSign = extractFloat64Sign( b );
3066 f090c9d4 pbrook
    av = float64_val(a);
3067 a1b91bb4 pbrook
    bv = float64_val(b);
3068 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
3069 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
3070 158142c2 bellard
3071 158142c2 bellard
}
3072 158142c2 bellard
3073 158142c2 bellard
/*----------------------------------------------------------------------------
3074 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
3075 158142c2 bellard
| corresponding value `b', and 0 otherwise.  The invalid exception is raised
3076 158142c2 bellard
| if either operand is a NaN.  Otherwise, the comparison is performed
3077 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3078 158142c2 bellard
*----------------------------------------------------------------------------*/
3079 158142c2 bellard
3080 750afe93 bellard
int float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
3081 158142c2 bellard
{
3082 f090c9d4 pbrook
    bits64 av, bv;
3083 158142c2 bellard
3084 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3085 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3086 158142c2 bellard
       ) {
3087 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3088 158142c2 bellard
        return 0;
3089 158142c2 bellard
    }
3090 f090c9d4 pbrook
    av = float64_val(a);
3091 a1b91bb4 pbrook
    bv = float64_val(b);
3092 f090c9d4 pbrook
    return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3093 158142c2 bellard
3094 158142c2 bellard
}
3095 158142c2 bellard
3096 158142c2 bellard
/*----------------------------------------------------------------------------
3097 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
3098 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
3099 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
3100 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3101 158142c2 bellard
*----------------------------------------------------------------------------*/
3102 158142c2 bellard
3103 750afe93 bellard
int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
3104 158142c2 bellard
{
3105 158142c2 bellard
    flag aSign, bSign;
3106 f090c9d4 pbrook
    bits64 av, bv;
3107 158142c2 bellard
3108 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3109 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3110 158142c2 bellard
       ) {
3111 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3112 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3113 158142c2 bellard
        }
3114 158142c2 bellard
        return 0;
3115 158142c2 bellard
    }
3116 158142c2 bellard
    aSign = extractFloat64Sign( a );
3117 158142c2 bellard
    bSign = extractFloat64Sign( b );
3118 f090c9d4 pbrook
    av = float64_val(a);
3119 a1b91bb4 pbrook
    bv = float64_val(b);
3120 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3121 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
3122 158142c2 bellard
3123 158142c2 bellard
}
3124 158142c2 bellard
3125 158142c2 bellard
/*----------------------------------------------------------------------------
3126 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
3127 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3128 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
3129 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3130 158142c2 bellard
*----------------------------------------------------------------------------*/
3131 158142c2 bellard
3132 750afe93 bellard
int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
3133 158142c2 bellard
{
3134 158142c2 bellard
    flag aSign, bSign;
3135 f090c9d4 pbrook
    bits64 av, bv;
3136 158142c2 bellard
3137 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3138 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3139 158142c2 bellard
       ) {
3140 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3141 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3142 158142c2 bellard
        }
3143 158142c2 bellard
        return 0;
3144 158142c2 bellard
    }
3145 158142c2 bellard
    aSign = extractFloat64Sign( a );
3146 158142c2 bellard
    bSign = extractFloat64Sign( b );
3147 f090c9d4 pbrook
    av = float64_val(a);
3148 a1b91bb4 pbrook
    bv = float64_val(b);
3149 f090c9d4 pbrook
    if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
3150 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
3151 158142c2 bellard
3152 158142c2 bellard
}
3153 158142c2 bellard
3154 158142c2 bellard
#ifdef FLOATX80
3155 158142c2 bellard
3156 158142c2 bellard
/*----------------------------------------------------------------------------
3157 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3158 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3159 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3160 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3161 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN, the
3162 158142c2 bellard
| largest positive integer is returned.  Otherwise, if the conversion
3163 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3164 158142c2 bellard
*----------------------------------------------------------------------------*/
3165 158142c2 bellard
3166 158142c2 bellard
int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
3167 158142c2 bellard
{
3168 158142c2 bellard
    flag aSign;
3169 158142c2 bellard
    int32 aExp, shiftCount;
3170 158142c2 bellard
    bits64 aSig;
3171 158142c2 bellard
3172 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3173 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3174 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3175 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3176 158142c2 bellard
    shiftCount = 0x4037 - aExp;
3177 158142c2 bellard
    if ( shiftCount <= 0 ) shiftCount = 1;
3178 158142c2 bellard
    shift64RightJamming( aSig, shiftCount, &aSig );
3179 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
3180 158142c2 bellard
3181 158142c2 bellard
}
3182 158142c2 bellard
3183 158142c2 bellard
/*----------------------------------------------------------------------------
3184 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3185 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3186 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3187 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3188 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3189 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3190 158142c2 bellard
| sign as `a' is returned.
3191 158142c2 bellard
*----------------------------------------------------------------------------*/
3192 158142c2 bellard
3193 158142c2 bellard
int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
3194 158142c2 bellard
{
3195 158142c2 bellard
    flag aSign;
3196 158142c2 bellard
    int32 aExp, shiftCount;
3197 158142c2 bellard
    bits64 aSig, savedASig;
3198 158142c2 bellard
    int32 z;
3199 158142c2 bellard
3200 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3201 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3202 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3203 158142c2 bellard
    if ( 0x401E < aExp ) {
3204 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3205 158142c2 bellard
        goto invalid;
3206 158142c2 bellard
    }
3207 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3208 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3209 158142c2 bellard
        return 0;
3210 158142c2 bellard
    }
3211 158142c2 bellard
    shiftCount = 0x403E - aExp;
3212 158142c2 bellard
    savedASig = aSig;
3213 158142c2 bellard
    aSig >>= shiftCount;
3214 158142c2 bellard
    z = aSig;
3215 158142c2 bellard
    if ( aSign ) z = - z;
3216 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
3217 158142c2 bellard
 invalid:
3218 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3219 158142c2 bellard
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
3220 158142c2 bellard
    }
3221 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
3222 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3223 158142c2 bellard
    }
3224 158142c2 bellard
    return z;
3225 158142c2 bellard
3226 158142c2 bellard
}
3227 158142c2 bellard
3228 158142c2 bellard
/*----------------------------------------------------------------------------
3229 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3230 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3231 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3232 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3233 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN,
3234 158142c2 bellard
| the largest positive integer is returned.  Otherwise, if the conversion
3235 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3236 158142c2 bellard
*----------------------------------------------------------------------------*/
3237 158142c2 bellard
3238 158142c2 bellard
int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
3239 158142c2 bellard
{
3240 158142c2 bellard
    flag aSign;
3241 158142c2 bellard
    int32 aExp, shiftCount;
3242 158142c2 bellard
    bits64 aSig, aSigExtra;
3243 158142c2 bellard
3244 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3245 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3246 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3247 158142c2 bellard
    shiftCount = 0x403E - aExp;
3248 158142c2 bellard
    if ( shiftCount <= 0 ) {
3249 158142c2 bellard
        if ( shiftCount ) {
3250 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3251 158142c2 bellard
            if (    ! aSign
3252 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
3253 158142c2 bellard
                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
3254 158142c2 bellard
               ) {
3255 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3256 158142c2 bellard
            }
3257 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
3258 158142c2 bellard
        }
3259 158142c2 bellard
        aSigExtra = 0;
3260 158142c2 bellard
    }
3261 158142c2 bellard
    else {
3262 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3263 158142c2 bellard
    }
3264 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
3265 158142c2 bellard
3266 158142c2 bellard
}
3267 158142c2 bellard
3268 158142c2 bellard
/*----------------------------------------------------------------------------
3269 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3270 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3271 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3272 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3273 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3274 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3275 158142c2 bellard
| sign as `a' is returned.
3276 158142c2 bellard
*----------------------------------------------------------------------------*/
3277 158142c2 bellard
3278 158142c2 bellard
int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
3279 158142c2 bellard
{
3280 158142c2 bellard
    flag aSign;
3281 158142c2 bellard
    int32 aExp, shiftCount;
3282 158142c2 bellard
    bits64 aSig;
3283 158142c2 bellard
    int64 z;
3284 158142c2 bellard
3285 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3286 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3287 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3288 158142c2 bellard
    shiftCount = aExp - 0x403E;
3289 158142c2 bellard
    if ( 0 <= shiftCount ) {
3290 158142c2 bellard
        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3291 158142c2 bellard
        if ( ( a.high != 0xC03E ) || aSig ) {
3292 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3293 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3294 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3295 158142c2 bellard
            }
3296 158142c2 bellard
        }
3297 158142c2 bellard
        return (sbits64) LIT64( 0x8000000000000000 );
3298 158142c2 bellard
    }
3299 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3300 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3301 158142c2 bellard
        return 0;
3302 158142c2 bellard
    }
3303 158142c2 bellard
    z = aSig>>( - shiftCount );
3304 158142c2 bellard
    if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3305 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3306 158142c2 bellard
    }
3307 158142c2 bellard
    if ( aSign ) z = - z;
3308 158142c2 bellard
    return z;
3309 158142c2 bellard
3310 158142c2 bellard
}
3311 158142c2 bellard
3312 158142c2 bellard
/*----------------------------------------------------------------------------
3313 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3314 158142c2 bellard
| point value `a' to the single-precision floating-point format.  The
3315 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3316 158142c2 bellard
| Floating-Point Arithmetic.
3317 158142c2 bellard
*----------------------------------------------------------------------------*/
3318 158142c2 bellard
3319 158142c2 bellard
float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
3320 158142c2 bellard
{
3321 158142c2 bellard
    flag aSign;
3322 158142c2 bellard
    int32 aExp;
3323 158142c2 bellard
    bits64 aSig;
3324 158142c2 bellard
3325 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3326 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3327 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3328 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3329 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) {
3330 158142c2 bellard
            return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) );
3331 158142c2 bellard
        }
3332 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
3333 158142c2 bellard
    }
3334 158142c2 bellard
    shift64RightJamming( aSig, 33, &aSig );
3335 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3F81;
3336 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
3337 158142c2 bellard
3338 158142c2 bellard
}
3339 158142c2 bellard
3340 158142c2 bellard
/*----------------------------------------------------------------------------
3341 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3342 158142c2 bellard
| point value `a' to the double-precision floating-point format.  The
3343 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3344 158142c2 bellard
| Floating-Point Arithmetic.
3345 158142c2 bellard
*----------------------------------------------------------------------------*/
3346 158142c2 bellard
3347 158142c2 bellard
float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
3348 158142c2 bellard
{
3349 158142c2 bellard
    flag aSign;
3350 158142c2 bellard
    int32 aExp;
3351 158142c2 bellard
    bits64 aSig, zSig;
3352 158142c2 bellard
3353 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3354 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3355 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3356 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3357 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) {
3358 158142c2 bellard
            return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) );
3359 158142c2 bellard
        }
3360 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
3361 158142c2 bellard
    }
3362 158142c2 bellard
    shift64RightJamming( aSig, 1, &zSig );
3363 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3C01;
3364 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
3365 158142c2 bellard
3366 158142c2 bellard
}
3367 158142c2 bellard
3368 158142c2 bellard
#ifdef FLOAT128
3369 158142c2 bellard
3370 158142c2 bellard
/*----------------------------------------------------------------------------
3371 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3372 158142c2 bellard
| point value `a' to the quadruple-precision floating-point format.  The
3373 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3374 158142c2 bellard
| Floating-Point Arithmetic.
3375 158142c2 bellard
*----------------------------------------------------------------------------*/
3376 158142c2 bellard
3377 158142c2 bellard
float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
3378 158142c2 bellard
{
3379 158142c2 bellard
    flag aSign;
3380 158142c2 bellard
    int16 aExp;
3381 158142c2 bellard
    bits64 aSig, zSig0, zSig1;
3382 158142c2 bellard
3383 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3384 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3385 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3386 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
3387 158142c2 bellard
        return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) );
3388 158142c2 bellard
    }
3389 158142c2 bellard
    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
3390 158142c2 bellard
    return packFloat128( aSign, aExp, zSig0, zSig1 );
3391 158142c2 bellard
3392 158142c2 bellard
}
3393 158142c2 bellard
3394 158142c2 bellard
#endif
3395 158142c2 bellard
3396 158142c2 bellard
/*----------------------------------------------------------------------------
3397 158142c2 bellard
| Rounds the extended double-precision floating-point value `a' to an integer,
3398 158142c2 bellard
| and returns the result as an extended quadruple-precision floating-point
3399 158142c2 bellard
| value.  The operation is performed according to the IEC/IEEE Standard for
3400 158142c2 bellard
| Binary Floating-Point Arithmetic.
3401 158142c2 bellard
*----------------------------------------------------------------------------*/
3402 158142c2 bellard
3403 158142c2 bellard
floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
3404 158142c2 bellard
{
3405 158142c2 bellard
    flag aSign;
3406 158142c2 bellard
    int32 aExp;
3407 158142c2 bellard
    bits64 lastBitMask, roundBitsMask;
3408 158142c2 bellard
    int8 roundingMode;
3409 158142c2 bellard
    floatx80 z;
3410 158142c2 bellard
3411 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3412 158142c2 bellard
    if ( 0x403E <= aExp ) {
3413 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
3414 158142c2 bellard
            return propagateFloatx80NaN( a, a STATUS_VAR );
3415 158142c2 bellard
        }
3416 158142c2 bellard
        return a;
3417 158142c2 bellard
    }
3418 158142c2 bellard
    if ( aExp < 0x3FFF ) {
3419 158142c2 bellard
        if (    ( aExp == 0 )
3420 158142c2 bellard
             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
3421 158142c2 bellard
            return a;
3422 158142c2 bellard
        }
3423 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3424 158142c2 bellard
        aSign = extractFloatx80Sign( a );
3425 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
3426 158142c2 bellard
         case float_round_nearest_even:
3427 158142c2 bellard
            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
3428 158142c2 bellard
               ) {
3429 158142c2 bellard
                return
3430 158142c2 bellard
                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3431 158142c2 bellard
            }
3432 158142c2 bellard
            break;
3433 158142c2 bellard
         case float_round_down:
3434 158142c2 bellard
            return
3435 158142c2 bellard
                  aSign ?
3436 158142c2 bellard
                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
3437 158142c2 bellard
                : packFloatx80( 0, 0, 0 );
3438 158142c2 bellard
         case float_round_up:
3439 158142c2 bellard
            return
3440 158142c2 bellard
                  aSign ? packFloatx80( 1, 0, 0 )
3441 158142c2 bellard
                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
3442 158142c2 bellard
        }
3443 158142c2 bellard
        return packFloatx80( aSign, 0, 0 );
3444 158142c2 bellard
    }
3445 158142c2 bellard
    lastBitMask = 1;
3446 158142c2 bellard
    lastBitMask <<= 0x403E - aExp;
3447 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
3448 158142c2 bellard
    z = a;
3449 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
3450 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
3451 158142c2 bellard
        z.low += lastBitMask>>1;
3452 158142c2 bellard
        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
3453 158142c2 bellard
    }
3454 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
3455 158142c2 bellard
        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
3456 158142c2 bellard
            z.low += roundBitsMask;
3457 158142c2 bellard
        }
3458 158142c2 bellard
    }
3459 158142c2 bellard
    z.low &= ~ roundBitsMask;
3460 158142c2 bellard
    if ( z.low == 0 ) {
3461 158142c2 bellard
        ++z.high;
3462 158142c2 bellard
        z.low = LIT64( 0x8000000000000000 );
3463 158142c2 bellard
    }
3464 158142c2 bellard
    if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
3465 158142c2 bellard
    return z;
3466 158142c2 bellard
3467 158142c2 bellard
}
3468 158142c2 bellard
3469 158142c2 bellard
/*----------------------------------------------------------------------------
3470 158142c2 bellard
| Returns the result of adding the absolute values of the extended double-
3471 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
3472 158142c2 bellard
| negated before being returned.  `zSign' is ignored if the result is a NaN.
3473 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
3474 158142c2 bellard
| Floating-Point Arithmetic.
3475 158142c2 bellard
*----------------------------------------------------------------------------*/
3476 158142c2 bellard
3477 158142c2 bellard
static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
3478 158142c2 bellard
{
3479 158142c2 bellard
    int32 aExp, bExp, zExp;
3480 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3481 158142c2 bellard
    int32 expDiff;
3482 158142c2 bellard
3483 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3484 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3485 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3486 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3487 158142c2 bellard
    expDiff = aExp - bExp;
3488 158142c2 bellard
    if ( 0 < expDiff ) {
3489 158142c2 bellard
        if ( aExp == 0x7FFF ) {
3490 158142c2 bellard
            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3491 158142c2 bellard
            return a;
3492 158142c2 bellard
        }
3493 158142c2 bellard
        if ( bExp == 0 ) --expDiff;
3494 158142c2 bellard
        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3495 158142c2 bellard
        zExp = aExp;
3496 158142c2 bellard
    }
3497 158142c2 bellard
    else if ( expDiff < 0 ) {
3498 158142c2 bellard
        if ( bExp == 0x7FFF ) {
3499 158142c2 bellard
            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3500 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3501 158142c2 bellard
        }
3502 158142c2 bellard
        if ( aExp == 0 ) ++expDiff;
3503 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3504 158142c2 bellard
        zExp = bExp;
3505 158142c2 bellard
    }
3506 158142c2 bellard
    else {
3507 158142c2 bellard
        if ( aExp == 0x7FFF ) {
3508 158142c2 bellard
            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3509 158142c2 bellard
                return propagateFloatx80NaN( a, b STATUS_VAR );
3510 158142c2 bellard
            }
3511 158142c2 bellard
            return a;
3512 158142c2 bellard
        }
3513 158142c2 bellard
        zSig1 = 0;
3514 158142c2 bellard
        zSig0 = aSig + bSig;
3515 158142c2 bellard
        if ( aExp == 0 ) {
3516 158142c2 bellard
            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
3517 158142c2 bellard
            goto roundAndPack;
3518 158142c2 bellard
        }
3519 158142c2 bellard
        zExp = aExp;
3520 158142c2 bellard
        goto shiftRight1;
3521 158142c2 bellard
    }
3522 158142c2 bellard
    zSig0 = aSig + bSig;
3523 158142c2 bellard
    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
3524 158142c2 bellard
 shiftRight1:
3525 158142c2 bellard
    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
3526 158142c2 bellard
    zSig0 |= LIT64( 0x8000000000000000 );
3527 158142c2 bellard
    ++zExp;
3528 158142c2 bellard
 roundAndPack:
3529 158142c2 bellard
    return
3530 158142c2 bellard
        roundAndPackFloatx80(
3531 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3532 158142c2 bellard
3533 158142c2 bellard
}
3534 158142c2 bellard
3535 158142c2 bellard
/*----------------------------------------------------------------------------
3536 158142c2 bellard
| Returns the result of subtracting the absolute values of the extended
3537 158142c2 bellard
| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
3538 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
3539 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
3540 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3541 158142c2 bellard
*----------------------------------------------------------------------------*/
3542 158142c2 bellard
3543 158142c2 bellard
static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
3544 158142c2 bellard
{
3545 158142c2 bellard
    int32 aExp, bExp, zExp;
3546 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3547 158142c2 bellard
    int32 expDiff;
3548 158142c2 bellard
    floatx80 z;
3549 158142c2 bellard
3550 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3551 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3552 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3553 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3554 158142c2 bellard
    expDiff = aExp - bExp;
3555 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
3556 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
3557 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3558 158142c2 bellard
        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3559 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
3560 158142c2 bellard
        }
3561 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3562 158142c2 bellard
        z.low = floatx80_default_nan_low;
3563 158142c2 bellard
        z.high = floatx80_default_nan_high;
3564 158142c2 bellard
        return z;
3565 158142c2 bellard
    }
3566 158142c2 bellard
    if ( aExp == 0 ) {
3567 158142c2 bellard
        aExp = 1;
3568 158142c2 bellard
        bExp = 1;
3569 158142c2 bellard
    }
3570 158142c2 bellard
    zSig1 = 0;
3571 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
3572 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
3573 158142c2 bellard
    return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
3574 158142c2 bellard
 bExpBigger:
3575 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3576 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3577 158142c2 bellard
        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
3578 158142c2 bellard
    }
3579 158142c2 bellard
    if ( aExp == 0 ) ++expDiff;
3580 158142c2 bellard
    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3581 158142c2 bellard
 bBigger:
3582 158142c2 bellard
    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
3583 158142c2 bellard
    zExp = bExp;
3584 158142c2 bellard
    zSign ^= 1;
3585 158142c2 bellard
    goto normalizeRoundAndPack;
3586 158142c2 bellard
 aExpBigger:
3587 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3588 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3589 158142c2 bellard
        return a;
3590 158142c2 bellard
    }
3591 158142c2 bellard
    if ( bExp == 0 ) --expDiff;
3592 158142c2 bellard
    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3593 158142c2 bellard
 aBigger:
3594 158142c2 bellard
    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
3595 158142c2 bellard
    zExp = aExp;
3596 158142c2 bellard
 normalizeRoundAndPack:
3597 158142c2 bellard
    return
3598 158142c2 bellard
        normalizeRoundAndPackFloatx80(
3599 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3600 158142c2 bellard
3601 158142c2 bellard
}
3602 158142c2 bellard
3603 158142c2 bellard
/*----------------------------------------------------------------------------
3604 158142c2 bellard
| Returns the result of adding the extended double-precision floating-point
3605 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
3606 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3607 158142c2 bellard
*----------------------------------------------------------------------------*/
3608 158142c2 bellard
3609 158142c2 bellard
floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
3610 158142c2 bellard
{
3611 158142c2 bellard
    flag aSign, bSign;
3612 158142c2 bellard
3613 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3614 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3615 158142c2 bellard
    if ( aSign == bSign ) {
3616 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
3617 158142c2 bellard
    }
3618 158142c2 bellard
    else {
3619 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
3620 158142c2 bellard
    }
3621 158142c2 bellard
3622 158142c2 bellard
}
3623 158142c2 bellard
3624 158142c2 bellard
/*----------------------------------------------------------------------------
3625 158142c2 bellard
| Returns the result of subtracting the extended double-precision floating-
3626 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
3627 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3628 158142c2 bellard
*----------------------------------------------------------------------------*/
3629 158142c2 bellard
3630 158142c2 bellard
floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
3631 158142c2 bellard
{
3632 158142c2 bellard
    flag aSign, bSign;
3633 158142c2 bellard
3634 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3635 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3636 158142c2 bellard
    if ( aSign == bSign ) {
3637 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
3638 158142c2 bellard
    }
3639 158142c2 bellard
    else {
3640 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
3641 158142c2 bellard
    }
3642 158142c2 bellard
3643 158142c2 bellard
}
3644 158142c2 bellard
3645 158142c2 bellard
/*----------------------------------------------------------------------------
3646 158142c2 bellard
| Returns the result of multiplying the extended double-precision floating-
3647 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
3648 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3649 158142c2 bellard
*----------------------------------------------------------------------------*/
3650 158142c2 bellard
3651 158142c2 bellard
floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
3652 158142c2 bellard
{
3653 158142c2 bellard
    flag aSign, bSign, zSign;
3654 158142c2 bellard
    int32 aExp, bExp, zExp;
3655 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3656 158142c2 bellard
    floatx80 z;
3657 158142c2 bellard
3658 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3659 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3660 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3661 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3662 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3663 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3664 158142c2 bellard
    zSign = aSign ^ bSign;
3665 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3666 158142c2 bellard
        if (    (bits64) ( aSig<<1 )
3667 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3668 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
3669 158142c2 bellard
        }
3670 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) goto invalid;
3671 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3672 158142c2 bellard
    }
3673 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3674 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3675 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
3676 158142c2 bellard
 invalid:
3677 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3678 158142c2 bellard
            z.low = floatx80_default_nan_low;
3679 158142c2 bellard
            z.high = floatx80_default_nan_high;
3680 158142c2 bellard
            return z;
3681 158142c2 bellard
        }
3682 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3683 158142c2 bellard
    }
3684 158142c2 bellard
    if ( aExp == 0 ) {
3685 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3686 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3687 158142c2 bellard
    }
3688 158142c2 bellard
    if ( bExp == 0 ) {
3689 158142c2 bellard
        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
3690 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3691 158142c2 bellard
    }
3692 158142c2 bellard
    zExp = aExp + bExp - 0x3FFE;
3693 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
3694 158142c2 bellard
    if ( 0 < (sbits64) zSig0 ) {
3695 158142c2 bellard
        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
3696 158142c2 bellard
        --zExp;
3697 158142c2 bellard
    }
3698 158142c2 bellard
    return
3699 158142c2 bellard
        roundAndPackFloatx80(
3700 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3701 158142c2 bellard
3702 158142c2 bellard
}
3703 158142c2 bellard
3704 158142c2 bellard
/*----------------------------------------------------------------------------
3705 158142c2 bellard
| Returns the result of dividing the extended double-precision floating-point
3706 158142c2 bellard
| value `a' by the corresponding value `b'.  The operation is performed
3707 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3708 158142c2 bellard
*----------------------------------------------------------------------------*/
3709 158142c2 bellard
3710 158142c2 bellard
floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
3711 158142c2 bellard
{
3712 158142c2 bellard
    flag aSign, bSign, zSign;
3713 158142c2 bellard
    int32 aExp, bExp, zExp;
3714 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3715 158142c2 bellard
    bits64 rem0, rem1, rem2, term0, term1, term2;
3716 158142c2 bellard
    floatx80 z;
3717 158142c2 bellard
3718 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3719 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3720 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3721 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3722 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3723 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3724 158142c2 bellard
    zSign = aSign ^ bSign;
3725 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3726 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3727 158142c2 bellard
        if ( bExp == 0x7FFF ) {
3728 158142c2 bellard
            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3729 158142c2 bellard
            goto invalid;
3730 158142c2 bellard
        }
3731 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3732 158142c2 bellard
    }
3733 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3734 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3735 158142c2 bellard
        return packFloatx80( zSign, 0, 0 );
3736 158142c2 bellard
    }
3737 158142c2 bellard
    if ( bExp == 0 ) {
3738 158142c2 bellard
        if ( bSig == 0 ) {
3739 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
3740 158142c2 bellard
 invalid:
3741 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
3742 158142c2 bellard
                z.low = floatx80_default_nan_low;
3743 158142c2 bellard
                z.high = floatx80_default_nan_high;
3744 158142c2 bellard
                return z;
3745 158142c2 bellard
            }
3746 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
3747 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3748 158142c2 bellard
        }
3749 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3750 158142c2 bellard
    }
3751 158142c2 bellard
    if ( aExp == 0 ) {
3752 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3753 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3754 158142c2 bellard
    }
3755 158142c2 bellard
    zExp = aExp - bExp + 0x3FFE;
3756 158142c2 bellard
    rem1 = 0;
3757 158142c2 bellard
    if ( bSig <= aSig ) {
3758 158142c2 bellard
        shift128Right( aSig, 0, 1, &aSig, &rem1 );
3759 158142c2 bellard
        ++zExp;
3760 158142c2 bellard
    }
3761 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3762 158142c2 bellard
    mul64To128( bSig, zSig0, &term0, &term1 );
3763 158142c2 bellard
    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3764 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
3765 158142c2 bellard
        --zSig0;
3766 158142c2 bellard
        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3767 158142c2 bellard
    }
3768 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, bSig );
3769 158142c2 bellard
    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3770 158142c2 bellard
        mul64To128( bSig, zSig1, &term1, &term2 );
3771 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3772 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
3773 158142c2 bellard
            --zSig1;
3774 158142c2 bellard
            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3775 158142c2 bellard
        }
3776 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 ) != 0 );
3777 158142c2 bellard
    }
3778 158142c2 bellard
    return
3779 158142c2 bellard
        roundAndPackFloatx80(
3780 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3781 158142c2 bellard
3782 158142c2 bellard
}
3783 158142c2 bellard
3784 158142c2 bellard
/*----------------------------------------------------------------------------
3785 158142c2 bellard
| Returns the remainder of the extended double-precision floating-point value
3786 158142c2 bellard
| `a' with respect to the corresponding value `b'.  The operation is performed
3787 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3788 158142c2 bellard
*----------------------------------------------------------------------------*/
3789 158142c2 bellard
3790 158142c2 bellard
floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
3791 158142c2 bellard
{
3792 158142c2 bellard
    flag aSign, bSign, zSign;
3793 158142c2 bellard
    int32 aExp, bExp, expDiff;
3794 158142c2 bellard
    bits64 aSig0, aSig1, bSig;
3795 158142c2 bellard
    bits64 q, term0, term1, alternateASig0, alternateASig1;
3796 158142c2 bellard
    floatx80 z;
3797 158142c2 bellard
3798 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
3799 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3800 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3801 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3802 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3803 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3804 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3805 158142c2 bellard
        if (    (bits64) ( aSig0<<1 )
3806 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3807 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
3808 158142c2 bellard
        }
3809 158142c2 bellard
        goto invalid;
3810 158142c2 bellard
    }
3811 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3812 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3813 158142c2 bellard
        return a;
3814 158142c2 bellard
    }
3815 158142c2 bellard
    if ( bExp == 0 ) {
3816 158142c2 bellard
        if ( bSig == 0 ) {
3817 158142c2 bellard
 invalid:
3818 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3819 158142c2 bellard
            z.low = floatx80_default_nan_low;
3820 158142c2 bellard
            z.high = floatx80_default_nan_high;
3821 158142c2 bellard
            return z;
3822 158142c2 bellard
        }
3823 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3824 158142c2 bellard
    }
3825 158142c2 bellard
    if ( aExp == 0 ) {
3826 158142c2 bellard
        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
3827 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3828 158142c2 bellard
    }
3829 158142c2 bellard
    bSig |= LIT64( 0x8000000000000000 );
3830 158142c2 bellard
    zSign = aSign;
3831 158142c2 bellard
    expDiff = aExp - bExp;
3832 158142c2 bellard
    aSig1 = 0;
3833 158142c2 bellard
    if ( expDiff < 0 ) {
3834 158142c2 bellard
        if ( expDiff < -1 ) return a;
3835 158142c2 bellard
        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3836 158142c2 bellard
        expDiff = 0;
3837 158142c2 bellard
    }
3838 158142c2 bellard
    q = ( bSig <= aSig0 );
3839 158142c2 bellard
    if ( q ) aSig0 -= bSig;
3840 158142c2 bellard
    expDiff -= 64;
3841 158142c2 bellard
    while ( 0 < expDiff ) {
3842 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
3843 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3844 158142c2 bellard
        mul64To128( bSig, q, &term0, &term1 );
3845 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3846 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3847 158142c2 bellard
        expDiff -= 62;
3848 158142c2 bellard
    }
3849 158142c2 bellard
    expDiff += 64;
3850 158142c2 bellard
    if ( 0 < expDiff ) {
3851 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
3852 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3853 158142c2 bellard
        q >>= 64 - expDiff;
3854 158142c2 bellard
        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
3855 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3856 158142c2 bellard
        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
3857 158142c2 bellard
        while ( le128( term0, term1, aSig0, aSig1 ) ) {
3858 158142c2 bellard
            ++q;
3859 158142c2 bellard
            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3860 158142c2 bellard
        }
3861 158142c2 bellard
    }
3862 158142c2 bellard
    else {
3863 158142c2 bellard
        term1 = 0;
3864 158142c2 bellard
        term0 = bSig;
3865 158142c2 bellard
    }
3866 158142c2 bellard
    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
3867 158142c2 bellard
    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
3868 158142c2 bellard
         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
3869 158142c2 bellard
              && ( q & 1 ) )
3870 158142c2 bellard
       ) {
3871 158142c2 bellard
        aSig0 = alternateASig0;
3872 158142c2 bellard
        aSig1 = alternateASig1;
3873 158142c2 bellard
        zSign = ! zSign;
3874 158142c2 bellard
    }
3875 158142c2 bellard
    return
3876 158142c2 bellard
        normalizeRoundAndPackFloatx80(
3877 158142c2 bellard
            80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
3878 158142c2 bellard
3879 158142c2 bellard
}
3880 158142c2 bellard
3881 158142c2 bellard
/*----------------------------------------------------------------------------
3882 158142c2 bellard
| Returns the square root of the extended double-precision floating-point
3883 158142c2 bellard
| value `a'.  The operation is performed according to the IEC/IEEE Standard
3884 158142c2 bellard
| for Binary Floating-Point Arithmetic.
3885 158142c2 bellard
*----------------------------------------------------------------------------*/
3886 158142c2 bellard
3887 158142c2 bellard
floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
3888 158142c2 bellard
{
3889 158142c2 bellard
    flag aSign;
3890 158142c2 bellard
    int32 aExp, zExp;
3891 158142c2 bellard
    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
3892 158142c2 bellard
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3893 158142c2 bellard
    floatx80 z;
3894 158142c2 bellard
3895 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
3896 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3897 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3898 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3899 158142c2 bellard
        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
3900 158142c2 bellard
        if ( ! aSign ) return a;
3901 158142c2 bellard
        goto invalid;
3902 158142c2 bellard
    }
3903 158142c2 bellard
    if ( aSign ) {
3904 158142c2 bellard
        if ( ( aExp | aSig0 ) == 0 ) return a;
3905 158142c2 bellard
 invalid:
3906 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3907 158142c2 bellard
        z.low = floatx80_default_nan_low;
3908 158142c2 bellard
        z.high = floatx80_default_nan_high;
3909 158142c2 bellard
        return z;
3910 158142c2 bellard
    }
3911 158142c2 bellard
    if ( aExp == 0 ) {
3912 158142c2 bellard
        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
3913 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3914 158142c2 bellard
    }
3915 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
3916 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
3917 158142c2 bellard
    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
3918 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
3919 158142c2 bellard
    doubleZSig0 = zSig0<<1;
3920 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
3921 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
3922 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
3923 158142c2 bellard
        --zSig0;
3924 158142c2 bellard
        doubleZSig0 -= 2;
3925 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
3926 158142c2 bellard
    }
3927 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
3928 158142c2 bellard
    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
3929 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
3930 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
3931 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3932 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
3933 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
3934 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
3935 158142c2 bellard
            --zSig1;
3936 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
3937 158142c2 bellard
            term3 |= 1;
3938 158142c2 bellard
            term2 |= doubleZSig0;
3939 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
3940 158142c2 bellard
        }
3941 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
3942 158142c2 bellard
    }
3943 158142c2 bellard
    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
3944 158142c2 bellard
    zSig0 |= doubleZSig0;
3945 158142c2 bellard
    return
3946 158142c2 bellard
        roundAndPackFloatx80(
3947 158142c2 bellard
            STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
3948 158142c2 bellard
3949 158142c2 bellard
}
3950 158142c2 bellard
3951 158142c2 bellard
/*----------------------------------------------------------------------------
3952 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
3953 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  The comparison is
3954 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
3955 158142c2 bellard
| Arithmetic.
3956 158142c2 bellard
*----------------------------------------------------------------------------*/
3957 158142c2 bellard
3958 750afe93 bellard
int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
3959 158142c2 bellard
{
3960 158142c2 bellard
3961 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
3962 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3963 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
3964 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3965 158142c2 bellard
       ) {
3966 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
3967 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
3968 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3969 158142c2 bellard
        }
3970 158142c2 bellard
        return 0;
3971 158142c2 bellard
    }
3972 158142c2 bellard
    return
3973 158142c2 bellard
           ( a.low == b.low )
3974 158142c2 bellard
        && (    ( a.high == b.high )
3975 158142c2 bellard
             || (    ( a.low == 0 )
3976 158142c2 bellard
                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
3977 158142c2 bellard
           );
3978 158142c2 bellard
3979 158142c2 bellard
}
3980 158142c2 bellard
3981 158142c2 bellard
/*----------------------------------------------------------------------------
3982 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
3983 158142c2 bellard
| less than or equal to the corresponding value `b', and 0 otherwise.  The
3984 158142c2 bellard
| comparison is performed according to the IEC/IEEE Standard for Binary
3985 158142c2 bellard
| Floating-Point Arithmetic.
3986 158142c2 bellard
*----------------------------------------------------------------------------*/
3987 158142c2 bellard
3988 750afe93 bellard
int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
3989 158142c2 bellard
{
3990 158142c2 bellard
    flag aSign, bSign;
3991 158142c2 bellard
3992 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
3993 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3994 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
3995 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3996 158142c2 bellard
       ) {
3997 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3998 158142c2 bellard
        return 0;
3999 158142c2 bellard
    }
4000 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4001 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4002 158142c2 bellard
    if ( aSign != bSign ) {
4003 158142c2 bellard
        return
4004 158142c2 bellard
               aSign
4005 158142c2 bellard
            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4006 158142c2 bellard
                 == 0 );
4007 158142c2 bellard
    }
4008 158142c2 bellard
    return
4009 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
4010 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
4011 158142c2 bellard
4012 158142c2 bellard
}
4013 158142c2 bellard
4014 158142c2 bellard
/*----------------------------------------------------------------------------
4015 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4016 158142c2 bellard
| less than the corresponding value `b', and 0 otherwise.  The comparison
4017 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4018 158142c2 bellard
| Arithmetic.
4019 158142c2 bellard
*----------------------------------------------------------------------------*/
4020 158142c2 bellard
4021 750afe93 bellard
int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
4022 158142c2 bellard
{
4023 158142c2 bellard
    flag aSign, bSign;
4024 158142c2 bellard
4025 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4026 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4027 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4028 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4029 158142c2 bellard
       ) {
4030 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4031 158142c2 bellard
        return 0;
4032 158142c2 bellard
    }
4033 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4034 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4035 158142c2 bellard
    if ( aSign != bSign ) {
4036 158142c2 bellard
        return
4037 158142c2 bellard
               aSign
4038 158142c2 bellard
            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4039 158142c2 bellard
                 != 0 );
4040 158142c2 bellard
    }
4041 158142c2 bellard
    return
4042 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
4043 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
4044 158142c2 bellard
4045 158142c2 bellard
}
4046 158142c2 bellard
4047 158142c2 bellard
/*----------------------------------------------------------------------------
4048 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is equal
4049 158142c2 bellard
| to the corresponding value `b', and 0 otherwise.  The invalid exception is
4050 158142c2 bellard
| raised if either operand is a NaN.  Otherwise, the comparison is performed
4051 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4052 158142c2 bellard
*----------------------------------------------------------------------------*/
4053 158142c2 bellard
4054 750afe93 bellard
int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM )
4055 158142c2 bellard
{
4056 158142c2 bellard
4057 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4058 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4059 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4060 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4061 158142c2 bellard
       ) {
4062 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4063 158142c2 bellard
        return 0;
4064 158142c2 bellard
    }
4065 158142c2 bellard
    return
4066 158142c2 bellard
           ( a.low == b.low )
4067 158142c2 bellard
        && (    ( a.high == b.high )
4068 158142c2 bellard
             || (    ( a.low == 0 )
4069 158142c2 bellard
                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4070 158142c2 bellard
           );
4071 158142c2 bellard
4072 158142c2 bellard
}
4073 158142c2 bellard
4074 158142c2 bellard
/*----------------------------------------------------------------------------
4075 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4076 158142c2 bellard
| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
4077 158142c2 bellard
| do not cause an exception.  Otherwise, the comparison is performed according
4078 158142c2 bellard
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4079 158142c2 bellard
*----------------------------------------------------------------------------*/
4080 158142c2 bellard
4081 750afe93 bellard
int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4082 158142c2 bellard
{
4083 158142c2 bellard
    flag aSign, bSign;
4084 158142c2 bellard
4085 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4086 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4087 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4088 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4089 158142c2 bellard
       ) {
4090 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4091 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4092 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4093 158142c2 bellard
        }
4094 158142c2 bellard
        return 0;
4095 158142c2 bellard
    }
4096 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4097 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4098 158142c2 bellard
    if ( aSign != bSign ) {
4099 158142c2 bellard
        return
4100 158142c2 bellard
               aSign
4101 158142c2 bellard
            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4102 158142c2 bellard
                 == 0 );
4103 158142c2 bellard
    }
4104 158142c2 bellard
    return
4105 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
4106 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
4107 158142c2 bellard
4108 158142c2 bellard
}
4109 158142c2 bellard
4110 158142c2 bellard
/*----------------------------------------------------------------------------
4111 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4112 158142c2 bellard
| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
4113 158142c2 bellard
| an exception.  Otherwise, the comparison is performed according to the
4114 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4115 158142c2 bellard
*----------------------------------------------------------------------------*/
4116 158142c2 bellard
4117 750afe93 bellard
int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4118 158142c2 bellard
{
4119 158142c2 bellard
    flag aSign, bSign;
4120 158142c2 bellard
4121 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4122 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4123 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4124 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4125 158142c2 bellard
       ) {
4126 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4127 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4128 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4129 158142c2 bellard
        }
4130 158142c2 bellard
        return 0;
4131 158142c2 bellard
    }
4132 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4133 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4134 158142c2 bellard
    if ( aSign != bSign ) {
4135 158142c2 bellard
        return
4136 158142c2 bellard
               aSign
4137 158142c2 bellard
            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4138 158142c2 bellard
                 != 0 );
4139 158142c2 bellard
    }
4140 158142c2 bellard
    return
4141 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
4142 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
4143 158142c2 bellard
4144 158142c2 bellard
}
4145 158142c2 bellard
4146 158142c2 bellard
#endif
4147 158142c2 bellard
4148 158142c2 bellard
#ifdef FLOAT128
4149 158142c2 bellard
4150 158142c2 bellard
/*----------------------------------------------------------------------------
4151 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4152 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4153 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4154 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4155 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4156 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4157 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4158 158142c2 bellard
*----------------------------------------------------------------------------*/
4159 158142c2 bellard
4160 158142c2 bellard
int32 float128_to_int32( float128 a STATUS_PARAM )
4161 158142c2 bellard
{
4162 158142c2 bellard
    flag aSign;
4163 158142c2 bellard
    int32 aExp, shiftCount;
4164 158142c2 bellard
    bits64 aSig0, aSig1;
4165 158142c2 bellard
4166 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4167 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4168 158142c2 bellard
    aExp = extractFloat128Exp( a );
4169 158142c2 bellard
    aSign = extractFloat128Sign( a );
4170 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4171 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4172 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4173 158142c2 bellard
    shiftCount = 0x4028 - aExp;
4174 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4175 158142c2 bellard
    return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
4176 158142c2 bellard
4177 158142c2 bellard
}
4178 158142c2 bellard
4179 158142c2 bellard
/*----------------------------------------------------------------------------
4180 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4181 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4182 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4183 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
4184 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
4185 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
4186 158142c2 bellard
| returned.
4187 158142c2 bellard
*----------------------------------------------------------------------------*/
4188 158142c2 bellard
4189 158142c2 bellard
int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
4190 158142c2 bellard
{
4191 158142c2 bellard
    flag aSign;
4192 158142c2 bellard
    int32 aExp, shiftCount;
4193 158142c2 bellard
    bits64 aSig0, aSig1, savedASig;
4194 158142c2 bellard
    int32 z;
4195 158142c2 bellard
4196 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4197 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4198 158142c2 bellard
    aExp = extractFloat128Exp( a );
4199 158142c2 bellard
    aSign = extractFloat128Sign( a );
4200 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4201 158142c2 bellard
    if ( 0x401E < aExp ) {
4202 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4203 158142c2 bellard
        goto invalid;
4204 158142c2 bellard
    }
4205 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
4206 158142c2 bellard
        if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
4207 158142c2 bellard
        return 0;
4208 158142c2 bellard
    }
4209 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4210 158142c2 bellard
    shiftCount = 0x402F - aExp;
4211 158142c2 bellard
    savedASig = aSig0;
4212 158142c2 bellard
    aSig0 >>= shiftCount;
4213 158142c2 bellard
    z = aSig0;
4214 158142c2 bellard
    if ( aSign ) z = - z;
4215 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
4216 158142c2 bellard
 invalid:
4217 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4218 158142c2 bellard
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
4219 158142c2 bellard
    }
4220 158142c2 bellard
    if ( ( aSig0<<shiftCount ) != savedASig ) {
4221 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4222 158142c2 bellard
    }
4223 158142c2 bellard
    return z;
4224 158142c2 bellard
4225 158142c2 bellard
}
4226 158142c2 bellard
4227 158142c2 bellard
/*----------------------------------------------------------------------------
4228 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4229 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4230 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4231 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4232 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4233 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4234 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4235 158142c2 bellard
*----------------------------------------------------------------------------*/
4236 158142c2 bellard
4237 158142c2 bellard
int64 float128_to_int64( float128 a STATUS_PARAM )
4238 158142c2 bellard
{
4239 158142c2 bellard
    flag aSign;
4240 158142c2 bellard
    int32 aExp, shiftCount;
4241 158142c2 bellard
    bits64 aSig0, aSig1;
4242 158142c2 bellard
4243 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4244 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4245 158142c2 bellard
    aExp = extractFloat128Exp( a );
4246 158142c2 bellard
    aSign = extractFloat128Sign( a );
4247 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4248 158142c2 bellard
    shiftCount = 0x402F - aExp;
4249 158142c2 bellard
    if ( shiftCount <= 0 ) {
4250 158142c2 bellard
        if ( 0x403E < aExp ) {
4251 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4252 158142c2 bellard
            if (    ! aSign
4253 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
4254 158142c2 bellard
                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4255 158142c2 bellard
                    )
4256 158142c2 bellard
               ) {
4257 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
4258 158142c2 bellard
            }
4259 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
4260 158142c2 bellard
        }
4261 158142c2 bellard
        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4262 158142c2 bellard
    }
4263 158142c2 bellard
    else {
4264 158142c2 bellard
        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4265 158142c2 bellard
    }
4266 158142c2 bellard
    return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
4267 158142c2 bellard
4268 158142c2 bellard
}
4269 158142c2 bellard
4270 158142c2 bellard
/*----------------------------------------------------------------------------
4271 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4272 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4273 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4274 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
4275 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
4276 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
4277 158142c2 bellard
| returned.
4278 158142c2 bellard
*----------------------------------------------------------------------------*/
4279 158142c2 bellard
4280 158142c2 bellard
int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
4281 158142c2 bellard
{
4282 158142c2 bellard
    flag aSign;
4283 158142c2 bellard
    int32 aExp, shiftCount;
4284 158142c2 bellard
    bits64 aSig0, aSig1;
4285 158142c2 bellard
    int64 z;
4286 158142c2 bellard
4287 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4288 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4289 158142c2 bellard
    aExp = extractFloat128Exp( a );
4290 158142c2 bellard
    aSign = extractFloat128Sign( a );
4291 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4292 158142c2 bellard
    shiftCount = aExp - 0x402F;
4293 158142c2 bellard
    if ( 0 < shiftCount ) {
4294 158142c2 bellard
        if ( 0x403E <= aExp ) {
4295 158142c2 bellard
            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4296 158142c2 bellard
            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4297 158142c2 bellard
                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4298 158142c2 bellard
                if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
4299 158142c2 bellard
            }
4300 158142c2 bellard
            else {
4301 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
4302 158142c2 bellard
                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4303 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
4304 158142c2 bellard
                }
4305 158142c2 bellard
            }
4306 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
4307 158142c2 bellard
        }
4308 158142c2 bellard
        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4309 158142c2 bellard
        if ( (bits64) ( aSig1<<shiftCount ) ) {
4310 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4311 158142c2 bellard
        }
4312 158142c2 bellard
    }
4313 158142c2 bellard
    else {
4314 158142c2 bellard
        if ( aExp < 0x3FFF ) {
4315 158142c2 bellard
            if ( aExp | aSig0 | aSig1 ) {
4316 158142c2 bellard
                STATUS(float_exception_flags) |= float_flag_inexact;
4317 158142c2 bellard
            }
4318 158142c2 bellard
            return 0;
4319 158142c2 bellard
        }
4320 158142c2 bellard
        z = aSig0>>( - shiftCount );
4321 158142c2 bellard
        if (    aSig1
4322 158142c2 bellard
             || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4323 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4324 158142c2 bellard
        }
4325 158142c2 bellard
    }
4326 158142c2 bellard
    if ( aSign ) z = - z;
4327 158142c2 bellard
    return z;
4328 158142c2 bellard
4329 158142c2 bellard
}
4330 158142c2 bellard
4331 158142c2 bellard
/*----------------------------------------------------------------------------
4332 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4333 158142c2 bellard
| value `a' to the single-precision floating-point format.  The conversion
4334 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4335 158142c2 bellard
| Arithmetic.
4336 158142c2 bellard
*----------------------------------------------------------------------------*/
4337 158142c2 bellard
4338 158142c2 bellard
float32 float128_to_float32( float128 a STATUS_PARAM )
4339 158142c2 bellard
{
4340 158142c2 bellard
    flag aSign;
4341 158142c2 bellard
    int32 aExp;
4342 158142c2 bellard
    bits64 aSig0, aSig1;
4343 158142c2 bellard
    bits32 zSig;
4344 158142c2 bellard
4345 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4346 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4347 158142c2 bellard
    aExp = extractFloat128Exp( a );
4348 158142c2 bellard
    aSign = extractFloat128Sign( a );
4349 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4350 158142c2 bellard
        if ( aSig0 | aSig1 ) {
4351 158142c2 bellard
            return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) );
4352 158142c2 bellard
        }
4353 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
4354 158142c2 bellard
    }
4355 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4356 158142c2 bellard
    shift64RightJamming( aSig0, 18, &aSig0 );
4357 158142c2 bellard
    zSig = aSig0;
4358 158142c2 bellard
    if ( aExp || zSig ) {
4359 158142c2 bellard
        zSig |= 0x40000000;
4360 158142c2 bellard
        aExp -= 0x3F81;
4361 158142c2 bellard
    }
4362 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
4363 158142c2 bellard
4364 158142c2 bellard
}
4365 158142c2 bellard
4366 158142c2 bellard
/*----------------------------------------------------------------------------
4367 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4368 158142c2 bellard
| value `a' to the double-precision floating-point format.  The conversion
4369 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4370 158142c2 bellard
| Arithmetic.
4371 158142c2 bellard
*----------------------------------------------------------------------------*/
4372 158142c2 bellard
4373 158142c2 bellard
float64 float128_to_float64( float128 a STATUS_PARAM )
4374 158142c2 bellard
{
4375 158142c2 bellard
    flag aSign;
4376 158142c2 bellard
    int32 aExp;
4377 158142c2 bellard
    bits64 aSig0, aSig1;
4378 158142c2 bellard
4379 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4380 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4381 158142c2 bellard
    aExp = extractFloat128Exp( a );
4382 158142c2 bellard
    aSign = extractFloat128Sign( a );
4383 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4384 158142c2 bellard
        if ( aSig0 | aSig1 ) {
4385 158142c2 bellard
            return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) );
4386 158142c2 bellard
        }
4387 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
4388 158142c2 bellard
    }
4389 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4390 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4391 158142c2 bellard
    if ( aExp || aSig0 ) {
4392 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
4393 158142c2 bellard
        aExp -= 0x3C01;
4394 158142c2 bellard
    }
4395 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
4396 158142c2 bellard
4397 158142c2 bellard
}
4398 158142c2 bellard
4399 158142c2 bellard
#ifdef FLOATX80
4400 158142c2 bellard
4401 158142c2 bellard
/*----------------------------------------------------------------------------
4402 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4403 158142c2 bellard
| value `a' to the extended double-precision floating-point format.  The
4404 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4405 158142c2 bellard
| Floating-Point Arithmetic.
4406 158142c2 bellard
*----------------------------------------------------------------------------*/
4407 158142c2 bellard
4408 158142c2 bellard
floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
4409 158142c2 bellard
{
4410 158142c2 bellard
    flag aSign;
4411 158142c2 bellard
    int32 aExp;
4412 158142c2 bellard
    bits64 aSig0, aSig1;
4413 158142c2 bellard
4414 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4415 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4416 158142c2 bellard
    aExp = extractFloat128Exp( a );
4417 158142c2 bellard
    aSign = extractFloat128Sign( a );
4418 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4419 158142c2 bellard
        if ( aSig0 | aSig1 ) {
4420 158142c2 bellard
            return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) );
4421 158142c2 bellard
        }
4422 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4423 158142c2 bellard
    }
4424 158142c2 bellard
    if ( aExp == 0 ) {
4425 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
4426 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4427 158142c2 bellard
    }
4428 158142c2 bellard
    else {
4429 158142c2 bellard
        aSig0 |= LIT64( 0x0001000000000000 );
4430 158142c2 bellard
    }
4431 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
4432 158142c2 bellard
    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
4433 158142c2 bellard
4434 158142c2 bellard
}
4435 158142c2 bellard
4436 158142c2 bellard
#endif
4437 158142c2 bellard
4438 158142c2 bellard
/*----------------------------------------------------------------------------
4439 158142c2 bellard
| Rounds the quadruple-precision floating-point value `a' to an integer, and
4440 158142c2 bellard
| returns the result as a quadruple-precision floating-point value.  The
4441 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
4442 158142c2 bellard
| Floating-Point Arithmetic.
4443 158142c2 bellard
*----------------------------------------------------------------------------*/
4444 158142c2 bellard
4445 158142c2 bellard
float128 float128_round_to_int( float128 a STATUS_PARAM )
4446 158142c2 bellard
{
4447 158142c2 bellard
    flag aSign;
4448 158142c2 bellard
    int32 aExp;
4449 158142c2 bellard
    bits64 lastBitMask, roundBitsMask;
4450 158142c2 bellard
    int8 roundingMode;
4451 158142c2 bellard
    float128 z;
4452 158142c2 bellard
4453 158142c2 bellard
    aExp = extractFloat128Exp( a );
4454 158142c2 bellard
    if ( 0x402F <= aExp ) {
4455 158142c2 bellard
        if ( 0x406F <= aExp ) {
4456 158142c2 bellard
            if (    ( aExp == 0x7FFF )
4457 158142c2 bellard
                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
4458 158142c2 bellard
               ) {
4459 158142c2 bellard
                return propagateFloat128NaN( a, a STATUS_VAR );
4460 158142c2 bellard
            }
4461 158142c2 bellard
            return a;
4462 158142c2 bellard
        }
4463 158142c2 bellard
        lastBitMask = 1;
4464 158142c2 bellard
        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
4465 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
4466 158142c2 bellard
        z = a;
4467 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
4468 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
4469 158142c2 bellard
            if ( lastBitMask ) {
4470 158142c2 bellard
                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
4471 158142c2 bellard
                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4472 158142c2 bellard
            }
4473 158142c2 bellard
            else {
4474 158142c2 bellard
                if ( (sbits64) z.low < 0 ) {
4475 158142c2 bellard
                    ++z.high;
4476 158142c2 bellard
                    if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
4477 158142c2 bellard
                }
4478 158142c2 bellard
            }
4479 158142c2 bellard
        }
4480 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
4481 158142c2 bellard
            if (   extractFloat128Sign( z )
4482 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
4483 158142c2 bellard
                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
4484 158142c2 bellard
            }
4485 158142c2 bellard
        }
4486 158142c2 bellard
        z.low &= ~ roundBitsMask;
4487 158142c2 bellard
    }
4488 158142c2 bellard
    else {
4489 158142c2 bellard
        if ( aExp < 0x3FFF ) {
4490 158142c2 bellard
            if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4491 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4492 158142c2 bellard
            aSign = extractFloat128Sign( a );
4493 158142c2 bellard
            switch ( STATUS(float_rounding_mode) ) {
4494 158142c2 bellard
             case float_round_nearest_even:
4495 158142c2 bellard
                if (    ( aExp == 0x3FFE )
4496 158142c2 bellard
                     && (   extractFloat128Frac0( a )
4497 158142c2 bellard
                          | extractFloat128Frac1( a ) )
4498 158142c2 bellard
                   ) {
4499 158142c2 bellard
                    return packFloat128( aSign, 0x3FFF, 0, 0 );
4500 158142c2 bellard
                }
4501 158142c2 bellard
                break;
4502 158142c2 bellard
             case float_round_down:
4503 158142c2 bellard
                return
4504 158142c2 bellard
                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
4505 158142c2 bellard
                    : packFloat128( 0, 0, 0, 0 );
4506 158142c2 bellard
             case float_round_up:
4507 158142c2 bellard
                return
4508 158142c2 bellard
                      aSign ? packFloat128( 1, 0, 0, 0 )
4509 158142c2 bellard
                    : packFloat128( 0, 0x3FFF, 0, 0 );
4510 158142c2 bellard
            }
4511 158142c2 bellard
            return packFloat128( aSign, 0, 0, 0 );
4512 158142c2 bellard
        }
4513 158142c2 bellard
        lastBitMask = 1;
4514 158142c2 bellard
        lastBitMask <<= 0x402F - aExp;
4515 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
4516 158142c2 bellard
        z.low = 0;
4517 158142c2 bellard
        z.high = a.high;
4518 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
4519 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
4520 158142c2 bellard
            z.high += lastBitMask>>1;
4521 158142c2 bellard
            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
4522 158142c2 bellard
                z.high &= ~ lastBitMask;
4523 158142c2 bellard
            }
4524 158142c2 bellard
        }
4525 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
4526 158142c2 bellard
            if (   extractFloat128Sign( z )
4527 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
4528 158142c2 bellard
                z.high |= ( a.low != 0 );
4529 158142c2 bellard
                z.high += roundBitsMask;
4530 158142c2 bellard
            }
4531 158142c2 bellard
        }
4532 158142c2 bellard
        z.high &= ~ roundBitsMask;
4533 158142c2 bellard
    }
4534 158142c2 bellard
    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4535 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4536 158142c2 bellard
    }
4537 158142c2 bellard
    return z;
4538 158142c2 bellard
4539 158142c2 bellard
}
4540 158142c2 bellard
4541 158142c2 bellard
/*----------------------------------------------------------------------------
4542 158142c2 bellard
| Returns the result of adding the absolute values of the quadruple-precision
4543 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
4544 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
4545 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
4546 158142c2 bellard
| Floating-Point Arithmetic.
4547 158142c2 bellard
*----------------------------------------------------------------------------*/
4548 158142c2 bellard
4549 158142c2 bellard
static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
4550 158142c2 bellard
{
4551 158142c2 bellard
    int32 aExp, bExp, zExp;
4552 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4553 158142c2 bellard
    int32 expDiff;
4554 158142c2 bellard
4555 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4556 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4557 158142c2 bellard
    aExp = extractFloat128Exp( a );
4558 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4559 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4560 158142c2 bellard
    bExp = extractFloat128Exp( b );
4561 158142c2 bellard
    expDiff = aExp - bExp;
4562 158142c2 bellard
    if ( 0 < expDiff ) {
4563 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4564 158142c2 bellard
            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4565 158142c2 bellard
            return a;
4566 158142c2 bellard
        }
4567 158142c2 bellard
        if ( bExp == 0 ) {
4568 158142c2 bellard
            --expDiff;
4569 158142c2 bellard
        }
4570 158142c2 bellard
        else {
4571 158142c2 bellard
            bSig0 |= LIT64( 0x0001000000000000 );
4572 158142c2 bellard
        }
4573 158142c2 bellard
        shift128ExtraRightJamming(
4574 158142c2 bellard
            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
4575 158142c2 bellard
        zExp = aExp;
4576 158142c2 bellard
    }
4577 158142c2 bellard
    else if ( expDiff < 0 ) {
4578 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4579 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4580 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
4581 158142c2 bellard
        }
4582 158142c2 bellard
        if ( aExp == 0 ) {
4583 158142c2 bellard
            ++expDiff;
4584 158142c2 bellard
        }
4585 158142c2 bellard
        else {
4586 158142c2 bellard
            aSig0 |= LIT64( 0x0001000000000000 );
4587 158142c2 bellard
        }
4588 158142c2 bellard
        shift128ExtraRightJamming(
4589 158142c2 bellard
            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
4590 158142c2 bellard
        zExp = bExp;
4591 158142c2 bellard
    }
4592 158142c2 bellard
    else {
4593 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4594 158142c2 bellard
            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4595 158142c2 bellard
                return propagateFloat128NaN( a, b STATUS_VAR );
4596 158142c2 bellard
            }
4597 158142c2 bellard
            return a;
4598 158142c2 bellard
        }
4599 158142c2 bellard
        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4600 158142c2 bellard
        if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
4601 158142c2 bellard
        zSig2 = 0;
4602 158142c2 bellard
        zSig0 |= LIT64( 0x0002000000000000 );
4603 158142c2 bellard
        zExp = aExp;
4604 158142c2 bellard
        goto shiftRight1;
4605 158142c2 bellard
    }
4606 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4607 158142c2 bellard
    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4608 158142c2 bellard
    --zExp;
4609 158142c2 bellard
    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
4610 158142c2 bellard
    ++zExp;
4611 158142c2 bellard
 shiftRight1:
4612 158142c2 bellard
    shift128ExtraRightJamming(
4613 158142c2 bellard
        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4614 158142c2 bellard
 roundAndPack:
4615 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
4616 158142c2 bellard
4617 158142c2 bellard
}
4618 158142c2 bellard
4619 158142c2 bellard
/*----------------------------------------------------------------------------
4620 158142c2 bellard
| Returns the result of subtracting the absolute values of the quadruple-
4621 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
4622 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
4623 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
4624 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4625 158142c2 bellard
*----------------------------------------------------------------------------*/
4626 158142c2 bellard
4627 158142c2 bellard
static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
4628 158142c2 bellard
{
4629 158142c2 bellard
    int32 aExp, bExp, zExp;
4630 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
4631 158142c2 bellard
    int32 expDiff;
4632 158142c2 bellard
    float128 z;
4633 158142c2 bellard
4634 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4635 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4636 158142c2 bellard
    aExp = extractFloat128Exp( a );
4637 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4638 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4639 158142c2 bellard
    bExp = extractFloat128Exp( b );
4640 158142c2 bellard
    expDiff = aExp - bExp;
4641 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4642 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
4643 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
4644 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
4645 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4646 158142c2 bellard
        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4647 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
4648 158142c2 bellard
        }
4649 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4650 158142c2 bellard
        z.low = float128_default_nan_low;
4651 158142c2 bellard
        z.high = float128_default_nan_high;
4652 158142c2 bellard
        return z;
4653 158142c2 bellard
    }
4654 158142c2 bellard
    if ( aExp == 0 ) {
4655 158142c2 bellard
        aExp = 1;
4656 158142c2 bellard
        bExp = 1;
4657 158142c2 bellard
    }
4658 158142c2 bellard
    if ( bSig0 < aSig0 ) goto aBigger;
4659 158142c2 bellard
    if ( aSig0 < bSig0 ) goto bBigger;
4660 158142c2 bellard
    if ( bSig1 < aSig1 ) goto aBigger;
4661 158142c2 bellard
    if ( aSig1 < bSig1 ) goto bBigger;
4662 158142c2 bellard
    return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
4663 158142c2 bellard
 bExpBigger:
4664 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4665 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4666 158142c2 bellard
        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
4667 158142c2 bellard
    }
4668 158142c2 bellard
    if ( aExp == 0 ) {
4669 158142c2 bellard
        ++expDiff;
4670 158142c2 bellard
    }
4671 158142c2 bellard
    else {
4672 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
4673 158142c2 bellard
    }
4674 158142c2 bellard
    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4675 158142c2 bellard
    bSig0 |= LIT64( 0x4000000000000000 );
4676 158142c2 bellard
 bBigger:
4677 158142c2 bellard
    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
4678 158142c2 bellard
    zExp = bExp;
4679 158142c2 bellard
    zSign ^= 1;
4680 158142c2 bellard
    goto normalizeRoundAndPack;
4681 158142c2 bellard
 aExpBigger:
4682 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4683 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4684 158142c2 bellard
        return a;
4685 158142c2 bellard
    }
4686 158142c2 bellard
    if ( bExp == 0 ) {
4687 158142c2 bellard
        --expDiff;
4688 158142c2 bellard
    }
4689 158142c2 bellard
    else {
4690 158142c2 bellard
        bSig0 |= LIT64( 0x4000000000000000 );
4691 158142c2 bellard
    }
4692 158142c2 bellard
    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
4693 158142c2 bellard
    aSig0 |= LIT64( 0x4000000000000000 );
4694 158142c2 bellard
 aBigger:
4695 158142c2 bellard
    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4696 158142c2 bellard
    zExp = aExp;
4697 158142c2 bellard
 normalizeRoundAndPack:
4698 158142c2 bellard
    --zExp;
4699 158142c2 bellard
    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
4700 158142c2 bellard
4701 158142c2 bellard
}
4702 158142c2 bellard
4703 158142c2 bellard
/*----------------------------------------------------------------------------
4704 158142c2 bellard
| Returns the result of adding the quadruple-precision floating-point values
4705 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
4706 158142c2 bellard
| for Binary Floating-Point Arithmetic.
4707 158142c2 bellard
*----------------------------------------------------------------------------*/
4708 158142c2 bellard
4709 158142c2 bellard
float128 float128_add( float128 a, float128 b STATUS_PARAM )
4710 158142c2 bellard
{
4711 158142c2 bellard
    flag aSign, bSign;
4712 158142c2 bellard
4713 158142c2 bellard
    aSign = extractFloat128Sign( a );
4714 158142c2 bellard
    bSign = extractFloat128Sign( b );
4715 158142c2 bellard
    if ( aSign == bSign ) {
4716 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
4717 158142c2 bellard
    }
4718 158142c2 bellard
    else {
4719 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
4720 158142c2 bellard
    }
4721 158142c2 bellard
4722 158142c2 bellard
}
4723 158142c2 bellard
4724 158142c2 bellard
/*----------------------------------------------------------------------------
4725 158142c2 bellard
| Returns the result of subtracting the quadruple-precision floating-point
4726 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
4727 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4728 158142c2 bellard
*----------------------------------------------------------------------------*/
4729 158142c2 bellard
4730 158142c2 bellard
float128 float128_sub( float128 a, float128 b STATUS_PARAM )
4731 158142c2 bellard
{
4732 158142c2 bellard
    flag aSign, bSign;
4733 158142c2 bellard
4734 158142c2 bellard
    aSign = extractFloat128Sign( a );
4735 158142c2 bellard
    bSign = extractFloat128Sign( b );
4736 158142c2 bellard
    if ( aSign == bSign ) {
4737 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
4738 158142c2 bellard
    }
4739 158142c2 bellard
    else {
4740 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
4741 158142c2 bellard
    }
4742 158142c2 bellard
4743 158142c2 bellard
}
4744 158142c2 bellard
4745 158142c2 bellard
/*----------------------------------------------------------------------------
4746 158142c2 bellard
| Returns the result of multiplying the quadruple-precision floating-point
4747 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
4748 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4749 158142c2 bellard
*----------------------------------------------------------------------------*/
4750 158142c2 bellard
4751 158142c2 bellard
float128 float128_mul( float128 a, float128 b STATUS_PARAM )
4752 158142c2 bellard
{
4753 158142c2 bellard
    flag aSign, bSign, zSign;
4754 158142c2 bellard
    int32 aExp, bExp, zExp;
4755 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
4756 158142c2 bellard
    float128 z;
4757 158142c2 bellard
4758 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4759 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4760 158142c2 bellard
    aExp = extractFloat128Exp( a );
4761 158142c2 bellard
    aSign = extractFloat128Sign( a );
4762 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4763 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4764 158142c2 bellard
    bExp = extractFloat128Exp( b );
4765 158142c2 bellard
    bSign = extractFloat128Sign( b );
4766 158142c2 bellard
    zSign = aSign ^ bSign;
4767 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4768 158142c2 bellard
        if (    ( aSig0 | aSig1 )
4769 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
4770 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
4771 158142c2 bellard
        }
4772 158142c2 bellard
        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
4773 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
4774 158142c2 bellard
    }
4775 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4776 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4777 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4778 158142c2 bellard
 invalid:
4779 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4780 158142c2 bellard
            z.low = float128_default_nan_low;
4781 158142c2 bellard
            z.high = float128_default_nan_high;
4782 158142c2 bellard
            return z;
4783 158142c2 bellard
        }
4784 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
4785 158142c2 bellard
    }
4786 158142c2 bellard
    if ( aExp == 0 ) {
4787 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4788 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4789 158142c2 bellard
    }
4790 158142c2 bellard
    if ( bExp == 0 ) {
4791 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4792 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4793 158142c2 bellard
    }
4794 158142c2 bellard
    zExp = aExp + bExp - 0x4000;
4795 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4796 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
4797 158142c2 bellard
    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
4798 158142c2 bellard
    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
4799 158142c2 bellard
    zSig2 |= ( zSig3 != 0 );
4800 158142c2 bellard
    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
4801 158142c2 bellard
        shift128ExtraRightJamming(
4802 158142c2 bellard
            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4803 158142c2 bellard
        ++zExp;
4804 158142c2 bellard
    }
4805 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
4806 158142c2 bellard
4807 158142c2 bellard
}
4808 158142c2 bellard
4809 158142c2 bellard
/*----------------------------------------------------------------------------
4810 158142c2 bellard
| Returns the result of dividing the quadruple-precision floating-point value
4811 158142c2 bellard
| `a' by the corresponding value `b'.  The operation is performed according to
4812 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4813 158142c2 bellard
*----------------------------------------------------------------------------*/
4814 158142c2 bellard
4815 158142c2 bellard
float128 float128_div( float128 a, float128 b STATUS_PARAM )
4816 158142c2 bellard
{
4817 158142c2 bellard
    flag aSign, bSign, zSign;
4818 158142c2 bellard
    int32 aExp, bExp, zExp;
4819 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4820 158142c2 bellard
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4821 158142c2 bellard
    float128 z;
4822 158142c2 bellard
4823 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4824 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4825 158142c2 bellard
    aExp = extractFloat128Exp( a );
4826 158142c2 bellard
    aSign = extractFloat128Sign( a );
4827 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4828 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4829 158142c2 bellard
    bExp = extractFloat128Exp( b );
4830 158142c2 bellard
    bSign = extractFloat128Sign( b );
4831 158142c2 bellard
    zSign = aSign ^ bSign;
4832 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4833 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4834 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4835 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4836 158142c2 bellard
            goto invalid;
4837 158142c2 bellard
        }
4838 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
4839 158142c2 bellard
    }
4840 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4841 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4842 158142c2 bellard
        return packFloat128( zSign, 0, 0, 0 );
4843 158142c2 bellard
    }
4844 158142c2 bellard
    if ( bExp == 0 ) {
4845 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
4846 158142c2 bellard
            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4847 158142c2 bellard
 invalid:
4848 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
4849 158142c2 bellard
                z.low = float128_default_nan_low;
4850 158142c2 bellard
                z.high = float128_default_nan_high;
4851 158142c2 bellard
                return z;
4852 158142c2 bellard
            }
4853 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
4854 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
4855 158142c2 bellard
        }
4856 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4857 158142c2 bellard
    }
4858 158142c2 bellard
    if ( aExp == 0 ) {
4859 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4860 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4861 158142c2 bellard
    }
4862 158142c2 bellard
    zExp = aExp - bExp + 0x3FFD;
4863 158142c2 bellard
    shortShift128Left(
4864 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
4865 158142c2 bellard
    shortShift128Left(
4866 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
4867 158142c2 bellard
    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
4868 158142c2 bellard
        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
4869 158142c2 bellard
        ++zExp;
4870 158142c2 bellard
    }
4871 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
4872 158142c2 bellard
    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
4873 158142c2 bellard
    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
4874 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
4875 158142c2 bellard
        --zSig0;
4876 158142c2 bellard
        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
4877 158142c2 bellard
    }
4878 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
4879 158142c2 bellard
    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
4880 158142c2 bellard
        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
4881 158142c2 bellard
        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
4882 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
4883 158142c2 bellard
            --zSig1;
4884 158142c2 bellard
            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
4885 158142c2 bellard
        }
4886 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4887 158142c2 bellard
    }
4888 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
4889 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
4890 158142c2 bellard
4891 158142c2 bellard
}
4892 158142c2 bellard
4893 158142c2 bellard
/*----------------------------------------------------------------------------
4894 158142c2 bellard
| Returns the remainder of the quadruple-precision floating-point value `a'
4895 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
4896 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4897 158142c2 bellard
*----------------------------------------------------------------------------*/
4898 158142c2 bellard
4899 158142c2 bellard
float128 float128_rem( float128 a, float128 b STATUS_PARAM )
4900 158142c2 bellard
{
4901 158142c2 bellard
    flag aSign, bSign, zSign;
4902 158142c2 bellard
    int32 aExp, bExp, expDiff;
4903 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
4904 158142c2 bellard
    bits64 allZero, alternateASig0, alternateASig1, sigMean1;
4905 158142c2 bellard
    sbits64 sigMean0;
4906 158142c2 bellard
    float128 z;
4907 158142c2 bellard
4908 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4909 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4910 158142c2 bellard
    aExp = extractFloat128Exp( a );
4911 158142c2 bellard
    aSign = extractFloat128Sign( a );
4912 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4913 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4914 158142c2 bellard
    bExp = extractFloat128Exp( b );
4915 158142c2 bellard
    bSign = extractFloat128Sign( b );
4916 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4917 158142c2 bellard
        if (    ( aSig0 | aSig1 )
4918 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
4919 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
4920 158142c2 bellard
        }
4921 158142c2 bellard
        goto invalid;
4922 158142c2 bellard
    }
4923 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4924 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4925 158142c2 bellard
        return a;
4926 158142c2 bellard
    }
4927 158142c2 bellard
    if ( bExp == 0 ) {
4928 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
4929 158142c2 bellard
 invalid:
4930 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4931 158142c2 bellard
            z.low = float128_default_nan_low;
4932 158142c2 bellard
            z.high = float128_default_nan_high;
4933 158142c2 bellard
            return z;
4934 158142c2 bellard
        }
4935 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4936 158142c2 bellard
    }
4937 158142c2 bellard
    if ( aExp == 0 ) {
4938 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return a;
4939 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4940 158142c2 bellard
    }
4941 158142c2 bellard
    expDiff = aExp - bExp;
4942 158142c2 bellard
    if ( expDiff < -1 ) return a;
4943 158142c2 bellard
    shortShift128Left(
4944 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ),
4945 158142c2 bellard
        aSig1,
4946 158142c2 bellard
        15 - ( expDiff < 0 ),
4947 158142c2 bellard
        &aSig0,
4948 158142c2 bellard
        &aSig1
4949 158142c2 bellard
    );
4950 158142c2 bellard
    shortShift128Left(
4951 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
4952 158142c2 bellard
    q = le128( bSig0, bSig1, aSig0, aSig1 );
4953 158142c2 bellard
    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
4954 158142c2 bellard
    expDiff -= 64;
4955 158142c2 bellard
    while ( 0 < expDiff ) {
4956 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
4957 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
4958 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
4959 158142c2 bellard
        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
4960 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
4961 158142c2 bellard
        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
4962 158142c2 bellard
        expDiff -= 61;
4963 158142c2 bellard
    }
4964 158142c2 bellard
    if ( -64 < expDiff ) {
4965 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
4966 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
4967 158142c2 bellard
        q >>= - expDiff;
4968 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
4969 158142c2 bellard
        expDiff += 52;
4970 158142c2 bellard
        if ( expDiff < 0 ) {
4971 158142c2 bellard
            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4972 158142c2 bellard
        }
4973 158142c2 bellard
        else {
4974 158142c2 bellard
            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
4975 158142c2 bellard
        }
4976 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
4977 158142c2 bellard
        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
4978 158142c2 bellard
    }
4979 158142c2 bellard
    else {
4980 158142c2 bellard
        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
4981 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
4982 158142c2 bellard
    }
4983 158142c2 bellard
    do {
4984 158142c2 bellard
        alternateASig0 = aSig0;
4985 158142c2 bellard
        alternateASig1 = aSig1;
4986 158142c2 bellard
        ++q;
4987 158142c2 bellard
        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
4988 158142c2 bellard
    } while ( 0 <= (sbits64) aSig0 );
4989 158142c2 bellard
    add128(
4990 158142c2 bellard
        aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
4991 158142c2 bellard
    if (    ( sigMean0 < 0 )
4992 158142c2 bellard
         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
4993 158142c2 bellard
        aSig0 = alternateASig0;
4994 158142c2 bellard
        aSig1 = alternateASig1;
4995 158142c2 bellard
    }
4996 158142c2 bellard
    zSign = ( (sbits64) aSig0 < 0 );
4997 158142c2 bellard
    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
4998 158142c2 bellard
    return
4999 158142c2 bellard
        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
5000 158142c2 bellard
5001 158142c2 bellard
}
5002 158142c2 bellard
5003 158142c2 bellard
/*----------------------------------------------------------------------------
5004 158142c2 bellard
| Returns the square root of the quadruple-precision floating-point value `a'.
5005 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
5006 158142c2 bellard
| Floating-Point Arithmetic.
5007 158142c2 bellard
*----------------------------------------------------------------------------*/
5008 158142c2 bellard
5009 158142c2 bellard
float128 float128_sqrt( float128 a STATUS_PARAM )
5010 158142c2 bellard
{
5011 158142c2 bellard
    flag aSign;
5012 158142c2 bellard
    int32 aExp, zExp;
5013 158142c2 bellard
    bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5014 158142c2 bellard
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5015 158142c2 bellard
    float128 z;
5016 158142c2 bellard
5017 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5018 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5019 158142c2 bellard
    aExp = extractFloat128Exp( a );
5020 158142c2 bellard
    aSign = extractFloat128Sign( a );
5021 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5022 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
5023 158142c2 bellard
        if ( ! aSign ) return a;
5024 158142c2 bellard
        goto invalid;
5025 158142c2 bellard
    }
5026 158142c2 bellard
    if ( aSign ) {
5027 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5028 158142c2 bellard
 invalid:
5029 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5030 158142c2 bellard
        z.low = float128_default_nan_low;
5031 158142c2 bellard
        z.high = float128_default_nan_high;
5032 158142c2 bellard
        return z;
5033 158142c2 bellard
    }
5034 158142c2 bellard
    if ( aExp == 0 ) {
5035 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5036 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5037 158142c2 bellard
    }
5038 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5039 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5040 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5041 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5042 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5043 158142c2 bellard
    doubleZSig0 = zSig0<<1;
5044 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
5045 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5046 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
5047 158142c2 bellard
        --zSig0;
5048 158142c2 bellard
        doubleZSig0 -= 2;
5049 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5050 158142c2 bellard
    }
5051 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5052 158142c2 bellard
    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5053 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
5054 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5055 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5056 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
5057 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5058 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
5059 158142c2 bellard
            --zSig1;
5060 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5061 158142c2 bellard
            term3 |= 1;
5062 158142c2 bellard
            term2 |= doubleZSig0;
5063 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5064 158142c2 bellard
        }
5065 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5066 158142c2 bellard
    }
5067 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5068 158142c2 bellard
    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5069 158142c2 bellard
5070 158142c2 bellard
}
5071 158142c2 bellard
5072 158142c2 bellard
/*----------------------------------------------------------------------------
5073 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5074 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
5075 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5076 158142c2 bellard
*----------------------------------------------------------------------------*/
5077 158142c2 bellard
5078 750afe93 bellard
int float128_eq( float128 a, float128 b STATUS_PARAM )
5079 158142c2 bellard
{
5080 158142c2 bellard
5081 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5082 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5083 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5084 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5085 158142c2 bellard
       ) {
5086 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5087 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5088 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5089 158142c2 bellard
        }
5090 158142c2 bellard
        return 0;
5091 158142c2 bellard
    }
5092 158142c2 bellard
    return
5093 158142c2 bellard
           ( a.low == b.low )
5094 158142c2 bellard
        && (    ( a.high == b.high )
5095 158142c2 bellard
             || (    ( a.low == 0 )
5096 158142c2 bellard
                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5097 158142c2 bellard
           );
5098 158142c2 bellard
5099 158142c2 bellard
}
5100 158142c2 bellard
5101 158142c2 bellard
/*----------------------------------------------------------------------------
5102 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5103 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  The comparison
5104 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5105 158142c2 bellard
| Arithmetic.
5106 158142c2 bellard
*----------------------------------------------------------------------------*/
5107 158142c2 bellard
5108 750afe93 bellard
int float128_le( float128 a, float128 b STATUS_PARAM )
5109 158142c2 bellard
{
5110 158142c2 bellard
    flag aSign, bSign;
5111 158142c2 bellard
5112 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5113 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5114 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5115 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5116 158142c2 bellard
       ) {
5117 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5118 158142c2 bellard
        return 0;
5119 158142c2 bellard
    }
5120 158142c2 bellard
    aSign = extractFloat128Sign( a );
5121 158142c2 bellard
    bSign = extractFloat128Sign( b );
5122 158142c2 bellard
    if ( aSign != bSign ) {
5123 158142c2 bellard
        return
5124 158142c2 bellard
               aSign
5125 158142c2 bellard
            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5126 158142c2 bellard
                 == 0 );
5127 158142c2 bellard
    }
5128 158142c2 bellard
    return
5129 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5130 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5131 158142c2 bellard
5132 158142c2 bellard
}
5133 158142c2 bellard
5134 158142c2 bellard
/*----------------------------------------------------------------------------
5135 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5136 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
5137 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5138 158142c2 bellard
*----------------------------------------------------------------------------*/
5139 158142c2 bellard
5140 750afe93 bellard
int float128_lt( float128 a, float128 b STATUS_PARAM )
5141 158142c2 bellard
{
5142 158142c2 bellard
    flag aSign, bSign;
5143 158142c2 bellard
5144 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5145 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5146 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5147 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5148 158142c2 bellard
       ) {
5149 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5150 158142c2 bellard
        return 0;
5151 158142c2 bellard
    }
5152 158142c2 bellard
    aSign = extractFloat128Sign( a );
5153 158142c2 bellard
    bSign = extractFloat128Sign( b );
5154 158142c2 bellard
    if ( aSign != bSign ) {
5155 158142c2 bellard
        return
5156 158142c2 bellard
               aSign
5157 158142c2 bellard
            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5158 158142c2 bellard
                 != 0 );
5159 158142c2 bellard
    }
5160 158142c2 bellard
    return
5161 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5162 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5163 158142c2 bellard
5164 158142c2 bellard
}
5165 158142c2 bellard
5166 158142c2 bellard
/*----------------------------------------------------------------------------
5167 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5168 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The invalid exception is
5169 158142c2 bellard
| raised if either operand is a NaN.  Otherwise, the comparison is performed
5170 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5171 158142c2 bellard
*----------------------------------------------------------------------------*/
5172 158142c2 bellard
5173 750afe93 bellard
int float128_eq_signaling( float128 a, float128 b STATUS_PARAM )
5174 158142c2 bellard
{
5175 158142c2 bellard
5176 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5177 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5178 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5179 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5180 158142c2 bellard
       ) {
5181 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5182 158142c2 bellard
        return 0;
5183 158142c2 bellard
    }
5184 158142c2 bellard
    return
5185 158142c2 bellard
           ( a.low == b.low )
5186 158142c2 bellard
        && (    ( a.high == b.high )
5187 158142c2 bellard
             || (    ( a.low == 0 )
5188 158142c2 bellard
                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5189 158142c2 bellard
           );
5190 158142c2 bellard
5191 158142c2 bellard
}
5192 158142c2 bellard
5193 158142c2 bellard
/*----------------------------------------------------------------------------
5194 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5195 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5196 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
5197 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5198 158142c2 bellard
*----------------------------------------------------------------------------*/
5199 158142c2 bellard
5200 750afe93 bellard
int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
5201 158142c2 bellard
{
5202 158142c2 bellard
    flag aSign, bSign;
5203 158142c2 bellard
5204 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5205 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5206 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5207 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5208 158142c2 bellard
       ) {
5209 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5210 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5211 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5212 158142c2 bellard
        }
5213 158142c2 bellard
        return 0;
5214 158142c2 bellard
    }
5215 158142c2 bellard
    aSign = extractFloat128Sign( a );
5216 158142c2 bellard
    bSign = extractFloat128Sign( b );
5217 158142c2 bellard
    if ( aSign != bSign ) {
5218 158142c2 bellard
        return
5219 158142c2 bellard
               aSign
5220 158142c2 bellard
            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5221 158142c2 bellard
                 == 0 );
5222 158142c2 bellard
    }
5223 158142c2 bellard
    return
5224 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5225 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5226 158142c2 bellard
5227 158142c2 bellard
}
5228 158142c2 bellard
5229 158142c2 bellard
/*----------------------------------------------------------------------------
5230 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5231 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5232 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
5233 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5234 158142c2 bellard
*----------------------------------------------------------------------------*/
5235 158142c2 bellard
5236 750afe93 bellard
int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
5237 158142c2 bellard
{
5238 158142c2 bellard
    flag aSign, bSign;
5239 158142c2 bellard
5240 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5241 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5242 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5243 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5244 158142c2 bellard
       ) {
5245 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5246 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5247 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5248 158142c2 bellard
        }
5249 158142c2 bellard
        return 0;
5250 158142c2 bellard
    }
5251 158142c2 bellard
    aSign = extractFloat128Sign( a );
5252 158142c2 bellard
    bSign = extractFloat128Sign( b );
5253 158142c2 bellard
    if ( aSign != bSign ) {
5254 158142c2 bellard
        return
5255 158142c2 bellard
               aSign
5256 158142c2 bellard
            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5257 158142c2 bellard
                 != 0 );
5258 158142c2 bellard
    }
5259 158142c2 bellard
    return
5260 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5261 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5262 158142c2 bellard
5263 158142c2 bellard
}
5264 158142c2 bellard
5265 158142c2 bellard
#endif
5266 158142c2 bellard
5267 1d6bda35 bellard
/* misc functions */
5268 1d6bda35 bellard
float32 uint32_to_float32( unsigned int a STATUS_PARAM )
5269 1d6bda35 bellard
{
5270 1d6bda35 bellard
    return int64_to_float32(a STATUS_VAR);
5271 1d6bda35 bellard
}
5272 1d6bda35 bellard
5273 1d6bda35 bellard
float64 uint32_to_float64( unsigned int a STATUS_PARAM )
5274 1d6bda35 bellard
{
5275 1d6bda35 bellard
    return int64_to_float64(a STATUS_VAR);
5276 1d6bda35 bellard
}
5277 1d6bda35 bellard
5278 1d6bda35 bellard
unsigned int float32_to_uint32( float32 a STATUS_PARAM )
5279 1d6bda35 bellard
{
5280 1d6bda35 bellard
    int64_t v;
5281 1d6bda35 bellard
    unsigned int res;
5282 1d6bda35 bellard
5283 1d6bda35 bellard
    v = float32_to_int64(a STATUS_VAR);
5284 1d6bda35 bellard
    if (v < 0) {
5285 1d6bda35 bellard
        res = 0;
5286 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5287 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5288 1d6bda35 bellard
        res = 0xffffffff;
5289 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5290 1d6bda35 bellard
    } else {
5291 1d6bda35 bellard
        res = v;
5292 1d6bda35 bellard
    }
5293 1d6bda35 bellard
    return res;
5294 1d6bda35 bellard
}
5295 1d6bda35 bellard
5296 1d6bda35 bellard
unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
5297 1d6bda35 bellard
{
5298 1d6bda35 bellard
    int64_t v;
5299 1d6bda35 bellard
    unsigned int res;
5300 1d6bda35 bellard
5301 1d6bda35 bellard
    v = float32_to_int64_round_to_zero(a STATUS_VAR);
5302 1d6bda35 bellard
    if (v < 0) {
5303 1d6bda35 bellard
        res = 0;
5304 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5305 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5306 1d6bda35 bellard
        res = 0xffffffff;
5307 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5308 1d6bda35 bellard
    } else {
5309 1d6bda35 bellard
        res = v;
5310 1d6bda35 bellard
    }
5311 1d6bda35 bellard
    return res;
5312 1d6bda35 bellard
}
5313 1d6bda35 bellard
5314 1d6bda35 bellard
unsigned int float64_to_uint32( float64 a STATUS_PARAM )
5315 1d6bda35 bellard
{
5316 1d6bda35 bellard
    int64_t v;
5317 1d6bda35 bellard
    unsigned int res;
5318 1d6bda35 bellard
5319 1d6bda35 bellard
    v = float64_to_int64(a STATUS_VAR);
5320 1d6bda35 bellard
    if (v < 0) {
5321 1d6bda35 bellard
        res = 0;
5322 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5323 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5324 1d6bda35 bellard
        res = 0xffffffff;
5325 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5326 1d6bda35 bellard
    } else {
5327 1d6bda35 bellard
        res = v;
5328 1d6bda35 bellard
    }
5329 1d6bda35 bellard
    return res;
5330 1d6bda35 bellard
}
5331 1d6bda35 bellard
5332 1d6bda35 bellard
unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
5333 1d6bda35 bellard
{
5334 1d6bda35 bellard
    int64_t v;
5335 1d6bda35 bellard
    unsigned int res;
5336 1d6bda35 bellard
5337 1d6bda35 bellard
    v = float64_to_int64_round_to_zero(a STATUS_VAR);
5338 1d6bda35 bellard
    if (v < 0) {
5339 1d6bda35 bellard
        res = 0;
5340 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5341 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5342 1d6bda35 bellard
        res = 0xffffffff;
5343 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5344 1d6bda35 bellard
    } else {
5345 1d6bda35 bellard
        res = v;
5346 1d6bda35 bellard
    }
5347 1d6bda35 bellard
    return res;
5348 1d6bda35 bellard
}
5349 1d6bda35 bellard
5350 f090c9d4 pbrook
/* FIXME: This looks broken.  */
5351 75d62a58 j_mayer
uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
5352 75d62a58 j_mayer
{
5353 75d62a58 j_mayer
    int64_t v;
5354 75d62a58 j_mayer
5355 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
5356 f090c9d4 pbrook
    v += float64_val(a);
5357 f090c9d4 pbrook
    v = float64_to_int64(make_float64(v) STATUS_VAR);
5358 75d62a58 j_mayer
5359 75d62a58 j_mayer
    return v - INT64_MIN;
5360 75d62a58 j_mayer
}
5361 75d62a58 j_mayer
5362 75d62a58 j_mayer
uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
5363 75d62a58 j_mayer
{
5364 75d62a58 j_mayer
    int64_t v;
5365 75d62a58 j_mayer
5366 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
5367 f090c9d4 pbrook
    v += float64_val(a);
5368 f090c9d4 pbrook
    v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
5369 75d62a58 j_mayer
5370 75d62a58 j_mayer
    return v - INT64_MIN;
5371 75d62a58 j_mayer
}
5372 75d62a58 j_mayer
5373 1d6bda35 bellard
#define COMPARE(s, nan_exp)                                                  \
5374 750afe93 bellard
INLINE int float ## s ## _compare_internal( float ## s a, float ## s b,      \
5375 1d6bda35 bellard
                                      int is_quiet STATUS_PARAM )            \
5376 1d6bda35 bellard
{                                                                            \
5377 1d6bda35 bellard
    flag aSign, bSign;                                                       \
5378 f090c9d4 pbrook
    bits ## s av, bv;                                                        \
5379 1d6bda35 bellard
                                                                             \
5380 1d6bda35 bellard
    if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) &&                    \
5381 1d6bda35 bellard
         extractFloat ## s ## Frac( a ) ) ||                                 \
5382 1d6bda35 bellard
        ( ( extractFloat ## s ## Exp( b ) == nan_exp ) &&                    \
5383 1d6bda35 bellard
          extractFloat ## s ## Frac( b ) )) {                                \
5384 1d6bda35 bellard
        if (!is_quiet ||                                                     \
5385 1d6bda35 bellard
            float ## s ## _is_signaling_nan( a ) ||                          \
5386 1d6bda35 bellard
            float ## s ## _is_signaling_nan( b ) ) {                         \
5387 1d6bda35 bellard
            float_raise( float_flag_invalid STATUS_VAR);                     \
5388 1d6bda35 bellard
        }                                                                    \
5389 1d6bda35 bellard
        return float_relation_unordered;                                     \
5390 1d6bda35 bellard
    }                                                                        \
5391 1d6bda35 bellard
    aSign = extractFloat ## s ## Sign( a );                                  \
5392 1d6bda35 bellard
    bSign = extractFloat ## s ## Sign( b );                                  \
5393 f090c9d4 pbrook
    av = float ## s ## _val(a);                                              \
5394 f090c9d4 pbrook
    bv = float ## s ## _val(a);                                              \
5395 1d6bda35 bellard
    if ( aSign != bSign ) {                                                  \
5396 f090c9d4 pbrook
        if ( (bits ## s) ( ( av | bv )<<1 ) == 0 ) {                         \
5397 1d6bda35 bellard
            /* zero case */                                                  \
5398 1d6bda35 bellard
            return float_relation_equal;                                     \
5399 1d6bda35 bellard
        } else {                                                             \
5400 1d6bda35 bellard
            return 1 - (2 * aSign);                                          \
5401 1d6bda35 bellard
        }                                                                    \
5402 1d6bda35 bellard
    } else {                                                                 \
5403 f090c9d4 pbrook
        if (av == bv) {                                                      \
5404 1d6bda35 bellard
            return float_relation_equal;                                     \
5405 1d6bda35 bellard
        } else {                                                             \
5406 f090c9d4 pbrook
            return 1 - 2 * (aSign ^ ( av < bv ));                            \
5407 1d6bda35 bellard
        }                                                                    \
5408 1d6bda35 bellard
    }                                                                        \
5409 1d6bda35 bellard
}                                                                            \
5410 1d6bda35 bellard
                                                                             \
5411 750afe93 bellard
int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM )        \
5412 1d6bda35 bellard
{                                                                            \
5413 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 0 STATUS_VAR);              \
5414 1d6bda35 bellard
}                                                                            \
5415 1d6bda35 bellard
                                                                             \
5416 750afe93 bellard
int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM )  \
5417 1d6bda35 bellard
{                                                                            \
5418 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 1 STATUS_VAR);              \
5419 1d6bda35 bellard
}
5420 1d6bda35 bellard
5421 1d6bda35 bellard
COMPARE(32, 0xff)
5422 1d6bda35 bellard
COMPARE(64, 0x7ff)
5423 9ee6e8bb pbrook
5424 9ee6e8bb pbrook
/* Multiply A by 2 raised to the power N.  */
5425 9ee6e8bb pbrook
float32 float32_scalbn( float32 a, int n STATUS_PARAM )
5426 9ee6e8bb pbrook
{
5427 9ee6e8bb pbrook
    flag aSign;
5428 9ee6e8bb pbrook
    int16 aExp;
5429 9ee6e8bb pbrook
    bits32 aSig;
5430 9ee6e8bb pbrook
5431 9ee6e8bb pbrook
    aSig = extractFloat32Frac( a );
5432 9ee6e8bb pbrook
    aExp = extractFloat32Exp( a );
5433 9ee6e8bb pbrook
    aSign = extractFloat32Sign( a );
5434 9ee6e8bb pbrook
5435 9ee6e8bb pbrook
    if ( aExp == 0xFF ) {
5436 9ee6e8bb pbrook
        return a;
5437 9ee6e8bb pbrook
    }
5438 9ee6e8bb pbrook
    aExp += n;
5439 9ee6e8bb pbrook
    return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
5440 9ee6e8bb pbrook
}
5441 9ee6e8bb pbrook
5442 9ee6e8bb pbrook
float64 float64_scalbn( float64 a, int n STATUS_PARAM )
5443 9ee6e8bb pbrook
{
5444 9ee6e8bb pbrook
    flag aSign;
5445 9ee6e8bb pbrook
    int16 aExp;
5446 9ee6e8bb pbrook
    bits64 aSig;
5447 9ee6e8bb pbrook
5448 9ee6e8bb pbrook
    aSig = extractFloat64Frac( a );
5449 9ee6e8bb pbrook
    aExp = extractFloat64Exp( a );
5450 9ee6e8bb pbrook
    aSign = extractFloat64Sign( a );
5451 9ee6e8bb pbrook
5452 9ee6e8bb pbrook
    if ( aExp == 0x7FF ) {
5453 9ee6e8bb pbrook
        return a;
5454 9ee6e8bb pbrook
    }
5455 9ee6e8bb pbrook
    aExp += n;
5456 9ee6e8bb pbrook
    return roundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
5457 9ee6e8bb pbrook
}
5458 9ee6e8bb pbrook
5459 9ee6e8bb pbrook
#ifdef FLOATX80
5460 9ee6e8bb pbrook
floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
5461 9ee6e8bb pbrook
{
5462 9ee6e8bb pbrook
    flag aSign;
5463 9ee6e8bb pbrook
    int16 aExp;
5464 9ee6e8bb pbrook
    bits64 aSig;
5465 9ee6e8bb pbrook
5466 9ee6e8bb pbrook
    aSig = extractFloatx80Frac( a );
5467 9ee6e8bb pbrook
    aExp = extractFloatx80Exp( a );
5468 9ee6e8bb pbrook
    aSign = extractFloatx80Sign( a );
5469 9ee6e8bb pbrook
5470 9ee6e8bb pbrook
    if ( aExp == 0x7FF ) {
5471 9ee6e8bb pbrook
        return a;
5472 9ee6e8bb pbrook
    }
5473 9ee6e8bb pbrook
    aExp += n;
5474 9ee6e8bb pbrook
    return roundAndPackFloatx80( STATUS(floatx80_rounding_precision),
5475 9ee6e8bb pbrook
                                 aSign, aExp, aSig, 0 STATUS_VAR );
5476 9ee6e8bb pbrook
}
5477 9ee6e8bb pbrook
#endif
5478 9ee6e8bb pbrook
5479 9ee6e8bb pbrook
#ifdef FLOAT128
5480 9ee6e8bb pbrook
float128 float128_scalbn( float128 a, int n STATUS_PARAM )
5481 9ee6e8bb pbrook
{
5482 9ee6e8bb pbrook
    flag aSign;
5483 9ee6e8bb pbrook
    int32 aExp;
5484 9ee6e8bb pbrook
    bits64 aSig0, aSig1;
5485 9ee6e8bb pbrook
5486 9ee6e8bb pbrook
    aSig1 = extractFloat128Frac1( a );
5487 9ee6e8bb pbrook
    aSig0 = extractFloat128Frac0( a );
5488 9ee6e8bb pbrook
    aExp = extractFloat128Exp( a );
5489 9ee6e8bb pbrook
    aSign = extractFloat128Sign( a );
5490 9ee6e8bb pbrook
    if ( aExp == 0x7FFF ) {
5491 9ee6e8bb pbrook
        return a;
5492 9ee6e8bb pbrook
    }
5493 9ee6e8bb pbrook
    aExp += n;
5494 9ee6e8bb pbrook
    return roundAndPackFloat128( aSign, aExp, aSig0, aSig1, 0 STATUS_VAR );
5495 9ee6e8bb pbrook
5496 9ee6e8bb pbrook
}
5497 9ee6e8bb pbrook
#endif