Statistics
| Branch: | Revision:

root / fpu / softfloat.c @ 83f64091

History | View | Annotate | Download (188.2 kB)

1 158142c2 bellard
2 158142c2 bellard
/*============================================================================
3 158142c2 bellard

4 158142c2 bellard
This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
5 158142c2 bellard
Package, Release 2b.
6 158142c2 bellard

7 158142c2 bellard
Written by John R. Hauser.  This work was made possible in part by the
8 158142c2 bellard
International Computer Science Institute, located at Suite 600, 1947 Center
9 158142c2 bellard
Street, Berkeley, California 94704.  Funding was partially provided by the
10 158142c2 bellard
National Science Foundation under grant MIP-9311980.  The original version
11 158142c2 bellard
of this code was written as part of a project to build a fixed-point vector
12 158142c2 bellard
processor in collaboration with the University of California at Berkeley,
13 158142c2 bellard
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
14 158142c2 bellard
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
15 158142c2 bellard
arithmetic/SoftFloat.html'.
16 158142c2 bellard

17 158142c2 bellard
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
18 158142c2 bellard
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
19 158142c2 bellard
RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
20 158142c2 bellard
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
21 158142c2 bellard
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
22 158142c2 bellard
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
23 158142c2 bellard
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
24 158142c2 bellard
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
25 158142c2 bellard

26 158142c2 bellard
Derivative works are acceptable, even for commercial purposes, so long as
27 158142c2 bellard
(1) the source code for the derivative work includes prominent notice that
28 158142c2 bellard
the work is derivative, and (2) the source code includes prominent notice with
29 158142c2 bellard
these four paragraphs for those parts of this code that are retained.
30 158142c2 bellard

31 158142c2 bellard
=============================================================================*/
32 158142c2 bellard
33 158142c2 bellard
#include "softfloat.h"
34 158142c2 bellard
35 158142c2 bellard
/*----------------------------------------------------------------------------
36 158142c2 bellard
| Primitive arithmetic functions, including multi-word arithmetic, and
37 158142c2 bellard
| division and square root approximations.  (Can be specialized to target if
38 158142c2 bellard
| desired.)
39 158142c2 bellard
*----------------------------------------------------------------------------*/
40 158142c2 bellard
#include "softfloat-macros.h"
41 158142c2 bellard
42 158142c2 bellard
/*----------------------------------------------------------------------------
43 158142c2 bellard
| Functions and definitions to determine:  (1) whether tininess for underflow
44 158142c2 bellard
| is detected before or after rounding by default, (2) what (if anything)
45 158142c2 bellard
| happens when exceptions are raised, (3) how signaling NaNs are distinguished
46 158142c2 bellard
| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
47 158142c2 bellard
| are propagated from function inputs to output.  These details are target-
48 158142c2 bellard
| specific.
49 158142c2 bellard
*----------------------------------------------------------------------------*/
50 158142c2 bellard
#include "softfloat-specialize.h"
51 158142c2 bellard
52 158142c2 bellard
void set_float_rounding_mode(int val STATUS_PARAM)
53 158142c2 bellard
{
54 158142c2 bellard
    STATUS(float_rounding_mode) = val;
55 158142c2 bellard
}
56 158142c2 bellard
57 1d6bda35 bellard
void set_float_exception_flags(int val STATUS_PARAM)
58 1d6bda35 bellard
{
59 1d6bda35 bellard
    STATUS(float_exception_flags) = val;
60 1d6bda35 bellard
}
61 1d6bda35 bellard
62 158142c2 bellard
#ifdef FLOATX80
63 158142c2 bellard
void set_floatx80_rounding_precision(int val STATUS_PARAM)
64 158142c2 bellard
{
65 158142c2 bellard
    STATUS(floatx80_rounding_precision) = val;
66 158142c2 bellard
}
67 158142c2 bellard
#endif
68 158142c2 bellard
69 158142c2 bellard
/*----------------------------------------------------------------------------
70 158142c2 bellard
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
71 158142c2 bellard
| and 7, and returns the properly rounded 32-bit integer corresponding to the
72 158142c2 bellard
| input.  If `zSign' is 1, the input is negated before being converted to an
73 158142c2 bellard
| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
74 158142c2 bellard
| is simply rounded to an integer, with the inexact exception raised if the
75 158142c2 bellard
| input cannot be represented exactly as an integer.  However, if the fixed-
76 158142c2 bellard
| point input is too large, the invalid exception is raised and the largest
77 158142c2 bellard
| positive or negative integer is returned.
78 158142c2 bellard
*----------------------------------------------------------------------------*/
79 158142c2 bellard
80 158142c2 bellard
static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM)
81 158142c2 bellard
{
82 158142c2 bellard
    int8 roundingMode;
83 158142c2 bellard
    flag roundNearestEven;
84 158142c2 bellard
    int8 roundIncrement, roundBits;
85 158142c2 bellard
    int32 z;
86 158142c2 bellard
87 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
88 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
89 158142c2 bellard
    roundIncrement = 0x40;
90 158142c2 bellard
    if ( ! roundNearestEven ) {
91 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
92 158142c2 bellard
            roundIncrement = 0;
93 158142c2 bellard
        }
94 158142c2 bellard
        else {
95 158142c2 bellard
            roundIncrement = 0x7F;
96 158142c2 bellard
            if ( zSign ) {
97 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
98 158142c2 bellard
            }
99 158142c2 bellard
            else {
100 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
101 158142c2 bellard
            }
102 158142c2 bellard
        }
103 158142c2 bellard
    }
104 158142c2 bellard
    roundBits = absZ & 0x7F;
105 158142c2 bellard
    absZ = ( absZ + roundIncrement )>>7;
106 158142c2 bellard
    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
107 158142c2 bellard
    z = absZ;
108 158142c2 bellard
    if ( zSign ) z = - z;
109 158142c2 bellard
    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
110 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
111 158142c2 bellard
        return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
112 158142c2 bellard
    }
113 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
114 158142c2 bellard
    return z;
115 158142c2 bellard
116 158142c2 bellard
}
117 158142c2 bellard
118 158142c2 bellard
/*----------------------------------------------------------------------------
119 158142c2 bellard
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
120 158142c2 bellard
| `absZ1', with binary point between bits 63 and 64 (between the input words),
121 158142c2 bellard
| and returns the properly rounded 64-bit integer corresponding to the input.
122 158142c2 bellard
| If `zSign' is 1, the input is negated before being converted to an integer.
123 158142c2 bellard
| Ordinarily, the fixed-point input is simply rounded to an integer, with
124 158142c2 bellard
| the inexact exception raised if the input cannot be represented exactly as
125 158142c2 bellard
| an integer.  However, if the fixed-point input is too large, the invalid
126 158142c2 bellard
| exception is raised and the largest positive or negative integer is
127 158142c2 bellard
| returned.
128 158142c2 bellard
*----------------------------------------------------------------------------*/
129 158142c2 bellard
130 158142c2 bellard
static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM)
131 158142c2 bellard
{
132 158142c2 bellard
    int8 roundingMode;
133 158142c2 bellard
    flag roundNearestEven, increment;
134 158142c2 bellard
    int64 z;
135 158142c2 bellard
136 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
137 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
138 158142c2 bellard
    increment = ( (sbits64) absZ1 < 0 );
139 158142c2 bellard
    if ( ! roundNearestEven ) {
140 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
141 158142c2 bellard
            increment = 0;
142 158142c2 bellard
        }
143 158142c2 bellard
        else {
144 158142c2 bellard
            if ( zSign ) {
145 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && absZ1;
146 158142c2 bellard
            }
147 158142c2 bellard
            else {
148 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && absZ1;
149 158142c2 bellard
            }
150 158142c2 bellard
        }
151 158142c2 bellard
    }
152 158142c2 bellard
    if ( increment ) {
153 158142c2 bellard
        ++absZ0;
154 158142c2 bellard
        if ( absZ0 == 0 ) goto overflow;
155 158142c2 bellard
        absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
156 158142c2 bellard
    }
157 158142c2 bellard
    z = absZ0;
158 158142c2 bellard
    if ( zSign ) z = - z;
159 158142c2 bellard
    if ( z && ( ( z < 0 ) ^ zSign ) ) {
160 158142c2 bellard
 overflow:
161 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
162 158142c2 bellard
        return
163 158142c2 bellard
              zSign ? (sbits64) LIT64( 0x8000000000000000 )
164 158142c2 bellard
            : LIT64( 0x7FFFFFFFFFFFFFFF );
165 158142c2 bellard
    }
166 158142c2 bellard
    if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
167 158142c2 bellard
    return z;
168 158142c2 bellard
169 158142c2 bellard
}
170 158142c2 bellard
171 158142c2 bellard
/*----------------------------------------------------------------------------
172 158142c2 bellard
| Returns the fraction bits of the single-precision floating-point value `a'.
173 158142c2 bellard
*----------------------------------------------------------------------------*/
174 158142c2 bellard
175 158142c2 bellard
INLINE bits32 extractFloat32Frac( float32 a )
176 158142c2 bellard
{
177 158142c2 bellard
178 158142c2 bellard
    return a & 0x007FFFFF;
179 158142c2 bellard
180 158142c2 bellard
}
181 158142c2 bellard
182 158142c2 bellard
/*----------------------------------------------------------------------------
183 158142c2 bellard
| Returns the exponent bits of the single-precision floating-point value `a'.
184 158142c2 bellard
*----------------------------------------------------------------------------*/
185 158142c2 bellard
186 158142c2 bellard
INLINE int16 extractFloat32Exp( float32 a )
187 158142c2 bellard
{
188 158142c2 bellard
189 158142c2 bellard
    return ( a>>23 ) & 0xFF;
190 158142c2 bellard
191 158142c2 bellard
}
192 158142c2 bellard
193 158142c2 bellard
/*----------------------------------------------------------------------------
194 158142c2 bellard
| Returns the sign bit of the single-precision floating-point value `a'.
195 158142c2 bellard
*----------------------------------------------------------------------------*/
196 158142c2 bellard
197 158142c2 bellard
INLINE flag extractFloat32Sign( float32 a )
198 158142c2 bellard
{
199 158142c2 bellard
200 158142c2 bellard
    return a>>31;
201 158142c2 bellard
202 158142c2 bellard
}
203 158142c2 bellard
204 158142c2 bellard
/*----------------------------------------------------------------------------
205 158142c2 bellard
| Normalizes the subnormal single-precision floating-point value represented
206 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
207 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
208 158142c2 bellard
| `zSigPtr', respectively.
209 158142c2 bellard
*----------------------------------------------------------------------------*/
210 158142c2 bellard
211 158142c2 bellard
static void
212 158142c2 bellard
 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
213 158142c2 bellard
{
214 158142c2 bellard
    int8 shiftCount;
215 158142c2 bellard
216 158142c2 bellard
    shiftCount = countLeadingZeros32( aSig ) - 8;
217 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
218 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
219 158142c2 bellard
220 158142c2 bellard
}
221 158142c2 bellard
222 158142c2 bellard
/*----------------------------------------------------------------------------
223 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
224 158142c2 bellard
| single-precision floating-point value, returning the result.  After being
225 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
226 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
227 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
228 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
229 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
230 158142c2 bellard
| significand.
231 158142c2 bellard
*----------------------------------------------------------------------------*/
232 158142c2 bellard
233 158142c2 bellard
INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
234 158142c2 bellard
{
235 158142c2 bellard
236 158142c2 bellard
    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
237 158142c2 bellard
238 158142c2 bellard
}
239 158142c2 bellard
240 158142c2 bellard
/*----------------------------------------------------------------------------
241 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
242 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
243 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
244 158142c2 bellard
| value is simply rounded and packed into the single-precision format, with
245 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
246 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
247 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
248 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
249 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
250 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal single-
251 158142c2 bellard
| precision floating-point number.
252 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 30
253 158142c2 bellard
| and 29, which is 7 bits to the left of the usual location.  This shifted
254 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
255 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
256 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
257 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
258 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
259 158142c2 bellard
| Binary Floating-Point Arithmetic.
260 158142c2 bellard
*----------------------------------------------------------------------------*/
261 158142c2 bellard
262 158142c2 bellard
static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
263 158142c2 bellard
{
264 158142c2 bellard
    int8 roundingMode;
265 158142c2 bellard
    flag roundNearestEven;
266 158142c2 bellard
    int8 roundIncrement, roundBits;
267 158142c2 bellard
    flag isTiny;
268 158142c2 bellard
269 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
270 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
271 158142c2 bellard
    roundIncrement = 0x40;
272 158142c2 bellard
    if ( ! roundNearestEven ) {
273 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
274 158142c2 bellard
            roundIncrement = 0;
275 158142c2 bellard
        }
276 158142c2 bellard
        else {
277 158142c2 bellard
            roundIncrement = 0x7F;
278 158142c2 bellard
            if ( zSign ) {
279 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
280 158142c2 bellard
            }
281 158142c2 bellard
            else {
282 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
283 158142c2 bellard
            }
284 158142c2 bellard
        }
285 158142c2 bellard
    }
286 158142c2 bellard
    roundBits = zSig & 0x7F;
287 158142c2 bellard
    if ( 0xFD <= (bits16) zExp ) {
288 158142c2 bellard
        if (    ( 0xFD < zExp )
289 158142c2 bellard
             || (    ( zExp == 0xFD )
290 158142c2 bellard
                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
291 158142c2 bellard
           ) {
292 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
293 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
294 158142c2 bellard
        }
295 158142c2 bellard
        if ( zExp < 0 ) {
296 158142c2 bellard
            isTiny =
297 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
298 158142c2 bellard
                || ( zExp < -1 )
299 158142c2 bellard
                || ( zSig + roundIncrement < 0x80000000 );
300 158142c2 bellard
            shift32RightJamming( zSig, - zExp, &zSig );
301 158142c2 bellard
            zExp = 0;
302 158142c2 bellard
            roundBits = zSig & 0x7F;
303 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
304 158142c2 bellard
        }
305 158142c2 bellard
    }
306 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
307 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>7;
308 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
309 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
310 158142c2 bellard
    return packFloat32( zSign, zExp, zSig );
311 158142c2 bellard
312 158142c2 bellard
}
313 158142c2 bellard
314 158142c2 bellard
/*----------------------------------------------------------------------------
315 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
316 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
317 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
318 158142c2 bellard
| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
319 158142c2 bellard
| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
320 158142c2 bellard
| floating-point exponent.
321 158142c2 bellard
*----------------------------------------------------------------------------*/
322 158142c2 bellard
323 158142c2 bellard
static float32
324 158142c2 bellard
 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
325 158142c2 bellard
{
326 158142c2 bellard
    int8 shiftCount;
327 158142c2 bellard
328 158142c2 bellard
    shiftCount = countLeadingZeros32( zSig ) - 1;
329 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
330 158142c2 bellard
331 158142c2 bellard
}
332 158142c2 bellard
333 158142c2 bellard
/*----------------------------------------------------------------------------
334 158142c2 bellard
| Returns the fraction bits of the double-precision floating-point value `a'.
335 158142c2 bellard
*----------------------------------------------------------------------------*/
336 158142c2 bellard
337 158142c2 bellard
INLINE bits64 extractFloat64Frac( float64 a )
338 158142c2 bellard
{
339 158142c2 bellard
340 158142c2 bellard
    return a & LIT64( 0x000FFFFFFFFFFFFF );
341 158142c2 bellard
342 158142c2 bellard
}
343 158142c2 bellard
344 158142c2 bellard
/*----------------------------------------------------------------------------
345 158142c2 bellard
| Returns the exponent bits of the double-precision floating-point value `a'.
346 158142c2 bellard
*----------------------------------------------------------------------------*/
347 158142c2 bellard
348 158142c2 bellard
INLINE int16 extractFloat64Exp( float64 a )
349 158142c2 bellard
{
350 158142c2 bellard
351 158142c2 bellard
    return ( a>>52 ) & 0x7FF;
352 158142c2 bellard
353 158142c2 bellard
}
354 158142c2 bellard
355 158142c2 bellard
/*----------------------------------------------------------------------------
356 158142c2 bellard
| Returns the sign bit of the double-precision floating-point value `a'.
357 158142c2 bellard
*----------------------------------------------------------------------------*/
358 158142c2 bellard
359 158142c2 bellard
INLINE flag extractFloat64Sign( float64 a )
360 158142c2 bellard
{
361 158142c2 bellard
362 158142c2 bellard
    return a>>63;
363 158142c2 bellard
364 158142c2 bellard
}
365 158142c2 bellard
366 158142c2 bellard
/*----------------------------------------------------------------------------
367 158142c2 bellard
| Normalizes the subnormal double-precision floating-point value represented
368 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
369 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
370 158142c2 bellard
| `zSigPtr', respectively.
371 158142c2 bellard
*----------------------------------------------------------------------------*/
372 158142c2 bellard
373 158142c2 bellard
static void
374 158142c2 bellard
 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
375 158142c2 bellard
{
376 158142c2 bellard
    int8 shiftCount;
377 158142c2 bellard
378 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig ) - 11;
379 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
380 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
381 158142c2 bellard
382 158142c2 bellard
}
383 158142c2 bellard
384 158142c2 bellard
/*----------------------------------------------------------------------------
385 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
386 158142c2 bellard
| double-precision floating-point value, returning the result.  After being
387 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
388 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
389 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
390 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
391 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
392 158142c2 bellard
| significand.
393 158142c2 bellard
*----------------------------------------------------------------------------*/
394 158142c2 bellard
395 158142c2 bellard
INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
396 158142c2 bellard
{
397 158142c2 bellard
398 158142c2 bellard
    return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
399 158142c2 bellard
400 158142c2 bellard
}
401 158142c2 bellard
402 158142c2 bellard
/*----------------------------------------------------------------------------
403 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
404 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
405 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
406 158142c2 bellard
| value is simply rounded and packed into the double-precision format, with
407 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
408 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
409 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
410 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded
411 158142c2 bellard
| to a subnormal number, and the underflow and inexact exceptions are raised
412 158142c2 bellard
| if the abstract input cannot be represented exactly as a subnormal double-
413 158142c2 bellard
| precision floating-point number.
414 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 62
415 158142c2 bellard
| and 61, which is 10 bits to the left of the usual location.  This shifted
416 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
417 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
418 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
419 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
420 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
421 158142c2 bellard
| Binary Floating-Point Arithmetic.
422 158142c2 bellard
*----------------------------------------------------------------------------*/
423 158142c2 bellard
424 158142c2 bellard
static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
425 158142c2 bellard
{
426 158142c2 bellard
    int8 roundingMode;
427 158142c2 bellard
    flag roundNearestEven;
428 158142c2 bellard
    int16 roundIncrement, roundBits;
429 158142c2 bellard
    flag isTiny;
430 158142c2 bellard
431 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
432 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
433 158142c2 bellard
    roundIncrement = 0x200;
434 158142c2 bellard
    if ( ! roundNearestEven ) {
435 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
436 158142c2 bellard
            roundIncrement = 0;
437 158142c2 bellard
        }
438 158142c2 bellard
        else {
439 158142c2 bellard
            roundIncrement = 0x3FF;
440 158142c2 bellard
            if ( zSign ) {
441 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
442 158142c2 bellard
            }
443 158142c2 bellard
            else {
444 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
445 158142c2 bellard
            }
446 158142c2 bellard
        }
447 158142c2 bellard
    }
448 158142c2 bellard
    roundBits = zSig & 0x3FF;
449 158142c2 bellard
    if ( 0x7FD <= (bits16) zExp ) {
450 158142c2 bellard
        if (    ( 0x7FD < zExp )
451 158142c2 bellard
             || (    ( zExp == 0x7FD )
452 158142c2 bellard
                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
453 158142c2 bellard
           ) {
454 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
455 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
456 158142c2 bellard
        }
457 158142c2 bellard
        if ( zExp < 0 ) {
458 158142c2 bellard
            isTiny =
459 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
460 158142c2 bellard
                || ( zExp < -1 )
461 158142c2 bellard
                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
462 158142c2 bellard
            shift64RightJamming( zSig, - zExp, &zSig );
463 158142c2 bellard
            zExp = 0;
464 158142c2 bellard
            roundBits = zSig & 0x3FF;
465 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
466 158142c2 bellard
        }
467 158142c2 bellard
    }
468 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
469 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>10;
470 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
471 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
472 158142c2 bellard
    return packFloat64( zSign, zExp, zSig );
473 158142c2 bellard
474 158142c2 bellard
}
475 158142c2 bellard
476 158142c2 bellard
/*----------------------------------------------------------------------------
477 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
478 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
479 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
480 158142c2 bellard
| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
481 158142c2 bellard
| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
482 158142c2 bellard
| floating-point exponent.
483 158142c2 bellard
*----------------------------------------------------------------------------*/
484 158142c2 bellard
485 158142c2 bellard
static float64
486 158142c2 bellard
 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
487 158142c2 bellard
{
488 158142c2 bellard
    int8 shiftCount;
489 158142c2 bellard
490 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig ) - 1;
491 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
492 158142c2 bellard
493 158142c2 bellard
}
494 158142c2 bellard
495 158142c2 bellard
#ifdef FLOATX80
496 158142c2 bellard
497 158142c2 bellard
/*----------------------------------------------------------------------------
498 158142c2 bellard
| Returns the fraction bits of the extended double-precision floating-point
499 158142c2 bellard
| value `a'.
500 158142c2 bellard
*----------------------------------------------------------------------------*/
501 158142c2 bellard
502 158142c2 bellard
INLINE bits64 extractFloatx80Frac( floatx80 a )
503 158142c2 bellard
{
504 158142c2 bellard
505 158142c2 bellard
    return a.low;
506 158142c2 bellard
507 158142c2 bellard
}
508 158142c2 bellard
509 158142c2 bellard
/*----------------------------------------------------------------------------
510 158142c2 bellard
| Returns the exponent bits of the extended double-precision floating-point
511 158142c2 bellard
| value `a'.
512 158142c2 bellard
*----------------------------------------------------------------------------*/
513 158142c2 bellard
514 158142c2 bellard
INLINE int32 extractFloatx80Exp( floatx80 a )
515 158142c2 bellard
{
516 158142c2 bellard
517 158142c2 bellard
    return a.high & 0x7FFF;
518 158142c2 bellard
519 158142c2 bellard
}
520 158142c2 bellard
521 158142c2 bellard
/*----------------------------------------------------------------------------
522 158142c2 bellard
| Returns the sign bit of the extended double-precision floating-point value
523 158142c2 bellard
| `a'.
524 158142c2 bellard
*----------------------------------------------------------------------------*/
525 158142c2 bellard
526 158142c2 bellard
INLINE flag extractFloatx80Sign( floatx80 a )
527 158142c2 bellard
{
528 158142c2 bellard
529 158142c2 bellard
    return a.high>>15;
530 158142c2 bellard
531 158142c2 bellard
}
532 158142c2 bellard
533 158142c2 bellard
/*----------------------------------------------------------------------------
534 158142c2 bellard
| Normalizes the subnormal extended double-precision floating-point value
535 158142c2 bellard
| represented by the denormalized significand `aSig'.  The normalized exponent
536 158142c2 bellard
| and significand are stored at the locations pointed to by `zExpPtr' and
537 158142c2 bellard
| `zSigPtr', respectively.
538 158142c2 bellard
*----------------------------------------------------------------------------*/
539 158142c2 bellard
540 158142c2 bellard
static void
541 158142c2 bellard
 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
542 158142c2 bellard
{
543 158142c2 bellard
    int8 shiftCount;
544 158142c2 bellard
545 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig );
546 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
547 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
548 158142c2 bellard
549 158142c2 bellard
}
550 158142c2 bellard
551 158142c2 bellard
/*----------------------------------------------------------------------------
552 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
553 158142c2 bellard
| extended double-precision floating-point value, returning the result.
554 158142c2 bellard
*----------------------------------------------------------------------------*/
555 158142c2 bellard
556 158142c2 bellard
INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
557 158142c2 bellard
{
558 158142c2 bellard
    floatx80 z;
559 158142c2 bellard
560 158142c2 bellard
    z.low = zSig;
561 158142c2 bellard
    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
562 158142c2 bellard
    return z;
563 158142c2 bellard
564 158142c2 bellard
}
565 158142c2 bellard
566 158142c2 bellard
/*----------------------------------------------------------------------------
567 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
568 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0' and `zSig1',
569 158142c2 bellard
| and returns the proper extended double-precision floating-point value
570 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
571 158142c2 bellard
| rounded and packed into the extended double-precision format, with the
572 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
573 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
574 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
575 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
576 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
577 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal extended
578 158142c2 bellard
| double-precision floating-point number.
579 158142c2 bellard
|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
580 158142c2 bellard
| number of bits as single or double precision, respectively.  Otherwise, the
581 158142c2 bellard
| result is rounded to the full precision of the extended double-precision
582 158142c2 bellard
| format.
583 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
584 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
585 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  The
586 158142c2 bellard
| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
587 158142c2 bellard
| Floating-Point Arithmetic.
588 158142c2 bellard
*----------------------------------------------------------------------------*/
589 158142c2 bellard
590 158142c2 bellard
static floatx80
591 158142c2 bellard
 roundAndPackFloatx80(
592 158142c2 bellard
     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
593 158142c2 bellard
 STATUS_PARAM)
594 158142c2 bellard
{
595 158142c2 bellard
    int8 roundingMode;
596 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
597 158142c2 bellard
    int64 roundIncrement, roundMask, roundBits;
598 158142c2 bellard
599 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
600 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
601 158142c2 bellard
    if ( roundingPrecision == 80 ) goto precision80;
602 158142c2 bellard
    if ( roundingPrecision == 64 ) {
603 158142c2 bellard
        roundIncrement = LIT64( 0x0000000000000400 );
604 158142c2 bellard
        roundMask = LIT64( 0x00000000000007FF );
605 158142c2 bellard
    }
606 158142c2 bellard
    else if ( roundingPrecision == 32 ) {
607 158142c2 bellard
        roundIncrement = LIT64( 0x0000008000000000 );
608 158142c2 bellard
        roundMask = LIT64( 0x000000FFFFFFFFFF );
609 158142c2 bellard
    }
610 158142c2 bellard
    else {
611 158142c2 bellard
        goto precision80;
612 158142c2 bellard
    }
613 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
614 158142c2 bellard
    if ( ! roundNearestEven ) {
615 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
616 158142c2 bellard
            roundIncrement = 0;
617 158142c2 bellard
        }
618 158142c2 bellard
        else {
619 158142c2 bellard
            roundIncrement = roundMask;
620 158142c2 bellard
            if ( zSign ) {
621 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
622 158142c2 bellard
            }
623 158142c2 bellard
            else {
624 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
625 158142c2 bellard
            }
626 158142c2 bellard
        }
627 158142c2 bellard
    }
628 158142c2 bellard
    roundBits = zSig0 & roundMask;
629 158142c2 bellard
    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
630 158142c2 bellard
        if (    ( 0x7FFE < zExp )
631 158142c2 bellard
             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
632 158142c2 bellard
           ) {
633 158142c2 bellard
            goto overflow;
634 158142c2 bellard
        }
635 158142c2 bellard
        if ( zExp <= 0 ) {
636 158142c2 bellard
            isTiny =
637 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
638 158142c2 bellard
                || ( zExp < 0 )
639 158142c2 bellard
                || ( zSig0 <= zSig0 + roundIncrement );
640 158142c2 bellard
            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
641 158142c2 bellard
            zExp = 0;
642 158142c2 bellard
            roundBits = zSig0 & roundMask;
643 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
644 158142c2 bellard
            if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
645 158142c2 bellard
            zSig0 += roundIncrement;
646 158142c2 bellard
            if ( (sbits64) zSig0 < 0 ) zExp = 1;
647 158142c2 bellard
            roundIncrement = roundMask + 1;
648 158142c2 bellard
            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
649 158142c2 bellard
                roundMask |= roundIncrement;
650 158142c2 bellard
            }
651 158142c2 bellard
            zSig0 &= ~ roundMask;
652 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
653 158142c2 bellard
        }
654 158142c2 bellard
    }
655 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
656 158142c2 bellard
    zSig0 += roundIncrement;
657 158142c2 bellard
    if ( zSig0 < roundIncrement ) {
658 158142c2 bellard
        ++zExp;
659 158142c2 bellard
        zSig0 = LIT64( 0x8000000000000000 );
660 158142c2 bellard
    }
661 158142c2 bellard
    roundIncrement = roundMask + 1;
662 158142c2 bellard
    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
663 158142c2 bellard
        roundMask |= roundIncrement;
664 158142c2 bellard
    }
665 158142c2 bellard
    zSig0 &= ~ roundMask;
666 158142c2 bellard
    if ( zSig0 == 0 ) zExp = 0;
667 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
668 158142c2 bellard
 precision80:
669 158142c2 bellard
    increment = ( (sbits64) zSig1 < 0 );
670 158142c2 bellard
    if ( ! roundNearestEven ) {
671 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
672 158142c2 bellard
            increment = 0;
673 158142c2 bellard
        }
674 158142c2 bellard
        else {
675 158142c2 bellard
            if ( zSign ) {
676 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig1;
677 158142c2 bellard
            }
678 158142c2 bellard
            else {
679 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig1;
680 158142c2 bellard
            }
681 158142c2 bellard
        }
682 158142c2 bellard
    }
683 158142c2 bellard
    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
684 158142c2 bellard
        if (    ( 0x7FFE < zExp )
685 158142c2 bellard
             || (    ( zExp == 0x7FFE )
686 158142c2 bellard
                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
687 158142c2 bellard
                  && increment
688 158142c2 bellard
                )
689 158142c2 bellard
           ) {
690 158142c2 bellard
            roundMask = 0;
691 158142c2 bellard
 overflow:
692 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
693 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
694 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
695 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
696 158142c2 bellard
               ) {
697 158142c2 bellard
                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
698 158142c2 bellard
            }
699 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
700 158142c2 bellard
        }
701 158142c2 bellard
        if ( zExp <= 0 ) {
702 158142c2 bellard
            isTiny =
703 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
704 158142c2 bellard
                || ( zExp < 0 )
705 158142c2 bellard
                || ! increment
706 158142c2 bellard
                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
707 158142c2 bellard
            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
708 158142c2 bellard
            zExp = 0;
709 158142c2 bellard
            if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
710 158142c2 bellard
            if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
711 158142c2 bellard
            if ( roundNearestEven ) {
712 158142c2 bellard
                increment = ( (sbits64) zSig1 < 0 );
713 158142c2 bellard
            }
714 158142c2 bellard
            else {
715 158142c2 bellard
                if ( zSign ) {
716 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig1;
717 158142c2 bellard
                }
718 158142c2 bellard
                else {
719 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig1;
720 158142c2 bellard
                }
721 158142c2 bellard
            }
722 158142c2 bellard
            if ( increment ) {
723 158142c2 bellard
                ++zSig0;
724 158142c2 bellard
                zSig0 &=
725 158142c2 bellard
                    ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
726 158142c2 bellard
                if ( (sbits64) zSig0 < 0 ) zExp = 1;
727 158142c2 bellard
            }
728 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
729 158142c2 bellard
        }
730 158142c2 bellard
    }
731 158142c2 bellard
    if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
732 158142c2 bellard
    if ( increment ) {
733 158142c2 bellard
        ++zSig0;
734 158142c2 bellard
        if ( zSig0 == 0 ) {
735 158142c2 bellard
            ++zExp;
736 158142c2 bellard
            zSig0 = LIT64( 0x8000000000000000 );
737 158142c2 bellard
        }
738 158142c2 bellard
        else {
739 158142c2 bellard
            zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
740 158142c2 bellard
        }
741 158142c2 bellard
    }
742 158142c2 bellard
    else {
743 158142c2 bellard
        if ( zSig0 == 0 ) zExp = 0;
744 158142c2 bellard
    }
745 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
746 158142c2 bellard
747 158142c2 bellard
}
748 158142c2 bellard
749 158142c2 bellard
/*----------------------------------------------------------------------------
750 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent
751 158142c2 bellard
| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
752 158142c2 bellard
| and returns the proper extended double-precision floating-point value
753 158142c2 bellard
| corresponding to the abstract input.  This routine is just like
754 158142c2 bellard
| `roundAndPackFloatx80' except that the input significand does not have to be
755 158142c2 bellard
| normalized.
756 158142c2 bellard
*----------------------------------------------------------------------------*/
757 158142c2 bellard
758 158142c2 bellard
static floatx80
759 158142c2 bellard
 normalizeRoundAndPackFloatx80(
760 158142c2 bellard
     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
761 158142c2 bellard
 STATUS_PARAM)
762 158142c2 bellard
{
763 158142c2 bellard
    int8 shiftCount;
764 158142c2 bellard
765 158142c2 bellard
    if ( zSig0 == 0 ) {
766 158142c2 bellard
        zSig0 = zSig1;
767 158142c2 bellard
        zSig1 = 0;
768 158142c2 bellard
        zExp -= 64;
769 158142c2 bellard
    }
770 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 );
771 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
772 158142c2 bellard
    zExp -= shiftCount;
773 158142c2 bellard
    return
774 158142c2 bellard
        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
775 158142c2 bellard
776 158142c2 bellard
}
777 158142c2 bellard
778 158142c2 bellard
#endif
779 158142c2 bellard
780 158142c2 bellard
#ifdef FLOAT128
781 158142c2 bellard
782 158142c2 bellard
/*----------------------------------------------------------------------------
783 158142c2 bellard
| Returns the least-significant 64 fraction bits of the quadruple-precision
784 158142c2 bellard
| floating-point value `a'.
785 158142c2 bellard
*----------------------------------------------------------------------------*/
786 158142c2 bellard
787 158142c2 bellard
INLINE bits64 extractFloat128Frac1( float128 a )
788 158142c2 bellard
{
789 158142c2 bellard
790 158142c2 bellard
    return a.low;
791 158142c2 bellard
792 158142c2 bellard
}
793 158142c2 bellard
794 158142c2 bellard
/*----------------------------------------------------------------------------
795 158142c2 bellard
| Returns the most-significant 48 fraction bits of the quadruple-precision
796 158142c2 bellard
| floating-point value `a'.
797 158142c2 bellard
*----------------------------------------------------------------------------*/
798 158142c2 bellard
799 158142c2 bellard
INLINE bits64 extractFloat128Frac0( float128 a )
800 158142c2 bellard
{
801 158142c2 bellard
802 158142c2 bellard
    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
803 158142c2 bellard
804 158142c2 bellard
}
805 158142c2 bellard
806 158142c2 bellard
/*----------------------------------------------------------------------------
807 158142c2 bellard
| Returns the exponent bits of the quadruple-precision floating-point value
808 158142c2 bellard
| `a'.
809 158142c2 bellard
*----------------------------------------------------------------------------*/
810 158142c2 bellard
811 158142c2 bellard
INLINE int32 extractFloat128Exp( float128 a )
812 158142c2 bellard
{
813 158142c2 bellard
814 158142c2 bellard
    return ( a.high>>48 ) & 0x7FFF;
815 158142c2 bellard
816 158142c2 bellard
}
817 158142c2 bellard
818 158142c2 bellard
/*----------------------------------------------------------------------------
819 158142c2 bellard
| Returns the sign bit of the quadruple-precision floating-point value `a'.
820 158142c2 bellard
*----------------------------------------------------------------------------*/
821 158142c2 bellard
822 158142c2 bellard
INLINE flag extractFloat128Sign( float128 a )
823 158142c2 bellard
{
824 158142c2 bellard
825 158142c2 bellard
    return a.high>>63;
826 158142c2 bellard
827 158142c2 bellard
}
828 158142c2 bellard
829 158142c2 bellard
/*----------------------------------------------------------------------------
830 158142c2 bellard
| Normalizes the subnormal quadruple-precision floating-point value
831 158142c2 bellard
| represented by the denormalized significand formed by the concatenation of
832 158142c2 bellard
| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
833 158142c2 bellard
| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
834 158142c2 bellard
| significand are stored at the location pointed to by `zSig0Ptr', and the
835 158142c2 bellard
| least significant 64 bits of the normalized significand are stored at the
836 158142c2 bellard
| location pointed to by `zSig1Ptr'.
837 158142c2 bellard
*----------------------------------------------------------------------------*/
838 158142c2 bellard
839 158142c2 bellard
static void
840 158142c2 bellard
 normalizeFloat128Subnormal(
841 158142c2 bellard
     bits64 aSig0,
842 158142c2 bellard
     bits64 aSig1,
843 158142c2 bellard
     int32 *zExpPtr,
844 158142c2 bellard
     bits64 *zSig0Ptr,
845 158142c2 bellard
     bits64 *zSig1Ptr
846 158142c2 bellard
 )
847 158142c2 bellard
{
848 158142c2 bellard
    int8 shiftCount;
849 158142c2 bellard
850 158142c2 bellard
    if ( aSig0 == 0 ) {
851 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig1 ) - 15;
852 158142c2 bellard
        if ( shiftCount < 0 ) {
853 158142c2 bellard
            *zSig0Ptr = aSig1>>( - shiftCount );
854 158142c2 bellard
            *zSig1Ptr = aSig1<<( shiftCount & 63 );
855 158142c2 bellard
        }
856 158142c2 bellard
        else {
857 158142c2 bellard
            *zSig0Ptr = aSig1<<shiftCount;
858 158142c2 bellard
            *zSig1Ptr = 0;
859 158142c2 bellard
        }
860 158142c2 bellard
        *zExpPtr = - shiftCount - 63;
861 158142c2 bellard
    }
862 158142c2 bellard
    else {
863 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig0 ) - 15;
864 158142c2 bellard
        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
865 158142c2 bellard
        *zExpPtr = 1 - shiftCount;
866 158142c2 bellard
    }
867 158142c2 bellard
868 158142c2 bellard
}
869 158142c2 bellard
870 158142c2 bellard
/*----------------------------------------------------------------------------
871 158142c2 bellard
| Packs the sign `zSign', the exponent `zExp', and the significand formed
872 158142c2 bellard
| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
873 158142c2 bellard
| floating-point value, returning the result.  After being shifted into the
874 158142c2 bellard
| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
875 158142c2 bellard
| added together to form the most significant 32 bits of the result.  This
876 158142c2 bellard
| means that any integer portion of `zSig0' will be added into the exponent.
877 158142c2 bellard
| Since a properly normalized significand will have an integer portion equal
878 158142c2 bellard
| to 1, the `zExp' input should be 1 less than the desired result exponent
879 158142c2 bellard
| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
880 158142c2 bellard
| significand.
881 158142c2 bellard
*----------------------------------------------------------------------------*/
882 158142c2 bellard
883 158142c2 bellard
INLINE float128
884 158142c2 bellard
 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
885 158142c2 bellard
{
886 158142c2 bellard
    float128 z;
887 158142c2 bellard
888 158142c2 bellard
    z.low = zSig1;
889 158142c2 bellard
    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
890 158142c2 bellard
    return z;
891 158142c2 bellard
892 158142c2 bellard
}
893 158142c2 bellard
894 158142c2 bellard
/*----------------------------------------------------------------------------
895 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
896 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0', `zSig1',
897 158142c2 bellard
| and `zSig2', and returns the proper quadruple-precision floating-point value
898 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
899 158142c2 bellard
| simply rounded and packed into the quadruple-precision format, with the
900 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
901 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
902 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
903 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
904 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
905 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal quadruple-
906 158142c2 bellard
| precision floating-point number.
907 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
908 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
909 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  In the
910 158142c2 bellard
| usual case that the input significand is normalized, `zExp' must be 1 less
911 158142c2 bellard
| than the ``true'' floating-point exponent.  The handling of underflow and
912 158142c2 bellard
| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
913 158142c2 bellard
*----------------------------------------------------------------------------*/
914 158142c2 bellard
915 158142c2 bellard
static float128
916 158142c2 bellard
 roundAndPackFloat128(
917 158142c2 bellard
     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM)
918 158142c2 bellard
{
919 158142c2 bellard
    int8 roundingMode;
920 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
921 158142c2 bellard
922 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
923 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
924 158142c2 bellard
    increment = ( (sbits64) zSig2 < 0 );
925 158142c2 bellard
    if ( ! roundNearestEven ) {
926 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
927 158142c2 bellard
            increment = 0;
928 158142c2 bellard
        }
929 158142c2 bellard
        else {
930 158142c2 bellard
            if ( zSign ) {
931 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig2;
932 158142c2 bellard
            }
933 158142c2 bellard
            else {
934 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig2;
935 158142c2 bellard
            }
936 158142c2 bellard
        }
937 158142c2 bellard
    }
938 158142c2 bellard
    if ( 0x7FFD <= (bits32) zExp ) {
939 158142c2 bellard
        if (    ( 0x7FFD < zExp )
940 158142c2 bellard
             || (    ( zExp == 0x7FFD )
941 158142c2 bellard
                  && eq128(
942 158142c2 bellard
                         LIT64( 0x0001FFFFFFFFFFFF ),
943 158142c2 bellard
                         LIT64( 0xFFFFFFFFFFFFFFFF ),
944 158142c2 bellard
                         zSig0,
945 158142c2 bellard
                         zSig1
946 158142c2 bellard
                     )
947 158142c2 bellard
                  && increment
948 158142c2 bellard
                )
949 158142c2 bellard
           ) {
950 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
951 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
952 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
953 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
954 158142c2 bellard
               ) {
955 158142c2 bellard
                return
956 158142c2 bellard
                    packFloat128(
957 158142c2 bellard
                        zSign,
958 158142c2 bellard
                        0x7FFE,
959 158142c2 bellard
                        LIT64( 0x0000FFFFFFFFFFFF ),
960 158142c2 bellard
                        LIT64( 0xFFFFFFFFFFFFFFFF )
961 158142c2 bellard
                    );
962 158142c2 bellard
            }
963 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
964 158142c2 bellard
        }
965 158142c2 bellard
        if ( zExp < 0 ) {
966 158142c2 bellard
            isTiny =
967 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
968 158142c2 bellard
                || ( zExp < -1 )
969 158142c2 bellard
                || ! increment
970 158142c2 bellard
                || lt128(
971 158142c2 bellard
                       zSig0,
972 158142c2 bellard
                       zSig1,
973 158142c2 bellard
                       LIT64( 0x0001FFFFFFFFFFFF ),
974 158142c2 bellard
                       LIT64( 0xFFFFFFFFFFFFFFFF )
975 158142c2 bellard
                   );
976 158142c2 bellard
            shift128ExtraRightJamming(
977 158142c2 bellard
                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
978 158142c2 bellard
            zExp = 0;
979 158142c2 bellard
            if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
980 158142c2 bellard
            if ( roundNearestEven ) {
981 158142c2 bellard
                increment = ( (sbits64) zSig2 < 0 );
982 158142c2 bellard
            }
983 158142c2 bellard
            else {
984 158142c2 bellard
                if ( zSign ) {
985 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig2;
986 158142c2 bellard
                }
987 158142c2 bellard
                else {
988 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig2;
989 158142c2 bellard
                }
990 158142c2 bellard
            }
991 158142c2 bellard
        }
992 158142c2 bellard
    }
993 158142c2 bellard
    if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
994 158142c2 bellard
    if ( increment ) {
995 158142c2 bellard
        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
996 158142c2 bellard
        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
997 158142c2 bellard
    }
998 158142c2 bellard
    else {
999 158142c2 bellard
        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1000 158142c2 bellard
    }
1001 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1002 158142c2 bellard
1003 158142c2 bellard
}
1004 158142c2 bellard
1005 158142c2 bellard
/*----------------------------------------------------------------------------
1006 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1007 158142c2 bellard
| and significand formed by the concatenation of `zSig0' and `zSig1', and
1008 158142c2 bellard
| returns the proper quadruple-precision floating-point value corresponding
1009 158142c2 bellard
| to the abstract input.  This routine is just like `roundAndPackFloat128'
1010 158142c2 bellard
| except that the input significand has fewer bits and does not have to be
1011 158142c2 bellard
| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1012 158142c2 bellard
| point exponent.
1013 158142c2 bellard
*----------------------------------------------------------------------------*/
1014 158142c2 bellard
1015 158142c2 bellard
static float128
1016 158142c2 bellard
 normalizeRoundAndPackFloat128(
1017 158142c2 bellard
     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM)
1018 158142c2 bellard
{
1019 158142c2 bellard
    int8 shiftCount;
1020 158142c2 bellard
    bits64 zSig2;
1021 158142c2 bellard
1022 158142c2 bellard
    if ( zSig0 == 0 ) {
1023 158142c2 bellard
        zSig0 = zSig1;
1024 158142c2 bellard
        zSig1 = 0;
1025 158142c2 bellard
        zExp -= 64;
1026 158142c2 bellard
    }
1027 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 ) - 15;
1028 158142c2 bellard
    if ( 0 <= shiftCount ) {
1029 158142c2 bellard
        zSig2 = 0;
1030 158142c2 bellard
        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1031 158142c2 bellard
    }
1032 158142c2 bellard
    else {
1033 158142c2 bellard
        shift128ExtraRightJamming(
1034 158142c2 bellard
            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1035 158142c2 bellard
    }
1036 158142c2 bellard
    zExp -= shiftCount;
1037 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
1038 158142c2 bellard
1039 158142c2 bellard
}
1040 158142c2 bellard
1041 158142c2 bellard
#endif
1042 158142c2 bellard
1043 158142c2 bellard
/*----------------------------------------------------------------------------
1044 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1045 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1046 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1047 158142c2 bellard
*----------------------------------------------------------------------------*/
1048 158142c2 bellard
1049 158142c2 bellard
float32 int32_to_float32( int32 a STATUS_PARAM )
1050 158142c2 bellard
{
1051 158142c2 bellard
    flag zSign;
1052 158142c2 bellard
1053 158142c2 bellard
    if ( a == 0 ) return 0;
1054 158142c2 bellard
    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1055 158142c2 bellard
    zSign = ( a < 0 );
1056 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
1057 158142c2 bellard
1058 158142c2 bellard
}
1059 158142c2 bellard
1060 158142c2 bellard
/*----------------------------------------------------------------------------
1061 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1062 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1063 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1064 158142c2 bellard
*----------------------------------------------------------------------------*/
1065 158142c2 bellard
1066 158142c2 bellard
float64 int32_to_float64( int32 a STATUS_PARAM )
1067 158142c2 bellard
{
1068 158142c2 bellard
    flag zSign;
1069 158142c2 bellard
    uint32 absA;
1070 158142c2 bellard
    int8 shiftCount;
1071 158142c2 bellard
    bits64 zSig;
1072 158142c2 bellard
1073 158142c2 bellard
    if ( a == 0 ) return 0;
1074 158142c2 bellard
    zSign = ( a < 0 );
1075 158142c2 bellard
    absA = zSign ? - a : a;
1076 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 21;
1077 158142c2 bellard
    zSig = absA;
1078 158142c2 bellard
    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1079 158142c2 bellard
1080 158142c2 bellard
}
1081 158142c2 bellard
1082 158142c2 bellard
#ifdef FLOATX80
1083 158142c2 bellard
1084 158142c2 bellard
/*----------------------------------------------------------------------------
1085 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1086 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1087 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1088 158142c2 bellard
| Arithmetic.
1089 158142c2 bellard
*----------------------------------------------------------------------------*/
1090 158142c2 bellard
1091 158142c2 bellard
floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
1092 158142c2 bellard
{
1093 158142c2 bellard
    flag zSign;
1094 158142c2 bellard
    uint32 absA;
1095 158142c2 bellard
    int8 shiftCount;
1096 158142c2 bellard
    bits64 zSig;
1097 158142c2 bellard
1098 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1099 158142c2 bellard
    zSign = ( a < 0 );
1100 158142c2 bellard
    absA = zSign ? - a : a;
1101 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 32;
1102 158142c2 bellard
    zSig = absA;
1103 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1104 158142c2 bellard
1105 158142c2 bellard
}
1106 158142c2 bellard
1107 158142c2 bellard
#endif
1108 158142c2 bellard
1109 158142c2 bellard
#ifdef FLOAT128
1110 158142c2 bellard
1111 158142c2 bellard
/*----------------------------------------------------------------------------
1112 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a' to
1113 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1114 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1115 158142c2 bellard
*----------------------------------------------------------------------------*/
1116 158142c2 bellard
1117 158142c2 bellard
float128 int32_to_float128( int32 a STATUS_PARAM )
1118 158142c2 bellard
{
1119 158142c2 bellard
    flag zSign;
1120 158142c2 bellard
    uint32 absA;
1121 158142c2 bellard
    int8 shiftCount;
1122 158142c2 bellard
    bits64 zSig0;
1123 158142c2 bellard
1124 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1125 158142c2 bellard
    zSign = ( a < 0 );
1126 158142c2 bellard
    absA = zSign ? - a : a;
1127 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 17;
1128 158142c2 bellard
    zSig0 = absA;
1129 158142c2 bellard
    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1130 158142c2 bellard
1131 158142c2 bellard
}
1132 158142c2 bellard
1133 158142c2 bellard
#endif
1134 158142c2 bellard
1135 158142c2 bellard
/*----------------------------------------------------------------------------
1136 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1137 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1138 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1139 158142c2 bellard
*----------------------------------------------------------------------------*/
1140 158142c2 bellard
1141 158142c2 bellard
float32 int64_to_float32( int64 a STATUS_PARAM )
1142 158142c2 bellard
{
1143 158142c2 bellard
    flag zSign;
1144 158142c2 bellard
    uint64 absA;
1145 158142c2 bellard
    int8 shiftCount;
1146 158142c2 bellard
1147 158142c2 bellard
    if ( a == 0 ) return 0;
1148 158142c2 bellard
    zSign = ( a < 0 );
1149 158142c2 bellard
    absA = zSign ? - a : a;
1150 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) - 40;
1151 158142c2 bellard
    if ( 0 <= shiftCount ) {
1152 158142c2 bellard
        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1153 158142c2 bellard
    }
1154 158142c2 bellard
    else {
1155 158142c2 bellard
        shiftCount += 7;
1156 158142c2 bellard
        if ( shiftCount < 0 ) {
1157 158142c2 bellard
            shift64RightJamming( absA, - shiftCount, &absA );
1158 158142c2 bellard
        }
1159 158142c2 bellard
        else {
1160 158142c2 bellard
            absA <<= shiftCount;
1161 158142c2 bellard
        }
1162 158142c2 bellard
        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
1163 158142c2 bellard
    }
1164 158142c2 bellard
1165 158142c2 bellard
}
1166 158142c2 bellard
1167 158142c2 bellard
/*----------------------------------------------------------------------------
1168 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1169 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1170 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1171 158142c2 bellard
*----------------------------------------------------------------------------*/
1172 158142c2 bellard
1173 158142c2 bellard
float64 int64_to_float64( int64 a STATUS_PARAM )
1174 158142c2 bellard
{
1175 158142c2 bellard
    flag zSign;
1176 158142c2 bellard
1177 158142c2 bellard
    if ( a == 0 ) return 0;
1178 158142c2 bellard
    if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
1179 158142c2 bellard
        return packFloat64( 1, 0x43E, 0 );
1180 158142c2 bellard
    }
1181 158142c2 bellard
    zSign = ( a < 0 );
1182 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
1183 158142c2 bellard
1184 158142c2 bellard
}
1185 158142c2 bellard
1186 158142c2 bellard
#ifdef FLOATX80
1187 158142c2 bellard
1188 158142c2 bellard
/*----------------------------------------------------------------------------
1189 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1190 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1191 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1192 158142c2 bellard
| Arithmetic.
1193 158142c2 bellard
*----------------------------------------------------------------------------*/
1194 158142c2 bellard
1195 158142c2 bellard
floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
1196 158142c2 bellard
{
1197 158142c2 bellard
    flag zSign;
1198 158142c2 bellard
    uint64 absA;
1199 158142c2 bellard
    int8 shiftCount;
1200 158142c2 bellard
1201 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1202 158142c2 bellard
    zSign = ( a < 0 );
1203 158142c2 bellard
    absA = zSign ? - a : a;
1204 158142c2 bellard
    shiftCount = countLeadingZeros64( absA );
1205 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1206 158142c2 bellard
1207 158142c2 bellard
}
1208 158142c2 bellard
1209 158142c2 bellard
#endif
1210 158142c2 bellard
1211 158142c2 bellard
#ifdef FLOAT128
1212 158142c2 bellard
1213 158142c2 bellard
/*----------------------------------------------------------------------------
1214 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a' to
1215 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1216 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1217 158142c2 bellard
*----------------------------------------------------------------------------*/
1218 158142c2 bellard
1219 158142c2 bellard
float128 int64_to_float128( int64 a STATUS_PARAM )
1220 158142c2 bellard
{
1221 158142c2 bellard
    flag zSign;
1222 158142c2 bellard
    uint64 absA;
1223 158142c2 bellard
    int8 shiftCount;
1224 158142c2 bellard
    int32 zExp;
1225 158142c2 bellard
    bits64 zSig0, zSig1;
1226 158142c2 bellard
1227 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1228 158142c2 bellard
    zSign = ( a < 0 );
1229 158142c2 bellard
    absA = zSign ? - a : a;
1230 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) + 49;
1231 158142c2 bellard
    zExp = 0x406E - shiftCount;
1232 158142c2 bellard
    if ( 64 <= shiftCount ) {
1233 158142c2 bellard
        zSig1 = 0;
1234 158142c2 bellard
        zSig0 = absA;
1235 158142c2 bellard
        shiftCount -= 64;
1236 158142c2 bellard
    }
1237 158142c2 bellard
    else {
1238 158142c2 bellard
        zSig1 = absA;
1239 158142c2 bellard
        zSig0 = 0;
1240 158142c2 bellard
    }
1241 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1242 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1243 158142c2 bellard
1244 158142c2 bellard
}
1245 158142c2 bellard
1246 158142c2 bellard
#endif
1247 158142c2 bellard
1248 158142c2 bellard
/*----------------------------------------------------------------------------
1249 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1250 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1251 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1252 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1253 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1254 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1255 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1256 158142c2 bellard
*----------------------------------------------------------------------------*/
1257 158142c2 bellard
1258 158142c2 bellard
int32 float32_to_int32( float32 a STATUS_PARAM )
1259 158142c2 bellard
{
1260 158142c2 bellard
    flag aSign;
1261 158142c2 bellard
    int16 aExp, shiftCount;
1262 158142c2 bellard
    bits32 aSig;
1263 158142c2 bellard
    bits64 aSig64;
1264 158142c2 bellard
1265 158142c2 bellard
    aSig = extractFloat32Frac( a );
1266 158142c2 bellard
    aExp = extractFloat32Exp( a );
1267 158142c2 bellard
    aSign = extractFloat32Sign( a );
1268 158142c2 bellard
    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1269 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1270 158142c2 bellard
    shiftCount = 0xAF - aExp;
1271 158142c2 bellard
    aSig64 = aSig;
1272 158142c2 bellard
    aSig64 <<= 32;
1273 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1274 158142c2 bellard
    return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
1275 158142c2 bellard
1276 158142c2 bellard
}
1277 158142c2 bellard
1278 158142c2 bellard
/*----------------------------------------------------------------------------
1279 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1280 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1281 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1282 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
1283 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1284 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
1285 158142c2 bellard
| returned.
1286 158142c2 bellard
*----------------------------------------------------------------------------*/
1287 158142c2 bellard
1288 158142c2 bellard
int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
1289 158142c2 bellard
{
1290 158142c2 bellard
    flag aSign;
1291 158142c2 bellard
    int16 aExp, shiftCount;
1292 158142c2 bellard
    bits32 aSig;
1293 158142c2 bellard
    int32 z;
1294 158142c2 bellard
1295 158142c2 bellard
    aSig = extractFloat32Frac( a );
1296 158142c2 bellard
    aExp = extractFloat32Exp( a );
1297 158142c2 bellard
    aSign = extractFloat32Sign( a );
1298 158142c2 bellard
    shiftCount = aExp - 0x9E;
1299 158142c2 bellard
    if ( 0 <= shiftCount ) {
1300 158142c2 bellard
        if ( a != 0xCF000000 ) {
1301 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1302 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1303 158142c2 bellard
        }
1304 158142c2 bellard
        return (sbits32) 0x80000000;
1305 158142c2 bellard
    }
1306 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1307 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1308 158142c2 bellard
        return 0;
1309 158142c2 bellard
    }
1310 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
1311 158142c2 bellard
    z = aSig>>( - shiftCount );
1312 158142c2 bellard
    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1313 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1314 158142c2 bellard
    }
1315 158142c2 bellard
    if ( aSign ) z = - z;
1316 158142c2 bellard
    return z;
1317 158142c2 bellard
1318 158142c2 bellard
}
1319 158142c2 bellard
1320 158142c2 bellard
/*----------------------------------------------------------------------------
1321 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1322 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1323 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1324 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1325 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1326 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1327 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1328 158142c2 bellard
*----------------------------------------------------------------------------*/
1329 158142c2 bellard
1330 158142c2 bellard
int64 float32_to_int64( float32 a STATUS_PARAM )
1331 158142c2 bellard
{
1332 158142c2 bellard
    flag aSign;
1333 158142c2 bellard
    int16 aExp, shiftCount;
1334 158142c2 bellard
    bits32 aSig;
1335 158142c2 bellard
    bits64 aSig64, aSigExtra;
1336 158142c2 bellard
1337 158142c2 bellard
    aSig = extractFloat32Frac( a );
1338 158142c2 bellard
    aExp = extractFloat32Exp( a );
1339 158142c2 bellard
    aSign = extractFloat32Sign( a );
1340 158142c2 bellard
    shiftCount = 0xBE - aExp;
1341 158142c2 bellard
    if ( shiftCount < 0 ) {
1342 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1343 158142c2 bellard
        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1344 158142c2 bellard
            return LIT64( 0x7FFFFFFFFFFFFFFF );
1345 158142c2 bellard
        }
1346 158142c2 bellard
        return (sbits64) LIT64( 0x8000000000000000 );
1347 158142c2 bellard
    }
1348 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1349 158142c2 bellard
    aSig64 = aSig;
1350 158142c2 bellard
    aSig64 <<= 40;
1351 158142c2 bellard
    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1352 158142c2 bellard
    return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
1353 158142c2 bellard
1354 158142c2 bellard
}
1355 158142c2 bellard
1356 158142c2 bellard
/*----------------------------------------------------------------------------
1357 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1358 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1359 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1360 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
1361 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1362 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
1363 158142c2 bellard
| returned.
1364 158142c2 bellard
*----------------------------------------------------------------------------*/
1365 158142c2 bellard
1366 158142c2 bellard
int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
1367 158142c2 bellard
{
1368 158142c2 bellard
    flag aSign;
1369 158142c2 bellard
    int16 aExp, shiftCount;
1370 158142c2 bellard
    bits32 aSig;
1371 158142c2 bellard
    bits64 aSig64;
1372 158142c2 bellard
    int64 z;
1373 158142c2 bellard
1374 158142c2 bellard
    aSig = extractFloat32Frac( a );
1375 158142c2 bellard
    aExp = extractFloat32Exp( a );
1376 158142c2 bellard
    aSign = extractFloat32Sign( a );
1377 158142c2 bellard
    shiftCount = aExp - 0xBE;
1378 158142c2 bellard
    if ( 0 <= shiftCount ) {
1379 158142c2 bellard
        if ( a != 0xDF000000 ) {
1380 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1381 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1382 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
1383 158142c2 bellard
            }
1384 158142c2 bellard
        }
1385 158142c2 bellard
        return (sbits64) LIT64( 0x8000000000000000 );
1386 158142c2 bellard
    }
1387 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1388 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1389 158142c2 bellard
        return 0;
1390 158142c2 bellard
    }
1391 158142c2 bellard
    aSig64 = aSig | 0x00800000;
1392 158142c2 bellard
    aSig64 <<= 40;
1393 158142c2 bellard
    z = aSig64>>( - shiftCount );
1394 158142c2 bellard
    if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1395 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1396 158142c2 bellard
    }
1397 158142c2 bellard
    if ( aSign ) z = - z;
1398 158142c2 bellard
    return z;
1399 158142c2 bellard
1400 158142c2 bellard
}
1401 158142c2 bellard
1402 158142c2 bellard
/*----------------------------------------------------------------------------
1403 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1404 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1405 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1406 158142c2 bellard
| Arithmetic.
1407 158142c2 bellard
*----------------------------------------------------------------------------*/
1408 158142c2 bellard
1409 158142c2 bellard
float64 float32_to_float64( float32 a STATUS_PARAM )
1410 158142c2 bellard
{
1411 158142c2 bellard
    flag aSign;
1412 158142c2 bellard
    int16 aExp;
1413 158142c2 bellard
    bits32 aSig;
1414 158142c2 bellard
1415 158142c2 bellard
    aSig = extractFloat32Frac( a );
1416 158142c2 bellard
    aExp = extractFloat32Exp( a );
1417 158142c2 bellard
    aSign = extractFloat32Sign( a );
1418 158142c2 bellard
    if ( aExp == 0xFF ) {
1419 158142c2 bellard
        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ));
1420 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
1421 158142c2 bellard
    }
1422 158142c2 bellard
    if ( aExp == 0 ) {
1423 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1424 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1425 158142c2 bellard
        --aExp;
1426 158142c2 bellard
    }
1427 158142c2 bellard
    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
1428 158142c2 bellard
1429 158142c2 bellard
}
1430 158142c2 bellard
1431 158142c2 bellard
#ifdef FLOATX80
1432 158142c2 bellard
1433 158142c2 bellard
/*----------------------------------------------------------------------------
1434 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1435 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
1436 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1437 158142c2 bellard
| Arithmetic.
1438 158142c2 bellard
*----------------------------------------------------------------------------*/
1439 158142c2 bellard
1440 158142c2 bellard
floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
1441 158142c2 bellard
{
1442 158142c2 bellard
    flag aSign;
1443 158142c2 bellard
    int16 aExp;
1444 158142c2 bellard
    bits32 aSig;
1445 158142c2 bellard
1446 158142c2 bellard
    aSig = extractFloat32Frac( a );
1447 158142c2 bellard
    aExp = extractFloat32Exp( a );
1448 158142c2 bellard
    aSign = extractFloat32Sign( a );
1449 158142c2 bellard
    if ( aExp == 0xFF ) {
1450 158142c2 bellard
        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) );
1451 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1452 158142c2 bellard
    }
1453 158142c2 bellard
    if ( aExp == 0 ) {
1454 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1455 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1456 158142c2 bellard
    }
1457 158142c2 bellard
    aSig |= 0x00800000;
1458 158142c2 bellard
    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
1459 158142c2 bellard
1460 158142c2 bellard
}
1461 158142c2 bellard
1462 158142c2 bellard
#endif
1463 158142c2 bellard
1464 158142c2 bellard
#ifdef FLOAT128
1465 158142c2 bellard
1466 158142c2 bellard
/*----------------------------------------------------------------------------
1467 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1468 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1469 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1470 158142c2 bellard
| Arithmetic.
1471 158142c2 bellard
*----------------------------------------------------------------------------*/
1472 158142c2 bellard
1473 158142c2 bellard
float128 float32_to_float128( float32 a STATUS_PARAM )
1474 158142c2 bellard
{
1475 158142c2 bellard
    flag aSign;
1476 158142c2 bellard
    int16 aExp;
1477 158142c2 bellard
    bits32 aSig;
1478 158142c2 bellard
1479 158142c2 bellard
    aSig = extractFloat32Frac( a );
1480 158142c2 bellard
    aExp = extractFloat32Exp( a );
1481 158142c2 bellard
    aSign = extractFloat32Sign( a );
1482 158142c2 bellard
    if ( aExp == 0xFF ) {
1483 158142c2 bellard
        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) );
1484 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
1485 158142c2 bellard
    }
1486 158142c2 bellard
    if ( aExp == 0 ) {
1487 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1488 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1489 158142c2 bellard
        --aExp;
1490 158142c2 bellard
    }
1491 158142c2 bellard
    return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
1492 158142c2 bellard
1493 158142c2 bellard
}
1494 158142c2 bellard
1495 158142c2 bellard
#endif
1496 158142c2 bellard
1497 158142c2 bellard
/*----------------------------------------------------------------------------
1498 158142c2 bellard
| Rounds the single-precision floating-point value `a' to an integer, and
1499 158142c2 bellard
| returns the result as a single-precision floating-point value.  The
1500 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
1501 158142c2 bellard
| Floating-Point Arithmetic.
1502 158142c2 bellard
*----------------------------------------------------------------------------*/
1503 158142c2 bellard
1504 158142c2 bellard
float32 float32_round_to_int( float32 a STATUS_PARAM)
1505 158142c2 bellard
{
1506 158142c2 bellard
    flag aSign;
1507 158142c2 bellard
    int16 aExp;
1508 158142c2 bellard
    bits32 lastBitMask, roundBitsMask;
1509 158142c2 bellard
    int8 roundingMode;
1510 158142c2 bellard
    float32 z;
1511 158142c2 bellard
1512 158142c2 bellard
    aExp = extractFloat32Exp( a );
1513 158142c2 bellard
    if ( 0x96 <= aExp ) {
1514 158142c2 bellard
        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1515 158142c2 bellard
            return propagateFloat32NaN( a, a STATUS_VAR );
1516 158142c2 bellard
        }
1517 158142c2 bellard
        return a;
1518 158142c2 bellard
    }
1519 158142c2 bellard
    if ( aExp <= 0x7E ) {
1520 158142c2 bellard
        if ( (bits32) ( a<<1 ) == 0 ) return a;
1521 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1522 158142c2 bellard
        aSign = extractFloat32Sign( a );
1523 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
1524 158142c2 bellard
         case float_round_nearest_even:
1525 158142c2 bellard
            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1526 158142c2 bellard
                return packFloat32( aSign, 0x7F, 0 );
1527 158142c2 bellard
            }
1528 158142c2 bellard
            break;
1529 158142c2 bellard
         case float_round_down:
1530 158142c2 bellard
            return aSign ? 0xBF800000 : 0;
1531 158142c2 bellard
         case float_round_up:
1532 158142c2 bellard
            return aSign ? 0x80000000 : 0x3F800000;
1533 158142c2 bellard
        }
1534 158142c2 bellard
        return packFloat32( aSign, 0, 0 );
1535 158142c2 bellard
    }
1536 158142c2 bellard
    lastBitMask = 1;
1537 158142c2 bellard
    lastBitMask <<= 0x96 - aExp;
1538 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
1539 158142c2 bellard
    z = a;
1540 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
1541 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
1542 158142c2 bellard
        z += lastBitMask>>1;
1543 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1544 158142c2 bellard
    }
1545 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
1546 158142c2 bellard
        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1547 158142c2 bellard
            z += roundBitsMask;
1548 158142c2 bellard
        }
1549 158142c2 bellard
    }
1550 158142c2 bellard
    z &= ~ roundBitsMask;
1551 158142c2 bellard
    if ( z != a ) STATUS(float_exception_flags) |= float_flag_inexact;
1552 158142c2 bellard
    return z;
1553 158142c2 bellard
1554 158142c2 bellard
}
1555 158142c2 bellard
1556 158142c2 bellard
/*----------------------------------------------------------------------------
1557 158142c2 bellard
| Returns the result of adding the absolute values of the single-precision
1558 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1559 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
1560 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
1561 158142c2 bellard
| Floating-Point Arithmetic.
1562 158142c2 bellard
*----------------------------------------------------------------------------*/
1563 158142c2 bellard
1564 158142c2 bellard
static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1565 158142c2 bellard
{
1566 158142c2 bellard
    int16 aExp, bExp, zExp;
1567 158142c2 bellard
    bits32 aSig, bSig, zSig;
1568 158142c2 bellard
    int16 expDiff;
1569 158142c2 bellard
1570 158142c2 bellard
    aSig = extractFloat32Frac( a );
1571 158142c2 bellard
    aExp = extractFloat32Exp( a );
1572 158142c2 bellard
    bSig = extractFloat32Frac( b );
1573 158142c2 bellard
    bExp = extractFloat32Exp( b );
1574 158142c2 bellard
    expDiff = aExp - bExp;
1575 158142c2 bellard
    aSig <<= 6;
1576 158142c2 bellard
    bSig <<= 6;
1577 158142c2 bellard
    if ( 0 < expDiff ) {
1578 158142c2 bellard
        if ( aExp == 0xFF ) {
1579 158142c2 bellard
            if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1580 158142c2 bellard
            return a;
1581 158142c2 bellard
        }
1582 158142c2 bellard
        if ( bExp == 0 ) {
1583 158142c2 bellard
            --expDiff;
1584 158142c2 bellard
        }
1585 158142c2 bellard
        else {
1586 158142c2 bellard
            bSig |= 0x20000000;
1587 158142c2 bellard
        }
1588 158142c2 bellard
        shift32RightJamming( bSig, expDiff, &bSig );
1589 158142c2 bellard
        zExp = aExp;
1590 158142c2 bellard
    }
1591 158142c2 bellard
    else if ( expDiff < 0 ) {
1592 158142c2 bellard
        if ( bExp == 0xFF ) {
1593 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1594 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1595 158142c2 bellard
        }
1596 158142c2 bellard
        if ( aExp == 0 ) {
1597 158142c2 bellard
            ++expDiff;
1598 158142c2 bellard
        }
1599 158142c2 bellard
        else {
1600 158142c2 bellard
            aSig |= 0x20000000;
1601 158142c2 bellard
        }
1602 158142c2 bellard
        shift32RightJamming( aSig, - expDiff, &aSig );
1603 158142c2 bellard
        zExp = bExp;
1604 158142c2 bellard
    }
1605 158142c2 bellard
    else {
1606 158142c2 bellard
        if ( aExp == 0xFF ) {
1607 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1608 158142c2 bellard
            return a;
1609 158142c2 bellard
        }
1610 158142c2 bellard
        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1611 158142c2 bellard
        zSig = 0x40000000 + aSig + bSig;
1612 158142c2 bellard
        zExp = aExp;
1613 158142c2 bellard
        goto roundAndPack;
1614 158142c2 bellard
    }
1615 158142c2 bellard
    aSig |= 0x20000000;
1616 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
1617 158142c2 bellard
    --zExp;
1618 158142c2 bellard
    if ( (sbits32) zSig < 0 ) {
1619 158142c2 bellard
        zSig = aSig + bSig;
1620 158142c2 bellard
        ++zExp;
1621 158142c2 bellard
    }
1622 158142c2 bellard
 roundAndPack:
1623 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1624 158142c2 bellard
1625 158142c2 bellard
}
1626 158142c2 bellard
1627 158142c2 bellard
/*----------------------------------------------------------------------------
1628 158142c2 bellard
| Returns the result of subtracting the absolute values of the single-
1629 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
1630 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
1631 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
1632 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
1633 158142c2 bellard
*----------------------------------------------------------------------------*/
1634 158142c2 bellard
1635 158142c2 bellard
static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1636 158142c2 bellard
{
1637 158142c2 bellard
    int16 aExp, bExp, zExp;
1638 158142c2 bellard
    bits32 aSig, bSig, zSig;
1639 158142c2 bellard
    int16 expDiff;
1640 158142c2 bellard
1641 158142c2 bellard
    aSig = extractFloat32Frac( a );
1642 158142c2 bellard
    aExp = extractFloat32Exp( a );
1643 158142c2 bellard
    bSig = extractFloat32Frac( b );
1644 158142c2 bellard
    bExp = extractFloat32Exp( b );
1645 158142c2 bellard
    expDiff = aExp - bExp;
1646 158142c2 bellard
    aSig <<= 7;
1647 158142c2 bellard
    bSig <<= 7;
1648 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
1649 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
1650 158142c2 bellard
    if ( aExp == 0xFF ) {
1651 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1652 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1653 158142c2 bellard
        return float32_default_nan;
1654 158142c2 bellard
    }
1655 158142c2 bellard
    if ( aExp == 0 ) {
1656 158142c2 bellard
        aExp = 1;
1657 158142c2 bellard
        bExp = 1;
1658 158142c2 bellard
    }
1659 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
1660 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
1661 158142c2 bellard
    return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
1662 158142c2 bellard
 bExpBigger:
1663 158142c2 bellard
    if ( bExp == 0xFF ) {
1664 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1665 158142c2 bellard
        return packFloat32( zSign ^ 1, 0xFF, 0 );
1666 158142c2 bellard
    }
1667 158142c2 bellard
    if ( aExp == 0 ) {
1668 158142c2 bellard
        ++expDiff;
1669 158142c2 bellard
    }
1670 158142c2 bellard
    else {
1671 158142c2 bellard
        aSig |= 0x40000000;
1672 158142c2 bellard
    }
1673 158142c2 bellard
    shift32RightJamming( aSig, - expDiff, &aSig );
1674 158142c2 bellard
    bSig |= 0x40000000;
1675 158142c2 bellard
 bBigger:
1676 158142c2 bellard
    zSig = bSig - aSig;
1677 158142c2 bellard
    zExp = bExp;
1678 158142c2 bellard
    zSign ^= 1;
1679 158142c2 bellard
    goto normalizeRoundAndPack;
1680 158142c2 bellard
 aExpBigger:
1681 158142c2 bellard
    if ( aExp == 0xFF ) {
1682 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1683 158142c2 bellard
        return a;
1684 158142c2 bellard
    }
1685 158142c2 bellard
    if ( bExp == 0 ) {
1686 158142c2 bellard
        --expDiff;
1687 158142c2 bellard
    }
1688 158142c2 bellard
    else {
1689 158142c2 bellard
        bSig |= 0x40000000;
1690 158142c2 bellard
    }
1691 158142c2 bellard
    shift32RightJamming( bSig, expDiff, &bSig );
1692 158142c2 bellard
    aSig |= 0x40000000;
1693 158142c2 bellard
 aBigger:
1694 158142c2 bellard
    zSig = aSig - bSig;
1695 158142c2 bellard
    zExp = aExp;
1696 158142c2 bellard
 normalizeRoundAndPack:
1697 158142c2 bellard
    --zExp;
1698 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1699 158142c2 bellard
1700 158142c2 bellard
}
1701 158142c2 bellard
1702 158142c2 bellard
/*----------------------------------------------------------------------------
1703 158142c2 bellard
| Returns the result of adding the single-precision floating-point values `a'
1704 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
1705 158142c2 bellard
| Binary Floating-Point Arithmetic.
1706 158142c2 bellard
*----------------------------------------------------------------------------*/
1707 158142c2 bellard
1708 158142c2 bellard
float32 float32_add( float32 a, float32 b STATUS_PARAM )
1709 158142c2 bellard
{
1710 158142c2 bellard
    flag aSign, bSign;
1711 158142c2 bellard
1712 158142c2 bellard
    aSign = extractFloat32Sign( a );
1713 158142c2 bellard
    bSign = extractFloat32Sign( b );
1714 158142c2 bellard
    if ( aSign == bSign ) {
1715 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR);
1716 158142c2 bellard
    }
1717 158142c2 bellard
    else {
1718 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1719 158142c2 bellard
    }
1720 158142c2 bellard
1721 158142c2 bellard
}
1722 158142c2 bellard
1723 158142c2 bellard
/*----------------------------------------------------------------------------
1724 158142c2 bellard
| Returns the result of subtracting the single-precision floating-point values
1725 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1726 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1727 158142c2 bellard
*----------------------------------------------------------------------------*/
1728 158142c2 bellard
1729 158142c2 bellard
float32 float32_sub( float32 a, float32 b STATUS_PARAM )
1730 158142c2 bellard
{
1731 158142c2 bellard
    flag aSign, bSign;
1732 158142c2 bellard
1733 158142c2 bellard
    aSign = extractFloat32Sign( a );
1734 158142c2 bellard
    bSign = extractFloat32Sign( b );
1735 158142c2 bellard
    if ( aSign == bSign ) {
1736 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1737 158142c2 bellard
    }
1738 158142c2 bellard
    else {
1739 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR );
1740 158142c2 bellard
    }
1741 158142c2 bellard
1742 158142c2 bellard
}
1743 158142c2 bellard
1744 158142c2 bellard
/*----------------------------------------------------------------------------
1745 158142c2 bellard
| Returns the result of multiplying the single-precision floating-point values
1746 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1747 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1748 158142c2 bellard
*----------------------------------------------------------------------------*/
1749 158142c2 bellard
1750 158142c2 bellard
float32 float32_mul( float32 a, float32 b STATUS_PARAM )
1751 158142c2 bellard
{
1752 158142c2 bellard
    flag aSign, bSign, zSign;
1753 158142c2 bellard
    int16 aExp, bExp, zExp;
1754 158142c2 bellard
    bits32 aSig, bSig;
1755 158142c2 bellard
    bits64 zSig64;
1756 158142c2 bellard
    bits32 zSig;
1757 158142c2 bellard
1758 158142c2 bellard
    aSig = extractFloat32Frac( a );
1759 158142c2 bellard
    aExp = extractFloat32Exp( a );
1760 158142c2 bellard
    aSign = extractFloat32Sign( a );
1761 158142c2 bellard
    bSig = extractFloat32Frac( b );
1762 158142c2 bellard
    bExp = extractFloat32Exp( b );
1763 158142c2 bellard
    bSign = extractFloat32Sign( b );
1764 158142c2 bellard
    zSign = aSign ^ bSign;
1765 158142c2 bellard
    if ( aExp == 0xFF ) {
1766 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1767 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
1768 158142c2 bellard
        }
1769 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
1770 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1771 158142c2 bellard
            return float32_default_nan;
1772 158142c2 bellard
        }
1773 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1774 158142c2 bellard
    }
1775 158142c2 bellard
    if ( bExp == 0xFF ) {
1776 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1777 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
1778 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1779 158142c2 bellard
            return float32_default_nan;
1780 158142c2 bellard
        }
1781 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1782 158142c2 bellard
    }
1783 158142c2 bellard
    if ( aExp == 0 ) {
1784 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1785 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1786 158142c2 bellard
    }
1787 158142c2 bellard
    if ( bExp == 0 ) {
1788 158142c2 bellard
        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1789 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1790 158142c2 bellard
    }
1791 158142c2 bellard
    zExp = aExp + bExp - 0x7F;
1792 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
1793 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
1794 158142c2 bellard
    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1795 158142c2 bellard
    zSig = zSig64;
1796 158142c2 bellard
    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1797 158142c2 bellard
        zSig <<= 1;
1798 158142c2 bellard
        --zExp;
1799 158142c2 bellard
    }
1800 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1801 158142c2 bellard
1802 158142c2 bellard
}
1803 158142c2 bellard
1804 158142c2 bellard
/*----------------------------------------------------------------------------
1805 158142c2 bellard
| Returns the result of dividing the single-precision floating-point value `a'
1806 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to the
1807 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1808 158142c2 bellard
*----------------------------------------------------------------------------*/
1809 158142c2 bellard
1810 158142c2 bellard
float32 float32_div( float32 a, float32 b STATUS_PARAM )
1811 158142c2 bellard
{
1812 158142c2 bellard
    flag aSign, bSign, zSign;
1813 158142c2 bellard
    int16 aExp, bExp, zExp;
1814 158142c2 bellard
    bits32 aSig, bSig, zSig;
1815 158142c2 bellard
1816 158142c2 bellard
    aSig = extractFloat32Frac( a );
1817 158142c2 bellard
    aExp = extractFloat32Exp( a );
1818 158142c2 bellard
    aSign = extractFloat32Sign( a );
1819 158142c2 bellard
    bSig = extractFloat32Frac( b );
1820 158142c2 bellard
    bExp = extractFloat32Exp( b );
1821 158142c2 bellard
    bSign = extractFloat32Sign( b );
1822 158142c2 bellard
    zSign = aSign ^ bSign;
1823 158142c2 bellard
    if ( aExp == 0xFF ) {
1824 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1825 158142c2 bellard
        if ( bExp == 0xFF ) {
1826 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1827 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1828 158142c2 bellard
            return float32_default_nan;
1829 158142c2 bellard
        }
1830 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1831 158142c2 bellard
    }
1832 158142c2 bellard
    if ( bExp == 0xFF ) {
1833 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1834 158142c2 bellard
        return packFloat32( zSign, 0, 0 );
1835 158142c2 bellard
    }
1836 158142c2 bellard
    if ( bExp == 0 ) {
1837 158142c2 bellard
        if ( bSig == 0 ) {
1838 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
1839 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
1840 158142c2 bellard
                return float32_default_nan;
1841 158142c2 bellard
            }
1842 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
1843 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1844 158142c2 bellard
        }
1845 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1846 158142c2 bellard
    }
1847 158142c2 bellard
    if ( aExp == 0 ) {
1848 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1849 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1850 158142c2 bellard
    }
1851 158142c2 bellard
    zExp = aExp - bExp + 0x7D;
1852 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
1853 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
1854 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
1855 158142c2 bellard
        aSig >>= 1;
1856 158142c2 bellard
        ++zExp;
1857 158142c2 bellard
    }
1858 158142c2 bellard
    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
1859 158142c2 bellard
    if ( ( zSig & 0x3F ) == 0 ) {
1860 158142c2 bellard
        zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
1861 158142c2 bellard
    }
1862 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1863 158142c2 bellard
1864 158142c2 bellard
}
1865 158142c2 bellard
1866 158142c2 bellard
/*----------------------------------------------------------------------------
1867 158142c2 bellard
| Returns the remainder of the single-precision floating-point value `a'
1868 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
1869 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1870 158142c2 bellard
*----------------------------------------------------------------------------*/
1871 158142c2 bellard
1872 158142c2 bellard
float32 float32_rem( float32 a, float32 b STATUS_PARAM )
1873 158142c2 bellard
{
1874 158142c2 bellard
    flag aSign, bSign, zSign;
1875 158142c2 bellard
    int16 aExp, bExp, expDiff;
1876 158142c2 bellard
    bits32 aSig, bSig;
1877 158142c2 bellard
    bits32 q;
1878 158142c2 bellard
    bits64 aSig64, bSig64, q64;
1879 158142c2 bellard
    bits32 alternateASig;
1880 158142c2 bellard
    sbits32 sigMean;
1881 158142c2 bellard
1882 158142c2 bellard
    aSig = extractFloat32Frac( a );
1883 158142c2 bellard
    aExp = extractFloat32Exp( a );
1884 158142c2 bellard
    aSign = extractFloat32Sign( a );
1885 158142c2 bellard
    bSig = extractFloat32Frac( b );
1886 158142c2 bellard
    bExp = extractFloat32Exp( b );
1887 158142c2 bellard
    bSign = extractFloat32Sign( b );
1888 158142c2 bellard
    if ( aExp == 0xFF ) {
1889 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1890 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
1891 158142c2 bellard
        }
1892 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1893 158142c2 bellard
        return float32_default_nan;
1894 158142c2 bellard
    }
1895 158142c2 bellard
    if ( bExp == 0xFF ) {
1896 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1897 158142c2 bellard
        return a;
1898 158142c2 bellard
    }
1899 158142c2 bellard
    if ( bExp == 0 ) {
1900 158142c2 bellard
        if ( bSig == 0 ) {
1901 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1902 158142c2 bellard
            return float32_default_nan;
1903 158142c2 bellard
        }
1904 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1905 158142c2 bellard
    }
1906 158142c2 bellard
    if ( aExp == 0 ) {
1907 158142c2 bellard
        if ( aSig == 0 ) return a;
1908 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1909 158142c2 bellard
    }
1910 158142c2 bellard
    expDiff = aExp - bExp;
1911 158142c2 bellard
    aSig |= 0x00800000;
1912 158142c2 bellard
    bSig |= 0x00800000;
1913 158142c2 bellard
    if ( expDiff < 32 ) {
1914 158142c2 bellard
        aSig <<= 8;
1915 158142c2 bellard
        bSig <<= 8;
1916 158142c2 bellard
        if ( expDiff < 0 ) {
1917 158142c2 bellard
            if ( expDiff < -1 ) return a;
1918 158142c2 bellard
            aSig >>= 1;
1919 158142c2 bellard
        }
1920 158142c2 bellard
        q = ( bSig <= aSig );
1921 158142c2 bellard
        if ( q ) aSig -= bSig;
1922 158142c2 bellard
        if ( 0 < expDiff ) {
1923 158142c2 bellard
            q = ( ( (bits64) aSig )<<32 ) / bSig;
1924 158142c2 bellard
            q >>= 32 - expDiff;
1925 158142c2 bellard
            bSig >>= 2;
1926 158142c2 bellard
            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1927 158142c2 bellard
        }
1928 158142c2 bellard
        else {
1929 158142c2 bellard
            aSig >>= 2;
1930 158142c2 bellard
            bSig >>= 2;
1931 158142c2 bellard
        }
1932 158142c2 bellard
    }
1933 158142c2 bellard
    else {
1934 158142c2 bellard
        if ( bSig <= aSig ) aSig -= bSig;
1935 158142c2 bellard
        aSig64 = ( (bits64) aSig )<<40;
1936 158142c2 bellard
        bSig64 = ( (bits64) bSig )<<40;
1937 158142c2 bellard
        expDiff -= 64;
1938 158142c2 bellard
        while ( 0 < expDiff ) {
1939 158142c2 bellard
            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1940 158142c2 bellard
            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1941 158142c2 bellard
            aSig64 = - ( ( bSig * q64 )<<38 );
1942 158142c2 bellard
            expDiff -= 62;
1943 158142c2 bellard
        }
1944 158142c2 bellard
        expDiff += 64;
1945 158142c2 bellard
        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1946 158142c2 bellard
        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1947 158142c2 bellard
        q = q64>>( 64 - expDiff );
1948 158142c2 bellard
        bSig <<= 6;
1949 158142c2 bellard
        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
1950 158142c2 bellard
    }
1951 158142c2 bellard
    do {
1952 158142c2 bellard
        alternateASig = aSig;
1953 158142c2 bellard
        ++q;
1954 158142c2 bellard
        aSig -= bSig;
1955 158142c2 bellard
    } while ( 0 <= (sbits32) aSig );
1956 158142c2 bellard
    sigMean = aSig + alternateASig;
1957 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1958 158142c2 bellard
        aSig = alternateASig;
1959 158142c2 bellard
    }
1960 158142c2 bellard
    zSign = ( (sbits32) aSig < 0 );
1961 158142c2 bellard
    if ( zSign ) aSig = - aSig;
1962 158142c2 bellard
    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
1963 158142c2 bellard
1964 158142c2 bellard
}
1965 158142c2 bellard
1966 158142c2 bellard
/*----------------------------------------------------------------------------
1967 158142c2 bellard
| Returns the square root of the single-precision floating-point value `a'.
1968 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
1969 158142c2 bellard
| Floating-Point Arithmetic.
1970 158142c2 bellard
*----------------------------------------------------------------------------*/
1971 158142c2 bellard
1972 158142c2 bellard
float32 float32_sqrt( float32 a STATUS_PARAM )
1973 158142c2 bellard
{
1974 158142c2 bellard
    flag aSign;
1975 158142c2 bellard
    int16 aExp, zExp;
1976 158142c2 bellard
    bits32 aSig, zSig;
1977 158142c2 bellard
    bits64 rem, term;
1978 158142c2 bellard
1979 158142c2 bellard
    aSig = extractFloat32Frac( a );
1980 158142c2 bellard
    aExp = extractFloat32Exp( a );
1981 158142c2 bellard
    aSign = extractFloat32Sign( a );
1982 158142c2 bellard
    if ( aExp == 0xFF ) {
1983 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, 0 STATUS_VAR );
1984 158142c2 bellard
        if ( ! aSign ) return a;
1985 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1986 158142c2 bellard
        return float32_default_nan;
1987 158142c2 bellard
    }
1988 158142c2 bellard
    if ( aSign ) {
1989 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
1990 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1991 158142c2 bellard
        return float32_default_nan;
1992 158142c2 bellard
    }
1993 158142c2 bellard
    if ( aExp == 0 ) {
1994 158142c2 bellard
        if ( aSig == 0 ) return 0;
1995 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1996 158142c2 bellard
    }
1997 158142c2 bellard
    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1998 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
1999 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig ) + 2;
2000 158142c2 bellard
    if ( ( zSig & 0x7F ) <= 5 ) {
2001 158142c2 bellard
        if ( zSig < 2 ) {
2002 158142c2 bellard
            zSig = 0x7FFFFFFF;
2003 158142c2 bellard
            goto roundAndPack;
2004 158142c2 bellard
        }
2005 158142c2 bellard
        aSig >>= aExp & 1;
2006 158142c2 bellard
        term = ( (bits64) zSig ) * zSig;
2007 158142c2 bellard
        rem = ( ( (bits64) aSig )<<32 ) - term;
2008 158142c2 bellard
        while ( (sbits64) rem < 0 ) {
2009 158142c2 bellard
            --zSig;
2010 158142c2 bellard
            rem += ( ( (bits64) zSig )<<1 ) | 1;
2011 158142c2 bellard
        }
2012 158142c2 bellard
        zSig |= ( rem != 0 );
2013 158142c2 bellard
    }
2014 158142c2 bellard
    shift32RightJamming( zSig, 1, &zSig );
2015 158142c2 bellard
 roundAndPack:
2016 158142c2 bellard
    return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
2017 158142c2 bellard
2018 158142c2 bellard
}
2019 158142c2 bellard
2020 158142c2 bellard
/*----------------------------------------------------------------------------
2021 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2022 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
2023 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2024 158142c2 bellard
*----------------------------------------------------------------------------*/
2025 158142c2 bellard
2026 158142c2 bellard
flag float32_eq( float32 a, float32 b STATUS_PARAM )
2027 158142c2 bellard
{
2028 158142c2 bellard
2029 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2030 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2031 158142c2 bellard
       ) {
2032 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2033 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2034 158142c2 bellard
        }
2035 158142c2 bellard
        return 0;
2036 158142c2 bellard
    }
2037 158142c2 bellard
    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2038 158142c2 bellard
2039 158142c2 bellard
}
2040 158142c2 bellard
2041 158142c2 bellard
/*----------------------------------------------------------------------------
2042 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2043 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  The comparison
2044 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2045 158142c2 bellard
| Arithmetic.
2046 158142c2 bellard
*----------------------------------------------------------------------------*/
2047 158142c2 bellard
2048 158142c2 bellard
flag float32_le( float32 a, float32 b STATUS_PARAM )
2049 158142c2 bellard
{
2050 158142c2 bellard
    flag aSign, bSign;
2051 158142c2 bellard
2052 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2053 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2054 158142c2 bellard
       ) {
2055 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2056 158142c2 bellard
        return 0;
2057 158142c2 bellard
    }
2058 158142c2 bellard
    aSign = extractFloat32Sign( a );
2059 158142c2 bellard
    bSign = extractFloat32Sign( b );
2060 158142c2 bellard
    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2061 158142c2 bellard
    return ( a == b ) || ( aSign ^ ( a < b ) );
2062 158142c2 bellard
2063 158142c2 bellard
}
2064 158142c2 bellard
2065 158142c2 bellard
/*----------------------------------------------------------------------------
2066 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2067 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
2068 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2069 158142c2 bellard
*----------------------------------------------------------------------------*/
2070 158142c2 bellard
2071 158142c2 bellard
flag float32_lt( float32 a, float32 b STATUS_PARAM )
2072 158142c2 bellard
{
2073 158142c2 bellard
    flag aSign, bSign;
2074 158142c2 bellard
2075 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2076 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2077 158142c2 bellard
       ) {
2078 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2079 158142c2 bellard
        return 0;
2080 158142c2 bellard
    }
2081 158142c2 bellard
    aSign = extractFloat32Sign( a );
2082 158142c2 bellard
    bSign = extractFloat32Sign( b );
2083 158142c2 bellard
    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2084 158142c2 bellard
    return ( a != b ) && ( aSign ^ ( a < b ) );
2085 158142c2 bellard
2086 158142c2 bellard
}
2087 158142c2 bellard
2088 158142c2 bellard
/*----------------------------------------------------------------------------
2089 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2090 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The invalid exception is
2091 158142c2 bellard
| raised if either operand is a NaN.  Otherwise, the comparison is performed
2092 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2093 158142c2 bellard
*----------------------------------------------------------------------------*/
2094 158142c2 bellard
2095 158142c2 bellard
flag float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
2096 158142c2 bellard
{
2097 158142c2 bellard
2098 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2099 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2100 158142c2 bellard
       ) {
2101 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2102 158142c2 bellard
        return 0;
2103 158142c2 bellard
    }
2104 158142c2 bellard
    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2105 158142c2 bellard
2106 158142c2 bellard
}
2107 158142c2 bellard
2108 158142c2 bellard
/*----------------------------------------------------------------------------
2109 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than or
2110 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2111 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
2112 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2113 158142c2 bellard
*----------------------------------------------------------------------------*/
2114 158142c2 bellard
2115 158142c2 bellard
flag float32_le_quiet( float32 a, float32 b STATUS_PARAM )
2116 158142c2 bellard
{
2117 158142c2 bellard
    flag aSign, bSign;
2118 158142c2 bellard
2119 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2120 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2121 158142c2 bellard
       ) {
2122 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2123 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2124 158142c2 bellard
        }
2125 158142c2 bellard
        return 0;
2126 158142c2 bellard
    }
2127 158142c2 bellard
    aSign = extractFloat32Sign( a );
2128 158142c2 bellard
    bSign = extractFloat32Sign( b );
2129 158142c2 bellard
    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2130 158142c2 bellard
    return ( a == b ) || ( aSign ^ ( a < b ) );
2131 158142c2 bellard
2132 158142c2 bellard
}
2133 158142c2 bellard
2134 158142c2 bellard
/*----------------------------------------------------------------------------
2135 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2136 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2137 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2138 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
2139 158142c2 bellard
*----------------------------------------------------------------------------*/
2140 158142c2 bellard
2141 158142c2 bellard
flag float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
2142 158142c2 bellard
{
2143 158142c2 bellard
    flag aSign, bSign;
2144 158142c2 bellard
2145 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2146 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2147 158142c2 bellard
       ) {
2148 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2149 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2150 158142c2 bellard
        }
2151 158142c2 bellard
        return 0;
2152 158142c2 bellard
    }
2153 158142c2 bellard
    aSign = extractFloat32Sign( a );
2154 158142c2 bellard
    bSign = extractFloat32Sign( b );
2155 158142c2 bellard
    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2156 158142c2 bellard
    return ( a != b ) && ( aSign ^ ( a < b ) );
2157 158142c2 bellard
2158 158142c2 bellard
}
2159 158142c2 bellard
2160 158142c2 bellard
/*----------------------------------------------------------------------------
2161 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2162 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2163 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2164 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2165 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2166 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2167 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2168 158142c2 bellard
*----------------------------------------------------------------------------*/
2169 158142c2 bellard
2170 158142c2 bellard
int32 float64_to_int32( float64 a STATUS_PARAM )
2171 158142c2 bellard
{
2172 158142c2 bellard
    flag aSign;
2173 158142c2 bellard
    int16 aExp, shiftCount;
2174 158142c2 bellard
    bits64 aSig;
2175 158142c2 bellard
2176 158142c2 bellard
    aSig = extractFloat64Frac( a );
2177 158142c2 bellard
    aExp = extractFloat64Exp( a );
2178 158142c2 bellard
    aSign = extractFloat64Sign( a );
2179 158142c2 bellard
    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2180 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2181 158142c2 bellard
    shiftCount = 0x42C - aExp;
2182 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2183 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
2184 158142c2 bellard
2185 158142c2 bellard
}
2186 158142c2 bellard
2187 158142c2 bellard
/*----------------------------------------------------------------------------
2188 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2189 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2190 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2191 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2192 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2193 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2194 158142c2 bellard
| returned.
2195 158142c2 bellard
*----------------------------------------------------------------------------*/
2196 158142c2 bellard
2197 158142c2 bellard
int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
2198 158142c2 bellard
{
2199 158142c2 bellard
    flag aSign;
2200 158142c2 bellard
    int16 aExp, shiftCount;
2201 158142c2 bellard
    bits64 aSig, savedASig;
2202 158142c2 bellard
    int32 z;
2203 158142c2 bellard
2204 158142c2 bellard
    aSig = extractFloat64Frac( a );
2205 158142c2 bellard
    aExp = extractFloat64Exp( a );
2206 158142c2 bellard
    aSign = extractFloat64Sign( a );
2207 158142c2 bellard
    if ( 0x41E < aExp ) {
2208 158142c2 bellard
        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2209 158142c2 bellard
        goto invalid;
2210 158142c2 bellard
    }
2211 158142c2 bellard
    else if ( aExp < 0x3FF ) {
2212 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2213 158142c2 bellard
        return 0;
2214 158142c2 bellard
    }
2215 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
2216 158142c2 bellard
    shiftCount = 0x433 - aExp;
2217 158142c2 bellard
    savedASig = aSig;
2218 158142c2 bellard
    aSig >>= shiftCount;
2219 158142c2 bellard
    z = aSig;
2220 158142c2 bellard
    if ( aSign ) z = - z;
2221 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
2222 158142c2 bellard
 invalid:
2223 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2224 158142c2 bellard
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
2225 158142c2 bellard
    }
2226 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
2227 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2228 158142c2 bellard
    }
2229 158142c2 bellard
    return z;
2230 158142c2 bellard
2231 158142c2 bellard
}
2232 158142c2 bellard
2233 158142c2 bellard
/*----------------------------------------------------------------------------
2234 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2235 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2236 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2237 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2238 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2239 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2240 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2241 158142c2 bellard
*----------------------------------------------------------------------------*/
2242 158142c2 bellard
2243 158142c2 bellard
int64 float64_to_int64( float64 a STATUS_PARAM )
2244 158142c2 bellard
{
2245 158142c2 bellard
    flag aSign;
2246 158142c2 bellard
    int16 aExp, shiftCount;
2247 158142c2 bellard
    bits64 aSig, aSigExtra;
2248 158142c2 bellard
2249 158142c2 bellard
    aSig = extractFloat64Frac( a );
2250 158142c2 bellard
    aExp = extractFloat64Exp( a );
2251 158142c2 bellard
    aSign = extractFloat64Sign( a );
2252 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2253 158142c2 bellard
    shiftCount = 0x433 - aExp;
2254 158142c2 bellard
    if ( shiftCount <= 0 ) {
2255 158142c2 bellard
        if ( 0x43E < aExp ) {
2256 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2257 158142c2 bellard
            if (    ! aSign
2258 158142c2 bellard
                 || (    ( aExp == 0x7FF )
2259 158142c2 bellard
                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
2260 158142c2 bellard
               ) {
2261 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
2262 158142c2 bellard
            }
2263 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
2264 158142c2 bellard
        }
2265 158142c2 bellard
        aSigExtra = 0;
2266 158142c2 bellard
        aSig <<= - shiftCount;
2267 158142c2 bellard
    }
2268 158142c2 bellard
    else {
2269 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2270 158142c2 bellard
    }
2271 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
2272 158142c2 bellard
2273 158142c2 bellard
}
2274 158142c2 bellard
2275 158142c2 bellard
/*----------------------------------------------------------------------------
2276 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2277 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2278 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2279 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2280 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2281 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2282 158142c2 bellard
| returned.
2283 158142c2 bellard
*----------------------------------------------------------------------------*/
2284 158142c2 bellard
2285 158142c2 bellard
int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
2286 158142c2 bellard
{
2287 158142c2 bellard
    flag aSign;
2288 158142c2 bellard
    int16 aExp, shiftCount;
2289 158142c2 bellard
    bits64 aSig;
2290 158142c2 bellard
    int64 z;
2291 158142c2 bellard
2292 158142c2 bellard
    aSig = extractFloat64Frac( a );
2293 158142c2 bellard
    aExp = extractFloat64Exp( a );
2294 158142c2 bellard
    aSign = extractFloat64Sign( a );
2295 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2296 158142c2 bellard
    shiftCount = aExp - 0x433;
2297 158142c2 bellard
    if ( 0 <= shiftCount ) {
2298 158142c2 bellard
        if ( 0x43E <= aExp ) {
2299 158142c2 bellard
            if ( a != LIT64( 0xC3E0000000000000 ) ) {
2300 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
2301 158142c2 bellard
                if (    ! aSign
2302 158142c2 bellard
                     || (    ( aExp == 0x7FF )
2303 158142c2 bellard
                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
2304 158142c2 bellard
                   ) {
2305 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
2306 158142c2 bellard
                }
2307 158142c2 bellard
            }
2308 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
2309 158142c2 bellard
        }
2310 158142c2 bellard
        z = aSig<<shiftCount;
2311 158142c2 bellard
    }
2312 158142c2 bellard
    else {
2313 158142c2 bellard
        if ( aExp < 0x3FE ) {
2314 158142c2 bellard
            if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2315 158142c2 bellard
            return 0;
2316 158142c2 bellard
        }
2317 158142c2 bellard
        z = aSig>>( - shiftCount );
2318 158142c2 bellard
        if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
2319 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
2320 158142c2 bellard
        }
2321 158142c2 bellard
    }
2322 158142c2 bellard
    if ( aSign ) z = - z;
2323 158142c2 bellard
    return z;
2324 158142c2 bellard
2325 158142c2 bellard
}
2326 158142c2 bellard
2327 158142c2 bellard
/*----------------------------------------------------------------------------
2328 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2329 158142c2 bellard
| `a' to the single-precision floating-point format.  The conversion is
2330 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2331 158142c2 bellard
| Arithmetic.
2332 158142c2 bellard
*----------------------------------------------------------------------------*/
2333 158142c2 bellard
2334 158142c2 bellard
float32 float64_to_float32( float64 a STATUS_PARAM )
2335 158142c2 bellard
{
2336 158142c2 bellard
    flag aSign;
2337 158142c2 bellard
    int16 aExp;
2338 158142c2 bellard
    bits64 aSig;
2339 158142c2 bellard
    bits32 zSig;
2340 158142c2 bellard
2341 158142c2 bellard
    aSig = extractFloat64Frac( a );
2342 158142c2 bellard
    aExp = extractFloat64Exp( a );
2343 158142c2 bellard
    aSign = extractFloat64Sign( a );
2344 158142c2 bellard
    if ( aExp == 0x7FF ) {
2345 158142c2 bellard
        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) );
2346 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
2347 158142c2 bellard
    }
2348 158142c2 bellard
    shift64RightJamming( aSig, 22, &aSig );
2349 158142c2 bellard
    zSig = aSig;
2350 158142c2 bellard
    if ( aExp || zSig ) {
2351 158142c2 bellard
        zSig |= 0x40000000;
2352 158142c2 bellard
        aExp -= 0x381;
2353 158142c2 bellard
    }
2354 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
2355 158142c2 bellard
2356 158142c2 bellard
}
2357 158142c2 bellard
2358 158142c2 bellard
#ifdef FLOATX80
2359 158142c2 bellard
2360 158142c2 bellard
/*----------------------------------------------------------------------------
2361 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2362 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
2363 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2364 158142c2 bellard
| Arithmetic.
2365 158142c2 bellard
*----------------------------------------------------------------------------*/
2366 158142c2 bellard
2367 158142c2 bellard
floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
2368 158142c2 bellard
{
2369 158142c2 bellard
    flag aSign;
2370 158142c2 bellard
    int16 aExp;
2371 158142c2 bellard
    bits64 aSig;
2372 158142c2 bellard
2373 158142c2 bellard
    aSig = extractFloat64Frac( a );
2374 158142c2 bellard
    aExp = extractFloat64Exp( a );
2375 158142c2 bellard
    aSign = extractFloat64Sign( a );
2376 158142c2 bellard
    if ( aExp == 0x7FF ) {
2377 158142c2 bellard
        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) );
2378 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2379 158142c2 bellard
    }
2380 158142c2 bellard
    if ( aExp == 0 ) {
2381 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2382 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2383 158142c2 bellard
    }
2384 158142c2 bellard
    return
2385 158142c2 bellard
        packFloatx80(
2386 158142c2 bellard
            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2387 158142c2 bellard
2388 158142c2 bellard
}
2389 158142c2 bellard
2390 158142c2 bellard
#endif
2391 158142c2 bellard
2392 158142c2 bellard
#ifdef FLOAT128
2393 158142c2 bellard
2394 158142c2 bellard
/*----------------------------------------------------------------------------
2395 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2396 158142c2 bellard
| `a' to the quadruple-precision floating-point format.  The conversion is
2397 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2398 158142c2 bellard
| Arithmetic.
2399 158142c2 bellard
*----------------------------------------------------------------------------*/
2400 158142c2 bellard
2401 158142c2 bellard
float128 float64_to_float128( float64 a STATUS_PARAM )
2402 158142c2 bellard
{
2403 158142c2 bellard
    flag aSign;
2404 158142c2 bellard
    int16 aExp;
2405 158142c2 bellard
    bits64 aSig, zSig0, zSig1;
2406 158142c2 bellard
2407 158142c2 bellard
    aSig = extractFloat64Frac( a );
2408 158142c2 bellard
    aExp = extractFloat64Exp( a );
2409 158142c2 bellard
    aSign = extractFloat64Sign( a );
2410 158142c2 bellard
    if ( aExp == 0x7FF ) {
2411 158142c2 bellard
        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) );
2412 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
2413 158142c2 bellard
    }
2414 158142c2 bellard
    if ( aExp == 0 ) {
2415 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2416 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2417 158142c2 bellard
        --aExp;
2418 158142c2 bellard
    }
2419 158142c2 bellard
    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2420 158142c2 bellard
    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2421 158142c2 bellard
2422 158142c2 bellard
}
2423 158142c2 bellard
2424 158142c2 bellard
#endif
2425 158142c2 bellard
2426 158142c2 bellard
/*----------------------------------------------------------------------------
2427 158142c2 bellard
| Rounds the double-precision floating-point value `a' to an integer, and
2428 158142c2 bellard
| returns the result as a double-precision floating-point value.  The
2429 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
2430 158142c2 bellard
| Floating-Point Arithmetic.
2431 158142c2 bellard
*----------------------------------------------------------------------------*/
2432 158142c2 bellard
2433 158142c2 bellard
float64 float64_round_to_int( float64 a STATUS_PARAM )
2434 158142c2 bellard
{
2435 158142c2 bellard
    flag aSign;
2436 158142c2 bellard
    int16 aExp;
2437 158142c2 bellard
    bits64 lastBitMask, roundBitsMask;
2438 158142c2 bellard
    int8 roundingMode;
2439 158142c2 bellard
    float64 z;
2440 158142c2 bellard
2441 158142c2 bellard
    aExp = extractFloat64Exp( a );
2442 158142c2 bellard
    if ( 0x433 <= aExp ) {
2443 158142c2 bellard
        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
2444 158142c2 bellard
            return propagateFloat64NaN( a, a STATUS_VAR );
2445 158142c2 bellard
        }
2446 158142c2 bellard
        return a;
2447 158142c2 bellard
    }
2448 158142c2 bellard
    if ( aExp < 0x3FF ) {
2449 158142c2 bellard
        if ( (bits64) ( a<<1 ) == 0 ) return a;
2450 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2451 158142c2 bellard
        aSign = extractFloat64Sign( a );
2452 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
2453 158142c2 bellard
         case float_round_nearest_even:
2454 158142c2 bellard
            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
2455 158142c2 bellard
                return packFloat64( aSign, 0x3FF, 0 );
2456 158142c2 bellard
            }
2457 158142c2 bellard
            break;
2458 158142c2 bellard
         case float_round_down:
2459 158142c2 bellard
            return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
2460 158142c2 bellard
         case float_round_up:
2461 158142c2 bellard
            return
2462 158142c2 bellard
            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
2463 158142c2 bellard
        }
2464 158142c2 bellard
        return packFloat64( aSign, 0, 0 );
2465 158142c2 bellard
    }
2466 158142c2 bellard
    lastBitMask = 1;
2467 158142c2 bellard
    lastBitMask <<= 0x433 - aExp;
2468 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
2469 158142c2 bellard
    z = a;
2470 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
2471 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
2472 158142c2 bellard
        z += lastBitMask>>1;
2473 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
2474 158142c2 bellard
    }
2475 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
2476 158142c2 bellard
        if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
2477 158142c2 bellard
            z += roundBitsMask;
2478 158142c2 bellard
        }
2479 158142c2 bellard
    }
2480 158142c2 bellard
    z &= ~ roundBitsMask;
2481 158142c2 bellard
    if ( z != a ) STATUS(float_exception_flags) |= float_flag_inexact;
2482 158142c2 bellard
    return z;
2483 158142c2 bellard
2484 158142c2 bellard
}
2485 158142c2 bellard
2486 158142c2 bellard
/*----------------------------------------------------------------------------
2487 158142c2 bellard
| Returns the result of adding the absolute values of the double-precision
2488 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
2489 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
2490 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
2491 158142c2 bellard
| Floating-Point Arithmetic.
2492 158142c2 bellard
*----------------------------------------------------------------------------*/
2493 158142c2 bellard
2494 158142c2 bellard
static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
2495 158142c2 bellard
{
2496 158142c2 bellard
    int16 aExp, bExp, zExp;
2497 158142c2 bellard
    bits64 aSig, bSig, zSig;
2498 158142c2 bellard
    int16 expDiff;
2499 158142c2 bellard
2500 158142c2 bellard
    aSig = extractFloat64Frac( a );
2501 158142c2 bellard
    aExp = extractFloat64Exp( a );
2502 158142c2 bellard
    bSig = extractFloat64Frac( b );
2503 158142c2 bellard
    bExp = extractFloat64Exp( b );
2504 158142c2 bellard
    expDiff = aExp - bExp;
2505 158142c2 bellard
    aSig <<= 9;
2506 158142c2 bellard
    bSig <<= 9;
2507 158142c2 bellard
    if ( 0 < expDiff ) {
2508 158142c2 bellard
        if ( aExp == 0x7FF ) {
2509 158142c2 bellard
            if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2510 158142c2 bellard
            return a;
2511 158142c2 bellard
        }
2512 158142c2 bellard
        if ( bExp == 0 ) {
2513 158142c2 bellard
            --expDiff;
2514 158142c2 bellard
        }
2515 158142c2 bellard
        else {
2516 158142c2 bellard
            bSig |= LIT64( 0x2000000000000000 );
2517 158142c2 bellard
        }
2518 158142c2 bellard
        shift64RightJamming( bSig, expDiff, &bSig );
2519 158142c2 bellard
        zExp = aExp;
2520 158142c2 bellard
    }
2521 158142c2 bellard
    else if ( expDiff < 0 ) {
2522 158142c2 bellard
        if ( bExp == 0x7FF ) {
2523 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2524 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
2525 158142c2 bellard
        }
2526 158142c2 bellard
        if ( aExp == 0 ) {
2527 158142c2 bellard
            ++expDiff;
2528 158142c2 bellard
        }
2529 158142c2 bellard
        else {
2530 158142c2 bellard
            aSig |= LIT64( 0x2000000000000000 );
2531 158142c2 bellard
        }
2532 158142c2 bellard
        shift64RightJamming( aSig, - expDiff, &aSig );
2533 158142c2 bellard
        zExp = bExp;
2534 158142c2 bellard
    }
2535 158142c2 bellard
    else {
2536 158142c2 bellard
        if ( aExp == 0x7FF ) {
2537 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2538 158142c2 bellard
            return a;
2539 158142c2 bellard
        }
2540 158142c2 bellard
        if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
2541 158142c2 bellard
        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
2542 158142c2 bellard
        zExp = aExp;
2543 158142c2 bellard
        goto roundAndPack;
2544 158142c2 bellard
    }
2545 158142c2 bellard
    aSig |= LIT64( 0x2000000000000000 );
2546 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
2547 158142c2 bellard
    --zExp;
2548 158142c2 bellard
    if ( (sbits64) zSig < 0 ) {
2549 158142c2 bellard
        zSig = aSig + bSig;
2550 158142c2 bellard
        ++zExp;
2551 158142c2 bellard
    }
2552 158142c2 bellard
 roundAndPack:
2553 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
2554 158142c2 bellard
2555 158142c2 bellard
}
2556 158142c2 bellard
2557 158142c2 bellard
/*----------------------------------------------------------------------------
2558 158142c2 bellard
| Returns the result of subtracting the absolute values of the double-
2559 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
2560 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
2561 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
2562 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
2563 158142c2 bellard
*----------------------------------------------------------------------------*/
2564 158142c2 bellard
2565 158142c2 bellard
static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
2566 158142c2 bellard
{
2567 158142c2 bellard
    int16 aExp, bExp, zExp;
2568 158142c2 bellard
    bits64 aSig, bSig, zSig;
2569 158142c2 bellard
    int16 expDiff;
2570 158142c2 bellard
2571 158142c2 bellard
    aSig = extractFloat64Frac( a );
2572 158142c2 bellard
    aExp = extractFloat64Exp( a );
2573 158142c2 bellard
    bSig = extractFloat64Frac( b );
2574 158142c2 bellard
    bExp = extractFloat64Exp( b );
2575 158142c2 bellard
    expDiff = aExp - bExp;
2576 158142c2 bellard
    aSig <<= 10;
2577 158142c2 bellard
    bSig <<= 10;
2578 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
2579 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
2580 158142c2 bellard
    if ( aExp == 0x7FF ) {
2581 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2582 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2583 158142c2 bellard
        return float64_default_nan;
2584 158142c2 bellard
    }
2585 158142c2 bellard
    if ( aExp == 0 ) {
2586 158142c2 bellard
        aExp = 1;
2587 158142c2 bellard
        bExp = 1;
2588 158142c2 bellard
    }
2589 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
2590 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
2591 158142c2 bellard
    return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
2592 158142c2 bellard
 bExpBigger:
2593 158142c2 bellard
    if ( bExp == 0x7FF ) {
2594 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2595 158142c2 bellard
        return packFloat64( zSign ^ 1, 0x7FF, 0 );
2596 158142c2 bellard
    }
2597 158142c2 bellard
    if ( aExp == 0 ) {
2598 158142c2 bellard
        ++expDiff;
2599 158142c2 bellard
    }
2600 158142c2 bellard
    else {
2601 158142c2 bellard
        aSig |= LIT64( 0x4000000000000000 );
2602 158142c2 bellard
    }
2603 158142c2 bellard
    shift64RightJamming( aSig, - expDiff, &aSig );
2604 158142c2 bellard
    bSig |= LIT64( 0x4000000000000000 );
2605 158142c2 bellard
 bBigger:
2606 158142c2 bellard
    zSig = bSig - aSig;
2607 158142c2 bellard
    zExp = bExp;
2608 158142c2 bellard
    zSign ^= 1;
2609 158142c2 bellard
    goto normalizeRoundAndPack;
2610 158142c2 bellard
 aExpBigger:
2611 158142c2 bellard
    if ( aExp == 0x7FF ) {
2612 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2613 158142c2 bellard
        return a;
2614 158142c2 bellard
    }
2615 158142c2 bellard
    if ( bExp == 0 ) {
2616 158142c2 bellard
        --expDiff;
2617 158142c2 bellard
    }
2618 158142c2 bellard
    else {
2619 158142c2 bellard
        bSig |= LIT64( 0x4000000000000000 );
2620 158142c2 bellard
    }
2621 158142c2 bellard
    shift64RightJamming( bSig, expDiff, &bSig );
2622 158142c2 bellard
    aSig |= LIT64( 0x4000000000000000 );
2623 158142c2 bellard
 aBigger:
2624 158142c2 bellard
    zSig = aSig - bSig;
2625 158142c2 bellard
    zExp = aExp;
2626 158142c2 bellard
 normalizeRoundAndPack:
2627 158142c2 bellard
    --zExp;
2628 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
2629 158142c2 bellard
2630 158142c2 bellard
}
2631 158142c2 bellard
2632 158142c2 bellard
/*----------------------------------------------------------------------------
2633 158142c2 bellard
| Returns the result of adding the double-precision floating-point values `a'
2634 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
2635 158142c2 bellard
| Binary Floating-Point Arithmetic.
2636 158142c2 bellard
*----------------------------------------------------------------------------*/
2637 158142c2 bellard
2638 158142c2 bellard
float64 float64_add( float64 a, float64 b STATUS_PARAM )
2639 158142c2 bellard
{
2640 158142c2 bellard
    flag aSign, bSign;
2641 158142c2 bellard
2642 158142c2 bellard
    aSign = extractFloat64Sign( a );
2643 158142c2 bellard
    bSign = extractFloat64Sign( b );
2644 158142c2 bellard
    if ( aSign == bSign ) {
2645 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
2646 158142c2 bellard
    }
2647 158142c2 bellard
    else {
2648 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
2649 158142c2 bellard
    }
2650 158142c2 bellard
2651 158142c2 bellard
}
2652 158142c2 bellard
2653 158142c2 bellard
/*----------------------------------------------------------------------------
2654 158142c2 bellard
| Returns the result of subtracting the double-precision floating-point values
2655 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2656 158142c2 bellard
| for Binary Floating-Point Arithmetic.
2657 158142c2 bellard
*----------------------------------------------------------------------------*/
2658 158142c2 bellard
2659 158142c2 bellard
float64 float64_sub( float64 a, float64 b STATUS_PARAM )
2660 158142c2 bellard
{
2661 158142c2 bellard
    flag aSign, bSign;
2662 158142c2 bellard
2663 158142c2 bellard
    aSign = extractFloat64Sign( a );
2664 158142c2 bellard
    bSign = extractFloat64Sign( b );
2665 158142c2 bellard
    if ( aSign == bSign ) {
2666 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
2667 158142c2 bellard
    }
2668 158142c2 bellard
    else {
2669 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
2670 158142c2 bellard
    }
2671 158142c2 bellard
2672 158142c2 bellard
}
2673 158142c2 bellard
2674 158142c2 bellard
/*----------------------------------------------------------------------------
2675 158142c2 bellard
| Returns the result of multiplying the double-precision floating-point values
2676 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2677 158142c2 bellard
| for Binary Floating-Point Arithmetic.
2678 158142c2 bellard
*----------------------------------------------------------------------------*/
2679 158142c2 bellard
2680 158142c2 bellard
float64 float64_mul( float64 a, float64 b STATUS_PARAM )
2681 158142c2 bellard
{
2682 158142c2 bellard
    flag aSign, bSign, zSign;
2683 158142c2 bellard
    int16 aExp, bExp, zExp;
2684 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
2685 158142c2 bellard
2686 158142c2 bellard
    aSig = extractFloat64Frac( a );
2687 158142c2 bellard
    aExp = extractFloat64Exp( a );
2688 158142c2 bellard
    aSign = extractFloat64Sign( a );
2689 158142c2 bellard
    bSig = extractFloat64Frac( b );
2690 158142c2 bellard
    bExp = extractFloat64Exp( b );
2691 158142c2 bellard
    bSign = extractFloat64Sign( b );
2692 158142c2 bellard
    zSign = aSign ^ bSign;
2693 158142c2 bellard
    if ( aExp == 0x7FF ) {
2694 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2695 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
2696 158142c2 bellard
        }
2697 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
2698 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2699 158142c2 bellard
            return float64_default_nan;
2700 158142c2 bellard
        }
2701 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
2702 158142c2 bellard
    }
2703 158142c2 bellard
    if ( bExp == 0x7FF ) {
2704 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2705 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
2706 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2707 158142c2 bellard
            return float64_default_nan;
2708 158142c2 bellard
        }
2709 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
2710 158142c2 bellard
    }
2711 158142c2 bellard
    if ( aExp == 0 ) {
2712 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2713 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2714 158142c2 bellard
    }
2715 158142c2 bellard
    if ( bExp == 0 ) {
2716 158142c2 bellard
        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2717 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2718 158142c2 bellard
    }
2719 158142c2 bellard
    zExp = aExp + bExp - 0x3FF;
2720 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2721 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2722 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
2723 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
2724 158142c2 bellard
    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2725 158142c2 bellard
        zSig0 <<= 1;
2726 158142c2 bellard
        --zExp;
2727 158142c2 bellard
    }
2728 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
2729 158142c2 bellard
2730 158142c2 bellard
}
2731 158142c2 bellard
2732 158142c2 bellard
/*----------------------------------------------------------------------------
2733 158142c2 bellard
| Returns the result of dividing the double-precision floating-point value `a'
2734 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to
2735 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2736 158142c2 bellard
*----------------------------------------------------------------------------*/
2737 158142c2 bellard
2738 158142c2 bellard
float64 float64_div( float64 a, float64 b STATUS_PARAM )
2739 158142c2 bellard
{
2740 158142c2 bellard
    flag aSign, bSign, zSign;
2741 158142c2 bellard
    int16 aExp, bExp, zExp;
2742 158142c2 bellard
    bits64 aSig, bSig, zSig;
2743 158142c2 bellard
    bits64 rem0, rem1;
2744 158142c2 bellard
    bits64 term0, term1;
2745 158142c2 bellard
2746 158142c2 bellard
    aSig = extractFloat64Frac( a );
2747 158142c2 bellard
    aExp = extractFloat64Exp( a );
2748 158142c2 bellard
    aSign = extractFloat64Sign( a );
2749 158142c2 bellard
    bSig = extractFloat64Frac( b );
2750 158142c2 bellard
    bExp = extractFloat64Exp( b );
2751 158142c2 bellard
    bSign = extractFloat64Sign( b );
2752 158142c2 bellard
    zSign = aSign ^ bSign;
2753 158142c2 bellard
    if ( aExp == 0x7FF ) {
2754 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2755 158142c2 bellard
        if ( bExp == 0x7FF ) {
2756 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2757 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2758 158142c2 bellard
            return float64_default_nan;
2759 158142c2 bellard
        }
2760 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
2761 158142c2 bellard
    }
2762 158142c2 bellard
    if ( bExp == 0x7FF ) {
2763 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2764 158142c2 bellard
        return packFloat64( zSign, 0, 0 );
2765 158142c2 bellard
    }
2766 158142c2 bellard
    if ( bExp == 0 ) {
2767 158142c2 bellard
        if ( bSig == 0 ) {
2768 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
2769 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
2770 158142c2 bellard
                return float64_default_nan;
2771 158142c2 bellard
            }
2772 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
2773 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
2774 158142c2 bellard
        }
2775 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2776 158142c2 bellard
    }
2777 158142c2 bellard
    if ( aExp == 0 ) {
2778 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2779 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2780 158142c2 bellard
    }
2781 158142c2 bellard
    zExp = aExp - bExp + 0x3FD;
2782 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2783 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2784 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
2785 158142c2 bellard
        aSig >>= 1;
2786 158142c2 bellard
        ++zExp;
2787 158142c2 bellard
    }
2788 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, bSig );
2789 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 2 ) {
2790 158142c2 bellard
        mul64To128( bSig, zSig, &term0, &term1 );
2791 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2792 158142c2 bellard
        while ( (sbits64) rem0 < 0 ) {
2793 158142c2 bellard
            --zSig;
2794 158142c2 bellard
            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2795 158142c2 bellard
        }
2796 158142c2 bellard
        zSig |= ( rem1 != 0 );
2797 158142c2 bellard
    }
2798 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
2799 158142c2 bellard
2800 158142c2 bellard
}
2801 158142c2 bellard
2802 158142c2 bellard
/*----------------------------------------------------------------------------
2803 158142c2 bellard
| Returns the remainder of the double-precision floating-point value `a'
2804 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
2805 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2806 158142c2 bellard
*----------------------------------------------------------------------------*/
2807 158142c2 bellard
2808 158142c2 bellard
float64 float64_rem( float64 a, float64 b STATUS_PARAM )
2809 158142c2 bellard
{
2810 158142c2 bellard
    flag aSign, bSign, zSign;
2811 158142c2 bellard
    int16 aExp, bExp, expDiff;
2812 158142c2 bellard
    bits64 aSig, bSig;
2813 158142c2 bellard
    bits64 q, alternateASig;
2814 158142c2 bellard
    sbits64 sigMean;
2815 158142c2 bellard
2816 158142c2 bellard
    aSig = extractFloat64Frac( a );
2817 158142c2 bellard
    aExp = extractFloat64Exp( a );
2818 158142c2 bellard
    aSign = extractFloat64Sign( a );
2819 158142c2 bellard
    bSig = extractFloat64Frac( b );
2820 158142c2 bellard
    bExp = extractFloat64Exp( b );
2821 158142c2 bellard
    bSign = extractFloat64Sign( b );
2822 158142c2 bellard
    if ( aExp == 0x7FF ) {
2823 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2824 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
2825 158142c2 bellard
        }
2826 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2827 158142c2 bellard
        return float64_default_nan;
2828 158142c2 bellard
    }
2829 158142c2 bellard
    if ( bExp == 0x7FF ) {
2830 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2831 158142c2 bellard
        return a;
2832 158142c2 bellard
    }
2833 158142c2 bellard
    if ( bExp == 0 ) {
2834 158142c2 bellard
        if ( bSig == 0 ) {
2835 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2836 158142c2 bellard
            return float64_default_nan;
2837 158142c2 bellard
        }
2838 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2839 158142c2 bellard
    }
2840 158142c2 bellard
    if ( aExp == 0 ) {
2841 158142c2 bellard
        if ( aSig == 0 ) return a;
2842 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2843 158142c2 bellard
    }
2844 158142c2 bellard
    expDiff = aExp - bExp;
2845 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
2846 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2847 158142c2 bellard
    if ( expDiff < 0 ) {
2848 158142c2 bellard
        if ( expDiff < -1 ) return a;
2849 158142c2 bellard
        aSig >>= 1;
2850 158142c2 bellard
    }
2851 158142c2 bellard
    q = ( bSig <= aSig );
2852 158142c2 bellard
    if ( q ) aSig -= bSig;
2853 158142c2 bellard
    expDiff -= 64;
2854 158142c2 bellard
    while ( 0 < expDiff ) {
2855 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
2856 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
2857 158142c2 bellard
        aSig = - ( ( bSig>>2 ) * q );
2858 158142c2 bellard
        expDiff -= 62;
2859 158142c2 bellard
    }
2860 158142c2 bellard
    expDiff += 64;
2861 158142c2 bellard
    if ( 0 < expDiff ) {
2862 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
2863 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
2864 158142c2 bellard
        q >>= 64 - expDiff;
2865 158142c2 bellard
        bSig >>= 2;
2866 158142c2 bellard
        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2867 158142c2 bellard
    }
2868 158142c2 bellard
    else {
2869 158142c2 bellard
        aSig >>= 2;
2870 158142c2 bellard
        bSig >>= 2;
2871 158142c2 bellard
    }
2872 158142c2 bellard
    do {
2873 158142c2 bellard
        alternateASig = aSig;
2874 158142c2 bellard
        ++q;
2875 158142c2 bellard
        aSig -= bSig;
2876 158142c2 bellard
    } while ( 0 <= (sbits64) aSig );
2877 158142c2 bellard
    sigMean = aSig + alternateASig;
2878 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2879 158142c2 bellard
        aSig = alternateASig;
2880 158142c2 bellard
    }
2881 158142c2 bellard
    zSign = ( (sbits64) aSig < 0 );
2882 158142c2 bellard
    if ( zSign ) aSig = - aSig;
2883 158142c2 bellard
    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
2884 158142c2 bellard
2885 158142c2 bellard
}
2886 158142c2 bellard
2887 158142c2 bellard
/*----------------------------------------------------------------------------
2888 158142c2 bellard
| Returns the square root of the double-precision floating-point value `a'.
2889 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
2890 158142c2 bellard
| Floating-Point Arithmetic.
2891 158142c2 bellard
*----------------------------------------------------------------------------*/
2892 158142c2 bellard
2893 158142c2 bellard
float64 float64_sqrt( float64 a STATUS_PARAM )
2894 158142c2 bellard
{
2895 158142c2 bellard
    flag aSign;
2896 158142c2 bellard
    int16 aExp, zExp;
2897 158142c2 bellard
    bits64 aSig, zSig, doubleZSig;
2898 158142c2 bellard
    bits64 rem0, rem1, term0, term1;
2899 158142c2 bellard
2900 158142c2 bellard
    aSig = extractFloat64Frac( a );
2901 158142c2 bellard
    aExp = extractFloat64Exp( a );
2902 158142c2 bellard
    aSign = extractFloat64Sign( a );
2903 158142c2 bellard
    if ( aExp == 0x7FF ) {
2904 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
2905 158142c2 bellard
        if ( ! aSign ) return a;
2906 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2907 158142c2 bellard
        return float64_default_nan;
2908 158142c2 bellard
    }
2909 158142c2 bellard
    if ( aSign ) {
2910 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
2911 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2912 158142c2 bellard
        return float64_default_nan;
2913 158142c2 bellard
    }
2914 158142c2 bellard
    if ( aExp == 0 ) {
2915 158142c2 bellard
        if ( aSig == 0 ) return 0;
2916 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2917 158142c2 bellard
    }
2918 158142c2 bellard
    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2919 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
2920 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig>>21 );
2921 158142c2 bellard
    aSig <<= 9 - ( aExp & 1 );
2922 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
2923 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 5 ) {
2924 158142c2 bellard
        doubleZSig = zSig<<1;
2925 158142c2 bellard
        mul64To128( zSig, zSig, &term0, &term1 );
2926 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2927 158142c2 bellard
        while ( (sbits64) rem0 < 0 ) {
2928 158142c2 bellard
            --zSig;
2929 158142c2 bellard
            doubleZSig -= 2;
2930 158142c2 bellard
            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
2931 158142c2 bellard
        }
2932 158142c2 bellard
        zSig |= ( ( rem0 | rem1 ) != 0 );
2933 158142c2 bellard
    }
2934 158142c2 bellard
    return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
2935 158142c2 bellard
2936 158142c2 bellard
}
2937 158142c2 bellard
2938 158142c2 bellard
/*----------------------------------------------------------------------------
2939 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
2940 158142c2 bellard
| corresponding value `b', and 0 otherwise.  The comparison is performed
2941 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2942 158142c2 bellard
*----------------------------------------------------------------------------*/
2943 158142c2 bellard
2944 158142c2 bellard
flag float64_eq( float64 a, float64 b STATUS_PARAM )
2945 158142c2 bellard
{
2946 158142c2 bellard
2947 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2948 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2949 158142c2 bellard
       ) {
2950 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2951 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2952 158142c2 bellard
        }
2953 158142c2 bellard
        return 0;
2954 158142c2 bellard
    }
2955 158142c2 bellard
    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2956 158142c2 bellard
2957 158142c2 bellard
}
2958 158142c2 bellard
2959 158142c2 bellard
/*----------------------------------------------------------------------------
2960 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
2961 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  The comparison is
2962 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2963 158142c2 bellard
| Arithmetic.
2964 158142c2 bellard
*----------------------------------------------------------------------------*/
2965 158142c2 bellard
2966 158142c2 bellard
flag float64_le( float64 a, float64 b STATUS_PARAM )
2967 158142c2 bellard
{
2968 158142c2 bellard
    flag aSign, bSign;
2969 158142c2 bellard
2970 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2971 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2972 158142c2 bellard
       ) {
2973 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2974 158142c2 bellard
        return 0;
2975 158142c2 bellard
    }
2976 158142c2 bellard
    aSign = extractFloat64Sign( a );
2977 158142c2 bellard
    bSign = extractFloat64Sign( b );
2978 158142c2 bellard
    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2979 158142c2 bellard
    return ( a == b ) || ( aSign ^ ( a < b ) );
2980 158142c2 bellard
2981 158142c2 bellard
}
2982 158142c2 bellard
2983 158142c2 bellard
/*----------------------------------------------------------------------------
2984 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
2985 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
2986 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2987 158142c2 bellard
*----------------------------------------------------------------------------*/
2988 158142c2 bellard
2989 158142c2 bellard
flag float64_lt( float64 a, float64 b STATUS_PARAM )
2990 158142c2 bellard
{
2991 158142c2 bellard
    flag aSign, bSign;
2992 158142c2 bellard
2993 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2994 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2995 158142c2 bellard
       ) {
2996 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2997 158142c2 bellard
        return 0;
2998 158142c2 bellard
    }
2999 158142c2 bellard
    aSign = extractFloat64Sign( a );
3000 158142c2 bellard
    bSign = extractFloat64Sign( b );
3001 158142c2 bellard
    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
3002 158142c2 bellard
    return ( a != b ) && ( aSign ^ ( a < b ) );
3003 158142c2 bellard
3004 158142c2 bellard
}
3005 158142c2 bellard
3006 158142c2 bellard
/*----------------------------------------------------------------------------
3007 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
3008 158142c2 bellard
| corresponding value `b', and 0 otherwise.  The invalid exception is raised
3009 158142c2 bellard
| if either operand is a NaN.  Otherwise, the comparison is performed
3010 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3011 158142c2 bellard
*----------------------------------------------------------------------------*/
3012 158142c2 bellard
3013 158142c2 bellard
flag float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
3014 158142c2 bellard
{
3015 158142c2 bellard
3016 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3017 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3018 158142c2 bellard
       ) {
3019 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3020 158142c2 bellard
        return 0;
3021 158142c2 bellard
    }
3022 158142c2 bellard
    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
3023 158142c2 bellard
3024 158142c2 bellard
}
3025 158142c2 bellard
3026 158142c2 bellard
/*----------------------------------------------------------------------------
3027 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
3028 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
3029 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
3030 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3031 158142c2 bellard
*----------------------------------------------------------------------------*/
3032 158142c2 bellard
3033 158142c2 bellard
flag float64_le_quiet( float64 a, float64 b STATUS_PARAM )
3034 158142c2 bellard
{
3035 158142c2 bellard
    flag aSign, bSign;
3036 158142c2 bellard
3037 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3038 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3039 158142c2 bellard
       ) {
3040 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3041 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3042 158142c2 bellard
        }
3043 158142c2 bellard
        return 0;
3044 158142c2 bellard
    }
3045 158142c2 bellard
    aSign = extractFloat64Sign( a );
3046 158142c2 bellard
    bSign = extractFloat64Sign( b );
3047 158142c2 bellard
    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
3048 158142c2 bellard
    return ( a == b ) || ( aSign ^ ( a < b ) );
3049 158142c2 bellard
3050 158142c2 bellard
}
3051 158142c2 bellard
3052 158142c2 bellard
/*----------------------------------------------------------------------------
3053 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
3054 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3055 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
3056 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3057 158142c2 bellard
*----------------------------------------------------------------------------*/
3058 158142c2 bellard
3059 158142c2 bellard
flag float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
3060 158142c2 bellard
{
3061 158142c2 bellard
    flag aSign, bSign;
3062 158142c2 bellard
3063 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3064 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3065 158142c2 bellard
       ) {
3066 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3067 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3068 158142c2 bellard
        }
3069 158142c2 bellard
        return 0;
3070 158142c2 bellard
    }
3071 158142c2 bellard
    aSign = extractFloat64Sign( a );
3072 158142c2 bellard
    bSign = extractFloat64Sign( b );
3073 158142c2 bellard
    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
3074 158142c2 bellard
    return ( a != b ) && ( aSign ^ ( a < b ) );
3075 158142c2 bellard
3076 158142c2 bellard
}
3077 158142c2 bellard
3078 158142c2 bellard
#ifdef FLOATX80
3079 158142c2 bellard
3080 158142c2 bellard
/*----------------------------------------------------------------------------
3081 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3082 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3083 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3084 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3085 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN, the
3086 158142c2 bellard
| largest positive integer is returned.  Otherwise, if the conversion
3087 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3088 158142c2 bellard
*----------------------------------------------------------------------------*/
3089 158142c2 bellard
3090 158142c2 bellard
int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
3091 158142c2 bellard
{
3092 158142c2 bellard
    flag aSign;
3093 158142c2 bellard
    int32 aExp, shiftCount;
3094 158142c2 bellard
    bits64 aSig;
3095 158142c2 bellard
3096 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3097 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3098 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3099 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3100 158142c2 bellard
    shiftCount = 0x4037 - aExp;
3101 158142c2 bellard
    if ( shiftCount <= 0 ) shiftCount = 1;
3102 158142c2 bellard
    shift64RightJamming( aSig, shiftCount, &aSig );
3103 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
3104 158142c2 bellard
3105 158142c2 bellard
}
3106 158142c2 bellard
3107 158142c2 bellard
/*----------------------------------------------------------------------------
3108 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3109 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3110 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3111 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3112 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3113 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3114 158142c2 bellard
| sign as `a' is returned.
3115 158142c2 bellard
*----------------------------------------------------------------------------*/
3116 158142c2 bellard
3117 158142c2 bellard
int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
3118 158142c2 bellard
{
3119 158142c2 bellard
    flag aSign;
3120 158142c2 bellard
    int32 aExp, shiftCount;
3121 158142c2 bellard
    bits64 aSig, savedASig;
3122 158142c2 bellard
    int32 z;
3123 158142c2 bellard
3124 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3125 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3126 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3127 158142c2 bellard
    if ( 0x401E < aExp ) {
3128 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3129 158142c2 bellard
        goto invalid;
3130 158142c2 bellard
    }
3131 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3132 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3133 158142c2 bellard
        return 0;
3134 158142c2 bellard
    }
3135 158142c2 bellard
    shiftCount = 0x403E - aExp;
3136 158142c2 bellard
    savedASig = aSig;
3137 158142c2 bellard
    aSig >>= shiftCount;
3138 158142c2 bellard
    z = aSig;
3139 158142c2 bellard
    if ( aSign ) z = - z;
3140 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
3141 158142c2 bellard
 invalid:
3142 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3143 158142c2 bellard
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
3144 158142c2 bellard
    }
3145 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
3146 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3147 158142c2 bellard
    }
3148 158142c2 bellard
    return z;
3149 158142c2 bellard
3150 158142c2 bellard
}
3151 158142c2 bellard
3152 158142c2 bellard
/*----------------------------------------------------------------------------
3153 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3154 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3155 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3156 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3157 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN,
3158 158142c2 bellard
| the largest positive integer is returned.  Otherwise, if the conversion
3159 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3160 158142c2 bellard
*----------------------------------------------------------------------------*/
3161 158142c2 bellard
3162 158142c2 bellard
int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
3163 158142c2 bellard
{
3164 158142c2 bellard
    flag aSign;
3165 158142c2 bellard
    int32 aExp, shiftCount;
3166 158142c2 bellard
    bits64 aSig, aSigExtra;
3167 158142c2 bellard
3168 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3169 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3170 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3171 158142c2 bellard
    shiftCount = 0x403E - aExp;
3172 158142c2 bellard
    if ( shiftCount <= 0 ) {
3173 158142c2 bellard
        if ( shiftCount ) {
3174 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3175 158142c2 bellard
            if (    ! aSign
3176 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
3177 158142c2 bellard
                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
3178 158142c2 bellard
               ) {
3179 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3180 158142c2 bellard
            }
3181 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
3182 158142c2 bellard
        }
3183 158142c2 bellard
        aSigExtra = 0;
3184 158142c2 bellard
    }
3185 158142c2 bellard
    else {
3186 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3187 158142c2 bellard
    }
3188 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
3189 158142c2 bellard
3190 158142c2 bellard
}
3191 158142c2 bellard
3192 158142c2 bellard
/*----------------------------------------------------------------------------
3193 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3194 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3195 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3196 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3197 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3198 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3199 158142c2 bellard
| sign as `a' is returned.
3200 158142c2 bellard
*----------------------------------------------------------------------------*/
3201 158142c2 bellard
3202 158142c2 bellard
int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
3203 158142c2 bellard
{
3204 158142c2 bellard
    flag aSign;
3205 158142c2 bellard
    int32 aExp, shiftCount;
3206 158142c2 bellard
    bits64 aSig;
3207 158142c2 bellard
    int64 z;
3208 158142c2 bellard
3209 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3210 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3211 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3212 158142c2 bellard
    shiftCount = aExp - 0x403E;
3213 158142c2 bellard
    if ( 0 <= shiftCount ) {
3214 158142c2 bellard
        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3215 158142c2 bellard
        if ( ( a.high != 0xC03E ) || aSig ) {
3216 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3217 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3218 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3219 158142c2 bellard
            }
3220 158142c2 bellard
        }
3221 158142c2 bellard
        return (sbits64) LIT64( 0x8000000000000000 );
3222 158142c2 bellard
    }
3223 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3224 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3225 158142c2 bellard
        return 0;
3226 158142c2 bellard
    }
3227 158142c2 bellard
    z = aSig>>( - shiftCount );
3228 158142c2 bellard
    if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3229 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3230 158142c2 bellard
    }
3231 158142c2 bellard
    if ( aSign ) z = - z;
3232 158142c2 bellard
    return z;
3233 158142c2 bellard
3234 158142c2 bellard
}
3235 158142c2 bellard
3236 158142c2 bellard
/*----------------------------------------------------------------------------
3237 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3238 158142c2 bellard
| point value `a' to the single-precision floating-point format.  The
3239 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3240 158142c2 bellard
| Floating-Point Arithmetic.
3241 158142c2 bellard
*----------------------------------------------------------------------------*/
3242 158142c2 bellard
3243 158142c2 bellard
float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
3244 158142c2 bellard
{
3245 158142c2 bellard
    flag aSign;
3246 158142c2 bellard
    int32 aExp;
3247 158142c2 bellard
    bits64 aSig;
3248 158142c2 bellard
3249 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3250 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3251 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3252 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3253 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) {
3254 158142c2 bellard
            return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) );
3255 158142c2 bellard
        }
3256 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
3257 158142c2 bellard
    }
3258 158142c2 bellard
    shift64RightJamming( aSig, 33, &aSig );
3259 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3F81;
3260 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
3261 158142c2 bellard
3262 158142c2 bellard
}
3263 158142c2 bellard
3264 158142c2 bellard
/*----------------------------------------------------------------------------
3265 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3266 158142c2 bellard
| point value `a' to the double-precision floating-point format.  The
3267 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3268 158142c2 bellard
| Floating-Point Arithmetic.
3269 158142c2 bellard
*----------------------------------------------------------------------------*/
3270 158142c2 bellard
3271 158142c2 bellard
float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
3272 158142c2 bellard
{
3273 158142c2 bellard
    flag aSign;
3274 158142c2 bellard
    int32 aExp;
3275 158142c2 bellard
    bits64 aSig, zSig;
3276 158142c2 bellard
3277 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3278 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3279 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3280 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3281 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) {
3282 158142c2 bellard
            return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) );
3283 158142c2 bellard
        }
3284 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
3285 158142c2 bellard
    }
3286 158142c2 bellard
    shift64RightJamming( aSig, 1, &zSig );
3287 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3C01;
3288 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
3289 158142c2 bellard
3290 158142c2 bellard
}
3291 158142c2 bellard
3292 158142c2 bellard
#ifdef FLOAT128
3293 158142c2 bellard
3294 158142c2 bellard
/*----------------------------------------------------------------------------
3295 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3296 158142c2 bellard
| point value `a' to the quadruple-precision floating-point format.  The
3297 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3298 158142c2 bellard
| Floating-Point Arithmetic.
3299 158142c2 bellard
*----------------------------------------------------------------------------*/
3300 158142c2 bellard
3301 158142c2 bellard
float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
3302 158142c2 bellard
{
3303 158142c2 bellard
    flag aSign;
3304 158142c2 bellard
    int16 aExp;
3305 158142c2 bellard
    bits64 aSig, zSig0, zSig1;
3306 158142c2 bellard
3307 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3308 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3309 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3310 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
3311 158142c2 bellard
        return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) );
3312 158142c2 bellard
    }
3313 158142c2 bellard
    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
3314 158142c2 bellard
    return packFloat128( aSign, aExp, zSig0, zSig1 );
3315 158142c2 bellard
3316 158142c2 bellard
}
3317 158142c2 bellard
3318 158142c2 bellard
#endif
3319 158142c2 bellard
3320 158142c2 bellard
/*----------------------------------------------------------------------------
3321 158142c2 bellard
| Rounds the extended double-precision floating-point value `a' to an integer,
3322 158142c2 bellard
| and returns the result as an extended quadruple-precision floating-point
3323 158142c2 bellard
| value.  The operation is performed according to the IEC/IEEE Standard for
3324 158142c2 bellard
| Binary Floating-Point Arithmetic.
3325 158142c2 bellard
*----------------------------------------------------------------------------*/
3326 158142c2 bellard
3327 158142c2 bellard
floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
3328 158142c2 bellard
{
3329 158142c2 bellard
    flag aSign;
3330 158142c2 bellard
    int32 aExp;
3331 158142c2 bellard
    bits64 lastBitMask, roundBitsMask;
3332 158142c2 bellard
    int8 roundingMode;
3333 158142c2 bellard
    floatx80 z;
3334 158142c2 bellard
3335 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3336 158142c2 bellard
    if ( 0x403E <= aExp ) {
3337 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
3338 158142c2 bellard
            return propagateFloatx80NaN( a, a STATUS_VAR );
3339 158142c2 bellard
        }
3340 158142c2 bellard
        return a;
3341 158142c2 bellard
    }
3342 158142c2 bellard
    if ( aExp < 0x3FFF ) {
3343 158142c2 bellard
        if (    ( aExp == 0 )
3344 158142c2 bellard
             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
3345 158142c2 bellard
            return a;
3346 158142c2 bellard
        }
3347 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3348 158142c2 bellard
        aSign = extractFloatx80Sign( a );
3349 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
3350 158142c2 bellard
         case float_round_nearest_even:
3351 158142c2 bellard
            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
3352 158142c2 bellard
               ) {
3353 158142c2 bellard
                return
3354 158142c2 bellard
                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3355 158142c2 bellard
            }
3356 158142c2 bellard
            break;
3357 158142c2 bellard
         case float_round_down:
3358 158142c2 bellard
            return
3359 158142c2 bellard
                  aSign ?
3360 158142c2 bellard
                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
3361 158142c2 bellard
                : packFloatx80( 0, 0, 0 );
3362 158142c2 bellard
         case float_round_up:
3363 158142c2 bellard
            return
3364 158142c2 bellard
                  aSign ? packFloatx80( 1, 0, 0 )
3365 158142c2 bellard
                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
3366 158142c2 bellard
        }
3367 158142c2 bellard
        return packFloatx80( aSign, 0, 0 );
3368 158142c2 bellard
    }
3369 158142c2 bellard
    lastBitMask = 1;
3370 158142c2 bellard
    lastBitMask <<= 0x403E - aExp;
3371 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
3372 158142c2 bellard
    z = a;
3373 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
3374 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
3375 158142c2 bellard
        z.low += lastBitMask>>1;
3376 158142c2 bellard
        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
3377 158142c2 bellard
    }
3378 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
3379 158142c2 bellard
        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
3380 158142c2 bellard
            z.low += roundBitsMask;
3381 158142c2 bellard
        }
3382 158142c2 bellard
    }
3383 158142c2 bellard
    z.low &= ~ roundBitsMask;
3384 158142c2 bellard
    if ( z.low == 0 ) {
3385 158142c2 bellard
        ++z.high;
3386 158142c2 bellard
        z.low = LIT64( 0x8000000000000000 );
3387 158142c2 bellard
    }
3388 158142c2 bellard
    if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
3389 158142c2 bellard
    return z;
3390 158142c2 bellard
3391 158142c2 bellard
}
3392 158142c2 bellard
3393 158142c2 bellard
/*----------------------------------------------------------------------------
3394 158142c2 bellard
| Returns the result of adding the absolute values of the extended double-
3395 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
3396 158142c2 bellard
| negated before being returned.  `zSign' is ignored if the result is a NaN.
3397 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
3398 158142c2 bellard
| Floating-Point Arithmetic.
3399 158142c2 bellard
*----------------------------------------------------------------------------*/
3400 158142c2 bellard
3401 158142c2 bellard
static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
3402 158142c2 bellard
{
3403 158142c2 bellard
    int32 aExp, bExp, zExp;
3404 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3405 158142c2 bellard
    int32 expDiff;
3406 158142c2 bellard
3407 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3408 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3409 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3410 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3411 158142c2 bellard
    expDiff = aExp - bExp;
3412 158142c2 bellard
    if ( 0 < expDiff ) {
3413 158142c2 bellard
        if ( aExp == 0x7FFF ) {
3414 158142c2 bellard
            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3415 158142c2 bellard
            return a;
3416 158142c2 bellard
        }
3417 158142c2 bellard
        if ( bExp == 0 ) --expDiff;
3418 158142c2 bellard
        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3419 158142c2 bellard
        zExp = aExp;
3420 158142c2 bellard
    }
3421 158142c2 bellard
    else if ( expDiff < 0 ) {
3422 158142c2 bellard
        if ( bExp == 0x7FFF ) {
3423 158142c2 bellard
            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3424 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3425 158142c2 bellard
        }
3426 158142c2 bellard
        if ( aExp == 0 ) ++expDiff;
3427 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3428 158142c2 bellard
        zExp = bExp;
3429 158142c2 bellard
    }
3430 158142c2 bellard
    else {
3431 158142c2 bellard
        if ( aExp == 0x7FFF ) {
3432 158142c2 bellard
            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3433 158142c2 bellard
                return propagateFloatx80NaN( a, b STATUS_VAR );
3434 158142c2 bellard
            }
3435 158142c2 bellard
            return a;
3436 158142c2 bellard
        }
3437 158142c2 bellard
        zSig1 = 0;
3438 158142c2 bellard
        zSig0 = aSig + bSig;
3439 158142c2 bellard
        if ( aExp == 0 ) {
3440 158142c2 bellard
            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
3441 158142c2 bellard
            goto roundAndPack;
3442 158142c2 bellard
        }
3443 158142c2 bellard
        zExp = aExp;
3444 158142c2 bellard
        goto shiftRight1;
3445 158142c2 bellard
    }
3446 158142c2 bellard
    zSig0 = aSig + bSig;
3447 158142c2 bellard
    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
3448 158142c2 bellard
 shiftRight1:
3449 158142c2 bellard
    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
3450 158142c2 bellard
    zSig0 |= LIT64( 0x8000000000000000 );
3451 158142c2 bellard
    ++zExp;
3452 158142c2 bellard
 roundAndPack:
3453 158142c2 bellard
    return
3454 158142c2 bellard
        roundAndPackFloatx80(
3455 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3456 158142c2 bellard
3457 158142c2 bellard
}
3458 158142c2 bellard
3459 158142c2 bellard
/*----------------------------------------------------------------------------
3460 158142c2 bellard
| Returns the result of subtracting the absolute values of the extended
3461 158142c2 bellard
| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
3462 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
3463 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
3464 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3465 158142c2 bellard
*----------------------------------------------------------------------------*/
3466 158142c2 bellard
3467 158142c2 bellard
static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
3468 158142c2 bellard
{
3469 158142c2 bellard
    int32 aExp, bExp, zExp;
3470 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3471 158142c2 bellard
    int32 expDiff;
3472 158142c2 bellard
    floatx80 z;
3473 158142c2 bellard
3474 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3475 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3476 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3477 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3478 158142c2 bellard
    expDiff = aExp - bExp;
3479 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
3480 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
3481 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3482 158142c2 bellard
        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3483 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
3484 158142c2 bellard
        }
3485 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3486 158142c2 bellard
        z.low = floatx80_default_nan_low;
3487 158142c2 bellard
        z.high = floatx80_default_nan_high;
3488 158142c2 bellard
        return z;
3489 158142c2 bellard
    }
3490 158142c2 bellard
    if ( aExp == 0 ) {
3491 158142c2 bellard
        aExp = 1;
3492 158142c2 bellard
        bExp = 1;
3493 158142c2 bellard
    }
3494 158142c2 bellard
    zSig1 = 0;
3495 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
3496 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
3497 158142c2 bellard
    return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
3498 158142c2 bellard
 bExpBigger:
3499 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3500 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3501 158142c2 bellard
        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
3502 158142c2 bellard
    }
3503 158142c2 bellard
    if ( aExp == 0 ) ++expDiff;
3504 158142c2 bellard
    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3505 158142c2 bellard
 bBigger:
3506 158142c2 bellard
    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
3507 158142c2 bellard
    zExp = bExp;
3508 158142c2 bellard
    zSign ^= 1;
3509 158142c2 bellard
    goto normalizeRoundAndPack;
3510 158142c2 bellard
 aExpBigger:
3511 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3512 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3513 158142c2 bellard
        return a;
3514 158142c2 bellard
    }
3515 158142c2 bellard
    if ( bExp == 0 ) --expDiff;
3516 158142c2 bellard
    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3517 158142c2 bellard
 aBigger:
3518 158142c2 bellard
    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
3519 158142c2 bellard
    zExp = aExp;
3520 158142c2 bellard
 normalizeRoundAndPack:
3521 158142c2 bellard
    return
3522 158142c2 bellard
        normalizeRoundAndPackFloatx80(
3523 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3524 158142c2 bellard
3525 158142c2 bellard
}
3526 158142c2 bellard
3527 158142c2 bellard
/*----------------------------------------------------------------------------
3528 158142c2 bellard
| Returns the result of adding the extended double-precision floating-point
3529 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
3530 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3531 158142c2 bellard
*----------------------------------------------------------------------------*/
3532 158142c2 bellard
3533 158142c2 bellard
floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
3534 158142c2 bellard
{
3535 158142c2 bellard
    flag aSign, bSign;
3536 158142c2 bellard
3537 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3538 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3539 158142c2 bellard
    if ( aSign == bSign ) {
3540 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
3541 158142c2 bellard
    }
3542 158142c2 bellard
    else {
3543 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
3544 158142c2 bellard
    }
3545 158142c2 bellard
3546 158142c2 bellard
}
3547 158142c2 bellard
3548 158142c2 bellard
/*----------------------------------------------------------------------------
3549 158142c2 bellard
| Returns the result of subtracting the extended double-precision floating-
3550 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
3551 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3552 158142c2 bellard
*----------------------------------------------------------------------------*/
3553 158142c2 bellard
3554 158142c2 bellard
floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
3555 158142c2 bellard
{
3556 158142c2 bellard
    flag aSign, bSign;
3557 158142c2 bellard
3558 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3559 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3560 158142c2 bellard
    if ( aSign == bSign ) {
3561 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
3562 158142c2 bellard
    }
3563 158142c2 bellard
    else {
3564 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
3565 158142c2 bellard
    }
3566 158142c2 bellard
3567 158142c2 bellard
}
3568 158142c2 bellard
3569 158142c2 bellard
/*----------------------------------------------------------------------------
3570 158142c2 bellard
| Returns the result of multiplying the extended double-precision floating-
3571 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
3572 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3573 158142c2 bellard
*----------------------------------------------------------------------------*/
3574 158142c2 bellard
3575 158142c2 bellard
floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
3576 158142c2 bellard
{
3577 158142c2 bellard
    flag aSign, bSign, zSign;
3578 158142c2 bellard
    int32 aExp, bExp, zExp;
3579 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3580 158142c2 bellard
    floatx80 z;
3581 158142c2 bellard
3582 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3583 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3584 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3585 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3586 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3587 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3588 158142c2 bellard
    zSign = aSign ^ bSign;
3589 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3590 158142c2 bellard
        if (    (bits64) ( aSig<<1 )
3591 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3592 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
3593 158142c2 bellard
        }
3594 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) goto invalid;
3595 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3596 158142c2 bellard
    }
3597 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3598 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3599 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
3600 158142c2 bellard
 invalid:
3601 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3602 158142c2 bellard
            z.low = floatx80_default_nan_low;
3603 158142c2 bellard
            z.high = floatx80_default_nan_high;
3604 158142c2 bellard
            return z;
3605 158142c2 bellard
        }
3606 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3607 158142c2 bellard
    }
3608 158142c2 bellard
    if ( aExp == 0 ) {
3609 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3610 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3611 158142c2 bellard
    }
3612 158142c2 bellard
    if ( bExp == 0 ) {
3613 158142c2 bellard
        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
3614 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3615 158142c2 bellard
    }
3616 158142c2 bellard
    zExp = aExp + bExp - 0x3FFE;
3617 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
3618 158142c2 bellard
    if ( 0 < (sbits64) zSig0 ) {
3619 158142c2 bellard
        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
3620 158142c2 bellard
        --zExp;
3621 158142c2 bellard
    }
3622 158142c2 bellard
    return
3623 158142c2 bellard
        roundAndPackFloatx80(
3624 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3625 158142c2 bellard
3626 158142c2 bellard
}
3627 158142c2 bellard
3628 158142c2 bellard
/*----------------------------------------------------------------------------
3629 158142c2 bellard
| Returns the result of dividing the extended double-precision floating-point
3630 158142c2 bellard
| value `a' by the corresponding value `b'.  The operation is performed
3631 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3632 158142c2 bellard
*----------------------------------------------------------------------------*/
3633 158142c2 bellard
3634 158142c2 bellard
floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
3635 158142c2 bellard
{
3636 158142c2 bellard
    flag aSign, bSign, zSign;
3637 158142c2 bellard
    int32 aExp, bExp, zExp;
3638 158142c2 bellard
    bits64 aSig, bSig, zSig0, zSig1;
3639 158142c2 bellard
    bits64 rem0, rem1, rem2, term0, term1, term2;
3640 158142c2 bellard
    floatx80 z;
3641 158142c2 bellard
3642 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3643 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3644 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3645 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3646 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3647 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3648 158142c2 bellard
    zSign = aSign ^ bSign;
3649 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3650 158142c2 bellard
        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3651 158142c2 bellard
        if ( bExp == 0x7FFF ) {
3652 158142c2 bellard
            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3653 158142c2 bellard
            goto invalid;
3654 158142c2 bellard
        }
3655 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3656 158142c2 bellard
    }
3657 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3658 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3659 158142c2 bellard
        return packFloatx80( zSign, 0, 0 );
3660 158142c2 bellard
    }
3661 158142c2 bellard
    if ( bExp == 0 ) {
3662 158142c2 bellard
        if ( bSig == 0 ) {
3663 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
3664 158142c2 bellard
 invalid:
3665 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
3666 158142c2 bellard
                z.low = floatx80_default_nan_low;
3667 158142c2 bellard
                z.high = floatx80_default_nan_high;
3668 158142c2 bellard
                return z;
3669 158142c2 bellard
            }
3670 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
3671 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3672 158142c2 bellard
        }
3673 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3674 158142c2 bellard
    }
3675 158142c2 bellard
    if ( aExp == 0 ) {
3676 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3677 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3678 158142c2 bellard
    }
3679 158142c2 bellard
    zExp = aExp - bExp + 0x3FFE;
3680 158142c2 bellard
    rem1 = 0;
3681 158142c2 bellard
    if ( bSig <= aSig ) {
3682 158142c2 bellard
        shift128Right( aSig, 0, 1, &aSig, &rem1 );
3683 158142c2 bellard
        ++zExp;
3684 158142c2 bellard
    }
3685 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3686 158142c2 bellard
    mul64To128( bSig, zSig0, &term0, &term1 );
3687 158142c2 bellard
    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3688 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
3689 158142c2 bellard
        --zSig0;
3690 158142c2 bellard
        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3691 158142c2 bellard
    }
3692 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, bSig );
3693 158142c2 bellard
    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3694 158142c2 bellard
        mul64To128( bSig, zSig1, &term1, &term2 );
3695 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3696 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
3697 158142c2 bellard
            --zSig1;
3698 158142c2 bellard
            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3699 158142c2 bellard
        }
3700 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 ) != 0 );
3701 158142c2 bellard
    }
3702 158142c2 bellard
    return
3703 158142c2 bellard
        roundAndPackFloatx80(
3704 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3705 158142c2 bellard
3706 158142c2 bellard
}
3707 158142c2 bellard
3708 158142c2 bellard
/*----------------------------------------------------------------------------
3709 158142c2 bellard
| Returns the remainder of the extended double-precision floating-point value
3710 158142c2 bellard
| `a' with respect to the corresponding value `b'.  The operation is performed
3711 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3712 158142c2 bellard
*----------------------------------------------------------------------------*/
3713 158142c2 bellard
3714 158142c2 bellard
floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
3715 158142c2 bellard
{
3716 158142c2 bellard
    flag aSign, bSign, zSign;
3717 158142c2 bellard
    int32 aExp, bExp, expDiff;
3718 158142c2 bellard
    bits64 aSig0, aSig1, bSig;
3719 158142c2 bellard
    bits64 q, term0, term1, alternateASig0, alternateASig1;
3720 158142c2 bellard
    floatx80 z;
3721 158142c2 bellard
3722 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
3723 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3724 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3725 158142c2 bellard
    bSig = extractFloatx80Frac( b );
3726 158142c2 bellard
    bExp = extractFloatx80Exp( b );
3727 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3728 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3729 158142c2 bellard
        if (    (bits64) ( aSig0<<1 )
3730 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3731 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
3732 158142c2 bellard
        }
3733 158142c2 bellard
        goto invalid;
3734 158142c2 bellard
    }
3735 158142c2 bellard
    if ( bExp == 0x7FFF ) {
3736 158142c2 bellard
        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3737 158142c2 bellard
        return a;
3738 158142c2 bellard
    }
3739 158142c2 bellard
    if ( bExp == 0 ) {
3740 158142c2 bellard
        if ( bSig == 0 ) {
3741 158142c2 bellard
 invalid:
3742 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3743 158142c2 bellard
            z.low = floatx80_default_nan_low;
3744 158142c2 bellard
            z.high = floatx80_default_nan_high;
3745 158142c2 bellard
            return z;
3746 158142c2 bellard
        }
3747 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3748 158142c2 bellard
    }
3749 158142c2 bellard
    if ( aExp == 0 ) {
3750 158142c2 bellard
        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
3751 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3752 158142c2 bellard
    }
3753 158142c2 bellard
    bSig |= LIT64( 0x8000000000000000 );
3754 158142c2 bellard
    zSign = aSign;
3755 158142c2 bellard
    expDiff = aExp - bExp;
3756 158142c2 bellard
    aSig1 = 0;
3757 158142c2 bellard
    if ( expDiff < 0 ) {
3758 158142c2 bellard
        if ( expDiff < -1 ) return a;
3759 158142c2 bellard
        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3760 158142c2 bellard
        expDiff = 0;
3761 158142c2 bellard
    }
3762 158142c2 bellard
    q = ( bSig <= aSig0 );
3763 158142c2 bellard
    if ( q ) aSig0 -= bSig;
3764 158142c2 bellard
    expDiff -= 64;
3765 158142c2 bellard
    while ( 0 < expDiff ) {
3766 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
3767 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3768 158142c2 bellard
        mul64To128( bSig, q, &term0, &term1 );
3769 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3770 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3771 158142c2 bellard
        expDiff -= 62;
3772 158142c2 bellard
    }
3773 158142c2 bellard
    expDiff += 64;
3774 158142c2 bellard
    if ( 0 < expDiff ) {
3775 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
3776 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3777 158142c2 bellard
        q >>= 64 - expDiff;
3778 158142c2 bellard
        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
3779 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3780 158142c2 bellard
        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
3781 158142c2 bellard
        while ( le128( term0, term1, aSig0, aSig1 ) ) {
3782 158142c2 bellard
            ++q;
3783 158142c2 bellard
            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3784 158142c2 bellard
        }
3785 158142c2 bellard
    }
3786 158142c2 bellard
    else {
3787 158142c2 bellard
        term1 = 0;
3788 158142c2 bellard
        term0 = bSig;
3789 158142c2 bellard
    }
3790 158142c2 bellard
    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
3791 158142c2 bellard
    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
3792 158142c2 bellard
         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
3793 158142c2 bellard
              && ( q & 1 ) )
3794 158142c2 bellard
       ) {
3795 158142c2 bellard
        aSig0 = alternateASig0;
3796 158142c2 bellard
        aSig1 = alternateASig1;
3797 158142c2 bellard
        zSign = ! zSign;
3798 158142c2 bellard
    }
3799 158142c2 bellard
    return
3800 158142c2 bellard
        normalizeRoundAndPackFloatx80(
3801 158142c2 bellard
            80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
3802 158142c2 bellard
3803 158142c2 bellard
}
3804 158142c2 bellard
3805 158142c2 bellard
/*----------------------------------------------------------------------------
3806 158142c2 bellard
| Returns the square root of the extended double-precision floating-point
3807 158142c2 bellard
| value `a'.  The operation is performed according to the IEC/IEEE Standard
3808 158142c2 bellard
| for Binary Floating-Point Arithmetic.
3809 158142c2 bellard
*----------------------------------------------------------------------------*/
3810 158142c2 bellard
3811 158142c2 bellard
floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
3812 158142c2 bellard
{
3813 158142c2 bellard
    flag aSign;
3814 158142c2 bellard
    int32 aExp, zExp;
3815 158142c2 bellard
    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
3816 158142c2 bellard
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3817 158142c2 bellard
    floatx80 z;
3818 158142c2 bellard
3819 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
3820 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3821 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3822 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3823 158142c2 bellard
        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
3824 158142c2 bellard
        if ( ! aSign ) return a;
3825 158142c2 bellard
        goto invalid;
3826 158142c2 bellard
    }
3827 158142c2 bellard
    if ( aSign ) {
3828 158142c2 bellard
        if ( ( aExp | aSig0 ) == 0 ) return a;
3829 158142c2 bellard
 invalid:
3830 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3831 158142c2 bellard
        z.low = floatx80_default_nan_low;
3832 158142c2 bellard
        z.high = floatx80_default_nan_high;
3833 158142c2 bellard
        return z;
3834 158142c2 bellard
    }
3835 158142c2 bellard
    if ( aExp == 0 ) {
3836 158142c2 bellard
        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
3837 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3838 158142c2 bellard
    }
3839 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
3840 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
3841 158142c2 bellard
    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
3842 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
3843 158142c2 bellard
    doubleZSig0 = zSig0<<1;
3844 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
3845 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
3846 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
3847 158142c2 bellard
        --zSig0;
3848 158142c2 bellard
        doubleZSig0 -= 2;
3849 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
3850 158142c2 bellard
    }
3851 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
3852 158142c2 bellard
    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
3853 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
3854 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
3855 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3856 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
3857 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
3858 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
3859 158142c2 bellard
            --zSig1;
3860 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
3861 158142c2 bellard
            term3 |= 1;
3862 158142c2 bellard
            term2 |= doubleZSig0;
3863 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
3864 158142c2 bellard
        }
3865 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
3866 158142c2 bellard
    }
3867 158142c2 bellard
    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
3868 158142c2 bellard
    zSig0 |= doubleZSig0;
3869 158142c2 bellard
    return
3870 158142c2 bellard
        roundAndPackFloatx80(
3871 158142c2 bellard
            STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
3872 158142c2 bellard
3873 158142c2 bellard
}
3874 158142c2 bellard
3875 158142c2 bellard
/*----------------------------------------------------------------------------
3876 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
3877 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  The comparison is
3878 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
3879 158142c2 bellard
| Arithmetic.
3880 158142c2 bellard
*----------------------------------------------------------------------------*/
3881 158142c2 bellard
3882 158142c2 bellard
flag floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
3883 158142c2 bellard
{
3884 158142c2 bellard
3885 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
3886 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3887 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
3888 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3889 158142c2 bellard
       ) {
3890 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
3891 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
3892 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3893 158142c2 bellard
        }
3894 158142c2 bellard
        return 0;
3895 158142c2 bellard
    }
3896 158142c2 bellard
    return
3897 158142c2 bellard
           ( a.low == b.low )
3898 158142c2 bellard
        && (    ( a.high == b.high )
3899 158142c2 bellard
             || (    ( a.low == 0 )
3900 158142c2 bellard
                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
3901 158142c2 bellard
           );
3902 158142c2 bellard
3903 158142c2 bellard
}
3904 158142c2 bellard
3905 158142c2 bellard
/*----------------------------------------------------------------------------
3906 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
3907 158142c2 bellard
| less than or equal to the corresponding value `b', and 0 otherwise.  The
3908 158142c2 bellard
| comparison is performed according to the IEC/IEEE Standard for Binary
3909 158142c2 bellard
| Floating-Point Arithmetic.
3910 158142c2 bellard
*----------------------------------------------------------------------------*/
3911 158142c2 bellard
3912 158142c2 bellard
flag floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
3913 158142c2 bellard
{
3914 158142c2 bellard
    flag aSign, bSign;
3915 158142c2 bellard
3916 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
3917 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3918 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
3919 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3920 158142c2 bellard
       ) {
3921 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3922 158142c2 bellard
        return 0;
3923 158142c2 bellard
    }
3924 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3925 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3926 158142c2 bellard
    if ( aSign != bSign ) {
3927 158142c2 bellard
        return
3928 158142c2 bellard
               aSign
3929 158142c2 bellard
            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3930 158142c2 bellard
                 == 0 );
3931 158142c2 bellard
    }
3932 158142c2 bellard
    return
3933 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
3934 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
3935 158142c2 bellard
3936 158142c2 bellard
}
3937 158142c2 bellard
3938 158142c2 bellard
/*----------------------------------------------------------------------------
3939 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
3940 158142c2 bellard
| less than the corresponding value `b', and 0 otherwise.  The comparison
3941 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3942 158142c2 bellard
| Arithmetic.
3943 158142c2 bellard
*----------------------------------------------------------------------------*/
3944 158142c2 bellard
3945 158142c2 bellard
flag floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
3946 158142c2 bellard
{
3947 158142c2 bellard
    flag aSign, bSign;
3948 158142c2 bellard
3949 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
3950 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3951 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
3952 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3953 158142c2 bellard
       ) {
3954 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3955 158142c2 bellard
        return 0;
3956 158142c2 bellard
    }
3957 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3958 158142c2 bellard
    bSign = extractFloatx80Sign( b );
3959 158142c2 bellard
    if ( aSign != bSign ) {
3960 158142c2 bellard
        return
3961 158142c2 bellard
               aSign
3962 158142c2 bellard
            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3963 158142c2 bellard
                 != 0 );
3964 158142c2 bellard
    }
3965 158142c2 bellard
    return
3966 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
3967 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
3968 158142c2 bellard
3969 158142c2 bellard
}
3970 158142c2 bellard
3971 158142c2 bellard
/*----------------------------------------------------------------------------
3972 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is equal
3973 158142c2 bellard
| to the corresponding value `b', and 0 otherwise.  The invalid exception is
3974 158142c2 bellard
| raised if either operand is a NaN.  Otherwise, the comparison is performed
3975 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3976 158142c2 bellard
*----------------------------------------------------------------------------*/
3977 158142c2 bellard
3978 158142c2 bellard
flag floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM )
3979 158142c2 bellard
{
3980 158142c2 bellard
3981 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
3982 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3983 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
3984 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3985 158142c2 bellard
       ) {
3986 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3987 158142c2 bellard
        return 0;
3988 158142c2 bellard
    }
3989 158142c2 bellard
    return
3990 158142c2 bellard
           ( a.low == b.low )
3991 158142c2 bellard
        && (    ( a.high == b.high )
3992 158142c2 bellard
             || (    ( a.low == 0 )
3993 158142c2 bellard
                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
3994 158142c2 bellard
           );
3995 158142c2 bellard
3996 158142c2 bellard
}
3997 158142c2 bellard
3998 158142c2 bellard
/*----------------------------------------------------------------------------
3999 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4000 158142c2 bellard
| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
4001 158142c2 bellard
| do not cause an exception.  Otherwise, the comparison is performed according
4002 158142c2 bellard
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4003 158142c2 bellard
*----------------------------------------------------------------------------*/
4004 158142c2 bellard
4005 158142c2 bellard
flag floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4006 158142c2 bellard
{
4007 158142c2 bellard
    flag aSign, bSign;
4008 158142c2 bellard
4009 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4010 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4011 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4012 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4013 158142c2 bellard
       ) {
4014 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4015 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4016 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4017 158142c2 bellard
        }
4018 158142c2 bellard
        return 0;
4019 158142c2 bellard
    }
4020 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4021 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4022 158142c2 bellard
    if ( aSign != bSign ) {
4023 158142c2 bellard
        return
4024 158142c2 bellard
               aSign
4025 158142c2 bellard
            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4026 158142c2 bellard
                 == 0 );
4027 158142c2 bellard
    }
4028 158142c2 bellard
    return
4029 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
4030 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
4031 158142c2 bellard
4032 158142c2 bellard
}
4033 158142c2 bellard
4034 158142c2 bellard
/*----------------------------------------------------------------------------
4035 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4036 158142c2 bellard
| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
4037 158142c2 bellard
| an exception.  Otherwise, the comparison is performed according to the
4038 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4039 158142c2 bellard
*----------------------------------------------------------------------------*/
4040 158142c2 bellard
4041 158142c2 bellard
flag floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4042 158142c2 bellard
{
4043 158142c2 bellard
    flag aSign, bSign;
4044 158142c2 bellard
4045 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4046 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4047 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4048 158142c2 bellard
              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4049 158142c2 bellard
       ) {
4050 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4051 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4052 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4053 158142c2 bellard
        }
4054 158142c2 bellard
        return 0;
4055 158142c2 bellard
    }
4056 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4057 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4058 158142c2 bellard
    if ( aSign != bSign ) {
4059 158142c2 bellard
        return
4060 158142c2 bellard
               aSign
4061 158142c2 bellard
            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4062 158142c2 bellard
                 != 0 );
4063 158142c2 bellard
    }
4064 158142c2 bellard
    return
4065 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
4066 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
4067 158142c2 bellard
4068 158142c2 bellard
}
4069 158142c2 bellard
4070 158142c2 bellard
#endif
4071 158142c2 bellard
4072 158142c2 bellard
#ifdef FLOAT128
4073 158142c2 bellard
4074 158142c2 bellard
/*----------------------------------------------------------------------------
4075 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4076 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4077 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4078 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4079 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4080 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4081 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4082 158142c2 bellard
*----------------------------------------------------------------------------*/
4083 158142c2 bellard
4084 158142c2 bellard
int32 float128_to_int32( float128 a STATUS_PARAM )
4085 158142c2 bellard
{
4086 158142c2 bellard
    flag aSign;
4087 158142c2 bellard
    int32 aExp, shiftCount;
4088 158142c2 bellard
    bits64 aSig0, aSig1;
4089 158142c2 bellard
4090 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4091 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4092 158142c2 bellard
    aExp = extractFloat128Exp( a );
4093 158142c2 bellard
    aSign = extractFloat128Sign( a );
4094 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4095 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4096 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4097 158142c2 bellard
    shiftCount = 0x4028 - aExp;
4098 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4099 158142c2 bellard
    return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
4100 158142c2 bellard
4101 158142c2 bellard
}
4102 158142c2 bellard
4103 158142c2 bellard
/*----------------------------------------------------------------------------
4104 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4105 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4106 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4107 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
4108 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
4109 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
4110 158142c2 bellard
| returned.
4111 158142c2 bellard
*----------------------------------------------------------------------------*/
4112 158142c2 bellard
4113 158142c2 bellard
int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
4114 158142c2 bellard
{
4115 158142c2 bellard
    flag aSign;
4116 158142c2 bellard
    int32 aExp, shiftCount;
4117 158142c2 bellard
    bits64 aSig0, aSig1, savedASig;
4118 158142c2 bellard
    int32 z;
4119 158142c2 bellard
4120 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4121 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4122 158142c2 bellard
    aExp = extractFloat128Exp( a );
4123 158142c2 bellard
    aSign = extractFloat128Sign( a );
4124 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4125 158142c2 bellard
    if ( 0x401E < aExp ) {
4126 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4127 158142c2 bellard
        goto invalid;
4128 158142c2 bellard
    }
4129 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
4130 158142c2 bellard
        if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
4131 158142c2 bellard
        return 0;
4132 158142c2 bellard
    }
4133 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4134 158142c2 bellard
    shiftCount = 0x402F - aExp;
4135 158142c2 bellard
    savedASig = aSig0;
4136 158142c2 bellard
    aSig0 >>= shiftCount;
4137 158142c2 bellard
    z = aSig0;
4138 158142c2 bellard
    if ( aSign ) z = - z;
4139 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
4140 158142c2 bellard
 invalid:
4141 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4142 158142c2 bellard
        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
4143 158142c2 bellard
    }
4144 158142c2 bellard
    if ( ( aSig0<<shiftCount ) != savedASig ) {
4145 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4146 158142c2 bellard
    }
4147 158142c2 bellard
    return z;
4148 158142c2 bellard
4149 158142c2 bellard
}
4150 158142c2 bellard
4151 158142c2 bellard
/*----------------------------------------------------------------------------
4152 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4153 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4154 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4155 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4156 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4157 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4158 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4159 158142c2 bellard
*----------------------------------------------------------------------------*/
4160 158142c2 bellard
4161 158142c2 bellard
int64 float128_to_int64( float128 a STATUS_PARAM )
4162 158142c2 bellard
{
4163 158142c2 bellard
    flag aSign;
4164 158142c2 bellard
    int32 aExp, shiftCount;
4165 158142c2 bellard
    bits64 aSig0, aSig1;
4166 158142c2 bellard
4167 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4168 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4169 158142c2 bellard
    aExp = extractFloat128Exp( a );
4170 158142c2 bellard
    aSign = extractFloat128Sign( a );
4171 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4172 158142c2 bellard
    shiftCount = 0x402F - aExp;
4173 158142c2 bellard
    if ( shiftCount <= 0 ) {
4174 158142c2 bellard
        if ( 0x403E < aExp ) {
4175 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4176 158142c2 bellard
            if (    ! aSign
4177 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
4178 158142c2 bellard
                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4179 158142c2 bellard
                    )
4180 158142c2 bellard
               ) {
4181 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
4182 158142c2 bellard
            }
4183 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
4184 158142c2 bellard
        }
4185 158142c2 bellard
        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4186 158142c2 bellard
    }
4187 158142c2 bellard
    else {
4188 158142c2 bellard
        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4189 158142c2 bellard
    }
4190 158142c2 bellard
    return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
4191 158142c2 bellard
4192 158142c2 bellard
}
4193 158142c2 bellard
4194 158142c2 bellard
/*----------------------------------------------------------------------------
4195 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4196 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4197 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4198 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
4199 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
4200 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
4201 158142c2 bellard
| returned.
4202 158142c2 bellard
*----------------------------------------------------------------------------*/
4203 158142c2 bellard
4204 158142c2 bellard
int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
4205 158142c2 bellard
{
4206 158142c2 bellard
    flag aSign;
4207 158142c2 bellard
    int32 aExp, shiftCount;
4208 158142c2 bellard
    bits64 aSig0, aSig1;
4209 158142c2 bellard
    int64 z;
4210 158142c2 bellard
4211 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4212 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4213 158142c2 bellard
    aExp = extractFloat128Exp( a );
4214 158142c2 bellard
    aSign = extractFloat128Sign( a );
4215 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4216 158142c2 bellard
    shiftCount = aExp - 0x402F;
4217 158142c2 bellard
    if ( 0 < shiftCount ) {
4218 158142c2 bellard
        if ( 0x403E <= aExp ) {
4219 158142c2 bellard
            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4220 158142c2 bellard
            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4221 158142c2 bellard
                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4222 158142c2 bellard
                if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
4223 158142c2 bellard
            }
4224 158142c2 bellard
            else {
4225 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
4226 158142c2 bellard
                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4227 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
4228 158142c2 bellard
                }
4229 158142c2 bellard
            }
4230 158142c2 bellard
            return (sbits64) LIT64( 0x8000000000000000 );
4231 158142c2 bellard
        }
4232 158142c2 bellard
        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4233 158142c2 bellard
        if ( (bits64) ( aSig1<<shiftCount ) ) {
4234 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4235 158142c2 bellard
        }
4236 158142c2 bellard
    }
4237 158142c2 bellard
    else {
4238 158142c2 bellard
        if ( aExp < 0x3FFF ) {
4239 158142c2 bellard
            if ( aExp | aSig0 | aSig1 ) {
4240 158142c2 bellard
                STATUS(float_exception_flags) |= float_flag_inexact;
4241 158142c2 bellard
            }
4242 158142c2 bellard
            return 0;
4243 158142c2 bellard
        }
4244 158142c2 bellard
        z = aSig0>>( - shiftCount );
4245 158142c2 bellard
        if (    aSig1
4246 158142c2 bellard
             || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4247 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4248 158142c2 bellard
        }
4249 158142c2 bellard
    }
4250 158142c2 bellard
    if ( aSign ) z = - z;
4251 158142c2 bellard
    return z;
4252 158142c2 bellard
4253 158142c2 bellard
}
4254 158142c2 bellard
4255 158142c2 bellard
/*----------------------------------------------------------------------------
4256 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4257 158142c2 bellard
| value `a' to the single-precision floating-point format.  The conversion
4258 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4259 158142c2 bellard
| Arithmetic.
4260 158142c2 bellard
*----------------------------------------------------------------------------*/
4261 158142c2 bellard
4262 158142c2 bellard
float32 float128_to_float32( float128 a STATUS_PARAM )
4263 158142c2 bellard
{
4264 158142c2 bellard
    flag aSign;
4265 158142c2 bellard
    int32 aExp;
4266 158142c2 bellard
    bits64 aSig0, aSig1;
4267 158142c2 bellard
    bits32 zSig;
4268 158142c2 bellard
4269 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4270 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4271 158142c2 bellard
    aExp = extractFloat128Exp( a );
4272 158142c2 bellard
    aSign = extractFloat128Sign( a );
4273 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4274 158142c2 bellard
        if ( aSig0 | aSig1 ) {
4275 158142c2 bellard
            return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) );
4276 158142c2 bellard
        }
4277 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
4278 158142c2 bellard
    }
4279 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4280 158142c2 bellard
    shift64RightJamming( aSig0, 18, &aSig0 );
4281 158142c2 bellard
    zSig = aSig0;
4282 158142c2 bellard
    if ( aExp || zSig ) {
4283 158142c2 bellard
        zSig |= 0x40000000;
4284 158142c2 bellard
        aExp -= 0x3F81;
4285 158142c2 bellard
    }
4286 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
4287 158142c2 bellard
4288 158142c2 bellard
}
4289 158142c2 bellard
4290 158142c2 bellard
/*----------------------------------------------------------------------------
4291 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4292 158142c2 bellard
| value `a' to the double-precision floating-point format.  The conversion
4293 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4294 158142c2 bellard
| Arithmetic.
4295 158142c2 bellard
*----------------------------------------------------------------------------*/
4296 158142c2 bellard
4297 158142c2 bellard
float64 float128_to_float64( float128 a STATUS_PARAM )
4298 158142c2 bellard
{
4299 158142c2 bellard
    flag aSign;
4300 158142c2 bellard
    int32 aExp;
4301 158142c2 bellard
    bits64 aSig0, aSig1;
4302 158142c2 bellard
4303 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4304 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4305 158142c2 bellard
    aExp = extractFloat128Exp( a );
4306 158142c2 bellard
    aSign = extractFloat128Sign( a );
4307 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4308 158142c2 bellard
        if ( aSig0 | aSig1 ) {
4309 158142c2 bellard
            return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) );
4310 158142c2 bellard
        }
4311 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
4312 158142c2 bellard
    }
4313 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4314 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4315 158142c2 bellard
    if ( aExp || aSig0 ) {
4316 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
4317 158142c2 bellard
        aExp -= 0x3C01;
4318 158142c2 bellard
    }
4319 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
4320 158142c2 bellard
4321 158142c2 bellard
}
4322 158142c2 bellard
4323 158142c2 bellard
#ifdef FLOATX80
4324 158142c2 bellard
4325 158142c2 bellard
/*----------------------------------------------------------------------------
4326 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4327 158142c2 bellard
| value `a' to the extended double-precision floating-point format.  The
4328 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4329 158142c2 bellard
| Floating-Point Arithmetic.
4330 158142c2 bellard
*----------------------------------------------------------------------------*/
4331 158142c2 bellard
4332 158142c2 bellard
floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
4333 158142c2 bellard
{
4334 158142c2 bellard
    flag aSign;
4335 158142c2 bellard
    int32 aExp;
4336 158142c2 bellard
    bits64 aSig0, aSig1;
4337 158142c2 bellard
4338 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4339 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4340 158142c2 bellard
    aExp = extractFloat128Exp( a );
4341 158142c2 bellard
    aSign = extractFloat128Sign( a );
4342 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4343 158142c2 bellard
        if ( aSig0 | aSig1 ) {
4344 158142c2 bellard
            return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) );
4345 158142c2 bellard
        }
4346 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4347 158142c2 bellard
    }
4348 158142c2 bellard
    if ( aExp == 0 ) {
4349 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
4350 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4351 158142c2 bellard
    }
4352 158142c2 bellard
    else {
4353 158142c2 bellard
        aSig0 |= LIT64( 0x0001000000000000 );
4354 158142c2 bellard
    }
4355 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
4356 158142c2 bellard
    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
4357 158142c2 bellard
4358 158142c2 bellard
}
4359 158142c2 bellard
4360 158142c2 bellard
#endif
4361 158142c2 bellard
4362 158142c2 bellard
/*----------------------------------------------------------------------------
4363 158142c2 bellard
| Rounds the quadruple-precision floating-point value `a' to an integer, and
4364 158142c2 bellard
| returns the result as a quadruple-precision floating-point value.  The
4365 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
4366 158142c2 bellard
| Floating-Point Arithmetic.
4367 158142c2 bellard
*----------------------------------------------------------------------------*/
4368 158142c2 bellard
4369 158142c2 bellard
float128 float128_round_to_int( float128 a STATUS_PARAM )
4370 158142c2 bellard
{
4371 158142c2 bellard
    flag aSign;
4372 158142c2 bellard
    int32 aExp;
4373 158142c2 bellard
    bits64 lastBitMask, roundBitsMask;
4374 158142c2 bellard
    int8 roundingMode;
4375 158142c2 bellard
    float128 z;
4376 158142c2 bellard
4377 158142c2 bellard
    aExp = extractFloat128Exp( a );
4378 158142c2 bellard
    if ( 0x402F <= aExp ) {
4379 158142c2 bellard
        if ( 0x406F <= aExp ) {
4380 158142c2 bellard
            if (    ( aExp == 0x7FFF )
4381 158142c2 bellard
                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
4382 158142c2 bellard
               ) {
4383 158142c2 bellard
                return propagateFloat128NaN( a, a STATUS_VAR );
4384 158142c2 bellard
            }
4385 158142c2 bellard
            return a;
4386 158142c2 bellard
        }
4387 158142c2 bellard
        lastBitMask = 1;
4388 158142c2 bellard
        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
4389 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
4390 158142c2 bellard
        z = a;
4391 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
4392 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
4393 158142c2 bellard
            if ( lastBitMask ) {
4394 158142c2 bellard
                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
4395 158142c2 bellard
                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4396 158142c2 bellard
            }
4397 158142c2 bellard
            else {
4398 158142c2 bellard
                if ( (sbits64) z.low < 0 ) {
4399 158142c2 bellard
                    ++z.high;
4400 158142c2 bellard
                    if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
4401 158142c2 bellard
                }
4402 158142c2 bellard
            }
4403 158142c2 bellard
        }
4404 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
4405 158142c2 bellard
            if (   extractFloat128Sign( z )
4406 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
4407 158142c2 bellard
                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
4408 158142c2 bellard
            }
4409 158142c2 bellard
        }
4410 158142c2 bellard
        z.low &= ~ roundBitsMask;
4411 158142c2 bellard
    }
4412 158142c2 bellard
    else {
4413 158142c2 bellard
        if ( aExp < 0x3FFF ) {
4414 158142c2 bellard
            if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4415 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
4416 158142c2 bellard
            aSign = extractFloat128Sign( a );
4417 158142c2 bellard
            switch ( STATUS(float_rounding_mode) ) {
4418 158142c2 bellard
             case float_round_nearest_even:
4419 158142c2 bellard
                if (    ( aExp == 0x3FFE )
4420 158142c2 bellard
                     && (   extractFloat128Frac0( a )
4421 158142c2 bellard
                          | extractFloat128Frac1( a ) )
4422 158142c2 bellard
                   ) {
4423 158142c2 bellard
                    return packFloat128( aSign, 0x3FFF, 0, 0 );
4424 158142c2 bellard
                }
4425 158142c2 bellard
                break;
4426 158142c2 bellard
             case float_round_down:
4427 158142c2 bellard
                return
4428 158142c2 bellard
                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
4429 158142c2 bellard
                    : packFloat128( 0, 0, 0, 0 );
4430 158142c2 bellard
             case float_round_up:
4431 158142c2 bellard
                return
4432 158142c2 bellard
                      aSign ? packFloat128( 1, 0, 0, 0 )
4433 158142c2 bellard
                    : packFloat128( 0, 0x3FFF, 0, 0 );
4434 158142c2 bellard
            }
4435 158142c2 bellard
            return packFloat128( aSign, 0, 0, 0 );
4436 158142c2 bellard
        }
4437 158142c2 bellard
        lastBitMask = 1;
4438 158142c2 bellard
        lastBitMask <<= 0x402F - aExp;
4439 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
4440 158142c2 bellard
        z.low = 0;
4441 158142c2 bellard
        z.high = a.high;
4442 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
4443 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
4444 158142c2 bellard
            z.high += lastBitMask>>1;
4445 158142c2 bellard
            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
4446 158142c2 bellard
                z.high &= ~ lastBitMask;
4447 158142c2 bellard
            }
4448 158142c2 bellard
        }
4449 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
4450 158142c2 bellard
            if (   extractFloat128Sign( z )
4451 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
4452 158142c2 bellard
                z.high |= ( a.low != 0 );
4453 158142c2 bellard
                z.high += roundBitsMask;
4454 158142c2 bellard
            }
4455 158142c2 bellard
        }
4456 158142c2 bellard
        z.high &= ~ roundBitsMask;
4457 158142c2 bellard
    }
4458 158142c2 bellard
    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4459 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4460 158142c2 bellard
    }
4461 158142c2 bellard
    return z;
4462 158142c2 bellard
4463 158142c2 bellard
}
4464 158142c2 bellard
4465 158142c2 bellard
/*----------------------------------------------------------------------------
4466 158142c2 bellard
| Returns the result of adding the absolute values of the quadruple-precision
4467 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
4468 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
4469 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
4470 158142c2 bellard
| Floating-Point Arithmetic.
4471 158142c2 bellard
*----------------------------------------------------------------------------*/
4472 158142c2 bellard
4473 158142c2 bellard
static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
4474 158142c2 bellard
{
4475 158142c2 bellard
    int32 aExp, bExp, zExp;
4476 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4477 158142c2 bellard
    int32 expDiff;
4478 158142c2 bellard
4479 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4480 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4481 158142c2 bellard
    aExp = extractFloat128Exp( a );
4482 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4483 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4484 158142c2 bellard
    bExp = extractFloat128Exp( b );
4485 158142c2 bellard
    expDiff = aExp - bExp;
4486 158142c2 bellard
    if ( 0 < expDiff ) {
4487 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4488 158142c2 bellard
            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4489 158142c2 bellard
            return a;
4490 158142c2 bellard
        }
4491 158142c2 bellard
        if ( bExp == 0 ) {
4492 158142c2 bellard
            --expDiff;
4493 158142c2 bellard
        }
4494 158142c2 bellard
        else {
4495 158142c2 bellard
            bSig0 |= LIT64( 0x0001000000000000 );
4496 158142c2 bellard
        }
4497 158142c2 bellard
        shift128ExtraRightJamming(
4498 158142c2 bellard
            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
4499 158142c2 bellard
        zExp = aExp;
4500 158142c2 bellard
    }
4501 158142c2 bellard
    else if ( expDiff < 0 ) {
4502 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4503 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4504 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
4505 158142c2 bellard
        }
4506 158142c2 bellard
        if ( aExp == 0 ) {
4507 158142c2 bellard
            ++expDiff;
4508 158142c2 bellard
        }
4509 158142c2 bellard
        else {
4510 158142c2 bellard
            aSig0 |= LIT64( 0x0001000000000000 );
4511 158142c2 bellard
        }
4512 158142c2 bellard
        shift128ExtraRightJamming(
4513 158142c2 bellard
            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
4514 158142c2 bellard
        zExp = bExp;
4515 158142c2 bellard
    }
4516 158142c2 bellard
    else {
4517 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4518 158142c2 bellard
            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4519 158142c2 bellard
                return propagateFloat128NaN( a, b STATUS_VAR );
4520 158142c2 bellard
            }
4521 158142c2 bellard
            return a;
4522 158142c2 bellard
        }
4523 158142c2 bellard
        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4524 158142c2 bellard
        if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
4525 158142c2 bellard
        zSig2 = 0;
4526 158142c2 bellard
        zSig0 |= LIT64( 0x0002000000000000 );
4527 158142c2 bellard
        zExp = aExp;
4528 158142c2 bellard
        goto shiftRight1;
4529 158142c2 bellard
    }
4530 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4531 158142c2 bellard
    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4532 158142c2 bellard
    --zExp;
4533 158142c2 bellard
    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
4534 158142c2 bellard
    ++zExp;
4535 158142c2 bellard
 shiftRight1:
4536 158142c2 bellard
    shift128ExtraRightJamming(
4537 158142c2 bellard
        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4538 158142c2 bellard
 roundAndPack:
4539 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
4540 158142c2 bellard
4541 158142c2 bellard
}
4542 158142c2 bellard
4543 158142c2 bellard
/*----------------------------------------------------------------------------
4544 158142c2 bellard
| Returns the result of subtracting the absolute values of the quadruple-
4545 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
4546 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
4547 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
4548 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4549 158142c2 bellard
*----------------------------------------------------------------------------*/
4550 158142c2 bellard
4551 158142c2 bellard
static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
4552 158142c2 bellard
{
4553 158142c2 bellard
    int32 aExp, bExp, zExp;
4554 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
4555 158142c2 bellard
    int32 expDiff;
4556 158142c2 bellard
    float128 z;
4557 158142c2 bellard
4558 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4559 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4560 158142c2 bellard
    aExp = extractFloat128Exp( a );
4561 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4562 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4563 158142c2 bellard
    bExp = extractFloat128Exp( b );
4564 158142c2 bellard
    expDiff = aExp - bExp;
4565 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4566 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
4567 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
4568 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
4569 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4570 158142c2 bellard
        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4571 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
4572 158142c2 bellard
        }
4573 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4574 158142c2 bellard
        z.low = float128_default_nan_low;
4575 158142c2 bellard
        z.high = float128_default_nan_high;
4576 158142c2 bellard
        return z;
4577 158142c2 bellard
    }
4578 158142c2 bellard
    if ( aExp == 0 ) {
4579 158142c2 bellard
        aExp = 1;
4580 158142c2 bellard
        bExp = 1;
4581 158142c2 bellard
    }
4582 158142c2 bellard
    if ( bSig0 < aSig0 ) goto aBigger;
4583 158142c2 bellard
    if ( aSig0 < bSig0 ) goto bBigger;
4584 158142c2 bellard
    if ( bSig1 < aSig1 ) goto aBigger;
4585 158142c2 bellard
    if ( aSig1 < bSig1 ) goto bBigger;
4586 158142c2 bellard
    return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
4587 158142c2 bellard
 bExpBigger:
4588 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4589 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4590 158142c2 bellard
        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
4591 158142c2 bellard
    }
4592 158142c2 bellard
    if ( aExp == 0 ) {
4593 158142c2 bellard
        ++expDiff;
4594 158142c2 bellard
    }
4595 158142c2 bellard
    else {
4596 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
4597 158142c2 bellard
    }
4598 158142c2 bellard
    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4599 158142c2 bellard
    bSig0 |= LIT64( 0x4000000000000000 );
4600 158142c2 bellard
 bBigger:
4601 158142c2 bellard
    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
4602 158142c2 bellard
    zExp = bExp;
4603 158142c2 bellard
    zSign ^= 1;
4604 158142c2 bellard
    goto normalizeRoundAndPack;
4605 158142c2 bellard
 aExpBigger:
4606 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4607 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4608 158142c2 bellard
        return a;
4609 158142c2 bellard
    }
4610 158142c2 bellard
    if ( bExp == 0 ) {
4611 158142c2 bellard
        --expDiff;
4612 158142c2 bellard
    }
4613 158142c2 bellard
    else {
4614 158142c2 bellard
        bSig0 |= LIT64( 0x4000000000000000 );
4615 158142c2 bellard
    }
4616 158142c2 bellard
    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
4617 158142c2 bellard
    aSig0 |= LIT64( 0x4000000000000000 );
4618 158142c2 bellard
 aBigger:
4619 158142c2 bellard
    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4620 158142c2 bellard
    zExp = aExp;
4621 158142c2 bellard
 normalizeRoundAndPack:
4622 158142c2 bellard
    --zExp;
4623 158142c2 bellard
    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
4624 158142c2 bellard
4625 158142c2 bellard
}
4626 158142c2 bellard
4627 158142c2 bellard
/*----------------------------------------------------------------------------
4628 158142c2 bellard
| Returns the result of adding the quadruple-precision floating-point values
4629 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
4630 158142c2 bellard
| for Binary Floating-Point Arithmetic.
4631 158142c2 bellard
*----------------------------------------------------------------------------*/
4632 158142c2 bellard
4633 158142c2 bellard
float128 float128_add( float128 a, float128 b STATUS_PARAM )
4634 158142c2 bellard
{
4635 158142c2 bellard
    flag aSign, bSign;
4636 158142c2 bellard
4637 158142c2 bellard
    aSign = extractFloat128Sign( a );
4638 158142c2 bellard
    bSign = extractFloat128Sign( b );
4639 158142c2 bellard
    if ( aSign == bSign ) {
4640 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
4641 158142c2 bellard
    }
4642 158142c2 bellard
    else {
4643 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
4644 158142c2 bellard
    }
4645 158142c2 bellard
4646 158142c2 bellard
}
4647 158142c2 bellard
4648 158142c2 bellard
/*----------------------------------------------------------------------------
4649 158142c2 bellard
| Returns the result of subtracting the quadruple-precision floating-point
4650 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
4651 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4652 158142c2 bellard
*----------------------------------------------------------------------------*/
4653 158142c2 bellard
4654 158142c2 bellard
float128 float128_sub( float128 a, float128 b STATUS_PARAM )
4655 158142c2 bellard
{
4656 158142c2 bellard
    flag aSign, bSign;
4657 158142c2 bellard
4658 158142c2 bellard
    aSign = extractFloat128Sign( a );
4659 158142c2 bellard
    bSign = extractFloat128Sign( b );
4660 158142c2 bellard
    if ( aSign == bSign ) {
4661 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
4662 158142c2 bellard
    }
4663 158142c2 bellard
    else {
4664 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
4665 158142c2 bellard
    }
4666 158142c2 bellard
4667 158142c2 bellard
}
4668 158142c2 bellard
4669 158142c2 bellard
/*----------------------------------------------------------------------------
4670 158142c2 bellard
| Returns the result of multiplying the quadruple-precision floating-point
4671 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
4672 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4673 158142c2 bellard
*----------------------------------------------------------------------------*/
4674 158142c2 bellard
4675 158142c2 bellard
float128 float128_mul( float128 a, float128 b STATUS_PARAM )
4676 158142c2 bellard
{
4677 158142c2 bellard
    flag aSign, bSign, zSign;
4678 158142c2 bellard
    int32 aExp, bExp, zExp;
4679 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
4680 158142c2 bellard
    float128 z;
4681 158142c2 bellard
4682 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4683 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4684 158142c2 bellard
    aExp = extractFloat128Exp( a );
4685 158142c2 bellard
    aSign = extractFloat128Sign( a );
4686 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4687 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4688 158142c2 bellard
    bExp = extractFloat128Exp( b );
4689 158142c2 bellard
    bSign = extractFloat128Sign( b );
4690 158142c2 bellard
    zSign = aSign ^ bSign;
4691 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4692 158142c2 bellard
        if (    ( aSig0 | aSig1 )
4693 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
4694 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
4695 158142c2 bellard
        }
4696 158142c2 bellard
        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
4697 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
4698 158142c2 bellard
    }
4699 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4700 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4701 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4702 158142c2 bellard
 invalid:
4703 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4704 158142c2 bellard
            z.low = float128_default_nan_low;
4705 158142c2 bellard
            z.high = float128_default_nan_high;
4706 158142c2 bellard
            return z;
4707 158142c2 bellard
        }
4708 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
4709 158142c2 bellard
    }
4710 158142c2 bellard
    if ( aExp == 0 ) {
4711 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4712 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4713 158142c2 bellard
    }
4714 158142c2 bellard
    if ( bExp == 0 ) {
4715 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4716 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4717 158142c2 bellard
    }
4718 158142c2 bellard
    zExp = aExp + bExp - 0x4000;
4719 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4720 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
4721 158142c2 bellard
    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
4722 158142c2 bellard
    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
4723 158142c2 bellard
    zSig2 |= ( zSig3 != 0 );
4724 158142c2 bellard
    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
4725 158142c2 bellard
        shift128ExtraRightJamming(
4726 158142c2 bellard
            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4727 158142c2 bellard
        ++zExp;
4728 158142c2 bellard
    }
4729 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
4730 158142c2 bellard
4731 158142c2 bellard
}
4732 158142c2 bellard
4733 158142c2 bellard
/*----------------------------------------------------------------------------
4734 158142c2 bellard
| Returns the result of dividing the quadruple-precision floating-point value
4735 158142c2 bellard
| `a' by the corresponding value `b'.  The operation is performed according to
4736 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4737 158142c2 bellard
*----------------------------------------------------------------------------*/
4738 158142c2 bellard
4739 158142c2 bellard
float128 float128_div( float128 a, float128 b STATUS_PARAM )
4740 158142c2 bellard
{
4741 158142c2 bellard
    flag aSign, bSign, zSign;
4742 158142c2 bellard
    int32 aExp, bExp, zExp;
4743 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4744 158142c2 bellard
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4745 158142c2 bellard
    float128 z;
4746 158142c2 bellard
4747 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4748 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4749 158142c2 bellard
    aExp = extractFloat128Exp( a );
4750 158142c2 bellard
    aSign = extractFloat128Sign( a );
4751 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4752 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4753 158142c2 bellard
    bExp = extractFloat128Exp( b );
4754 158142c2 bellard
    bSign = extractFloat128Sign( b );
4755 158142c2 bellard
    zSign = aSign ^ bSign;
4756 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4757 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4758 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4759 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4760 158142c2 bellard
            goto invalid;
4761 158142c2 bellard
        }
4762 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
4763 158142c2 bellard
    }
4764 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4765 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4766 158142c2 bellard
        return packFloat128( zSign, 0, 0, 0 );
4767 158142c2 bellard
    }
4768 158142c2 bellard
    if ( bExp == 0 ) {
4769 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
4770 158142c2 bellard
            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4771 158142c2 bellard
 invalid:
4772 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
4773 158142c2 bellard
                z.low = float128_default_nan_low;
4774 158142c2 bellard
                z.high = float128_default_nan_high;
4775 158142c2 bellard
                return z;
4776 158142c2 bellard
            }
4777 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
4778 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
4779 158142c2 bellard
        }
4780 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4781 158142c2 bellard
    }
4782 158142c2 bellard
    if ( aExp == 0 ) {
4783 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4784 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4785 158142c2 bellard
    }
4786 158142c2 bellard
    zExp = aExp - bExp + 0x3FFD;
4787 158142c2 bellard
    shortShift128Left(
4788 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
4789 158142c2 bellard
    shortShift128Left(
4790 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
4791 158142c2 bellard
    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
4792 158142c2 bellard
        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
4793 158142c2 bellard
        ++zExp;
4794 158142c2 bellard
    }
4795 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
4796 158142c2 bellard
    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
4797 158142c2 bellard
    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
4798 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
4799 158142c2 bellard
        --zSig0;
4800 158142c2 bellard
        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
4801 158142c2 bellard
    }
4802 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
4803 158142c2 bellard
    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
4804 158142c2 bellard
        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
4805 158142c2 bellard
        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
4806 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
4807 158142c2 bellard
            --zSig1;
4808 158142c2 bellard
            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
4809 158142c2 bellard
        }
4810 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4811 158142c2 bellard
    }
4812 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
4813 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
4814 158142c2 bellard
4815 158142c2 bellard
}
4816 158142c2 bellard
4817 158142c2 bellard
/*----------------------------------------------------------------------------
4818 158142c2 bellard
| Returns the remainder of the quadruple-precision floating-point value `a'
4819 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
4820 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4821 158142c2 bellard
*----------------------------------------------------------------------------*/
4822 158142c2 bellard
4823 158142c2 bellard
float128 float128_rem( float128 a, float128 b STATUS_PARAM )
4824 158142c2 bellard
{
4825 158142c2 bellard
    flag aSign, bSign, zSign;
4826 158142c2 bellard
    int32 aExp, bExp, expDiff;
4827 158142c2 bellard
    bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
4828 158142c2 bellard
    bits64 allZero, alternateASig0, alternateASig1, sigMean1;
4829 158142c2 bellard
    sbits64 sigMean0;
4830 158142c2 bellard
    float128 z;
4831 158142c2 bellard
4832 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4833 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4834 158142c2 bellard
    aExp = extractFloat128Exp( a );
4835 158142c2 bellard
    aSign = extractFloat128Sign( a );
4836 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
4837 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
4838 158142c2 bellard
    bExp = extractFloat128Exp( b );
4839 158142c2 bellard
    bSign = extractFloat128Sign( b );
4840 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4841 158142c2 bellard
        if (    ( aSig0 | aSig1 )
4842 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
4843 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
4844 158142c2 bellard
        }
4845 158142c2 bellard
        goto invalid;
4846 158142c2 bellard
    }
4847 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4848 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4849 158142c2 bellard
        return a;
4850 158142c2 bellard
    }
4851 158142c2 bellard
    if ( bExp == 0 ) {
4852 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
4853 158142c2 bellard
 invalid:
4854 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4855 158142c2 bellard
            z.low = float128_default_nan_low;
4856 158142c2 bellard
            z.high = float128_default_nan_high;
4857 158142c2 bellard
            return z;
4858 158142c2 bellard
        }
4859 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4860 158142c2 bellard
    }
4861 158142c2 bellard
    if ( aExp == 0 ) {
4862 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return a;
4863 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4864 158142c2 bellard
    }
4865 158142c2 bellard
    expDiff = aExp - bExp;
4866 158142c2 bellard
    if ( expDiff < -1 ) return a;
4867 158142c2 bellard
    shortShift128Left(
4868 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ),
4869 158142c2 bellard
        aSig1,
4870 158142c2 bellard
        15 - ( expDiff < 0 ),
4871 158142c2 bellard
        &aSig0,
4872 158142c2 bellard
        &aSig1
4873 158142c2 bellard
    );
4874 158142c2 bellard
    shortShift128Left(
4875 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
4876 158142c2 bellard
    q = le128( bSig0, bSig1, aSig0, aSig1 );
4877 158142c2 bellard
    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
4878 158142c2 bellard
    expDiff -= 64;
4879 158142c2 bellard
    while ( 0 < expDiff ) {
4880 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
4881 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
4882 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
4883 158142c2 bellard
        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
4884 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
4885 158142c2 bellard
        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
4886 158142c2 bellard
        expDiff -= 61;
4887 158142c2 bellard
    }
4888 158142c2 bellard
    if ( -64 < expDiff ) {
4889 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
4890 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
4891 158142c2 bellard
        q >>= - expDiff;
4892 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
4893 158142c2 bellard
        expDiff += 52;
4894 158142c2 bellard
        if ( expDiff < 0 ) {
4895 158142c2 bellard
            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4896 158142c2 bellard
        }
4897 158142c2 bellard
        else {
4898 158142c2 bellard
            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
4899 158142c2 bellard
        }
4900 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
4901 158142c2 bellard
        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
4902 158142c2 bellard
    }
4903 158142c2 bellard
    else {
4904 158142c2 bellard
        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
4905 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
4906 158142c2 bellard
    }
4907 158142c2 bellard
    do {
4908 158142c2 bellard
        alternateASig0 = aSig0;
4909 158142c2 bellard
        alternateASig1 = aSig1;
4910 158142c2 bellard
        ++q;
4911 158142c2 bellard
        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
4912 158142c2 bellard
    } while ( 0 <= (sbits64) aSig0 );
4913 158142c2 bellard
    add128(
4914 158142c2 bellard
        aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
4915 158142c2 bellard
    if (    ( sigMean0 < 0 )
4916 158142c2 bellard
         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
4917 158142c2 bellard
        aSig0 = alternateASig0;
4918 158142c2 bellard
        aSig1 = alternateASig1;
4919 158142c2 bellard
    }
4920 158142c2 bellard
    zSign = ( (sbits64) aSig0 < 0 );
4921 158142c2 bellard
    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
4922 158142c2 bellard
    return
4923 158142c2 bellard
        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
4924 158142c2 bellard
4925 158142c2 bellard
}
4926 158142c2 bellard
4927 158142c2 bellard
/*----------------------------------------------------------------------------
4928 158142c2 bellard
| Returns the square root of the quadruple-precision floating-point value `a'.
4929 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
4930 158142c2 bellard
| Floating-Point Arithmetic.
4931 158142c2 bellard
*----------------------------------------------------------------------------*/
4932 158142c2 bellard
4933 158142c2 bellard
float128 float128_sqrt( float128 a STATUS_PARAM )
4934 158142c2 bellard
{
4935 158142c2 bellard
    flag aSign;
4936 158142c2 bellard
    int32 aExp, zExp;
4937 158142c2 bellard
    bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
4938 158142c2 bellard
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4939 158142c2 bellard
    float128 z;
4940 158142c2 bellard
4941 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4942 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4943 158142c2 bellard
    aExp = extractFloat128Exp( a );
4944 158142c2 bellard
    aSign = extractFloat128Sign( a );
4945 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4946 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
4947 158142c2 bellard
        if ( ! aSign ) return a;
4948 158142c2 bellard
        goto invalid;
4949 158142c2 bellard
    }
4950 158142c2 bellard
    if ( aSign ) {
4951 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
4952 158142c2 bellard
 invalid:
4953 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4954 158142c2 bellard
        z.low = float128_default_nan_low;
4955 158142c2 bellard
        z.high = float128_default_nan_high;
4956 158142c2 bellard
        return z;
4957 158142c2 bellard
    }
4958 158142c2 bellard
    if ( aExp == 0 ) {
4959 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
4960 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4961 158142c2 bellard
    }
4962 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
4963 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4964 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
4965 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
4966 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4967 158142c2 bellard
    doubleZSig0 = zSig0<<1;
4968 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
4969 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4970 158142c2 bellard
    while ( (sbits64) rem0 < 0 ) {
4971 158142c2 bellard
        --zSig0;
4972 158142c2 bellard
        doubleZSig0 -= 2;
4973 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4974 158142c2 bellard
    }
4975 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4976 158142c2 bellard
    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
4977 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
4978 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4979 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4980 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
4981 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4982 158142c2 bellard
        while ( (sbits64) rem1 < 0 ) {
4983 158142c2 bellard
            --zSig1;
4984 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4985 158142c2 bellard
            term3 |= 1;
4986 158142c2 bellard
            term2 |= doubleZSig0;
4987 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4988 158142c2 bellard
        }
4989 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4990 158142c2 bellard
    }
4991 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
4992 158142c2 bellard
    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
4993 158142c2 bellard
4994 158142c2 bellard
}
4995 158142c2 bellard
4996 158142c2 bellard
/*----------------------------------------------------------------------------
4997 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
4998 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
4999 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5000 158142c2 bellard
*----------------------------------------------------------------------------*/
5001 158142c2 bellard
5002 158142c2 bellard
flag float128_eq( float128 a, float128 b STATUS_PARAM )
5003 158142c2 bellard
{
5004 158142c2 bellard
5005 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5006 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5007 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5008 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5009 158142c2 bellard
       ) {
5010 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5011 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5012 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5013 158142c2 bellard
        }
5014 158142c2 bellard
        return 0;
5015 158142c2 bellard
    }
5016 158142c2 bellard
    return
5017 158142c2 bellard
           ( a.low == b.low )
5018 158142c2 bellard
        && (    ( a.high == b.high )
5019 158142c2 bellard
             || (    ( a.low == 0 )
5020 158142c2 bellard
                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5021 158142c2 bellard
           );
5022 158142c2 bellard
5023 158142c2 bellard
}
5024 158142c2 bellard
5025 158142c2 bellard
/*----------------------------------------------------------------------------
5026 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5027 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  The comparison
5028 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5029 158142c2 bellard
| Arithmetic.
5030 158142c2 bellard
*----------------------------------------------------------------------------*/
5031 158142c2 bellard
5032 158142c2 bellard
flag float128_le( float128 a, float128 b STATUS_PARAM )
5033 158142c2 bellard
{
5034 158142c2 bellard
    flag aSign, bSign;
5035 158142c2 bellard
5036 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5037 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5038 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5039 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5040 158142c2 bellard
       ) {
5041 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5042 158142c2 bellard
        return 0;
5043 158142c2 bellard
    }
5044 158142c2 bellard
    aSign = extractFloat128Sign( a );
5045 158142c2 bellard
    bSign = extractFloat128Sign( b );
5046 158142c2 bellard
    if ( aSign != bSign ) {
5047 158142c2 bellard
        return
5048 158142c2 bellard
               aSign
5049 158142c2 bellard
            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5050 158142c2 bellard
                 == 0 );
5051 158142c2 bellard
    }
5052 158142c2 bellard
    return
5053 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5054 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5055 158142c2 bellard
5056 158142c2 bellard
}
5057 158142c2 bellard
5058 158142c2 bellard
/*----------------------------------------------------------------------------
5059 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5060 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The comparison is performed
5061 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5062 158142c2 bellard
*----------------------------------------------------------------------------*/
5063 158142c2 bellard
5064 158142c2 bellard
flag float128_lt( float128 a, float128 b STATUS_PARAM )
5065 158142c2 bellard
{
5066 158142c2 bellard
    flag aSign, bSign;
5067 158142c2 bellard
5068 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5069 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5070 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5071 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5072 158142c2 bellard
       ) {
5073 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5074 158142c2 bellard
        return 0;
5075 158142c2 bellard
    }
5076 158142c2 bellard
    aSign = extractFloat128Sign( a );
5077 158142c2 bellard
    bSign = extractFloat128Sign( b );
5078 158142c2 bellard
    if ( aSign != bSign ) {
5079 158142c2 bellard
        return
5080 158142c2 bellard
               aSign
5081 158142c2 bellard
            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5082 158142c2 bellard
                 != 0 );
5083 158142c2 bellard
    }
5084 158142c2 bellard
    return
5085 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5086 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5087 158142c2 bellard
5088 158142c2 bellard
}
5089 158142c2 bellard
5090 158142c2 bellard
/*----------------------------------------------------------------------------
5091 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5092 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  The invalid exception is
5093 158142c2 bellard
| raised if either operand is a NaN.  Otherwise, the comparison is performed
5094 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5095 158142c2 bellard
*----------------------------------------------------------------------------*/
5096 158142c2 bellard
5097 158142c2 bellard
flag float128_eq_signaling( float128 a, float128 b STATUS_PARAM )
5098 158142c2 bellard
{
5099 158142c2 bellard
5100 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5101 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5102 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5103 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5104 158142c2 bellard
       ) {
5105 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5106 158142c2 bellard
        return 0;
5107 158142c2 bellard
    }
5108 158142c2 bellard
    return
5109 158142c2 bellard
           ( a.low == b.low )
5110 158142c2 bellard
        && (    ( a.high == b.high )
5111 158142c2 bellard
             || (    ( a.low == 0 )
5112 158142c2 bellard
                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5113 158142c2 bellard
           );
5114 158142c2 bellard
5115 158142c2 bellard
}
5116 158142c2 bellard
5117 158142c2 bellard
/*----------------------------------------------------------------------------
5118 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5119 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5120 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
5121 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5122 158142c2 bellard
*----------------------------------------------------------------------------*/
5123 158142c2 bellard
5124 158142c2 bellard
flag float128_le_quiet( float128 a, float128 b STATUS_PARAM )
5125 158142c2 bellard
{
5126 158142c2 bellard
    flag aSign, bSign;
5127 158142c2 bellard
5128 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5129 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5130 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5131 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5132 158142c2 bellard
       ) {
5133 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5134 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5135 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5136 158142c2 bellard
        }
5137 158142c2 bellard
        return 0;
5138 158142c2 bellard
    }
5139 158142c2 bellard
    aSign = extractFloat128Sign( a );
5140 158142c2 bellard
    bSign = extractFloat128Sign( b );
5141 158142c2 bellard
    if ( aSign != bSign ) {
5142 158142c2 bellard
        return
5143 158142c2 bellard
               aSign
5144 158142c2 bellard
            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5145 158142c2 bellard
                 == 0 );
5146 158142c2 bellard
    }
5147 158142c2 bellard
    return
5148 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5149 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5150 158142c2 bellard
5151 158142c2 bellard
}
5152 158142c2 bellard
5153 158142c2 bellard
/*----------------------------------------------------------------------------
5154 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5155 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5156 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
5157 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5158 158142c2 bellard
*----------------------------------------------------------------------------*/
5159 158142c2 bellard
5160 158142c2 bellard
flag float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
5161 158142c2 bellard
{
5162 158142c2 bellard
    flag aSign, bSign;
5163 158142c2 bellard
5164 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5165 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5166 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5167 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5168 158142c2 bellard
       ) {
5169 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5170 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5171 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5172 158142c2 bellard
        }
5173 158142c2 bellard
        return 0;
5174 158142c2 bellard
    }
5175 158142c2 bellard
    aSign = extractFloat128Sign( a );
5176 158142c2 bellard
    bSign = extractFloat128Sign( b );
5177 158142c2 bellard
    if ( aSign != bSign ) {
5178 158142c2 bellard
        return
5179 158142c2 bellard
               aSign
5180 158142c2 bellard
            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5181 158142c2 bellard
                 != 0 );
5182 158142c2 bellard
    }
5183 158142c2 bellard
    return
5184 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5185 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5186 158142c2 bellard
5187 158142c2 bellard
}
5188 158142c2 bellard
5189 158142c2 bellard
#endif
5190 158142c2 bellard
5191 1d6bda35 bellard
/* misc functions */
5192 1d6bda35 bellard
float32 uint32_to_float32( unsigned int a STATUS_PARAM )
5193 1d6bda35 bellard
{
5194 1d6bda35 bellard
    return int64_to_float32(a STATUS_VAR);
5195 1d6bda35 bellard
}
5196 1d6bda35 bellard
5197 1d6bda35 bellard
float64 uint32_to_float64( unsigned int a STATUS_PARAM )
5198 1d6bda35 bellard
{
5199 1d6bda35 bellard
    return int64_to_float64(a STATUS_VAR);
5200 1d6bda35 bellard
}
5201 1d6bda35 bellard
5202 1d6bda35 bellard
unsigned int float32_to_uint32( float32 a STATUS_PARAM )
5203 1d6bda35 bellard
{
5204 1d6bda35 bellard
    int64_t v;
5205 1d6bda35 bellard
    unsigned int res;
5206 1d6bda35 bellard
5207 1d6bda35 bellard
    v = float32_to_int64(a STATUS_VAR);
5208 1d6bda35 bellard
    if (v < 0) {
5209 1d6bda35 bellard
        res = 0;
5210 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5211 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5212 1d6bda35 bellard
        res = 0xffffffff;
5213 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5214 1d6bda35 bellard
    } else {
5215 1d6bda35 bellard
        res = v;
5216 1d6bda35 bellard
    }
5217 1d6bda35 bellard
    return res;
5218 1d6bda35 bellard
}
5219 1d6bda35 bellard
5220 1d6bda35 bellard
unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
5221 1d6bda35 bellard
{
5222 1d6bda35 bellard
    int64_t v;
5223 1d6bda35 bellard
    unsigned int res;
5224 1d6bda35 bellard
5225 1d6bda35 bellard
    v = float32_to_int64_round_to_zero(a STATUS_VAR);
5226 1d6bda35 bellard
    if (v < 0) {
5227 1d6bda35 bellard
        res = 0;
5228 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5229 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5230 1d6bda35 bellard
        res = 0xffffffff;
5231 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5232 1d6bda35 bellard
    } else {
5233 1d6bda35 bellard
        res = v;
5234 1d6bda35 bellard
    }
5235 1d6bda35 bellard
    return res;
5236 1d6bda35 bellard
}
5237 1d6bda35 bellard
5238 1d6bda35 bellard
unsigned int float64_to_uint32( float64 a STATUS_PARAM )
5239 1d6bda35 bellard
{
5240 1d6bda35 bellard
    int64_t v;
5241 1d6bda35 bellard
    unsigned int res;
5242 1d6bda35 bellard
5243 1d6bda35 bellard
    v = float64_to_int64(a STATUS_VAR);
5244 1d6bda35 bellard
    if (v < 0) {
5245 1d6bda35 bellard
        res = 0;
5246 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5247 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5248 1d6bda35 bellard
        res = 0xffffffff;
5249 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5250 1d6bda35 bellard
    } else {
5251 1d6bda35 bellard
        res = v;
5252 1d6bda35 bellard
    }
5253 1d6bda35 bellard
    return res;
5254 1d6bda35 bellard
}
5255 1d6bda35 bellard
5256 1d6bda35 bellard
unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
5257 1d6bda35 bellard
{
5258 1d6bda35 bellard
    int64_t v;
5259 1d6bda35 bellard
    unsigned int res;
5260 1d6bda35 bellard
5261 1d6bda35 bellard
    v = float64_to_int64_round_to_zero(a STATUS_VAR);
5262 1d6bda35 bellard
    if (v < 0) {
5263 1d6bda35 bellard
        res = 0;
5264 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5265 1d6bda35 bellard
    } else if (v > 0xffffffff) {
5266 1d6bda35 bellard
        res = 0xffffffff;
5267 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5268 1d6bda35 bellard
    } else {
5269 1d6bda35 bellard
        res = v;
5270 1d6bda35 bellard
    }
5271 1d6bda35 bellard
    return res;
5272 1d6bda35 bellard
}
5273 1d6bda35 bellard
5274 1d6bda35 bellard
#define COMPARE(s, nan_exp)                                                  \
5275 1d6bda35 bellard
INLINE char float ## s ## _compare_internal( float ## s a, float ## s b,     \
5276 1d6bda35 bellard
                                      int is_quiet STATUS_PARAM )            \
5277 1d6bda35 bellard
{                                                                            \
5278 1d6bda35 bellard
    flag aSign, bSign;                                                       \
5279 1d6bda35 bellard
                                                                             \
5280 1d6bda35 bellard
    if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) &&                    \
5281 1d6bda35 bellard
         extractFloat ## s ## Frac( a ) ) ||                                 \
5282 1d6bda35 bellard
        ( ( extractFloat ## s ## Exp( b ) == nan_exp ) &&                    \
5283 1d6bda35 bellard
          extractFloat ## s ## Frac( b ) )) {                                \
5284 1d6bda35 bellard
        if (!is_quiet ||                                                     \
5285 1d6bda35 bellard
            float ## s ## _is_signaling_nan( a ) ||                          \
5286 1d6bda35 bellard
            float ## s ## _is_signaling_nan( b ) ) {                         \
5287 1d6bda35 bellard
            float_raise( float_flag_invalid STATUS_VAR);                     \
5288 1d6bda35 bellard
        }                                                                    \
5289 1d6bda35 bellard
        return float_relation_unordered;                                     \
5290 1d6bda35 bellard
    }                                                                        \
5291 1d6bda35 bellard
    aSign = extractFloat ## s ## Sign( a );                                  \
5292 1d6bda35 bellard
    bSign = extractFloat ## s ## Sign( b );                                  \
5293 1d6bda35 bellard
    if ( aSign != bSign ) {                                                  \
5294 1d6bda35 bellard
        if ( (bits ## s) ( ( a | b )<<1 ) == 0 ) {                           \
5295 1d6bda35 bellard
            /* zero case */                                                  \
5296 1d6bda35 bellard
            return float_relation_equal;                                     \
5297 1d6bda35 bellard
        } else {                                                             \
5298 1d6bda35 bellard
            return 1 - (2 * aSign);                                          \
5299 1d6bda35 bellard
        }                                                                    \
5300 1d6bda35 bellard
    } else {                                                                 \
5301 1d6bda35 bellard
        if (a == b) {                                                        \
5302 1d6bda35 bellard
            return float_relation_equal;                                     \
5303 1d6bda35 bellard
        } else {                                                             \
5304 1d6bda35 bellard
            return 1 - 2 * (aSign ^ ( a < b ));                              \
5305 1d6bda35 bellard
        }                                                                    \
5306 1d6bda35 bellard
    }                                                                        \
5307 1d6bda35 bellard
}                                                                            \
5308 1d6bda35 bellard
                                                                             \
5309 1d6bda35 bellard
char float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM )       \
5310 1d6bda35 bellard
{                                                                            \
5311 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 0 STATUS_VAR);              \
5312 1d6bda35 bellard
}                                                                            \
5313 1d6bda35 bellard
                                                                             \
5314 1d6bda35 bellard
char float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \
5315 1d6bda35 bellard
{                                                                            \
5316 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 1 STATUS_VAR);              \
5317 1d6bda35 bellard
}
5318 1d6bda35 bellard
5319 1d6bda35 bellard
COMPARE(32, 0xff)
5320 1d6bda35 bellard
COMPARE(64, 0x7ff)