Statistics
| Branch: | Revision:

root / fpu / softfloat.c @ e6afc87f

History | View | Annotate | Download (228.9 kB)

1 8d725fac Andreas Färber
/*
2 8d725fac Andreas Färber
 * QEMU float support
3 8d725fac Andreas Färber
 *
4 8d725fac Andreas Färber
 * Derived from SoftFloat.
5 8d725fac Andreas Färber
 */
6 158142c2 bellard
7 158142c2 bellard
/*============================================================================
8 158142c2 bellard

9 158142c2 bellard
This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
10 158142c2 bellard
Package, Release 2b.
11 158142c2 bellard

12 158142c2 bellard
Written by John R. Hauser.  This work was made possible in part by the
13 158142c2 bellard
International Computer Science Institute, located at Suite 600, 1947 Center
14 158142c2 bellard
Street, Berkeley, California 94704.  Funding was partially provided by the
15 158142c2 bellard
National Science Foundation under grant MIP-9311980.  The original version
16 158142c2 bellard
of this code was written as part of a project to build a fixed-point vector
17 158142c2 bellard
processor in collaboration with the University of California at Berkeley,
18 158142c2 bellard
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
19 158142c2 bellard
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20 158142c2 bellard
arithmetic/SoftFloat.html'.
21 158142c2 bellard

22 158142c2 bellard
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
23 158142c2 bellard
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24 158142c2 bellard
RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25 158142c2 bellard
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26 158142c2 bellard
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27 158142c2 bellard
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28 158142c2 bellard
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
29 158142c2 bellard
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
30 158142c2 bellard

31 158142c2 bellard
Derivative works are acceptable, even for commercial purposes, so long as
32 158142c2 bellard
(1) the source code for the derivative work includes prominent notice that
33 158142c2 bellard
the work is derivative, and (2) the source code includes prominent notice with
34 158142c2 bellard
these four paragraphs for those parts of this code that are retained.
35 158142c2 bellard

36 158142c2 bellard
=============================================================================*/
37 158142c2 bellard
38 158142c2 bellard
#include "softfloat.h"
39 158142c2 bellard
40 158142c2 bellard
/*----------------------------------------------------------------------------
41 158142c2 bellard
| Primitive arithmetic functions, including multi-word arithmetic, and
42 158142c2 bellard
| division and square root approximations.  (Can be specialized to target if
43 158142c2 bellard
| desired.)
44 158142c2 bellard
*----------------------------------------------------------------------------*/
45 158142c2 bellard
#include "softfloat-macros.h"
46 158142c2 bellard
47 158142c2 bellard
/*----------------------------------------------------------------------------
48 158142c2 bellard
| Functions and definitions to determine:  (1) whether tininess for underflow
49 158142c2 bellard
| is detected before or after rounding by default, (2) what (if anything)
50 158142c2 bellard
| happens when exceptions are raised, (3) how signaling NaNs are distinguished
51 158142c2 bellard
| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
52 158142c2 bellard
| are propagated from function inputs to output.  These details are target-
53 158142c2 bellard
| specific.
54 158142c2 bellard
*----------------------------------------------------------------------------*/
55 158142c2 bellard
#include "softfloat-specialize.h"
56 158142c2 bellard
57 158142c2 bellard
void set_float_rounding_mode(int val STATUS_PARAM)
58 158142c2 bellard
{
59 158142c2 bellard
    STATUS(float_rounding_mode) = val;
60 158142c2 bellard
}
61 158142c2 bellard
62 1d6bda35 bellard
void set_float_exception_flags(int val STATUS_PARAM)
63 1d6bda35 bellard
{
64 1d6bda35 bellard
    STATUS(float_exception_flags) = val;
65 1d6bda35 bellard
}
66 1d6bda35 bellard
67 158142c2 bellard
#ifdef FLOATX80
68 158142c2 bellard
void set_floatx80_rounding_precision(int val STATUS_PARAM)
69 158142c2 bellard
{
70 158142c2 bellard
    STATUS(floatx80_rounding_precision) = val;
71 158142c2 bellard
}
72 158142c2 bellard
#endif
73 158142c2 bellard
74 158142c2 bellard
/*----------------------------------------------------------------------------
75 bb4d4bb3 Peter Maydell
| Returns the fraction bits of the half-precision floating-point value `a'.
76 bb4d4bb3 Peter Maydell
*----------------------------------------------------------------------------*/
77 bb4d4bb3 Peter Maydell
78 bb4d4bb3 Peter Maydell
INLINE uint32_t extractFloat16Frac(float16 a)
79 bb4d4bb3 Peter Maydell
{
80 bb4d4bb3 Peter Maydell
    return float16_val(a) & 0x3ff;
81 bb4d4bb3 Peter Maydell
}
82 bb4d4bb3 Peter Maydell
83 bb4d4bb3 Peter Maydell
/*----------------------------------------------------------------------------
84 bb4d4bb3 Peter Maydell
| Returns the exponent bits of the half-precision floating-point value `a'.
85 bb4d4bb3 Peter Maydell
*----------------------------------------------------------------------------*/
86 bb4d4bb3 Peter Maydell
87 bb4d4bb3 Peter Maydell
INLINE int16 extractFloat16Exp(float16 a)
88 bb4d4bb3 Peter Maydell
{
89 bb4d4bb3 Peter Maydell
    return (float16_val(a) >> 10) & 0x1f;
90 bb4d4bb3 Peter Maydell
}
91 bb4d4bb3 Peter Maydell
92 bb4d4bb3 Peter Maydell
/*----------------------------------------------------------------------------
93 bb4d4bb3 Peter Maydell
| Returns the sign bit of the single-precision floating-point value `a'.
94 bb4d4bb3 Peter Maydell
*----------------------------------------------------------------------------*/
95 bb4d4bb3 Peter Maydell
96 bb4d4bb3 Peter Maydell
INLINE flag extractFloat16Sign(float16 a)
97 bb4d4bb3 Peter Maydell
{
98 bb4d4bb3 Peter Maydell
    return float16_val(a)>>15;
99 bb4d4bb3 Peter Maydell
}
100 bb4d4bb3 Peter Maydell
101 bb4d4bb3 Peter Maydell
/*----------------------------------------------------------------------------
102 158142c2 bellard
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
103 158142c2 bellard
| and 7, and returns the properly rounded 32-bit integer corresponding to the
104 158142c2 bellard
| input.  If `zSign' is 1, the input is negated before being converted to an
105 158142c2 bellard
| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
106 158142c2 bellard
| is simply rounded to an integer, with the inexact exception raised if the
107 158142c2 bellard
| input cannot be represented exactly as an integer.  However, if the fixed-
108 158142c2 bellard
| point input is too large, the invalid exception is raised and the largest
109 158142c2 bellard
| positive or negative integer is returned.
110 158142c2 bellard
*----------------------------------------------------------------------------*/
111 158142c2 bellard
112 bb98fe42 Andreas Färber
static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
113 158142c2 bellard
{
114 158142c2 bellard
    int8 roundingMode;
115 158142c2 bellard
    flag roundNearestEven;
116 158142c2 bellard
    int8 roundIncrement, roundBits;
117 158142c2 bellard
    int32 z;
118 158142c2 bellard
119 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
120 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
121 158142c2 bellard
    roundIncrement = 0x40;
122 158142c2 bellard
    if ( ! roundNearestEven ) {
123 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
124 158142c2 bellard
            roundIncrement = 0;
125 158142c2 bellard
        }
126 158142c2 bellard
        else {
127 158142c2 bellard
            roundIncrement = 0x7F;
128 158142c2 bellard
            if ( zSign ) {
129 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
130 158142c2 bellard
            }
131 158142c2 bellard
            else {
132 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
133 158142c2 bellard
            }
134 158142c2 bellard
        }
135 158142c2 bellard
    }
136 158142c2 bellard
    roundBits = absZ & 0x7F;
137 158142c2 bellard
    absZ = ( absZ + roundIncrement )>>7;
138 158142c2 bellard
    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
139 158142c2 bellard
    z = absZ;
140 158142c2 bellard
    if ( zSign ) z = - z;
141 158142c2 bellard
    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
142 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
143 bb98fe42 Andreas Färber
        return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
144 158142c2 bellard
    }
145 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
146 158142c2 bellard
    return z;
147 158142c2 bellard
148 158142c2 bellard
}
149 158142c2 bellard
150 158142c2 bellard
/*----------------------------------------------------------------------------
151 158142c2 bellard
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
152 158142c2 bellard
| `absZ1', with binary point between bits 63 and 64 (between the input words),
153 158142c2 bellard
| and returns the properly rounded 64-bit integer corresponding to the input.
154 158142c2 bellard
| If `zSign' is 1, the input is negated before being converted to an integer.
155 158142c2 bellard
| Ordinarily, the fixed-point input is simply rounded to an integer, with
156 158142c2 bellard
| the inexact exception raised if the input cannot be represented exactly as
157 158142c2 bellard
| an integer.  However, if the fixed-point input is too large, the invalid
158 158142c2 bellard
| exception is raised and the largest positive or negative integer is
159 158142c2 bellard
| returned.
160 158142c2 bellard
*----------------------------------------------------------------------------*/
161 158142c2 bellard
162 bb98fe42 Andreas Färber
static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
163 158142c2 bellard
{
164 158142c2 bellard
    int8 roundingMode;
165 158142c2 bellard
    flag roundNearestEven, increment;
166 158142c2 bellard
    int64 z;
167 158142c2 bellard
168 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
169 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
170 bb98fe42 Andreas Färber
    increment = ( (int64_t) absZ1 < 0 );
171 158142c2 bellard
    if ( ! roundNearestEven ) {
172 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
173 158142c2 bellard
            increment = 0;
174 158142c2 bellard
        }
175 158142c2 bellard
        else {
176 158142c2 bellard
            if ( zSign ) {
177 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && absZ1;
178 158142c2 bellard
            }
179 158142c2 bellard
            else {
180 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && absZ1;
181 158142c2 bellard
            }
182 158142c2 bellard
        }
183 158142c2 bellard
    }
184 158142c2 bellard
    if ( increment ) {
185 158142c2 bellard
        ++absZ0;
186 158142c2 bellard
        if ( absZ0 == 0 ) goto overflow;
187 bb98fe42 Andreas Färber
        absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
188 158142c2 bellard
    }
189 158142c2 bellard
    z = absZ0;
190 158142c2 bellard
    if ( zSign ) z = - z;
191 158142c2 bellard
    if ( z && ( ( z < 0 ) ^ zSign ) ) {
192 158142c2 bellard
 overflow:
193 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
194 158142c2 bellard
        return
195 bb98fe42 Andreas Färber
              zSign ? (int64_t) LIT64( 0x8000000000000000 )
196 158142c2 bellard
            : LIT64( 0x7FFFFFFFFFFFFFFF );
197 158142c2 bellard
    }
198 158142c2 bellard
    if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
199 158142c2 bellard
    return z;
200 158142c2 bellard
201 158142c2 bellard
}
202 158142c2 bellard
203 158142c2 bellard
/*----------------------------------------------------------------------------
204 158142c2 bellard
| Returns the fraction bits of the single-precision floating-point value `a'.
205 158142c2 bellard
*----------------------------------------------------------------------------*/
206 158142c2 bellard
207 bb98fe42 Andreas Färber
INLINE uint32_t extractFloat32Frac( float32 a )
208 158142c2 bellard
{
209 158142c2 bellard
210 f090c9d4 pbrook
    return float32_val(a) & 0x007FFFFF;
211 158142c2 bellard
212 158142c2 bellard
}
213 158142c2 bellard
214 158142c2 bellard
/*----------------------------------------------------------------------------
215 158142c2 bellard
| Returns the exponent bits of the single-precision floating-point value `a'.
216 158142c2 bellard
*----------------------------------------------------------------------------*/
217 158142c2 bellard
218 158142c2 bellard
INLINE int16 extractFloat32Exp( float32 a )
219 158142c2 bellard
{
220 158142c2 bellard
221 f090c9d4 pbrook
    return ( float32_val(a)>>23 ) & 0xFF;
222 158142c2 bellard
223 158142c2 bellard
}
224 158142c2 bellard
225 158142c2 bellard
/*----------------------------------------------------------------------------
226 158142c2 bellard
| Returns the sign bit of the single-precision floating-point value `a'.
227 158142c2 bellard
*----------------------------------------------------------------------------*/
228 158142c2 bellard
229 158142c2 bellard
INLINE flag extractFloat32Sign( float32 a )
230 158142c2 bellard
{
231 158142c2 bellard
232 f090c9d4 pbrook
    return float32_val(a)>>31;
233 158142c2 bellard
234 158142c2 bellard
}
235 158142c2 bellard
236 158142c2 bellard
/*----------------------------------------------------------------------------
237 37d18660 Peter Maydell
| If `a' is denormal and we are in flush-to-zero mode then set the
238 37d18660 Peter Maydell
| input-denormal exception and return zero. Otherwise just return the value.
239 37d18660 Peter Maydell
*----------------------------------------------------------------------------*/
240 37d18660 Peter Maydell
static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
241 37d18660 Peter Maydell
{
242 37d18660 Peter Maydell
    if (STATUS(flush_inputs_to_zero)) {
243 37d18660 Peter Maydell
        if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
244 37d18660 Peter Maydell
            float_raise(float_flag_input_denormal STATUS_VAR);
245 37d18660 Peter Maydell
            return make_float32(float32_val(a) & 0x80000000);
246 37d18660 Peter Maydell
        }
247 37d18660 Peter Maydell
    }
248 37d18660 Peter Maydell
    return a;
249 37d18660 Peter Maydell
}
250 37d18660 Peter Maydell
251 37d18660 Peter Maydell
/*----------------------------------------------------------------------------
252 158142c2 bellard
| Normalizes the subnormal single-precision floating-point value represented
253 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
254 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
255 158142c2 bellard
| `zSigPtr', respectively.
256 158142c2 bellard
*----------------------------------------------------------------------------*/
257 158142c2 bellard
258 158142c2 bellard
static void
259 bb98fe42 Andreas Färber
 normalizeFloat32Subnormal( uint32_t aSig, int16 *zExpPtr, uint32_t *zSigPtr )
260 158142c2 bellard
{
261 158142c2 bellard
    int8 shiftCount;
262 158142c2 bellard
263 158142c2 bellard
    shiftCount = countLeadingZeros32( aSig ) - 8;
264 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
265 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
266 158142c2 bellard
267 158142c2 bellard
}
268 158142c2 bellard
269 158142c2 bellard
/*----------------------------------------------------------------------------
270 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
271 158142c2 bellard
| single-precision floating-point value, returning the result.  After being
272 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
273 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
274 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
275 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
276 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
277 158142c2 bellard
| significand.
278 158142c2 bellard
*----------------------------------------------------------------------------*/
279 158142c2 bellard
280 bb98fe42 Andreas Färber
INLINE float32 packFloat32( flag zSign, int16 zExp, uint32_t zSig )
281 158142c2 bellard
{
282 158142c2 bellard
283 f090c9d4 pbrook
    return make_float32(
284 bb98fe42 Andreas Färber
          ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
285 158142c2 bellard
286 158142c2 bellard
}
287 158142c2 bellard
288 158142c2 bellard
/*----------------------------------------------------------------------------
289 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
290 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
291 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
292 158142c2 bellard
| value is simply rounded and packed into the single-precision format, with
293 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
294 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
295 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
296 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
297 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
298 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal single-
299 158142c2 bellard
| precision floating-point number.
300 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 30
301 158142c2 bellard
| and 29, which is 7 bits to the left of the usual location.  This shifted
302 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
303 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
304 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
305 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
306 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
307 158142c2 bellard
| Binary Floating-Point Arithmetic.
308 158142c2 bellard
*----------------------------------------------------------------------------*/
309 158142c2 bellard
310 bb98fe42 Andreas Färber
static float32 roundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
311 158142c2 bellard
{
312 158142c2 bellard
    int8 roundingMode;
313 158142c2 bellard
    flag roundNearestEven;
314 158142c2 bellard
    int8 roundIncrement, roundBits;
315 158142c2 bellard
    flag isTiny;
316 158142c2 bellard
317 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
318 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
319 158142c2 bellard
    roundIncrement = 0x40;
320 158142c2 bellard
    if ( ! roundNearestEven ) {
321 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
322 158142c2 bellard
            roundIncrement = 0;
323 158142c2 bellard
        }
324 158142c2 bellard
        else {
325 158142c2 bellard
            roundIncrement = 0x7F;
326 158142c2 bellard
            if ( zSign ) {
327 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
328 158142c2 bellard
            }
329 158142c2 bellard
            else {
330 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
331 158142c2 bellard
            }
332 158142c2 bellard
        }
333 158142c2 bellard
    }
334 158142c2 bellard
    roundBits = zSig & 0x7F;
335 bb98fe42 Andreas Färber
    if ( 0xFD <= (uint16_t) zExp ) {
336 158142c2 bellard
        if (    ( 0xFD < zExp )
337 158142c2 bellard
             || (    ( zExp == 0xFD )
338 bb98fe42 Andreas Färber
                  && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
339 158142c2 bellard
           ) {
340 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
341 f090c9d4 pbrook
            return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
342 158142c2 bellard
        }
343 158142c2 bellard
        if ( zExp < 0 ) {
344 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
345 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
346 e6afc87f Peter Maydell
                return packFloat32(zSign, 0, 0);
347 e6afc87f Peter Maydell
            }
348 158142c2 bellard
            isTiny =
349 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
350 158142c2 bellard
                || ( zExp < -1 )
351 158142c2 bellard
                || ( zSig + roundIncrement < 0x80000000 );
352 158142c2 bellard
            shift32RightJamming( zSig, - zExp, &zSig );
353 158142c2 bellard
            zExp = 0;
354 158142c2 bellard
            roundBits = zSig & 0x7F;
355 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
356 158142c2 bellard
        }
357 158142c2 bellard
    }
358 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
359 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>7;
360 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
361 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
362 158142c2 bellard
    return packFloat32( zSign, zExp, zSig );
363 158142c2 bellard
364 158142c2 bellard
}
365 158142c2 bellard
366 158142c2 bellard
/*----------------------------------------------------------------------------
367 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
368 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
369 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
370 158142c2 bellard
| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
371 158142c2 bellard
| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
372 158142c2 bellard
| floating-point exponent.
373 158142c2 bellard
*----------------------------------------------------------------------------*/
374 158142c2 bellard
375 158142c2 bellard
static float32
376 bb98fe42 Andreas Färber
 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
377 158142c2 bellard
{
378 158142c2 bellard
    int8 shiftCount;
379 158142c2 bellard
380 158142c2 bellard
    shiftCount = countLeadingZeros32( zSig ) - 1;
381 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
382 158142c2 bellard
383 158142c2 bellard
}
384 158142c2 bellard
385 158142c2 bellard
/*----------------------------------------------------------------------------
386 158142c2 bellard
| Returns the fraction bits of the double-precision floating-point value `a'.
387 158142c2 bellard
*----------------------------------------------------------------------------*/
388 158142c2 bellard
389 bb98fe42 Andreas Färber
INLINE uint64_t extractFloat64Frac( float64 a )
390 158142c2 bellard
{
391 158142c2 bellard
392 f090c9d4 pbrook
    return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
393 158142c2 bellard
394 158142c2 bellard
}
395 158142c2 bellard
396 158142c2 bellard
/*----------------------------------------------------------------------------
397 158142c2 bellard
| Returns the exponent bits of the double-precision floating-point value `a'.
398 158142c2 bellard
*----------------------------------------------------------------------------*/
399 158142c2 bellard
400 158142c2 bellard
INLINE int16 extractFloat64Exp( float64 a )
401 158142c2 bellard
{
402 158142c2 bellard
403 f090c9d4 pbrook
    return ( float64_val(a)>>52 ) & 0x7FF;
404 158142c2 bellard
405 158142c2 bellard
}
406 158142c2 bellard
407 158142c2 bellard
/*----------------------------------------------------------------------------
408 158142c2 bellard
| Returns the sign bit of the double-precision floating-point value `a'.
409 158142c2 bellard
*----------------------------------------------------------------------------*/
410 158142c2 bellard
411 158142c2 bellard
INLINE flag extractFloat64Sign( float64 a )
412 158142c2 bellard
{
413 158142c2 bellard
414 f090c9d4 pbrook
    return float64_val(a)>>63;
415 158142c2 bellard
416 158142c2 bellard
}
417 158142c2 bellard
418 158142c2 bellard
/*----------------------------------------------------------------------------
419 37d18660 Peter Maydell
| If `a' is denormal and we are in flush-to-zero mode then set the
420 37d18660 Peter Maydell
| input-denormal exception and return zero. Otherwise just return the value.
421 37d18660 Peter Maydell
*----------------------------------------------------------------------------*/
422 37d18660 Peter Maydell
static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
423 37d18660 Peter Maydell
{
424 37d18660 Peter Maydell
    if (STATUS(flush_inputs_to_zero)) {
425 37d18660 Peter Maydell
        if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
426 37d18660 Peter Maydell
            float_raise(float_flag_input_denormal STATUS_VAR);
427 37d18660 Peter Maydell
            return make_float64(float64_val(a) & (1ULL << 63));
428 37d18660 Peter Maydell
        }
429 37d18660 Peter Maydell
    }
430 37d18660 Peter Maydell
    return a;
431 37d18660 Peter Maydell
}
432 37d18660 Peter Maydell
433 37d18660 Peter Maydell
/*----------------------------------------------------------------------------
434 158142c2 bellard
| Normalizes the subnormal double-precision floating-point value represented
435 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
436 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
437 158142c2 bellard
| `zSigPtr', respectively.
438 158142c2 bellard
*----------------------------------------------------------------------------*/
439 158142c2 bellard
440 158142c2 bellard
static void
441 bb98fe42 Andreas Färber
 normalizeFloat64Subnormal( uint64_t aSig, int16 *zExpPtr, uint64_t *zSigPtr )
442 158142c2 bellard
{
443 158142c2 bellard
    int8 shiftCount;
444 158142c2 bellard
445 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig ) - 11;
446 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
447 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
448 158142c2 bellard
449 158142c2 bellard
}
450 158142c2 bellard
451 158142c2 bellard
/*----------------------------------------------------------------------------
452 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
453 158142c2 bellard
| double-precision floating-point value, returning the result.  After being
454 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
455 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
456 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
457 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
458 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
459 158142c2 bellard
| significand.
460 158142c2 bellard
*----------------------------------------------------------------------------*/
461 158142c2 bellard
462 bb98fe42 Andreas Färber
INLINE float64 packFloat64( flag zSign, int16 zExp, uint64_t zSig )
463 158142c2 bellard
{
464 158142c2 bellard
465 f090c9d4 pbrook
    return make_float64(
466 bb98fe42 Andreas Färber
        ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
467 158142c2 bellard
468 158142c2 bellard
}
469 158142c2 bellard
470 158142c2 bellard
/*----------------------------------------------------------------------------
471 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
472 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
473 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
474 158142c2 bellard
| value is simply rounded and packed into the double-precision format, with
475 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
476 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
477 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
478 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded
479 158142c2 bellard
| to a subnormal number, and the underflow and inexact exceptions are raised
480 158142c2 bellard
| if the abstract input cannot be represented exactly as a subnormal double-
481 158142c2 bellard
| precision floating-point number.
482 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 62
483 158142c2 bellard
| and 61, which is 10 bits to the left of the usual location.  This shifted
484 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
485 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
486 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
487 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
488 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
489 158142c2 bellard
| Binary Floating-Point Arithmetic.
490 158142c2 bellard
*----------------------------------------------------------------------------*/
491 158142c2 bellard
492 bb98fe42 Andreas Färber
static float64 roundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
493 158142c2 bellard
{
494 158142c2 bellard
    int8 roundingMode;
495 158142c2 bellard
    flag roundNearestEven;
496 158142c2 bellard
    int16 roundIncrement, roundBits;
497 158142c2 bellard
    flag isTiny;
498 158142c2 bellard
499 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
500 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
501 158142c2 bellard
    roundIncrement = 0x200;
502 158142c2 bellard
    if ( ! roundNearestEven ) {
503 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
504 158142c2 bellard
            roundIncrement = 0;
505 158142c2 bellard
        }
506 158142c2 bellard
        else {
507 158142c2 bellard
            roundIncrement = 0x3FF;
508 158142c2 bellard
            if ( zSign ) {
509 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
510 158142c2 bellard
            }
511 158142c2 bellard
            else {
512 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
513 158142c2 bellard
            }
514 158142c2 bellard
        }
515 158142c2 bellard
    }
516 158142c2 bellard
    roundBits = zSig & 0x3FF;
517 bb98fe42 Andreas Färber
    if ( 0x7FD <= (uint16_t) zExp ) {
518 158142c2 bellard
        if (    ( 0x7FD < zExp )
519 158142c2 bellard
             || (    ( zExp == 0x7FD )
520 bb98fe42 Andreas Färber
                  && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
521 158142c2 bellard
           ) {
522 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
523 f090c9d4 pbrook
            return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
524 158142c2 bellard
        }
525 158142c2 bellard
        if ( zExp < 0 ) {
526 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
527 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
528 e6afc87f Peter Maydell
                return packFloat64(zSign, 0, 0);
529 e6afc87f Peter Maydell
            }
530 158142c2 bellard
            isTiny =
531 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
532 158142c2 bellard
                || ( zExp < -1 )
533 158142c2 bellard
                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
534 158142c2 bellard
            shift64RightJamming( zSig, - zExp, &zSig );
535 158142c2 bellard
            zExp = 0;
536 158142c2 bellard
            roundBits = zSig & 0x3FF;
537 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
538 158142c2 bellard
        }
539 158142c2 bellard
    }
540 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
541 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>10;
542 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
543 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
544 158142c2 bellard
    return packFloat64( zSign, zExp, zSig );
545 158142c2 bellard
546 158142c2 bellard
}
547 158142c2 bellard
548 158142c2 bellard
/*----------------------------------------------------------------------------
549 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
550 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
551 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
552 158142c2 bellard
| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
553 158142c2 bellard
| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
554 158142c2 bellard
| floating-point exponent.
555 158142c2 bellard
*----------------------------------------------------------------------------*/
556 158142c2 bellard
557 158142c2 bellard
static float64
558 bb98fe42 Andreas Färber
 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
559 158142c2 bellard
{
560 158142c2 bellard
    int8 shiftCount;
561 158142c2 bellard
562 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig ) - 1;
563 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
564 158142c2 bellard
565 158142c2 bellard
}
566 158142c2 bellard
567 158142c2 bellard
#ifdef FLOATX80
568 158142c2 bellard
569 158142c2 bellard
/*----------------------------------------------------------------------------
570 158142c2 bellard
| Returns the fraction bits of the extended double-precision floating-point
571 158142c2 bellard
| value `a'.
572 158142c2 bellard
*----------------------------------------------------------------------------*/
573 158142c2 bellard
574 bb98fe42 Andreas Färber
INLINE uint64_t extractFloatx80Frac( floatx80 a )
575 158142c2 bellard
{
576 158142c2 bellard
577 158142c2 bellard
    return a.low;
578 158142c2 bellard
579 158142c2 bellard
}
580 158142c2 bellard
581 158142c2 bellard
/*----------------------------------------------------------------------------
582 158142c2 bellard
| Returns the exponent bits of the extended double-precision floating-point
583 158142c2 bellard
| value `a'.
584 158142c2 bellard
*----------------------------------------------------------------------------*/
585 158142c2 bellard
586 158142c2 bellard
INLINE int32 extractFloatx80Exp( floatx80 a )
587 158142c2 bellard
{
588 158142c2 bellard
589 158142c2 bellard
    return a.high & 0x7FFF;
590 158142c2 bellard
591 158142c2 bellard
}
592 158142c2 bellard
593 158142c2 bellard
/*----------------------------------------------------------------------------
594 158142c2 bellard
| Returns the sign bit of the extended double-precision floating-point value
595 158142c2 bellard
| `a'.
596 158142c2 bellard
*----------------------------------------------------------------------------*/
597 158142c2 bellard
598 158142c2 bellard
INLINE flag extractFloatx80Sign( floatx80 a )
599 158142c2 bellard
{
600 158142c2 bellard
601 158142c2 bellard
    return a.high>>15;
602 158142c2 bellard
603 158142c2 bellard
}
604 158142c2 bellard
605 158142c2 bellard
/*----------------------------------------------------------------------------
606 158142c2 bellard
| Normalizes the subnormal extended double-precision floating-point value
607 158142c2 bellard
| represented by the denormalized significand `aSig'.  The normalized exponent
608 158142c2 bellard
| and significand are stored at the locations pointed to by `zExpPtr' and
609 158142c2 bellard
| `zSigPtr', respectively.
610 158142c2 bellard
*----------------------------------------------------------------------------*/
611 158142c2 bellard
612 158142c2 bellard
static void
613 bb98fe42 Andreas Färber
 normalizeFloatx80Subnormal( uint64_t aSig, int32 *zExpPtr, uint64_t *zSigPtr )
614 158142c2 bellard
{
615 158142c2 bellard
    int8 shiftCount;
616 158142c2 bellard
617 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig );
618 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
619 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
620 158142c2 bellard
621 158142c2 bellard
}
622 158142c2 bellard
623 158142c2 bellard
/*----------------------------------------------------------------------------
624 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
625 158142c2 bellard
| extended double-precision floating-point value, returning the result.
626 158142c2 bellard
*----------------------------------------------------------------------------*/
627 158142c2 bellard
628 bb98fe42 Andreas Färber
INLINE floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
629 158142c2 bellard
{
630 158142c2 bellard
    floatx80 z;
631 158142c2 bellard
632 158142c2 bellard
    z.low = zSig;
633 bb98fe42 Andreas Färber
    z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
634 158142c2 bellard
    return z;
635 158142c2 bellard
636 158142c2 bellard
}
637 158142c2 bellard
638 158142c2 bellard
/*----------------------------------------------------------------------------
639 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
640 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0' and `zSig1',
641 158142c2 bellard
| and returns the proper extended double-precision floating-point value
642 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
643 158142c2 bellard
| rounded and packed into the extended double-precision format, with the
644 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
645 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
646 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
647 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
648 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
649 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal extended
650 158142c2 bellard
| double-precision floating-point number.
651 158142c2 bellard
|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
652 158142c2 bellard
| number of bits as single or double precision, respectively.  Otherwise, the
653 158142c2 bellard
| result is rounded to the full precision of the extended double-precision
654 158142c2 bellard
| format.
655 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
656 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
657 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  The
658 158142c2 bellard
| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
659 158142c2 bellard
| Floating-Point Arithmetic.
660 158142c2 bellard
*----------------------------------------------------------------------------*/
661 158142c2 bellard
662 158142c2 bellard
static floatx80
663 158142c2 bellard
 roundAndPackFloatx80(
664 bb98fe42 Andreas Färber
     int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
665 158142c2 bellard
 STATUS_PARAM)
666 158142c2 bellard
{
667 158142c2 bellard
    int8 roundingMode;
668 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
669 158142c2 bellard
    int64 roundIncrement, roundMask, roundBits;
670 158142c2 bellard
671 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
672 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
673 158142c2 bellard
    if ( roundingPrecision == 80 ) goto precision80;
674 158142c2 bellard
    if ( roundingPrecision == 64 ) {
675 158142c2 bellard
        roundIncrement = LIT64( 0x0000000000000400 );
676 158142c2 bellard
        roundMask = LIT64( 0x00000000000007FF );
677 158142c2 bellard
    }
678 158142c2 bellard
    else if ( roundingPrecision == 32 ) {
679 158142c2 bellard
        roundIncrement = LIT64( 0x0000008000000000 );
680 158142c2 bellard
        roundMask = LIT64( 0x000000FFFFFFFFFF );
681 158142c2 bellard
    }
682 158142c2 bellard
    else {
683 158142c2 bellard
        goto precision80;
684 158142c2 bellard
    }
685 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
686 158142c2 bellard
    if ( ! roundNearestEven ) {
687 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
688 158142c2 bellard
            roundIncrement = 0;
689 158142c2 bellard
        }
690 158142c2 bellard
        else {
691 158142c2 bellard
            roundIncrement = roundMask;
692 158142c2 bellard
            if ( zSign ) {
693 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
694 158142c2 bellard
            }
695 158142c2 bellard
            else {
696 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
697 158142c2 bellard
            }
698 158142c2 bellard
        }
699 158142c2 bellard
    }
700 158142c2 bellard
    roundBits = zSig0 & roundMask;
701 bb98fe42 Andreas Färber
    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
702 158142c2 bellard
        if (    ( 0x7FFE < zExp )
703 158142c2 bellard
             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
704 158142c2 bellard
           ) {
705 158142c2 bellard
            goto overflow;
706 158142c2 bellard
        }
707 158142c2 bellard
        if ( zExp <= 0 ) {
708 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
709 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
710 e6afc87f Peter Maydell
                return packFloatx80(zSign, 0, 0);
711 e6afc87f Peter Maydell
            }
712 158142c2 bellard
            isTiny =
713 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
714 158142c2 bellard
                || ( zExp < 0 )
715 158142c2 bellard
                || ( zSig0 <= zSig0 + roundIncrement );
716 158142c2 bellard
            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
717 158142c2 bellard
            zExp = 0;
718 158142c2 bellard
            roundBits = zSig0 & roundMask;
719 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
720 158142c2 bellard
            if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
721 158142c2 bellard
            zSig0 += roundIncrement;
722 bb98fe42 Andreas Färber
            if ( (int64_t) zSig0 < 0 ) zExp = 1;
723 158142c2 bellard
            roundIncrement = roundMask + 1;
724 158142c2 bellard
            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
725 158142c2 bellard
                roundMask |= roundIncrement;
726 158142c2 bellard
            }
727 158142c2 bellard
            zSig0 &= ~ roundMask;
728 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
729 158142c2 bellard
        }
730 158142c2 bellard
    }
731 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
732 158142c2 bellard
    zSig0 += roundIncrement;
733 158142c2 bellard
    if ( zSig0 < roundIncrement ) {
734 158142c2 bellard
        ++zExp;
735 158142c2 bellard
        zSig0 = LIT64( 0x8000000000000000 );
736 158142c2 bellard
    }
737 158142c2 bellard
    roundIncrement = roundMask + 1;
738 158142c2 bellard
    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
739 158142c2 bellard
        roundMask |= roundIncrement;
740 158142c2 bellard
    }
741 158142c2 bellard
    zSig0 &= ~ roundMask;
742 158142c2 bellard
    if ( zSig0 == 0 ) zExp = 0;
743 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
744 158142c2 bellard
 precision80:
745 bb98fe42 Andreas Färber
    increment = ( (int64_t) zSig1 < 0 );
746 158142c2 bellard
    if ( ! roundNearestEven ) {
747 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
748 158142c2 bellard
            increment = 0;
749 158142c2 bellard
        }
750 158142c2 bellard
        else {
751 158142c2 bellard
            if ( zSign ) {
752 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig1;
753 158142c2 bellard
            }
754 158142c2 bellard
            else {
755 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig1;
756 158142c2 bellard
            }
757 158142c2 bellard
        }
758 158142c2 bellard
    }
759 bb98fe42 Andreas Färber
    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
760 158142c2 bellard
        if (    ( 0x7FFE < zExp )
761 158142c2 bellard
             || (    ( zExp == 0x7FFE )
762 158142c2 bellard
                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
763 158142c2 bellard
                  && increment
764 158142c2 bellard
                )
765 158142c2 bellard
           ) {
766 158142c2 bellard
            roundMask = 0;
767 158142c2 bellard
 overflow:
768 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
769 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
770 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
771 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
772 158142c2 bellard
               ) {
773 158142c2 bellard
                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
774 158142c2 bellard
            }
775 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
776 158142c2 bellard
        }
777 158142c2 bellard
        if ( zExp <= 0 ) {
778 158142c2 bellard
            isTiny =
779 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
780 158142c2 bellard
                || ( zExp < 0 )
781 158142c2 bellard
                || ! increment
782 158142c2 bellard
                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
783 158142c2 bellard
            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
784 158142c2 bellard
            zExp = 0;
785 158142c2 bellard
            if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
786 158142c2 bellard
            if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
787 158142c2 bellard
            if ( roundNearestEven ) {
788 bb98fe42 Andreas Färber
                increment = ( (int64_t) zSig1 < 0 );
789 158142c2 bellard
            }
790 158142c2 bellard
            else {
791 158142c2 bellard
                if ( zSign ) {
792 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig1;
793 158142c2 bellard
                }
794 158142c2 bellard
                else {
795 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig1;
796 158142c2 bellard
                }
797 158142c2 bellard
            }
798 158142c2 bellard
            if ( increment ) {
799 158142c2 bellard
                ++zSig0;
800 158142c2 bellard
                zSig0 &=
801 bb98fe42 Andreas Färber
                    ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
802 bb98fe42 Andreas Färber
                if ( (int64_t) zSig0 < 0 ) zExp = 1;
803 158142c2 bellard
            }
804 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
805 158142c2 bellard
        }
806 158142c2 bellard
    }
807 158142c2 bellard
    if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
808 158142c2 bellard
    if ( increment ) {
809 158142c2 bellard
        ++zSig0;
810 158142c2 bellard
        if ( zSig0 == 0 ) {
811 158142c2 bellard
            ++zExp;
812 158142c2 bellard
            zSig0 = LIT64( 0x8000000000000000 );
813 158142c2 bellard
        }
814 158142c2 bellard
        else {
815 bb98fe42 Andreas Färber
            zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
816 158142c2 bellard
        }
817 158142c2 bellard
    }
818 158142c2 bellard
    else {
819 158142c2 bellard
        if ( zSig0 == 0 ) zExp = 0;
820 158142c2 bellard
    }
821 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
822 158142c2 bellard
823 158142c2 bellard
}
824 158142c2 bellard
825 158142c2 bellard
/*----------------------------------------------------------------------------
826 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent
827 158142c2 bellard
| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
828 158142c2 bellard
| and returns the proper extended double-precision floating-point value
829 158142c2 bellard
| corresponding to the abstract input.  This routine is just like
830 158142c2 bellard
| `roundAndPackFloatx80' except that the input significand does not have to be
831 158142c2 bellard
| normalized.
832 158142c2 bellard
*----------------------------------------------------------------------------*/
833 158142c2 bellard
834 158142c2 bellard
static floatx80
835 158142c2 bellard
 normalizeRoundAndPackFloatx80(
836 bb98fe42 Andreas Färber
     int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
837 158142c2 bellard
 STATUS_PARAM)
838 158142c2 bellard
{
839 158142c2 bellard
    int8 shiftCount;
840 158142c2 bellard
841 158142c2 bellard
    if ( zSig0 == 0 ) {
842 158142c2 bellard
        zSig0 = zSig1;
843 158142c2 bellard
        zSig1 = 0;
844 158142c2 bellard
        zExp -= 64;
845 158142c2 bellard
    }
846 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 );
847 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
848 158142c2 bellard
    zExp -= shiftCount;
849 158142c2 bellard
    return
850 158142c2 bellard
        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
851 158142c2 bellard
852 158142c2 bellard
}
853 158142c2 bellard
854 158142c2 bellard
#endif
855 158142c2 bellard
856 158142c2 bellard
#ifdef FLOAT128
857 158142c2 bellard
858 158142c2 bellard
/*----------------------------------------------------------------------------
859 158142c2 bellard
| Returns the least-significant 64 fraction bits of the quadruple-precision
860 158142c2 bellard
| floating-point value `a'.
861 158142c2 bellard
*----------------------------------------------------------------------------*/
862 158142c2 bellard
863 bb98fe42 Andreas Färber
INLINE uint64_t extractFloat128Frac1( float128 a )
864 158142c2 bellard
{
865 158142c2 bellard
866 158142c2 bellard
    return a.low;
867 158142c2 bellard
868 158142c2 bellard
}
869 158142c2 bellard
870 158142c2 bellard
/*----------------------------------------------------------------------------
871 158142c2 bellard
| Returns the most-significant 48 fraction bits of the quadruple-precision
872 158142c2 bellard
| floating-point value `a'.
873 158142c2 bellard
*----------------------------------------------------------------------------*/
874 158142c2 bellard
875 bb98fe42 Andreas Färber
INLINE uint64_t extractFloat128Frac0( float128 a )
876 158142c2 bellard
{
877 158142c2 bellard
878 158142c2 bellard
    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
879 158142c2 bellard
880 158142c2 bellard
}
881 158142c2 bellard
882 158142c2 bellard
/*----------------------------------------------------------------------------
883 158142c2 bellard
| Returns the exponent bits of the quadruple-precision floating-point value
884 158142c2 bellard
| `a'.
885 158142c2 bellard
*----------------------------------------------------------------------------*/
886 158142c2 bellard
887 158142c2 bellard
INLINE int32 extractFloat128Exp( float128 a )
888 158142c2 bellard
{
889 158142c2 bellard
890 158142c2 bellard
    return ( a.high>>48 ) & 0x7FFF;
891 158142c2 bellard
892 158142c2 bellard
}
893 158142c2 bellard
894 158142c2 bellard
/*----------------------------------------------------------------------------
895 158142c2 bellard
| Returns the sign bit of the quadruple-precision floating-point value `a'.
896 158142c2 bellard
*----------------------------------------------------------------------------*/
897 158142c2 bellard
898 158142c2 bellard
INLINE flag extractFloat128Sign( float128 a )
899 158142c2 bellard
{
900 158142c2 bellard
901 158142c2 bellard
    return a.high>>63;
902 158142c2 bellard
903 158142c2 bellard
}
904 158142c2 bellard
905 158142c2 bellard
/*----------------------------------------------------------------------------
906 158142c2 bellard
| Normalizes the subnormal quadruple-precision floating-point value
907 158142c2 bellard
| represented by the denormalized significand formed by the concatenation of
908 158142c2 bellard
| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
909 158142c2 bellard
| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
910 158142c2 bellard
| significand are stored at the location pointed to by `zSig0Ptr', and the
911 158142c2 bellard
| least significant 64 bits of the normalized significand are stored at the
912 158142c2 bellard
| location pointed to by `zSig1Ptr'.
913 158142c2 bellard
*----------------------------------------------------------------------------*/
914 158142c2 bellard
915 158142c2 bellard
static void
916 158142c2 bellard
 normalizeFloat128Subnormal(
917 bb98fe42 Andreas Färber
     uint64_t aSig0,
918 bb98fe42 Andreas Färber
     uint64_t aSig1,
919 158142c2 bellard
     int32 *zExpPtr,
920 bb98fe42 Andreas Färber
     uint64_t *zSig0Ptr,
921 bb98fe42 Andreas Färber
     uint64_t *zSig1Ptr
922 158142c2 bellard
 )
923 158142c2 bellard
{
924 158142c2 bellard
    int8 shiftCount;
925 158142c2 bellard
926 158142c2 bellard
    if ( aSig0 == 0 ) {
927 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig1 ) - 15;
928 158142c2 bellard
        if ( shiftCount < 0 ) {
929 158142c2 bellard
            *zSig0Ptr = aSig1>>( - shiftCount );
930 158142c2 bellard
            *zSig1Ptr = aSig1<<( shiftCount & 63 );
931 158142c2 bellard
        }
932 158142c2 bellard
        else {
933 158142c2 bellard
            *zSig0Ptr = aSig1<<shiftCount;
934 158142c2 bellard
            *zSig1Ptr = 0;
935 158142c2 bellard
        }
936 158142c2 bellard
        *zExpPtr = - shiftCount - 63;
937 158142c2 bellard
    }
938 158142c2 bellard
    else {
939 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig0 ) - 15;
940 158142c2 bellard
        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
941 158142c2 bellard
        *zExpPtr = 1 - shiftCount;
942 158142c2 bellard
    }
943 158142c2 bellard
944 158142c2 bellard
}
945 158142c2 bellard
946 158142c2 bellard
/*----------------------------------------------------------------------------
947 158142c2 bellard
| Packs the sign `zSign', the exponent `zExp', and the significand formed
948 158142c2 bellard
| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
949 158142c2 bellard
| floating-point value, returning the result.  After being shifted into the
950 158142c2 bellard
| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
951 158142c2 bellard
| added together to form the most significant 32 bits of the result.  This
952 158142c2 bellard
| means that any integer portion of `zSig0' will be added into the exponent.
953 158142c2 bellard
| Since a properly normalized significand will have an integer portion equal
954 158142c2 bellard
| to 1, the `zExp' input should be 1 less than the desired result exponent
955 158142c2 bellard
| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
956 158142c2 bellard
| significand.
957 158142c2 bellard
*----------------------------------------------------------------------------*/
958 158142c2 bellard
959 158142c2 bellard
INLINE float128
960 bb98fe42 Andreas Färber
 packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
961 158142c2 bellard
{
962 158142c2 bellard
    float128 z;
963 158142c2 bellard
964 158142c2 bellard
    z.low = zSig1;
965 bb98fe42 Andreas Färber
    z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
966 158142c2 bellard
    return z;
967 158142c2 bellard
968 158142c2 bellard
}
969 158142c2 bellard
970 158142c2 bellard
/*----------------------------------------------------------------------------
971 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
972 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0', `zSig1',
973 158142c2 bellard
| and `zSig2', and returns the proper quadruple-precision floating-point value
974 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
975 158142c2 bellard
| simply rounded and packed into the quadruple-precision format, with the
976 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
977 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
978 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
979 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
980 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
981 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal quadruple-
982 158142c2 bellard
| precision floating-point number.
983 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
984 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
985 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  In the
986 158142c2 bellard
| usual case that the input significand is normalized, `zExp' must be 1 less
987 158142c2 bellard
| than the ``true'' floating-point exponent.  The handling of underflow and
988 158142c2 bellard
| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
989 158142c2 bellard
*----------------------------------------------------------------------------*/
990 158142c2 bellard
991 158142c2 bellard
static float128
992 158142c2 bellard
 roundAndPackFloat128(
993 bb98fe42 Andreas Färber
     flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2 STATUS_PARAM)
994 158142c2 bellard
{
995 158142c2 bellard
    int8 roundingMode;
996 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
997 158142c2 bellard
998 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
999 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
1000 bb98fe42 Andreas Färber
    increment = ( (int64_t) zSig2 < 0 );
1001 158142c2 bellard
    if ( ! roundNearestEven ) {
1002 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
1003 158142c2 bellard
            increment = 0;
1004 158142c2 bellard
        }
1005 158142c2 bellard
        else {
1006 158142c2 bellard
            if ( zSign ) {
1007 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig2;
1008 158142c2 bellard
            }
1009 158142c2 bellard
            else {
1010 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig2;
1011 158142c2 bellard
            }
1012 158142c2 bellard
        }
1013 158142c2 bellard
    }
1014 bb98fe42 Andreas Färber
    if ( 0x7FFD <= (uint32_t) zExp ) {
1015 158142c2 bellard
        if (    ( 0x7FFD < zExp )
1016 158142c2 bellard
             || (    ( zExp == 0x7FFD )
1017 158142c2 bellard
                  && eq128(
1018 158142c2 bellard
                         LIT64( 0x0001FFFFFFFFFFFF ),
1019 158142c2 bellard
                         LIT64( 0xFFFFFFFFFFFFFFFF ),
1020 158142c2 bellard
                         zSig0,
1021 158142c2 bellard
                         zSig1
1022 158142c2 bellard
                     )
1023 158142c2 bellard
                  && increment
1024 158142c2 bellard
                )
1025 158142c2 bellard
           ) {
1026 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
1027 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
1028 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
1029 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
1030 158142c2 bellard
               ) {
1031 158142c2 bellard
                return
1032 158142c2 bellard
                    packFloat128(
1033 158142c2 bellard
                        zSign,
1034 158142c2 bellard
                        0x7FFE,
1035 158142c2 bellard
                        LIT64( 0x0000FFFFFFFFFFFF ),
1036 158142c2 bellard
                        LIT64( 0xFFFFFFFFFFFFFFFF )
1037 158142c2 bellard
                    );
1038 158142c2 bellard
            }
1039 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
1040 158142c2 bellard
        }
1041 158142c2 bellard
        if ( zExp < 0 ) {
1042 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
1043 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
1044 e6afc87f Peter Maydell
                return packFloat128(zSign, 0, 0, 0);
1045 e6afc87f Peter Maydell
            }
1046 158142c2 bellard
            isTiny =
1047 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
1048 158142c2 bellard
                || ( zExp < -1 )
1049 158142c2 bellard
                || ! increment
1050 158142c2 bellard
                || lt128(
1051 158142c2 bellard
                       zSig0,
1052 158142c2 bellard
                       zSig1,
1053 158142c2 bellard
                       LIT64( 0x0001FFFFFFFFFFFF ),
1054 158142c2 bellard
                       LIT64( 0xFFFFFFFFFFFFFFFF )
1055 158142c2 bellard
                   );
1056 158142c2 bellard
            shift128ExtraRightJamming(
1057 158142c2 bellard
                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1058 158142c2 bellard
            zExp = 0;
1059 158142c2 bellard
            if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
1060 158142c2 bellard
            if ( roundNearestEven ) {
1061 bb98fe42 Andreas Färber
                increment = ( (int64_t) zSig2 < 0 );
1062 158142c2 bellard
            }
1063 158142c2 bellard
            else {
1064 158142c2 bellard
                if ( zSign ) {
1065 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig2;
1066 158142c2 bellard
                }
1067 158142c2 bellard
                else {
1068 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig2;
1069 158142c2 bellard
                }
1070 158142c2 bellard
            }
1071 158142c2 bellard
        }
1072 158142c2 bellard
    }
1073 158142c2 bellard
    if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
1074 158142c2 bellard
    if ( increment ) {
1075 158142c2 bellard
        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1076 158142c2 bellard
        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1077 158142c2 bellard
    }
1078 158142c2 bellard
    else {
1079 158142c2 bellard
        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1080 158142c2 bellard
    }
1081 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1082 158142c2 bellard
1083 158142c2 bellard
}
1084 158142c2 bellard
1085 158142c2 bellard
/*----------------------------------------------------------------------------
1086 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1087 158142c2 bellard
| and significand formed by the concatenation of `zSig0' and `zSig1', and
1088 158142c2 bellard
| returns the proper quadruple-precision floating-point value corresponding
1089 158142c2 bellard
| to the abstract input.  This routine is just like `roundAndPackFloat128'
1090 158142c2 bellard
| except that the input significand has fewer bits and does not have to be
1091 158142c2 bellard
| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1092 158142c2 bellard
| point exponent.
1093 158142c2 bellard
*----------------------------------------------------------------------------*/
1094 158142c2 bellard
1095 158142c2 bellard
static float128
1096 158142c2 bellard
 normalizeRoundAndPackFloat128(
1097 bb98fe42 Andreas Färber
     flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 STATUS_PARAM)
1098 158142c2 bellard
{
1099 158142c2 bellard
    int8 shiftCount;
1100 bb98fe42 Andreas Färber
    uint64_t zSig2;
1101 158142c2 bellard
1102 158142c2 bellard
    if ( zSig0 == 0 ) {
1103 158142c2 bellard
        zSig0 = zSig1;
1104 158142c2 bellard
        zSig1 = 0;
1105 158142c2 bellard
        zExp -= 64;
1106 158142c2 bellard
    }
1107 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 ) - 15;
1108 158142c2 bellard
    if ( 0 <= shiftCount ) {
1109 158142c2 bellard
        zSig2 = 0;
1110 158142c2 bellard
        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1111 158142c2 bellard
    }
1112 158142c2 bellard
    else {
1113 158142c2 bellard
        shift128ExtraRightJamming(
1114 158142c2 bellard
            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1115 158142c2 bellard
    }
1116 158142c2 bellard
    zExp -= shiftCount;
1117 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
1118 158142c2 bellard
1119 158142c2 bellard
}
1120 158142c2 bellard
1121 158142c2 bellard
#endif
1122 158142c2 bellard
1123 158142c2 bellard
/*----------------------------------------------------------------------------
1124 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1125 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1126 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1127 158142c2 bellard
*----------------------------------------------------------------------------*/
1128 158142c2 bellard
1129 158142c2 bellard
float32 int32_to_float32( int32 a STATUS_PARAM )
1130 158142c2 bellard
{
1131 158142c2 bellard
    flag zSign;
1132 158142c2 bellard
1133 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1134 bb98fe42 Andreas Färber
    if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1135 158142c2 bellard
    zSign = ( a < 0 );
1136 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
1137 158142c2 bellard
1138 158142c2 bellard
}
1139 158142c2 bellard
1140 158142c2 bellard
/*----------------------------------------------------------------------------
1141 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1142 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1143 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1144 158142c2 bellard
*----------------------------------------------------------------------------*/
1145 158142c2 bellard
1146 158142c2 bellard
float64 int32_to_float64( int32 a STATUS_PARAM )
1147 158142c2 bellard
{
1148 158142c2 bellard
    flag zSign;
1149 158142c2 bellard
    uint32 absA;
1150 158142c2 bellard
    int8 shiftCount;
1151 bb98fe42 Andreas Färber
    uint64_t zSig;
1152 158142c2 bellard
1153 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1154 158142c2 bellard
    zSign = ( a < 0 );
1155 158142c2 bellard
    absA = zSign ? - a : a;
1156 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 21;
1157 158142c2 bellard
    zSig = absA;
1158 158142c2 bellard
    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1159 158142c2 bellard
1160 158142c2 bellard
}
1161 158142c2 bellard
1162 158142c2 bellard
#ifdef FLOATX80
1163 158142c2 bellard
1164 158142c2 bellard
/*----------------------------------------------------------------------------
1165 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1166 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1167 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1168 158142c2 bellard
| Arithmetic.
1169 158142c2 bellard
*----------------------------------------------------------------------------*/
1170 158142c2 bellard
1171 158142c2 bellard
floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
1172 158142c2 bellard
{
1173 158142c2 bellard
    flag zSign;
1174 158142c2 bellard
    uint32 absA;
1175 158142c2 bellard
    int8 shiftCount;
1176 bb98fe42 Andreas Färber
    uint64_t zSig;
1177 158142c2 bellard
1178 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1179 158142c2 bellard
    zSign = ( a < 0 );
1180 158142c2 bellard
    absA = zSign ? - a : a;
1181 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 32;
1182 158142c2 bellard
    zSig = absA;
1183 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1184 158142c2 bellard
1185 158142c2 bellard
}
1186 158142c2 bellard
1187 158142c2 bellard
#endif
1188 158142c2 bellard
1189 158142c2 bellard
#ifdef FLOAT128
1190 158142c2 bellard
1191 158142c2 bellard
/*----------------------------------------------------------------------------
1192 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a' to
1193 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1194 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1195 158142c2 bellard
*----------------------------------------------------------------------------*/
1196 158142c2 bellard
1197 158142c2 bellard
float128 int32_to_float128( int32 a STATUS_PARAM )
1198 158142c2 bellard
{
1199 158142c2 bellard
    flag zSign;
1200 158142c2 bellard
    uint32 absA;
1201 158142c2 bellard
    int8 shiftCount;
1202 bb98fe42 Andreas Färber
    uint64_t zSig0;
1203 158142c2 bellard
1204 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1205 158142c2 bellard
    zSign = ( a < 0 );
1206 158142c2 bellard
    absA = zSign ? - a : a;
1207 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 17;
1208 158142c2 bellard
    zSig0 = absA;
1209 158142c2 bellard
    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1210 158142c2 bellard
1211 158142c2 bellard
}
1212 158142c2 bellard
1213 158142c2 bellard
#endif
1214 158142c2 bellard
1215 158142c2 bellard
/*----------------------------------------------------------------------------
1216 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1217 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1218 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1219 158142c2 bellard
*----------------------------------------------------------------------------*/
1220 158142c2 bellard
1221 158142c2 bellard
float32 int64_to_float32( int64 a STATUS_PARAM )
1222 158142c2 bellard
{
1223 158142c2 bellard
    flag zSign;
1224 158142c2 bellard
    uint64 absA;
1225 158142c2 bellard
    int8 shiftCount;
1226 158142c2 bellard
1227 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1228 158142c2 bellard
    zSign = ( a < 0 );
1229 158142c2 bellard
    absA = zSign ? - a : a;
1230 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) - 40;
1231 158142c2 bellard
    if ( 0 <= shiftCount ) {
1232 158142c2 bellard
        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1233 158142c2 bellard
    }
1234 158142c2 bellard
    else {
1235 158142c2 bellard
        shiftCount += 7;
1236 158142c2 bellard
        if ( shiftCount < 0 ) {
1237 158142c2 bellard
            shift64RightJamming( absA, - shiftCount, &absA );
1238 158142c2 bellard
        }
1239 158142c2 bellard
        else {
1240 158142c2 bellard
            absA <<= shiftCount;
1241 158142c2 bellard
        }
1242 158142c2 bellard
        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
1243 158142c2 bellard
    }
1244 158142c2 bellard
1245 158142c2 bellard
}
1246 158142c2 bellard
1247 3430b0be j_mayer
float32 uint64_to_float32( uint64 a STATUS_PARAM )
1248 75d62a58 j_mayer
{
1249 75d62a58 j_mayer
    int8 shiftCount;
1250 75d62a58 j_mayer
1251 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1252 75d62a58 j_mayer
    shiftCount = countLeadingZeros64( a ) - 40;
1253 75d62a58 j_mayer
    if ( 0 <= shiftCount ) {
1254 75d62a58 j_mayer
        return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
1255 75d62a58 j_mayer
    }
1256 75d62a58 j_mayer
    else {
1257 75d62a58 j_mayer
        shiftCount += 7;
1258 75d62a58 j_mayer
        if ( shiftCount < 0 ) {
1259 75d62a58 j_mayer
            shift64RightJamming( a, - shiftCount, &a );
1260 75d62a58 j_mayer
        }
1261 75d62a58 j_mayer
        else {
1262 75d62a58 j_mayer
            a <<= shiftCount;
1263 75d62a58 j_mayer
        }
1264 75d62a58 j_mayer
        return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
1265 75d62a58 j_mayer
    }
1266 75d62a58 j_mayer
}
1267 75d62a58 j_mayer
1268 158142c2 bellard
/*----------------------------------------------------------------------------
1269 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1270 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1271 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1272 158142c2 bellard
*----------------------------------------------------------------------------*/
1273 158142c2 bellard
1274 158142c2 bellard
float64 int64_to_float64( int64 a STATUS_PARAM )
1275 158142c2 bellard
{
1276 158142c2 bellard
    flag zSign;
1277 158142c2 bellard
1278 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1279 bb98fe42 Andreas Färber
    if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
1280 158142c2 bellard
        return packFloat64( 1, 0x43E, 0 );
1281 158142c2 bellard
    }
1282 158142c2 bellard
    zSign = ( a < 0 );
1283 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
1284 158142c2 bellard
1285 158142c2 bellard
}
1286 158142c2 bellard
1287 75d62a58 j_mayer
float64 uint64_to_float64( uint64 a STATUS_PARAM )
1288 75d62a58 j_mayer
{
1289 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1290 75d62a58 j_mayer
    return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
1291 75d62a58 j_mayer
1292 75d62a58 j_mayer
}
1293 75d62a58 j_mayer
1294 158142c2 bellard
#ifdef FLOATX80
1295 158142c2 bellard
1296 158142c2 bellard
/*----------------------------------------------------------------------------
1297 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1298 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1299 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1300 158142c2 bellard
| Arithmetic.
1301 158142c2 bellard
*----------------------------------------------------------------------------*/
1302 158142c2 bellard
1303 158142c2 bellard
floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
1304 158142c2 bellard
{
1305 158142c2 bellard
    flag zSign;
1306 158142c2 bellard
    uint64 absA;
1307 158142c2 bellard
    int8 shiftCount;
1308 158142c2 bellard
1309 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1310 158142c2 bellard
    zSign = ( a < 0 );
1311 158142c2 bellard
    absA = zSign ? - a : a;
1312 158142c2 bellard
    shiftCount = countLeadingZeros64( absA );
1313 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1314 158142c2 bellard
1315 158142c2 bellard
}
1316 158142c2 bellard
1317 158142c2 bellard
#endif
1318 158142c2 bellard
1319 158142c2 bellard
#ifdef FLOAT128
1320 158142c2 bellard
1321 158142c2 bellard
/*----------------------------------------------------------------------------
1322 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a' to
1323 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1324 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1325 158142c2 bellard
*----------------------------------------------------------------------------*/
1326 158142c2 bellard
1327 158142c2 bellard
float128 int64_to_float128( int64 a STATUS_PARAM )
1328 158142c2 bellard
{
1329 158142c2 bellard
    flag zSign;
1330 158142c2 bellard
    uint64 absA;
1331 158142c2 bellard
    int8 shiftCount;
1332 158142c2 bellard
    int32 zExp;
1333 bb98fe42 Andreas Färber
    uint64_t zSig0, zSig1;
1334 158142c2 bellard
1335 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1336 158142c2 bellard
    zSign = ( a < 0 );
1337 158142c2 bellard
    absA = zSign ? - a : a;
1338 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) + 49;
1339 158142c2 bellard
    zExp = 0x406E - shiftCount;
1340 158142c2 bellard
    if ( 64 <= shiftCount ) {
1341 158142c2 bellard
        zSig1 = 0;
1342 158142c2 bellard
        zSig0 = absA;
1343 158142c2 bellard
        shiftCount -= 64;
1344 158142c2 bellard
    }
1345 158142c2 bellard
    else {
1346 158142c2 bellard
        zSig1 = absA;
1347 158142c2 bellard
        zSig0 = 0;
1348 158142c2 bellard
    }
1349 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1350 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1351 158142c2 bellard
1352 158142c2 bellard
}
1353 158142c2 bellard
1354 158142c2 bellard
#endif
1355 158142c2 bellard
1356 158142c2 bellard
/*----------------------------------------------------------------------------
1357 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1358 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1359 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1360 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1361 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1362 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1363 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1364 158142c2 bellard
*----------------------------------------------------------------------------*/
1365 158142c2 bellard
1366 158142c2 bellard
int32 float32_to_int32( float32 a STATUS_PARAM )
1367 158142c2 bellard
{
1368 158142c2 bellard
    flag aSign;
1369 158142c2 bellard
    int16 aExp, shiftCount;
1370 bb98fe42 Andreas Färber
    uint32_t aSig;
1371 bb98fe42 Andreas Färber
    uint64_t aSig64;
1372 158142c2 bellard
1373 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1374 158142c2 bellard
    aSig = extractFloat32Frac( a );
1375 158142c2 bellard
    aExp = extractFloat32Exp( a );
1376 158142c2 bellard
    aSign = extractFloat32Sign( a );
1377 158142c2 bellard
    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1378 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1379 158142c2 bellard
    shiftCount = 0xAF - aExp;
1380 158142c2 bellard
    aSig64 = aSig;
1381 158142c2 bellard
    aSig64 <<= 32;
1382 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1383 158142c2 bellard
    return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
1384 158142c2 bellard
1385 158142c2 bellard
}
1386 158142c2 bellard
1387 158142c2 bellard
/*----------------------------------------------------------------------------
1388 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1389 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1390 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1391 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
1392 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1393 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
1394 158142c2 bellard
| returned.
1395 158142c2 bellard
*----------------------------------------------------------------------------*/
1396 158142c2 bellard
1397 158142c2 bellard
int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
1398 158142c2 bellard
{
1399 158142c2 bellard
    flag aSign;
1400 158142c2 bellard
    int16 aExp, shiftCount;
1401 bb98fe42 Andreas Färber
    uint32_t aSig;
1402 158142c2 bellard
    int32 z;
1403 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1404 158142c2 bellard
1405 158142c2 bellard
    aSig = extractFloat32Frac( a );
1406 158142c2 bellard
    aExp = extractFloat32Exp( a );
1407 158142c2 bellard
    aSign = extractFloat32Sign( a );
1408 158142c2 bellard
    shiftCount = aExp - 0x9E;
1409 158142c2 bellard
    if ( 0 <= shiftCount ) {
1410 f090c9d4 pbrook
        if ( float32_val(a) != 0xCF000000 ) {
1411 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1412 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1413 158142c2 bellard
        }
1414 bb98fe42 Andreas Färber
        return (int32_t) 0x80000000;
1415 158142c2 bellard
    }
1416 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1417 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1418 158142c2 bellard
        return 0;
1419 158142c2 bellard
    }
1420 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
1421 158142c2 bellard
    z = aSig>>( - shiftCount );
1422 bb98fe42 Andreas Färber
    if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1423 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1424 158142c2 bellard
    }
1425 158142c2 bellard
    if ( aSign ) z = - z;
1426 158142c2 bellard
    return z;
1427 158142c2 bellard
1428 158142c2 bellard
}
1429 158142c2 bellard
1430 158142c2 bellard
/*----------------------------------------------------------------------------
1431 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1432 cbcef455 Peter Maydell
| `a' to the 16-bit two's complement integer format.  The conversion is
1433 cbcef455 Peter Maydell
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1434 cbcef455 Peter Maydell
| Arithmetic, except that the conversion is always rounded toward zero.
1435 cbcef455 Peter Maydell
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1436 cbcef455 Peter Maydell
| the conversion overflows, the largest integer with the same sign as `a' is
1437 cbcef455 Peter Maydell
| returned.
1438 cbcef455 Peter Maydell
*----------------------------------------------------------------------------*/
1439 cbcef455 Peter Maydell
1440 cbcef455 Peter Maydell
int16 float32_to_int16_round_to_zero( float32 a STATUS_PARAM )
1441 cbcef455 Peter Maydell
{
1442 cbcef455 Peter Maydell
    flag aSign;
1443 cbcef455 Peter Maydell
    int16 aExp, shiftCount;
1444 bb98fe42 Andreas Färber
    uint32_t aSig;
1445 cbcef455 Peter Maydell
    int32 z;
1446 cbcef455 Peter Maydell
1447 cbcef455 Peter Maydell
    aSig = extractFloat32Frac( a );
1448 cbcef455 Peter Maydell
    aExp = extractFloat32Exp( a );
1449 cbcef455 Peter Maydell
    aSign = extractFloat32Sign( a );
1450 cbcef455 Peter Maydell
    shiftCount = aExp - 0x8E;
1451 cbcef455 Peter Maydell
    if ( 0 <= shiftCount ) {
1452 cbcef455 Peter Maydell
        if ( float32_val(a) != 0xC7000000 ) {
1453 cbcef455 Peter Maydell
            float_raise( float_flag_invalid STATUS_VAR);
1454 cbcef455 Peter Maydell
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1455 cbcef455 Peter Maydell
                return 0x7FFF;
1456 cbcef455 Peter Maydell
            }
1457 cbcef455 Peter Maydell
        }
1458 bb98fe42 Andreas Färber
        return (int32_t) 0xffff8000;
1459 cbcef455 Peter Maydell
    }
1460 cbcef455 Peter Maydell
    else if ( aExp <= 0x7E ) {
1461 cbcef455 Peter Maydell
        if ( aExp | aSig ) {
1462 cbcef455 Peter Maydell
            STATUS(float_exception_flags) |= float_flag_inexact;
1463 cbcef455 Peter Maydell
        }
1464 cbcef455 Peter Maydell
        return 0;
1465 cbcef455 Peter Maydell
    }
1466 cbcef455 Peter Maydell
    shiftCount -= 0x10;
1467 cbcef455 Peter Maydell
    aSig = ( aSig | 0x00800000 )<<8;
1468 cbcef455 Peter Maydell
    z = aSig>>( - shiftCount );
1469 bb98fe42 Andreas Färber
    if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1470 cbcef455 Peter Maydell
        STATUS(float_exception_flags) |= float_flag_inexact;
1471 cbcef455 Peter Maydell
    }
1472 cbcef455 Peter Maydell
    if ( aSign ) {
1473 cbcef455 Peter Maydell
        z = - z;
1474 cbcef455 Peter Maydell
    }
1475 cbcef455 Peter Maydell
    return z;
1476 cbcef455 Peter Maydell
1477 cbcef455 Peter Maydell
}
1478 cbcef455 Peter Maydell
1479 cbcef455 Peter Maydell
/*----------------------------------------------------------------------------
1480 cbcef455 Peter Maydell
| Returns the result of converting the single-precision floating-point value
1481 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1482 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1483 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1484 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1485 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1486 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1487 158142c2 bellard
*----------------------------------------------------------------------------*/
1488 158142c2 bellard
1489 158142c2 bellard
int64 float32_to_int64( float32 a STATUS_PARAM )
1490 158142c2 bellard
{
1491 158142c2 bellard
    flag aSign;
1492 158142c2 bellard
    int16 aExp, shiftCount;
1493 bb98fe42 Andreas Färber
    uint32_t aSig;
1494 bb98fe42 Andreas Färber
    uint64_t aSig64, aSigExtra;
1495 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1496 158142c2 bellard
1497 158142c2 bellard
    aSig = extractFloat32Frac( a );
1498 158142c2 bellard
    aExp = extractFloat32Exp( a );
1499 158142c2 bellard
    aSign = extractFloat32Sign( a );
1500 158142c2 bellard
    shiftCount = 0xBE - aExp;
1501 158142c2 bellard
    if ( shiftCount < 0 ) {
1502 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1503 158142c2 bellard
        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1504 158142c2 bellard
            return LIT64( 0x7FFFFFFFFFFFFFFF );
1505 158142c2 bellard
        }
1506 bb98fe42 Andreas Färber
        return (int64_t) LIT64( 0x8000000000000000 );
1507 158142c2 bellard
    }
1508 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1509 158142c2 bellard
    aSig64 = aSig;
1510 158142c2 bellard
    aSig64 <<= 40;
1511 158142c2 bellard
    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1512 158142c2 bellard
    return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
1513 158142c2 bellard
1514 158142c2 bellard
}
1515 158142c2 bellard
1516 158142c2 bellard
/*----------------------------------------------------------------------------
1517 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1518 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1519 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1520 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
1521 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1522 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
1523 158142c2 bellard
| returned.
1524 158142c2 bellard
*----------------------------------------------------------------------------*/
1525 158142c2 bellard
1526 158142c2 bellard
int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
1527 158142c2 bellard
{
1528 158142c2 bellard
    flag aSign;
1529 158142c2 bellard
    int16 aExp, shiftCount;
1530 bb98fe42 Andreas Färber
    uint32_t aSig;
1531 bb98fe42 Andreas Färber
    uint64_t aSig64;
1532 158142c2 bellard
    int64 z;
1533 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1534 158142c2 bellard
1535 158142c2 bellard
    aSig = extractFloat32Frac( a );
1536 158142c2 bellard
    aExp = extractFloat32Exp( a );
1537 158142c2 bellard
    aSign = extractFloat32Sign( a );
1538 158142c2 bellard
    shiftCount = aExp - 0xBE;
1539 158142c2 bellard
    if ( 0 <= shiftCount ) {
1540 f090c9d4 pbrook
        if ( float32_val(a) != 0xDF000000 ) {
1541 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1542 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1543 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
1544 158142c2 bellard
            }
1545 158142c2 bellard
        }
1546 bb98fe42 Andreas Färber
        return (int64_t) LIT64( 0x8000000000000000 );
1547 158142c2 bellard
    }
1548 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1549 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1550 158142c2 bellard
        return 0;
1551 158142c2 bellard
    }
1552 158142c2 bellard
    aSig64 = aSig | 0x00800000;
1553 158142c2 bellard
    aSig64 <<= 40;
1554 158142c2 bellard
    z = aSig64>>( - shiftCount );
1555 bb98fe42 Andreas Färber
    if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
1556 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1557 158142c2 bellard
    }
1558 158142c2 bellard
    if ( aSign ) z = - z;
1559 158142c2 bellard
    return z;
1560 158142c2 bellard
1561 158142c2 bellard
}
1562 158142c2 bellard
1563 158142c2 bellard
/*----------------------------------------------------------------------------
1564 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1565 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1566 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1567 158142c2 bellard
| Arithmetic.
1568 158142c2 bellard
*----------------------------------------------------------------------------*/
1569 158142c2 bellard
1570 158142c2 bellard
float64 float32_to_float64( float32 a STATUS_PARAM )
1571 158142c2 bellard
{
1572 158142c2 bellard
    flag aSign;
1573 158142c2 bellard
    int16 aExp;
1574 bb98fe42 Andreas Färber
    uint32_t aSig;
1575 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1576 158142c2 bellard
1577 158142c2 bellard
    aSig = extractFloat32Frac( a );
1578 158142c2 bellard
    aExp = extractFloat32Exp( a );
1579 158142c2 bellard
    aSign = extractFloat32Sign( a );
1580 158142c2 bellard
    if ( aExp == 0xFF ) {
1581 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1582 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
1583 158142c2 bellard
    }
1584 158142c2 bellard
    if ( aExp == 0 ) {
1585 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1586 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1587 158142c2 bellard
        --aExp;
1588 158142c2 bellard
    }
1589 bb98fe42 Andreas Färber
    return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
1590 158142c2 bellard
1591 158142c2 bellard
}
1592 158142c2 bellard
1593 158142c2 bellard
#ifdef FLOATX80
1594 158142c2 bellard
1595 158142c2 bellard
/*----------------------------------------------------------------------------
1596 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1597 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
1598 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1599 158142c2 bellard
| Arithmetic.
1600 158142c2 bellard
*----------------------------------------------------------------------------*/
1601 158142c2 bellard
1602 158142c2 bellard
floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
1603 158142c2 bellard
{
1604 158142c2 bellard
    flag aSign;
1605 158142c2 bellard
    int16 aExp;
1606 bb98fe42 Andreas Färber
    uint32_t aSig;
1607 158142c2 bellard
1608 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1609 158142c2 bellard
    aSig = extractFloat32Frac( a );
1610 158142c2 bellard
    aExp = extractFloat32Exp( a );
1611 158142c2 bellard
    aSign = extractFloat32Sign( a );
1612 158142c2 bellard
    if ( aExp == 0xFF ) {
1613 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1614 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1615 158142c2 bellard
    }
1616 158142c2 bellard
    if ( aExp == 0 ) {
1617 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1618 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1619 158142c2 bellard
    }
1620 158142c2 bellard
    aSig |= 0x00800000;
1621 bb98fe42 Andreas Färber
    return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
1622 158142c2 bellard
1623 158142c2 bellard
}
1624 158142c2 bellard
1625 158142c2 bellard
#endif
1626 158142c2 bellard
1627 158142c2 bellard
#ifdef FLOAT128
1628 158142c2 bellard
1629 158142c2 bellard
/*----------------------------------------------------------------------------
1630 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1631 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1632 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1633 158142c2 bellard
| Arithmetic.
1634 158142c2 bellard
*----------------------------------------------------------------------------*/
1635 158142c2 bellard
1636 158142c2 bellard
float128 float32_to_float128( float32 a STATUS_PARAM )
1637 158142c2 bellard
{
1638 158142c2 bellard
    flag aSign;
1639 158142c2 bellard
    int16 aExp;
1640 bb98fe42 Andreas Färber
    uint32_t aSig;
1641 158142c2 bellard
1642 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1643 158142c2 bellard
    aSig = extractFloat32Frac( a );
1644 158142c2 bellard
    aExp = extractFloat32Exp( a );
1645 158142c2 bellard
    aSign = extractFloat32Sign( a );
1646 158142c2 bellard
    if ( aExp == 0xFF ) {
1647 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1648 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
1649 158142c2 bellard
    }
1650 158142c2 bellard
    if ( aExp == 0 ) {
1651 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1652 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1653 158142c2 bellard
        --aExp;
1654 158142c2 bellard
    }
1655 bb98fe42 Andreas Färber
    return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
1656 158142c2 bellard
1657 158142c2 bellard
}
1658 158142c2 bellard
1659 158142c2 bellard
#endif
1660 158142c2 bellard
1661 158142c2 bellard
/*----------------------------------------------------------------------------
1662 158142c2 bellard
| Rounds the single-precision floating-point value `a' to an integer, and
1663 158142c2 bellard
| returns the result as a single-precision floating-point value.  The
1664 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
1665 158142c2 bellard
| Floating-Point Arithmetic.
1666 158142c2 bellard
*----------------------------------------------------------------------------*/
1667 158142c2 bellard
1668 158142c2 bellard
float32 float32_round_to_int( float32 a STATUS_PARAM)
1669 158142c2 bellard
{
1670 158142c2 bellard
    flag aSign;
1671 158142c2 bellard
    int16 aExp;
1672 bb98fe42 Andreas Färber
    uint32_t lastBitMask, roundBitsMask;
1673 158142c2 bellard
    int8 roundingMode;
1674 bb98fe42 Andreas Färber
    uint32_t z;
1675 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1676 158142c2 bellard
1677 158142c2 bellard
    aExp = extractFloat32Exp( a );
1678 158142c2 bellard
    if ( 0x96 <= aExp ) {
1679 158142c2 bellard
        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1680 158142c2 bellard
            return propagateFloat32NaN( a, a STATUS_VAR );
1681 158142c2 bellard
        }
1682 158142c2 bellard
        return a;
1683 158142c2 bellard
    }
1684 158142c2 bellard
    if ( aExp <= 0x7E ) {
1685 bb98fe42 Andreas Färber
        if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
1686 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1687 158142c2 bellard
        aSign = extractFloat32Sign( a );
1688 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
1689 158142c2 bellard
         case float_round_nearest_even:
1690 158142c2 bellard
            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1691 158142c2 bellard
                return packFloat32( aSign, 0x7F, 0 );
1692 158142c2 bellard
            }
1693 158142c2 bellard
            break;
1694 158142c2 bellard
         case float_round_down:
1695 f090c9d4 pbrook
            return make_float32(aSign ? 0xBF800000 : 0);
1696 158142c2 bellard
         case float_round_up:
1697 f090c9d4 pbrook
            return make_float32(aSign ? 0x80000000 : 0x3F800000);
1698 158142c2 bellard
        }
1699 158142c2 bellard
        return packFloat32( aSign, 0, 0 );
1700 158142c2 bellard
    }
1701 158142c2 bellard
    lastBitMask = 1;
1702 158142c2 bellard
    lastBitMask <<= 0x96 - aExp;
1703 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
1704 f090c9d4 pbrook
    z = float32_val(a);
1705 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
1706 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
1707 158142c2 bellard
        z += lastBitMask>>1;
1708 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1709 158142c2 bellard
    }
1710 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
1711 f090c9d4 pbrook
        if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
1712 158142c2 bellard
            z += roundBitsMask;
1713 158142c2 bellard
        }
1714 158142c2 bellard
    }
1715 158142c2 bellard
    z &= ~ roundBitsMask;
1716 f090c9d4 pbrook
    if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
1717 f090c9d4 pbrook
    return make_float32(z);
1718 158142c2 bellard
1719 158142c2 bellard
}
1720 158142c2 bellard
1721 158142c2 bellard
/*----------------------------------------------------------------------------
1722 158142c2 bellard
| Returns the result of adding the absolute values of the single-precision
1723 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1724 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
1725 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
1726 158142c2 bellard
| Floating-Point Arithmetic.
1727 158142c2 bellard
*----------------------------------------------------------------------------*/
1728 158142c2 bellard
1729 158142c2 bellard
static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1730 158142c2 bellard
{
1731 158142c2 bellard
    int16 aExp, bExp, zExp;
1732 bb98fe42 Andreas Färber
    uint32_t aSig, bSig, zSig;
1733 158142c2 bellard
    int16 expDiff;
1734 158142c2 bellard
1735 158142c2 bellard
    aSig = extractFloat32Frac( a );
1736 158142c2 bellard
    aExp = extractFloat32Exp( a );
1737 158142c2 bellard
    bSig = extractFloat32Frac( b );
1738 158142c2 bellard
    bExp = extractFloat32Exp( b );
1739 158142c2 bellard
    expDiff = aExp - bExp;
1740 158142c2 bellard
    aSig <<= 6;
1741 158142c2 bellard
    bSig <<= 6;
1742 158142c2 bellard
    if ( 0 < expDiff ) {
1743 158142c2 bellard
        if ( aExp == 0xFF ) {
1744 158142c2 bellard
            if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1745 158142c2 bellard
            return a;
1746 158142c2 bellard
        }
1747 158142c2 bellard
        if ( bExp == 0 ) {
1748 158142c2 bellard
            --expDiff;
1749 158142c2 bellard
        }
1750 158142c2 bellard
        else {
1751 158142c2 bellard
            bSig |= 0x20000000;
1752 158142c2 bellard
        }
1753 158142c2 bellard
        shift32RightJamming( bSig, expDiff, &bSig );
1754 158142c2 bellard
        zExp = aExp;
1755 158142c2 bellard
    }
1756 158142c2 bellard
    else if ( expDiff < 0 ) {
1757 158142c2 bellard
        if ( bExp == 0xFF ) {
1758 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1759 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1760 158142c2 bellard
        }
1761 158142c2 bellard
        if ( aExp == 0 ) {
1762 158142c2 bellard
            ++expDiff;
1763 158142c2 bellard
        }
1764 158142c2 bellard
        else {
1765 158142c2 bellard
            aSig |= 0x20000000;
1766 158142c2 bellard
        }
1767 158142c2 bellard
        shift32RightJamming( aSig, - expDiff, &aSig );
1768 158142c2 bellard
        zExp = bExp;
1769 158142c2 bellard
    }
1770 158142c2 bellard
    else {
1771 158142c2 bellard
        if ( aExp == 0xFF ) {
1772 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1773 158142c2 bellard
            return a;
1774 158142c2 bellard
        }
1775 fe76d976 pbrook
        if ( aExp == 0 ) {
1776 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
1777 e6afc87f Peter Maydell
                if (aSig | bSig) {
1778 e6afc87f Peter Maydell
                    float_raise(float_flag_output_denormal STATUS_VAR);
1779 e6afc87f Peter Maydell
                }
1780 e6afc87f Peter Maydell
                return packFloat32(zSign, 0, 0);
1781 e6afc87f Peter Maydell
            }
1782 fe76d976 pbrook
            return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1783 fe76d976 pbrook
        }
1784 158142c2 bellard
        zSig = 0x40000000 + aSig + bSig;
1785 158142c2 bellard
        zExp = aExp;
1786 158142c2 bellard
        goto roundAndPack;
1787 158142c2 bellard
    }
1788 158142c2 bellard
    aSig |= 0x20000000;
1789 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
1790 158142c2 bellard
    --zExp;
1791 bb98fe42 Andreas Färber
    if ( (int32_t) zSig < 0 ) {
1792 158142c2 bellard
        zSig = aSig + bSig;
1793 158142c2 bellard
        ++zExp;
1794 158142c2 bellard
    }
1795 158142c2 bellard
 roundAndPack:
1796 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1797 158142c2 bellard
1798 158142c2 bellard
}
1799 158142c2 bellard
1800 158142c2 bellard
/*----------------------------------------------------------------------------
1801 158142c2 bellard
| Returns the result of subtracting the absolute values of the single-
1802 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
1803 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
1804 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
1805 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
1806 158142c2 bellard
*----------------------------------------------------------------------------*/
1807 158142c2 bellard
1808 158142c2 bellard
static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1809 158142c2 bellard
{
1810 158142c2 bellard
    int16 aExp, bExp, zExp;
1811 bb98fe42 Andreas Färber
    uint32_t aSig, bSig, zSig;
1812 158142c2 bellard
    int16 expDiff;
1813 158142c2 bellard
1814 158142c2 bellard
    aSig = extractFloat32Frac( a );
1815 158142c2 bellard
    aExp = extractFloat32Exp( a );
1816 158142c2 bellard
    bSig = extractFloat32Frac( b );
1817 158142c2 bellard
    bExp = extractFloat32Exp( b );
1818 158142c2 bellard
    expDiff = aExp - bExp;
1819 158142c2 bellard
    aSig <<= 7;
1820 158142c2 bellard
    bSig <<= 7;
1821 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
1822 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
1823 158142c2 bellard
    if ( aExp == 0xFF ) {
1824 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1825 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1826 158142c2 bellard
        return float32_default_nan;
1827 158142c2 bellard
    }
1828 158142c2 bellard
    if ( aExp == 0 ) {
1829 158142c2 bellard
        aExp = 1;
1830 158142c2 bellard
        bExp = 1;
1831 158142c2 bellard
    }
1832 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
1833 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
1834 158142c2 bellard
    return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
1835 158142c2 bellard
 bExpBigger:
1836 158142c2 bellard
    if ( bExp == 0xFF ) {
1837 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1838 158142c2 bellard
        return packFloat32( zSign ^ 1, 0xFF, 0 );
1839 158142c2 bellard
    }
1840 158142c2 bellard
    if ( aExp == 0 ) {
1841 158142c2 bellard
        ++expDiff;
1842 158142c2 bellard
    }
1843 158142c2 bellard
    else {
1844 158142c2 bellard
        aSig |= 0x40000000;
1845 158142c2 bellard
    }
1846 158142c2 bellard
    shift32RightJamming( aSig, - expDiff, &aSig );
1847 158142c2 bellard
    bSig |= 0x40000000;
1848 158142c2 bellard
 bBigger:
1849 158142c2 bellard
    zSig = bSig - aSig;
1850 158142c2 bellard
    zExp = bExp;
1851 158142c2 bellard
    zSign ^= 1;
1852 158142c2 bellard
    goto normalizeRoundAndPack;
1853 158142c2 bellard
 aExpBigger:
1854 158142c2 bellard
    if ( aExp == 0xFF ) {
1855 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1856 158142c2 bellard
        return a;
1857 158142c2 bellard
    }
1858 158142c2 bellard
    if ( bExp == 0 ) {
1859 158142c2 bellard
        --expDiff;
1860 158142c2 bellard
    }
1861 158142c2 bellard
    else {
1862 158142c2 bellard
        bSig |= 0x40000000;
1863 158142c2 bellard
    }
1864 158142c2 bellard
    shift32RightJamming( bSig, expDiff, &bSig );
1865 158142c2 bellard
    aSig |= 0x40000000;
1866 158142c2 bellard
 aBigger:
1867 158142c2 bellard
    zSig = aSig - bSig;
1868 158142c2 bellard
    zExp = aExp;
1869 158142c2 bellard
 normalizeRoundAndPack:
1870 158142c2 bellard
    --zExp;
1871 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1872 158142c2 bellard
1873 158142c2 bellard
}
1874 158142c2 bellard
1875 158142c2 bellard
/*----------------------------------------------------------------------------
1876 158142c2 bellard
| Returns the result of adding the single-precision floating-point values `a'
1877 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
1878 158142c2 bellard
| Binary Floating-Point Arithmetic.
1879 158142c2 bellard
*----------------------------------------------------------------------------*/
1880 158142c2 bellard
1881 158142c2 bellard
float32 float32_add( float32 a, float32 b STATUS_PARAM )
1882 158142c2 bellard
{
1883 158142c2 bellard
    flag aSign, bSign;
1884 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1885 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1886 158142c2 bellard
1887 158142c2 bellard
    aSign = extractFloat32Sign( a );
1888 158142c2 bellard
    bSign = extractFloat32Sign( b );
1889 158142c2 bellard
    if ( aSign == bSign ) {
1890 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR);
1891 158142c2 bellard
    }
1892 158142c2 bellard
    else {
1893 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1894 158142c2 bellard
    }
1895 158142c2 bellard
1896 158142c2 bellard
}
1897 158142c2 bellard
1898 158142c2 bellard
/*----------------------------------------------------------------------------
1899 158142c2 bellard
| Returns the result of subtracting the single-precision floating-point values
1900 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1901 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1902 158142c2 bellard
*----------------------------------------------------------------------------*/
1903 158142c2 bellard
1904 158142c2 bellard
float32 float32_sub( float32 a, float32 b STATUS_PARAM )
1905 158142c2 bellard
{
1906 158142c2 bellard
    flag aSign, bSign;
1907 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1908 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1909 158142c2 bellard
1910 158142c2 bellard
    aSign = extractFloat32Sign( a );
1911 158142c2 bellard
    bSign = extractFloat32Sign( b );
1912 158142c2 bellard
    if ( aSign == bSign ) {
1913 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1914 158142c2 bellard
    }
1915 158142c2 bellard
    else {
1916 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR );
1917 158142c2 bellard
    }
1918 158142c2 bellard
1919 158142c2 bellard
}
1920 158142c2 bellard
1921 158142c2 bellard
/*----------------------------------------------------------------------------
1922 158142c2 bellard
| Returns the result of multiplying the single-precision floating-point values
1923 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1924 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1925 158142c2 bellard
*----------------------------------------------------------------------------*/
1926 158142c2 bellard
1927 158142c2 bellard
float32 float32_mul( float32 a, float32 b STATUS_PARAM )
1928 158142c2 bellard
{
1929 158142c2 bellard
    flag aSign, bSign, zSign;
1930 158142c2 bellard
    int16 aExp, bExp, zExp;
1931 bb98fe42 Andreas Färber
    uint32_t aSig, bSig;
1932 bb98fe42 Andreas Färber
    uint64_t zSig64;
1933 bb98fe42 Andreas Färber
    uint32_t zSig;
1934 158142c2 bellard
1935 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1936 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1937 37d18660 Peter Maydell
1938 158142c2 bellard
    aSig = extractFloat32Frac( a );
1939 158142c2 bellard
    aExp = extractFloat32Exp( a );
1940 158142c2 bellard
    aSign = extractFloat32Sign( a );
1941 158142c2 bellard
    bSig = extractFloat32Frac( b );
1942 158142c2 bellard
    bExp = extractFloat32Exp( b );
1943 158142c2 bellard
    bSign = extractFloat32Sign( b );
1944 158142c2 bellard
    zSign = aSign ^ bSign;
1945 158142c2 bellard
    if ( aExp == 0xFF ) {
1946 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1947 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
1948 158142c2 bellard
        }
1949 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
1950 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1951 158142c2 bellard
            return float32_default_nan;
1952 158142c2 bellard
        }
1953 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1954 158142c2 bellard
    }
1955 158142c2 bellard
    if ( bExp == 0xFF ) {
1956 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1957 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
1958 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1959 158142c2 bellard
            return float32_default_nan;
1960 158142c2 bellard
        }
1961 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1962 158142c2 bellard
    }
1963 158142c2 bellard
    if ( aExp == 0 ) {
1964 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1965 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1966 158142c2 bellard
    }
1967 158142c2 bellard
    if ( bExp == 0 ) {
1968 158142c2 bellard
        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1969 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1970 158142c2 bellard
    }
1971 158142c2 bellard
    zExp = aExp + bExp - 0x7F;
1972 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
1973 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
1974 bb98fe42 Andreas Färber
    shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
1975 158142c2 bellard
    zSig = zSig64;
1976 bb98fe42 Andreas Färber
    if ( 0 <= (int32_t) ( zSig<<1 ) ) {
1977 158142c2 bellard
        zSig <<= 1;
1978 158142c2 bellard
        --zExp;
1979 158142c2 bellard
    }
1980 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1981 158142c2 bellard
1982 158142c2 bellard
}
1983 158142c2 bellard
1984 158142c2 bellard
/*----------------------------------------------------------------------------
1985 158142c2 bellard
| Returns the result of dividing the single-precision floating-point value `a'
1986 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to the
1987 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1988 158142c2 bellard
*----------------------------------------------------------------------------*/
1989 158142c2 bellard
1990 158142c2 bellard
float32 float32_div( float32 a, float32 b STATUS_PARAM )
1991 158142c2 bellard
{
1992 158142c2 bellard
    flag aSign, bSign, zSign;
1993 158142c2 bellard
    int16 aExp, bExp, zExp;
1994 bb98fe42 Andreas Färber
    uint32_t aSig, bSig, zSig;
1995 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1996 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1997 158142c2 bellard
1998 158142c2 bellard
    aSig = extractFloat32Frac( a );
1999 158142c2 bellard
    aExp = extractFloat32Exp( a );
2000 158142c2 bellard
    aSign = extractFloat32Sign( a );
2001 158142c2 bellard
    bSig = extractFloat32Frac( b );
2002 158142c2 bellard
    bExp = extractFloat32Exp( b );
2003 158142c2 bellard
    bSign = extractFloat32Sign( b );
2004 158142c2 bellard
    zSign = aSign ^ bSign;
2005 158142c2 bellard
    if ( aExp == 0xFF ) {
2006 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2007 158142c2 bellard
        if ( bExp == 0xFF ) {
2008 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2009 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2010 158142c2 bellard
            return float32_default_nan;
2011 158142c2 bellard
        }
2012 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
2013 158142c2 bellard
    }
2014 158142c2 bellard
    if ( bExp == 0xFF ) {
2015 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2016 158142c2 bellard
        return packFloat32( zSign, 0, 0 );
2017 158142c2 bellard
    }
2018 158142c2 bellard
    if ( bExp == 0 ) {
2019 158142c2 bellard
        if ( bSig == 0 ) {
2020 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
2021 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
2022 158142c2 bellard
                return float32_default_nan;
2023 158142c2 bellard
            }
2024 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
2025 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
2026 158142c2 bellard
        }
2027 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2028 158142c2 bellard
    }
2029 158142c2 bellard
    if ( aExp == 0 ) {
2030 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2031 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2032 158142c2 bellard
    }
2033 158142c2 bellard
    zExp = aExp - bExp + 0x7D;
2034 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
2035 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
2036 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
2037 158142c2 bellard
        aSig >>= 1;
2038 158142c2 bellard
        ++zExp;
2039 158142c2 bellard
    }
2040 bb98fe42 Andreas Färber
    zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
2041 158142c2 bellard
    if ( ( zSig & 0x3F ) == 0 ) {
2042 bb98fe42 Andreas Färber
        zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
2043 158142c2 bellard
    }
2044 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
2045 158142c2 bellard
2046 158142c2 bellard
}
2047 158142c2 bellard
2048 158142c2 bellard
/*----------------------------------------------------------------------------
2049 158142c2 bellard
| Returns the remainder of the single-precision floating-point value `a'
2050 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
2051 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2052 158142c2 bellard
*----------------------------------------------------------------------------*/
2053 158142c2 bellard
2054 158142c2 bellard
float32 float32_rem( float32 a, float32 b STATUS_PARAM )
2055 158142c2 bellard
{
2056 ed086f3d Blue Swirl
    flag aSign, zSign;
2057 158142c2 bellard
    int16 aExp, bExp, expDiff;
2058 bb98fe42 Andreas Färber
    uint32_t aSig, bSig;
2059 bb98fe42 Andreas Färber
    uint32_t q;
2060 bb98fe42 Andreas Färber
    uint64_t aSig64, bSig64, q64;
2061 bb98fe42 Andreas Färber
    uint32_t alternateASig;
2062 bb98fe42 Andreas Färber
    int32_t sigMean;
2063 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2064 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2065 158142c2 bellard
2066 158142c2 bellard
    aSig = extractFloat32Frac( a );
2067 158142c2 bellard
    aExp = extractFloat32Exp( a );
2068 158142c2 bellard
    aSign = extractFloat32Sign( a );
2069 158142c2 bellard
    bSig = extractFloat32Frac( b );
2070 158142c2 bellard
    bExp = extractFloat32Exp( b );
2071 158142c2 bellard
    if ( aExp == 0xFF ) {
2072 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2073 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
2074 158142c2 bellard
        }
2075 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2076 158142c2 bellard
        return float32_default_nan;
2077 158142c2 bellard
    }
2078 158142c2 bellard
    if ( bExp == 0xFF ) {
2079 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2080 158142c2 bellard
        return a;
2081 158142c2 bellard
    }
2082 158142c2 bellard
    if ( bExp == 0 ) {
2083 158142c2 bellard
        if ( bSig == 0 ) {
2084 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2085 158142c2 bellard
            return float32_default_nan;
2086 158142c2 bellard
        }
2087 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2088 158142c2 bellard
    }
2089 158142c2 bellard
    if ( aExp == 0 ) {
2090 158142c2 bellard
        if ( aSig == 0 ) return a;
2091 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2092 158142c2 bellard
    }
2093 158142c2 bellard
    expDiff = aExp - bExp;
2094 158142c2 bellard
    aSig |= 0x00800000;
2095 158142c2 bellard
    bSig |= 0x00800000;
2096 158142c2 bellard
    if ( expDiff < 32 ) {
2097 158142c2 bellard
        aSig <<= 8;
2098 158142c2 bellard
        bSig <<= 8;
2099 158142c2 bellard
        if ( expDiff < 0 ) {
2100 158142c2 bellard
            if ( expDiff < -1 ) return a;
2101 158142c2 bellard
            aSig >>= 1;
2102 158142c2 bellard
        }
2103 158142c2 bellard
        q = ( bSig <= aSig );
2104 158142c2 bellard
        if ( q ) aSig -= bSig;
2105 158142c2 bellard
        if ( 0 < expDiff ) {
2106 bb98fe42 Andreas Färber
            q = ( ( (uint64_t) aSig )<<32 ) / bSig;
2107 158142c2 bellard
            q >>= 32 - expDiff;
2108 158142c2 bellard
            bSig >>= 2;
2109 158142c2 bellard
            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2110 158142c2 bellard
        }
2111 158142c2 bellard
        else {
2112 158142c2 bellard
            aSig >>= 2;
2113 158142c2 bellard
            bSig >>= 2;
2114 158142c2 bellard
        }
2115 158142c2 bellard
    }
2116 158142c2 bellard
    else {
2117 158142c2 bellard
        if ( bSig <= aSig ) aSig -= bSig;
2118 bb98fe42 Andreas Färber
        aSig64 = ( (uint64_t) aSig )<<40;
2119 bb98fe42 Andreas Färber
        bSig64 = ( (uint64_t) bSig )<<40;
2120 158142c2 bellard
        expDiff -= 64;
2121 158142c2 bellard
        while ( 0 < expDiff ) {
2122 158142c2 bellard
            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2123 158142c2 bellard
            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2124 158142c2 bellard
            aSig64 = - ( ( bSig * q64 )<<38 );
2125 158142c2 bellard
            expDiff -= 62;
2126 158142c2 bellard
        }
2127 158142c2 bellard
        expDiff += 64;
2128 158142c2 bellard
        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2129 158142c2 bellard
        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2130 158142c2 bellard
        q = q64>>( 64 - expDiff );
2131 158142c2 bellard
        bSig <<= 6;
2132 158142c2 bellard
        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2133 158142c2 bellard
    }
2134 158142c2 bellard
    do {
2135 158142c2 bellard
        alternateASig = aSig;
2136 158142c2 bellard
        ++q;
2137 158142c2 bellard
        aSig -= bSig;
2138 bb98fe42 Andreas Färber
    } while ( 0 <= (int32_t) aSig );
2139 158142c2 bellard
    sigMean = aSig + alternateASig;
2140 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2141 158142c2 bellard
        aSig = alternateASig;
2142 158142c2 bellard
    }
2143 bb98fe42 Andreas Färber
    zSign = ( (int32_t) aSig < 0 );
2144 158142c2 bellard
    if ( zSign ) aSig = - aSig;
2145 158142c2 bellard
    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
2146 158142c2 bellard
2147 158142c2 bellard
}
2148 158142c2 bellard
2149 158142c2 bellard
/*----------------------------------------------------------------------------
2150 158142c2 bellard
| Returns the square root of the single-precision floating-point value `a'.
2151 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
2152 158142c2 bellard
| Floating-Point Arithmetic.
2153 158142c2 bellard
*----------------------------------------------------------------------------*/
2154 158142c2 bellard
2155 158142c2 bellard
float32 float32_sqrt( float32 a STATUS_PARAM )
2156 158142c2 bellard
{
2157 158142c2 bellard
    flag aSign;
2158 158142c2 bellard
    int16 aExp, zExp;
2159 bb98fe42 Andreas Färber
    uint32_t aSig, zSig;
2160 bb98fe42 Andreas Färber
    uint64_t rem, term;
2161 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2162 158142c2 bellard
2163 158142c2 bellard
    aSig = extractFloat32Frac( a );
2164 158142c2 bellard
    aExp = extractFloat32Exp( a );
2165 158142c2 bellard
    aSign = extractFloat32Sign( a );
2166 158142c2 bellard
    if ( aExp == 0xFF ) {
2167 f090c9d4 pbrook
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2168 158142c2 bellard
        if ( ! aSign ) return a;
2169 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2170 158142c2 bellard
        return float32_default_nan;
2171 158142c2 bellard
    }
2172 158142c2 bellard
    if ( aSign ) {
2173 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
2174 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2175 158142c2 bellard
        return float32_default_nan;
2176 158142c2 bellard
    }
2177 158142c2 bellard
    if ( aExp == 0 ) {
2178 f090c9d4 pbrook
        if ( aSig == 0 ) return float32_zero;
2179 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2180 158142c2 bellard
    }
2181 158142c2 bellard
    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2182 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
2183 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig ) + 2;
2184 158142c2 bellard
    if ( ( zSig & 0x7F ) <= 5 ) {
2185 158142c2 bellard
        if ( zSig < 2 ) {
2186 158142c2 bellard
            zSig = 0x7FFFFFFF;
2187 158142c2 bellard
            goto roundAndPack;
2188 158142c2 bellard
        }
2189 158142c2 bellard
        aSig >>= aExp & 1;
2190 bb98fe42 Andreas Färber
        term = ( (uint64_t) zSig ) * zSig;
2191 bb98fe42 Andreas Färber
        rem = ( ( (uint64_t) aSig )<<32 ) - term;
2192 bb98fe42 Andreas Färber
        while ( (int64_t) rem < 0 ) {
2193 158142c2 bellard
            --zSig;
2194 bb98fe42 Andreas Färber
            rem += ( ( (uint64_t) zSig )<<1 ) | 1;
2195 158142c2 bellard
        }
2196 158142c2 bellard
        zSig |= ( rem != 0 );
2197 158142c2 bellard
    }
2198 158142c2 bellard
    shift32RightJamming( zSig, 1, &zSig );
2199 158142c2 bellard
 roundAndPack:
2200 158142c2 bellard
    return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
2201 158142c2 bellard
2202 158142c2 bellard
}
2203 158142c2 bellard
2204 158142c2 bellard
/*----------------------------------------------------------------------------
2205 8229c991 Aurelien Jarno
| Returns the binary exponential of the single-precision floating-point value
2206 8229c991 Aurelien Jarno
| `a'. The operation is performed according to the IEC/IEEE Standard for
2207 8229c991 Aurelien Jarno
| Binary Floating-Point Arithmetic.
2208 8229c991 Aurelien Jarno
|
2209 8229c991 Aurelien Jarno
| Uses the following identities:
2210 8229c991 Aurelien Jarno
|
2211 8229c991 Aurelien Jarno
| 1. -------------------------------------------------------------------------
2212 8229c991 Aurelien Jarno
|      x    x*ln(2)
2213 8229c991 Aurelien Jarno
|     2  = e
2214 8229c991 Aurelien Jarno
|
2215 8229c991 Aurelien Jarno
| 2. -------------------------------------------------------------------------
2216 8229c991 Aurelien Jarno
|                      2     3     4     5           n
2217 8229c991 Aurelien Jarno
|      x        x     x     x     x     x           x
2218 8229c991 Aurelien Jarno
|     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
2219 8229c991 Aurelien Jarno
|               1!    2!    3!    4!    5!          n!
2220 8229c991 Aurelien Jarno
*----------------------------------------------------------------------------*/
2221 8229c991 Aurelien Jarno
2222 8229c991 Aurelien Jarno
static const float64 float32_exp2_coefficients[15] =
2223 8229c991 Aurelien Jarno
{
2224 d5138cf4 Peter Maydell
    const_float64( 0x3ff0000000000000ll ), /*  1 */
2225 d5138cf4 Peter Maydell
    const_float64( 0x3fe0000000000000ll ), /*  2 */
2226 d5138cf4 Peter Maydell
    const_float64( 0x3fc5555555555555ll ), /*  3 */
2227 d5138cf4 Peter Maydell
    const_float64( 0x3fa5555555555555ll ), /*  4 */
2228 d5138cf4 Peter Maydell
    const_float64( 0x3f81111111111111ll ), /*  5 */
2229 d5138cf4 Peter Maydell
    const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
2230 d5138cf4 Peter Maydell
    const_float64( 0x3f2a01a01a01a01all ), /*  7 */
2231 d5138cf4 Peter Maydell
    const_float64( 0x3efa01a01a01a01all ), /*  8 */
2232 d5138cf4 Peter Maydell
    const_float64( 0x3ec71de3a556c734ll ), /*  9 */
2233 d5138cf4 Peter Maydell
    const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
2234 d5138cf4 Peter Maydell
    const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
2235 d5138cf4 Peter Maydell
    const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
2236 d5138cf4 Peter Maydell
    const_float64( 0x3de6124613a86d09ll ), /* 13 */
2237 d5138cf4 Peter Maydell
    const_float64( 0x3da93974a8c07c9dll ), /* 14 */
2238 d5138cf4 Peter Maydell
    const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
2239 8229c991 Aurelien Jarno
};
2240 8229c991 Aurelien Jarno
2241 8229c991 Aurelien Jarno
float32 float32_exp2( float32 a STATUS_PARAM )
2242 8229c991 Aurelien Jarno
{
2243 8229c991 Aurelien Jarno
    flag aSign;
2244 8229c991 Aurelien Jarno
    int16 aExp;
2245 bb98fe42 Andreas Färber
    uint32_t aSig;
2246 8229c991 Aurelien Jarno
    float64 r, x, xn;
2247 8229c991 Aurelien Jarno
    int i;
2248 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2249 8229c991 Aurelien Jarno
2250 8229c991 Aurelien Jarno
    aSig = extractFloat32Frac( a );
2251 8229c991 Aurelien Jarno
    aExp = extractFloat32Exp( a );
2252 8229c991 Aurelien Jarno
    aSign = extractFloat32Sign( a );
2253 8229c991 Aurelien Jarno
2254 8229c991 Aurelien Jarno
    if ( aExp == 0xFF) {
2255 8229c991 Aurelien Jarno
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2256 8229c991 Aurelien Jarno
        return (aSign) ? float32_zero : a;
2257 8229c991 Aurelien Jarno
    }
2258 8229c991 Aurelien Jarno
    if (aExp == 0) {
2259 8229c991 Aurelien Jarno
        if (aSig == 0) return float32_one;
2260 8229c991 Aurelien Jarno
    }
2261 8229c991 Aurelien Jarno
2262 8229c991 Aurelien Jarno
    float_raise( float_flag_inexact STATUS_VAR);
2263 8229c991 Aurelien Jarno
2264 8229c991 Aurelien Jarno
    /* ******************************* */
2265 8229c991 Aurelien Jarno
    /* using float64 for approximation */
2266 8229c991 Aurelien Jarno
    /* ******************************* */
2267 8229c991 Aurelien Jarno
    x = float32_to_float64(a STATUS_VAR);
2268 8229c991 Aurelien Jarno
    x = float64_mul(x, float64_ln2 STATUS_VAR);
2269 8229c991 Aurelien Jarno
2270 8229c991 Aurelien Jarno
    xn = x;
2271 8229c991 Aurelien Jarno
    r = float64_one;
2272 8229c991 Aurelien Jarno
    for (i = 0 ; i < 15 ; i++) {
2273 8229c991 Aurelien Jarno
        float64 f;
2274 8229c991 Aurelien Jarno
2275 8229c991 Aurelien Jarno
        f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR);
2276 8229c991 Aurelien Jarno
        r = float64_add(r, f STATUS_VAR);
2277 8229c991 Aurelien Jarno
2278 8229c991 Aurelien Jarno
        xn = float64_mul(xn, x STATUS_VAR);
2279 8229c991 Aurelien Jarno
    }
2280 8229c991 Aurelien Jarno
2281 8229c991 Aurelien Jarno
    return float64_to_float32(r, status);
2282 8229c991 Aurelien Jarno
}
2283 8229c991 Aurelien Jarno
2284 8229c991 Aurelien Jarno
/*----------------------------------------------------------------------------
2285 374dfc33 aurel32
| Returns the binary log of the single-precision floating-point value `a'.
2286 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
2287 374dfc33 aurel32
| Floating-Point Arithmetic.
2288 374dfc33 aurel32
*----------------------------------------------------------------------------*/
2289 374dfc33 aurel32
float32 float32_log2( float32 a STATUS_PARAM )
2290 374dfc33 aurel32
{
2291 374dfc33 aurel32
    flag aSign, zSign;
2292 374dfc33 aurel32
    int16 aExp;
2293 bb98fe42 Andreas Färber
    uint32_t aSig, zSig, i;
2294 374dfc33 aurel32
2295 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2296 374dfc33 aurel32
    aSig = extractFloat32Frac( a );
2297 374dfc33 aurel32
    aExp = extractFloat32Exp( a );
2298 374dfc33 aurel32
    aSign = extractFloat32Sign( a );
2299 374dfc33 aurel32
2300 374dfc33 aurel32
    if ( aExp == 0 ) {
2301 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2302 374dfc33 aurel32
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2303 374dfc33 aurel32
    }
2304 374dfc33 aurel32
    if ( aSign ) {
2305 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
2306 374dfc33 aurel32
        return float32_default_nan;
2307 374dfc33 aurel32
    }
2308 374dfc33 aurel32
    if ( aExp == 0xFF ) {
2309 374dfc33 aurel32
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2310 374dfc33 aurel32
        return a;
2311 374dfc33 aurel32
    }
2312 374dfc33 aurel32
2313 374dfc33 aurel32
    aExp -= 0x7F;
2314 374dfc33 aurel32
    aSig |= 0x00800000;
2315 374dfc33 aurel32
    zSign = aExp < 0;
2316 374dfc33 aurel32
    zSig = aExp << 23;
2317 374dfc33 aurel32
2318 374dfc33 aurel32
    for (i = 1 << 22; i > 0; i >>= 1) {
2319 bb98fe42 Andreas Färber
        aSig = ( (uint64_t)aSig * aSig ) >> 23;
2320 374dfc33 aurel32
        if ( aSig & 0x01000000 ) {
2321 374dfc33 aurel32
            aSig >>= 1;
2322 374dfc33 aurel32
            zSig |= i;
2323 374dfc33 aurel32
        }
2324 374dfc33 aurel32
    }
2325 374dfc33 aurel32
2326 374dfc33 aurel32
    if ( zSign )
2327 374dfc33 aurel32
        zSig = -zSig;
2328 374dfc33 aurel32
2329 374dfc33 aurel32
    return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
2330 374dfc33 aurel32
}
2331 374dfc33 aurel32
2332 374dfc33 aurel32
/*----------------------------------------------------------------------------
2333 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2334 b689362d Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
2335 b689362d Aurelien Jarno
| raised if either operand is a NaN.  Otherwise, the comparison is performed
2336 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2337 158142c2 bellard
*----------------------------------------------------------------------------*/
2338 158142c2 bellard
2339 b689362d Aurelien Jarno
int float32_eq( float32 a, float32 b STATUS_PARAM )
2340 158142c2 bellard
{
2341 b689362d Aurelien Jarno
    uint32_t av, bv;
2342 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2343 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2344 158142c2 bellard
2345 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2346 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2347 158142c2 bellard
       ) {
2348 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
2349 158142c2 bellard
        return 0;
2350 158142c2 bellard
    }
2351 b689362d Aurelien Jarno
    av = float32_val(a);
2352 b689362d Aurelien Jarno
    bv = float32_val(b);
2353 b689362d Aurelien Jarno
    return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2354 158142c2 bellard
}
2355 158142c2 bellard
2356 158142c2 bellard
/*----------------------------------------------------------------------------
2357 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2358 f5a64251 Aurelien Jarno
| or equal to the corresponding value `b', and 0 otherwise.  The invalid
2359 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
2360 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2361 158142c2 bellard
*----------------------------------------------------------------------------*/
2362 158142c2 bellard
2363 750afe93 bellard
int float32_le( float32 a, float32 b STATUS_PARAM )
2364 158142c2 bellard
{
2365 158142c2 bellard
    flag aSign, bSign;
2366 bb98fe42 Andreas Färber
    uint32_t av, bv;
2367 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2368 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2369 158142c2 bellard
2370 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2371 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2372 158142c2 bellard
       ) {
2373 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2374 158142c2 bellard
        return 0;
2375 158142c2 bellard
    }
2376 158142c2 bellard
    aSign = extractFloat32Sign( a );
2377 158142c2 bellard
    bSign = extractFloat32Sign( b );
2378 f090c9d4 pbrook
    av = float32_val(a);
2379 f090c9d4 pbrook
    bv = float32_val(b);
2380 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2381 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2382 158142c2 bellard
2383 158142c2 bellard
}
2384 158142c2 bellard
2385 158142c2 bellard
/*----------------------------------------------------------------------------
2386 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2387 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
2388 f5a64251 Aurelien Jarno
| raised if either operand is a NaN.  The comparison is performed according
2389 f5a64251 Aurelien Jarno
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2390 158142c2 bellard
*----------------------------------------------------------------------------*/
2391 158142c2 bellard
2392 750afe93 bellard
int float32_lt( float32 a, float32 b STATUS_PARAM )
2393 158142c2 bellard
{
2394 158142c2 bellard
    flag aSign, bSign;
2395 bb98fe42 Andreas Färber
    uint32_t av, bv;
2396 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2397 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2398 158142c2 bellard
2399 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2400 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2401 158142c2 bellard
       ) {
2402 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2403 158142c2 bellard
        return 0;
2404 158142c2 bellard
    }
2405 158142c2 bellard
    aSign = extractFloat32Sign( a );
2406 158142c2 bellard
    bSign = extractFloat32Sign( b );
2407 f090c9d4 pbrook
    av = float32_val(a);
2408 f090c9d4 pbrook
    bv = float32_val(b);
2409 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
2410 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2411 158142c2 bellard
2412 158142c2 bellard
}
2413 158142c2 bellard
2414 158142c2 bellard
/*----------------------------------------------------------------------------
2415 67b7861d Aurelien Jarno
| Returns 1 if the single-precision floating-point values `a' and `b' cannot
2416 f5a64251 Aurelien Jarno
| be compared, and 0 otherwise.  The invalid exception is raised if either
2417 f5a64251 Aurelien Jarno
| operand is a NaN.  The comparison is performed according to the IEC/IEEE
2418 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
2419 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
2420 67b7861d Aurelien Jarno
2421 67b7861d Aurelien Jarno
int float32_unordered( float32 a, float32 b STATUS_PARAM )
2422 67b7861d Aurelien Jarno
{
2423 67b7861d Aurelien Jarno
    a = float32_squash_input_denormal(a STATUS_VAR);
2424 67b7861d Aurelien Jarno
    b = float32_squash_input_denormal(b STATUS_VAR);
2425 67b7861d Aurelien Jarno
2426 67b7861d Aurelien Jarno
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2427 67b7861d Aurelien Jarno
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2428 67b7861d Aurelien Jarno
       ) {
2429 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
2430 67b7861d Aurelien Jarno
        return 1;
2431 67b7861d Aurelien Jarno
    }
2432 67b7861d Aurelien Jarno
    return 0;
2433 67b7861d Aurelien Jarno
}
2434 b689362d Aurelien Jarno
2435 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
2436 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2437 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2438 f5a64251 Aurelien Jarno
| exception.  The comparison is performed according to the IEC/IEEE Standard
2439 f5a64251 Aurelien Jarno
| for Binary Floating-Point Arithmetic.
2440 158142c2 bellard
*----------------------------------------------------------------------------*/
2441 158142c2 bellard
2442 b689362d Aurelien Jarno
int float32_eq_quiet( float32 a, float32 b STATUS_PARAM )
2443 158142c2 bellard
{
2444 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2445 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2446 158142c2 bellard
2447 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2448 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2449 158142c2 bellard
       ) {
2450 b689362d Aurelien Jarno
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2451 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
2452 b689362d Aurelien Jarno
        }
2453 158142c2 bellard
        return 0;
2454 158142c2 bellard
    }
2455 b689362d Aurelien Jarno
    return ( float32_val(a) == float32_val(b) ) ||
2456 b689362d Aurelien Jarno
            ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
2457 158142c2 bellard
}
2458 158142c2 bellard
2459 158142c2 bellard
/*----------------------------------------------------------------------------
2460 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than or
2461 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2462 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
2463 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2464 158142c2 bellard
*----------------------------------------------------------------------------*/
2465 158142c2 bellard
2466 750afe93 bellard
int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
2467 158142c2 bellard
{
2468 158142c2 bellard
    flag aSign, bSign;
2469 bb98fe42 Andreas Färber
    uint32_t av, bv;
2470 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2471 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2472 158142c2 bellard
2473 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2474 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2475 158142c2 bellard
       ) {
2476 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2477 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2478 158142c2 bellard
        }
2479 158142c2 bellard
        return 0;
2480 158142c2 bellard
    }
2481 158142c2 bellard
    aSign = extractFloat32Sign( a );
2482 158142c2 bellard
    bSign = extractFloat32Sign( b );
2483 f090c9d4 pbrook
    av = float32_val(a);
2484 f090c9d4 pbrook
    bv = float32_val(b);
2485 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2486 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2487 158142c2 bellard
2488 158142c2 bellard
}
2489 158142c2 bellard
2490 158142c2 bellard
/*----------------------------------------------------------------------------
2491 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2492 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2493 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2494 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
2495 158142c2 bellard
*----------------------------------------------------------------------------*/
2496 158142c2 bellard
2497 750afe93 bellard
int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
2498 158142c2 bellard
{
2499 158142c2 bellard
    flag aSign, bSign;
2500 bb98fe42 Andreas Färber
    uint32_t av, bv;
2501 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2502 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2503 158142c2 bellard
2504 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2505 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2506 158142c2 bellard
       ) {
2507 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2508 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2509 158142c2 bellard
        }
2510 158142c2 bellard
        return 0;
2511 158142c2 bellard
    }
2512 158142c2 bellard
    aSign = extractFloat32Sign( a );
2513 158142c2 bellard
    bSign = extractFloat32Sign( b );
2514 f090c9d4 pbrook
    av = float32_val(a);
2515 f090c9d4 pbrook
    bv = float32_val(b);
2516 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
2517 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2518 158142c2 bellard
2519 158142c2 bellard
}
2520 158142c2 bellard
2521 158142c2 bellard
/*----------------------------------------------------------------------------
2522 67b7861d Aurelien Jarno
| Returns 1 if the single-precision floating-point values `a' and `b' cannot
2523 67b7861d Aurelien Jarno
| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
2524 67b7861d Aurelien Jarno
| comparison is performed according to the IEC/IEEE Standard for Binary
2525 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
2526 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
2527 67b7861d Aurelien Jarno
2528 67b7861d Aurelien Jarno
int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM )
2529 67b7861d Aurelien Jarno
{
2530 67b7861d Aurelien Jarno
    a = float32_squash_input_denormal(a STATUS_VAR);
2531 67b7861d Aurelien Jarno
    b = float32_squash_input_denormal(b STATUS_VAR);
2532 67b7861d Aurelien Jarno
2533 67b7861d Aurelien Jarno
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2534 67b7861d Aurelien Jarno
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2535 67b7861d Aurelien Jarno
       ) {
2536 67b7861d Aurelien Jarno
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2537 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
2538 67b7861d Aurelien Jarno
        }
2539 67b7861d Aurelien Jarno
        return 1;
2540 67b7861d Aurelien Jarno
    }
2541 67b7861d Aurelien Jarno
    return 0;
2542 67b7861d Aurelien Jarno
}
2543 67b7861d Aurelien Jarno
2544 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
2545 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2546 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2547 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2548 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2549 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2550 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2551 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2552 158142c2 bellard
*----------------------------------------------------------------------------*/
2553 158142c2 bellard
2554 158142c2 bellard
int32 float64_to_int32( float64 a STATUS_PARAM )
2555 158142c2 bellard
{
2556 158142c2 bellard
    flag aSign;
2557 158142c2 bellard
    int16 aExp, shiftCount;
2558 bb98fe42 Andreas Färber
    uint64_t aSig;
2559 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2560 158142c2 bellard
2561 158142c2 bellard
    aSig = extractFloat64Frac( a );
2562 158142c2 bellard
    aExp = extractFloat64Exp( a );
2563 158142c2 bellard
    aSign = extractFloat64Sign( a );
2564 158142c2 bellard
    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2565 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2566 158142c2 bellard
    shiftCount = 0x42C - aExp;
2567 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2568 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
2569 158142c2 bellard
2570 158142c2 bellard
}
2571 158142c2 bellard
2572 158142c2 bellard
/*----------------------------------------------------------------------------
2573 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2574 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2575 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2576 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2577 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2578 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2579 158142c2 bellard
| returned.
2580 158142c2 bellard
*----------------------------------------------------------------------------*/
2581 158142c2 bellard
2582 158142c2 bellard
int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
2583 158142c2 bellard
{
2584 158142c2 bellard
    flag aSign;
2585 158142c2 bellard
    int16 aExp, shiftCount;
2586 bb98fe42 Andreas Färber
    uint64_t aSig, savedASig;
2587 158142c2 bellard
    int32 z;
2588 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2589 158142c2 bellard
2590 158142c2 bellard
    aSig = extractFloat64Frac( a );
2591 158142c2 bellard
    aExp = extractFloat64Exp( a );
2592 158142c2 bellard
    aSign = extractFloat64Sign( a );
2593 158142c2 bellard
    if ( 0x41E < aExp ) {
2594 158142c2 bellard
        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2595 158142c2 bellard
        goto invalid;
2596 158142c2 bellard
    }
2597 158142c2 bellard
    else if ( aExp < 0x3FF ) {
2598 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2599 158142c2 bellard
        return 0;
2600 158142c2 bellard
    }
2601 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
2602 158142c2 bellard
    shiftCount = 0x433 - aExp;
2603 158142c2 bellard
    savedASig = aSig;
2604 158142c2 bellard
    aSig >>= shiftCount;
2605 158142c2 bellard
    z = aSig;
2606 158142c2 bellard
    if ( aSign ) z = - z;
2607 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
2608 158142c2 bellard
 invalid:
2609 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2610 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
2611 158142c2 bellard
    }
2612 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
2613 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2614 158142c2 bellard
    }
2615 158142c2 bellard
    return z;
2616 158142c2 bellard
2617 158142c2 bellard
}
2618 158142c2 bellard
2619 158142c2 bellard
/*----------------------------------------------------------------------------
2620 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2621 cbcef455 Peter Maydell
| `a' to the 16-bit two's complement integer format.  The conversion is
2622 cbcef455 Peter Maydell
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2623 cbcef455 Peter Maydell
| Arithmetic, except that the conversion is always rounded toward zero.
2624 cbcef455 Peter Maydell
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2625 cbcef455 Peter Maydell
| the conversion overflows, the largest integer with the same sign as `a' is
2626 cbcef455 Peter Maydell
| returned.
2627 cbcef455 Peter Maydell
*----------------------------------------------------------------------------*/
2628 cbcef455 Peter Maydell
2629 cbcef455 Peter Maydell
int16 float64_to_int16_round_to_zero( float64 a STATUS_PARAM )
2630 cbcef455 Peter Maydell
{
2631 cbcef455 Peter Maydell
    flag aSign;
2632 cbcef455 Peter Maydell
    int16 aExp, shiftCount;
2633 bb98fe42 Andreas Färber
    uint64_t aSig, savedASig;
2634 cbcef455 Peter Maydell
    int32 z;
2635 cbcef455 Peter Maydell
2636 cbcef455 Peter Maydell
    aSig = extractFloat64Frac( a );
2637 cbcef455 Peter Maydell
    aExp = extractFloat64Exp( a );
2638 cbcef455 Peter Maydell
    aSign = extractFloat64Sign( a );
2639 cbcef455 Peter Maydell
    if ( 0x40E < aExp ) {
2640 cbcef455 Peter Maydell
        if ( ( aExp == 0x7FF ) && aSig ) {
2641 cbcef455 Peter Maydell
            aSign = 0;
2642 cbcef455 Peter Maydell
        }
2643 cbcef455 Peter Maydell
        goto invalid;
2644 cbcef455 Peter Maydell
    }
2645 cbcef455 Peter Maydell
    else if ( aExp < 0x3FF ) {
2646 cbcef455 Peter Maydell
        if ( aExp || aSig ) {
2647 cbcef455 Peter Maydell
            STATUS(float_exception_flags) |= float_flag_inexact;
2648 cbcef455 Peter Maydell
        }
2649 cbcef455 Peter Maydell
        return 0;
2650 cbcef455 Peter Maydell
    }
2651 cbcef455 Peter Maydell
    aSig |= LIT64( 0x0010000000000000 );
2652 cbcef455 Peter Maydell
    shiftCount = 0x433 - aExp;
2653 cbcef455 Peter Maydell
    savedASig = aSig;
2654 cbcef455 Peter Maydell
    aSig >>= shiftCount;
2655 cbcef455 Peter Maydell
    z = aSig;
2656 cbcef455 Peter Maydell
    if ( aSign ) {
2657 cbcef455 Peter Maydell
        z = - z;
2658 cbcef455 Peter Maydell
    }
2659 cbcef455 Peter Maydell
    if ( ( (int16_t)z < 0 ) ^ aSign ) {
2660 cbcef455 Peter Maydell
 invalid:
2661 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
2662 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
2663 cbcef455 Peter Maydell
    }
2664 cbcef455 Peter Maydell
    if ( ( aSig<<shiftCount ) != savedASig ) {
2665 cbcef455 Peter Maydell
        STATUS(float_exception_flags) |= float_flag_inexact;
2666 cbcef455 Peter Maydell
    }
2667 cbcef455 Peter Maydell
    return z;
2668 cbcef455 Peter Maydell
}
2669 cbcef455 Peter Maydell
2670 cbcef455 Peter Maydell
/*----------------------------------------------------------------------------
2671 cbcef455 Peter Maydell
| Returns the result of converting the double-precision floating-point value
2672 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2673 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2674 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2675 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2676 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2677 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2678 158142c2 bellard
*----------------------------------------------------------------------------*/
2679 158142c2 bellard
2680 158142c2 bellard
int64 float64_to_int64( float64 a STATUS_PARAM )
2681 158142c2 bellard
{
2682 158142c2 bellard
    flag aSign;
2683 158142c2 bellard
    int16 aExp, shiftCount;
2684 bb98fe42 Andreas Färber
    uint64_t aSig, aSigExtra;
2685 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2686 158142c2 bellard
2687 158142c2 bellard
    aSig = extractFloat64Frac( a );
2688 158142c2 bellard
    aExp = extractFloat64Exp( a );
2689 158142c2 bellard
    aSign = extractFloat64Sign( a );
2690 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2691 158142c2 bellard
    shiftCount = 0x433 - aExp;
2692 158142c2 bellard
    if ( shiftCount <= 0 ) {
2693 158142c2 bellard
        if ( 0x43E < aExp ) {
2694 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2695 158142c2 bellard
            if (    ! aSign
2696 158142c2 bellard
                 || (    ( aExp == 0x7FF )
2697 158142c2 bellard
                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
2698 158142c2 bellard
               ) {
2699 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
2700 158142c2 bellard
            }
2701 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
2702 158142c2 bellard
        }
2703 158142c2 bellard
        aSigExtra = 0;
2704 158142c2 bellard
        aSig <<= - shiftCount;
2705 158142c2 bellard
    }
2706 158142c2 bellard
    else {
2707 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2708 158142c2 bellard
    }
2709 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
2710 158142c2 bellard
2711 158142c2 bellard
}
2712 158142c2 bellard
2713 158142c2 bellard
/*----------------------------------------------------------------------------
2714 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2715 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2716 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2717 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2718 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2719 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2720 158142c2 bellard
| returned.
2721 158142c2 bellard
*----------------------------------------------------------------------------*/
2722 158142c2 bellard
2723 158142c2 bellard
int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
2724 158142c2 bellard
{
2725 158142c2 bellard
    flag aSign;
2726 158142c2 bellard
    int16 aExp, shiftCount;
2727 bb98fe42 Andreas Färber
    uint64_t aSig;
2728 158142c2 bellard
    int64 z;
2729 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2730 158142c2 bellard
2731 158142c2 bellard
    aSig = extractFloat64Frac( a );
2732 158142c2 bellard
    aExp = extractFloat64Exp( a );
2733 158142c2 bellard
    aSign = extractFloat64Sign( a );
2734 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2735 158142c2 bellard
    shiftCount = aExp - 0x433;
2736 158142c2 bellard
    if ( 0 <= shiftCount ) {
2737 158142c2 bellard
        if ( 0x43E <= aExp ) {
2738 f090c9d4 pbrook
            if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
2739 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
2740 158142c2 bellard
                if (    ! aSign
2741 158142c2 bellard
                     || (    ( aExp == 0x7FF )
2742 158142c2 bellard
                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
2743 158142c2 bellard
                   ) {
2744 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
2745 158142c2 bellard
                }
2746 158142c2 bellard
            }
2747 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
2748 158142c2 bellard
        }
2749 158142c2 bellard
        z = aSig<<shiftCount;
2750 158142c2 bellard
    }
2751 158142c2 bellard
    else {
2752 158142c2 bellard
        if ( aExp < 0x3FE ) {
2753 158142c2 bellard
            if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2754 158142c2 bellard
            return 0;
2755 158142c2 bellard
        }
2756 158142c2 bellard
        z = aSig>>( - shiftCount );
2757 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
2758 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
2759 158142c2 bellard
        }
2760 158142c2 bellard
    }
2761 158142c2 bellard
    if ( aSign ) z = - z;
2762 158142c2 bellard
    return z;
2763 158142c2 bellard
2764 158142c2 bellard
}
2765 158142c2 bellard
2766 158142c2 bellard
/*----------------------------------------------------------------------------
2767 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2768 158142c2 bellard
| `a' to the single-precision floating-point format.  The conversion is
2769 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2770 158142c2 bellard
| Arithmetic.
2771 158142c2 bellard
*----------------------------------------------------------------------------*/
2772 158142c2 bellard
2773 158142c2 bellard
float32 float64_to_float32( float64 a STATUS_PARAM )
2774 158142c2 bellard
{
2775 158142c2 bellard
    flag aSign;
2776 158142c2 bellard
    int16 aExp;
2777 bb98fe42 Andreas Färber
    uint64_t aSig;
2778 bb98fe42 Andreas Färber
    uint32_t zSig;
2779 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2780 158142c2 bellard
2781 158142c2 bellard
    aSig = extractFloat64Frac( a );
2782 158142c2 bellard
    aExp = extractFloat64Exp( a );
2783 158142c2 bellard
    aSign = extractFloat64Sign( a );
2784 158142c2 bellard
    if ( aExp == 0x7FF ) {
2785 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2786 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
2787 158142c2 bellard
    }
2788 158142c2 bellard
    shift64RightJamming( aSig, 22, &aSig );
2789 158142c2 bellard
    zSig = aSig;
2790 158142c2 bellard
    if ( aExp || zSig ) {
2791 158142c2 bellard
        zSig |= 0x40000000;
2792 158142c2 bellard
        aExp -= 0x381;
2793 158142c2 bellard
    }
2794 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
2795 158142c2 bellard
2796 158142c2 bellard
}
2797 158142c2 bellard
2798 60011498 Paul Brook
2799 60011498 Paul Brook
/*----------------------------------------------------------------------------
2800 60011498 Paul Brook
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2801 60011498 Paul Brook
| half-precision floating-point value, returning the result.  After being
2802 60011498 Paul Brook
| shifted into the proper positions, the three fields are simply added
2803 60011498 Paul Brook
| together to form the result.  This means that any integer portion of `zSig'
2804 60011498 Paul Brook
| will be added into the exponent.  Since a properly normalized significand
2805 60011498 Paul Brook
| will have an integer portion equal to 1, the `zExp' input should be 1 less
2806 60011498 Paul Brook
| than the desired result exponent whenever `zSig' is a complete, normalized
2807 60011498 Paul Brook
| significand.
2808 60011498 Paul Brook
*----------------------------------------------------------------------------*/
2809 bb98fe42 Andreas Färber
static float16 packFloat16(flag zSign, int16 zExp, uint16_t zSig)
2810 60011498 Paul Brook
{
2811 bb4d4bb3 Peter Maydell
    return make_float16(
2812 bb98fe42 Andreas Färber
        (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
2813 60011498 Paul Brook
}
2814 60011498 Paul Brook
2815 60011498 Paul Brook
/* Half precision floats come in two formats: standard IEEE and "ARM" format.
2816 60011498 Paul Brook
   The latter gains extra exponent range by omitting the NaN/Inf encodings.  */
2817 bb4d4bb3 Peter Maydell
2818 bb4d4bb3 Peter Maydell
float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
2819 60011498 Paul Brook
{
2820 60011498 Paul Brook
    flag aSign;
2821 60011498 Paul Brook
    int16 aExp;
2822 bb98fe42 Andreas Färber
    uint32_t aSig;
2823 60011498 Paul Brook
2824 bb4d4bb3 Peter Maydell
    aSign = extractFloat16Sign(a);
2825 bb4d4bb3 Peter Maydell
    aExp = extractFloat16Exp(a);
2826 bb4d4bb3 Peter Maydell
    aSig = extractFloat16Frac(a);
2827 60011498 Paul Brook
2828 60011498 Paul Brook
    if (aExp == 0x1f && ieee) {
2829 60011498 Paul Brook
        if (aSig) {
2830 f591e1be Peter Maydell
            return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
2831 60011498 Paul Brook
        }
2832 60011498 Paul Brook
        return packFloat32(aSign, 0xff, aSig << 13);
2833 60011498 Paul Brook
    }
2834 60011498 Paul Brook
    if (aExp == 0) {
2835 60011498 Paul Brook
        int8 shiftCount;
2836 60011498 Paul Brook
2837 60011498 Paul Brook
        if (aSig == 0) {
2838 60011498 Paul Brook
            return packFloat32(aSign, 0, 0);
2839 60011498 Paul Brook
        }
2840 60011498 Paul Brook
2841 60011498 Paul Brook
        shiftCount = countLeadingZeros32( aSig ) - 21;
2842 60011498 Paul Brook
        aSig = aSig << shiftCount;
2843 60011498 Paul Brook
        aExp = -shiftCount;
2844 60011498 Paul Brook
    }
2845 60011498 Paul Brook
    return packFloat32( aSign, aExp + 0x70, aSig << 13);
2846 60011498 Paul Brook
}
2847 60011498 Paul Brook
2848 bb4d4bb3 Peter Maydell
float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
2849 60011498 Paul Brook
{
2850 60011498 Paul Brook
    flag aSign;
2851 60011498 Paul Brook
    int16 aExp;
2852 bb98fe42 Andreas Färber
    uint32_t aSig;
2853 bb98fe42 Andreas Färber
    uint32_t mask;
2854 bb98fe42 Andreas Färber
    uint32_t increment;
2855 60011498 Paul Brook
    int8 roundingMode;
2856 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2857 60011498 Paul Brook
2858 60011498 Paul Brook
    aSig = extractFloat32Frac( a );
2859 60011498 Paul Brook
    aExp = extractFloat32Exp( a );
2860 60011498 Paul Brook
    aSign = extractFloat32Sign( a );
2861 60011498 Paul Brook
    if ( aExp == 0xFF ) {
2862 60011498 Paul Brook
        if (aSig) {
2863 600e30d2 Peter Maydell
            /* Input is a NaN */
2864 600e30d2 Peter Maydell
            float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2865 600e30d2 Peter Maydell
            if (!ieee) {
2866 600e30d2 Peter Maydell
                return packFloat16(aSign, 0, 0);
2867 600e30d2 Peter Maydell
            }
2868 600e30d2 Peter Maydell
            return r;
2869 60011498 Paul Brook
        }
2870 600e30d2 Peter Maydell
        /* Infinity */
2871 600e30d2 Peter Maydell
        if (!ieee) {
2872 600e30d2 Peter Maydell
            float_raise(float_flag_invalid STATUS_VAR);
2873 600e30d2 Peter Maydell
            return packFloat16(aSign, 0x1f, 0x3ff);
2874 600e30d2 Peter Maydell
        }
2875 600e30d2 Peter Maydell
        return packFloat16(aSign, 0x1f, 0);
2876 60011498 Paul Brook
    }
2877 600e30d2 Peter Maydell
    if (aExp == 0 && aSig == 0) {
2878 60011498 Paul Brook
        return packFloat16(aSign, 0, 0);
2879 60011498 Paul Brook
    }
2880 60011498 Paul Brook
    /* Decimal point between bits 22 and 23.  */
2881 60011498 Paul Brook
    aSig |= 0x00800000;
2882 60011498 Paul Brook
    aExp -= 0x7f;
2883 60011498 Paul Brook
    if (aExp < -14) {
2884 600e30d2 Peter Maydell
        mask = 0x00ffffff;
2885 600e30d2 Peter Maydell
        if (aExp >= -24) {
2886 600e30d2 Peter Maydell
            mask >>= 25 + aExp;
2887 60011498 Paul Brook
        }
2888 60011498 Paul Brook
    } else {
2889 60011498 Paul Brook
        mask = 0x00001fff;
2890 60011498 Paul Brook
    }
2891 60011498 Paul Brook
    if (aSig & mask) {
2892 60011498 Paul Brook
        float_raise( float_flag_underflow STATUS_VAR );
2893 60011498 Paul Brook
        roundingMode = STATUS(float_rounding_mode);
2894 60011498 Paul Brook
        switch (roundingMode) {
2895 60011498 Paul Brook
        case float_round_nearest_even:
2896 60011498 Paul Brook
            increment = (mask + 1) >> 1;
2897 60011498 Paul Brook
            if ((aSig & mask) == increment) {
2898 60011498 Paul Brook
                increment = aSig & (increment << 1);
2899 60011498 Paul Brook
            }
2900 60011498 Paul Brook
            break;
2901 60011498 Paul Brook
        case float_round_up:
2902 60011498 Paul Brook
            increment = aSign ? 0 : mask;
2903 60011498 Paul Brook
            break;
2904 60011498 Paul Brook
        case float_round_down:
2905 60011498 Paul Brook
            increment = aSign ? mask : 0;
2906 60011498 Paul Brook
            break;
2907 60011498 Paul Brook
        default: /* round_to_zero */
2908 60011498 Paul Brook
            increment = 0;
2909 60011498 Paul Brook
            break;
2910 60011498 Paul Brook
        }
2911 60011498 Paul Brook
        aSig += increment;
2912 60011498 Paul Brook
        if (aSig >= 0x01000000) {
2913 60011498 Paul Brook
            aSig >>= 1;
2914 60011498 Paul Brook
            aExp++;
2915 60011498 Paul Brook
        }
2916 60011498 Paul Brook
    } else if (aExp < -14
2917 60011498 Paul Brook
          && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
2918 60011498 Paul Brook
        float_raise( float_flag_underflow STATUS_VAR);
2919 60011498 Paul Brook
    }
2920 60011498 Paul Brook
2921 60011498 Paul Brook
    if (ieee) {
2922 60011498 Paul Brook
        if (aExp > 15) {
2923 60011498 Paul Brook
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
2924 60011498 Paul Brook
            return packFloat16(aSign, 0x1f, 0);
2925 60011498 Paul Brook
        }
2926 60011498 Paul Brook
    } else {
2927 60011498 Paul Brook
        if (aExp > 16) {
2928 600e30d2 Peter Maydell
            float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
2929 60011498 Paul Brook
            return packFloat16(aSign, 0x1f, 0x3ff);
2930 60011498 Paul Brook
        }
2931 60011498 Paul Brook
    }
2932 60011498 Paul Brook
    if (aExp < -24) {
2933 60011498 Paul Brook
        return packFloat16(aSign, 0, 0);
2934 60011498 Paul Brook
    }
2935 60011498 Paul Brook
    if (aExp < -14) {
2936 60011498 Paul Brook
        aSig >>= -14 - aExp;
2937 60011498 Paul Brook
        aExp = -14;
2938 60011498 Paul Brook
    }
2939 60011498 Paul Brook
    return packFloat16(aSign, aExp + 14, aSig >> 13);
2940 60011498 Paul Brook
}
2941 60011498 Paul Brook
2942 158142c2 bellard
#ifdef FLOATX80
2943 158142c2 bellard
2944 158142c2 bellard
/*----------------------------------------------------------------------------
2945 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2946 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
2947 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2948 158142c2 bellard
| Arithmetic.
2949 158142c2 bellard
*----------------------------------------------------------------------------*/
2950 158142c2 bellard
2951 158142c2 bellard
floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
2952 158142c2 bellard
{
2953 158142c2 bellard
    flag aSign;
2954 158142c2 bellard
    int16 aExp;
2955 bb98fe42 Andreas Färber
    uint64_t aSig;
2956 158142c2 bellard
2957 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2958 158142c2 bellard
    aSig = extractFloat64Frac( a );
2959 158142c2 bellard
    aExp = extractFloat64Exp( a );
2960 158142c2 bellard
    aSign = extractFloat64Sign( a );
2961 158142c2 bellard
    if ( aExp == 0x7FF ) {
2962 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2963 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2964 158142c2 bellard
    }
2965 158142c2 bellard
    if ( aExp == 0 ) {
2966 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2967 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2968 158142c2 bellard
    }
2969 158142c2 bellard
    return
2970 158142c2 bellard
        packFloatx80(
2971 158142c2 bellard
            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2972 158142c2 bellard
2973 158142c2 bellard
}
2974 158142c2 bellard
2975 158142c2 bellard
#endif
2976 158142c2 bellard
2977 158142c2 bellard
#ifdef FLOAT128
2978 158142c2 bellard
2979 158142c2 bellard
/*----------------------------------------------------------------------------
2980 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2981 158142c2 bellard
| `a' to the quadruple-precision floating-point format.  The conversion is
2982 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2983 158142c2 bellard
| Arithmetic.
2984 158142c2 bellard
*----------------------------------------------------------------------------*/
2985 158142c2 bellard
2986 158142c2 bellard
float128 float64_to_float128( float64 a STATUS_PARAM )
2987 158142c2 bellard
{
2988 158142c2 bellard
    flag aSign;
2989 158142c2 bellard
    int16 aExp;
2990 bb98fe42 Andreas Färber
    uint64_t aSig, zSig0, zSig1;
2991 158142c2 bellard
2992 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2993 158142c2 bellard
    aSig = extractFloat64Frac( a );
2994 158142c2 bellard
    aExp = extractFloat64Exp( a );
2995 158142c2 bellard
    aSign = extractFloat64Sign( a );
2996 158142c2 bellard
    if ( aExp == 0x7FF ) {
2997 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2998 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
2999 158142c2 bellard
    }
3000 158142c2 bellard
    if ( aExp == 0 ) {
3001 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3002 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3003 158142c2 bellard
        --aExp;
3004 158142c2 bellard
    }
3005 158142c2 bellard
    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
3006 158142c2 bellard
    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
3007 158142c2 bellard
3008 158142c2 bellard
}
3009 158142c2 bellard
3010 158142c2 bellard
#endif
3011 158142c2 bellard
3012 158142c2 bellard
/*----------------------------------------------------------------------------
3013 158142c2 bellard
| Rounds the double-precision floating-point value `a' to an integer, and
3014 158142c2 bellard
| returns the result as a double-precision floating-point value.  The
3015 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
3016 158142c2 bellard
| Floating-Point Arithmetic.
3017 158142c2 bellard
*----------------------------------------------------------------------------*/
3018 158142c2 bellard
3019 158142c2 bellard
float64 float64_round_to_int( float64 a STATUS_PARAM )
3020 158142c2 bellard
{
3021 158142c2 bellard
    flag aSign;
3022 158142c2 bellard
    int16 aExp;
3023 bb98fe42 Andreas Färber
    uint64_t lastBitMask, roundBitsMask;
3024 158142c2 bellard
    int8 roundingMode;
3025 bb98fe42 Andreas Färber
    uint64_t z;
3026 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3027 158142c2 bellard
3028 158142c2 bellard
    aExp = extractFloat64Exp( a );
3029 158142c2 bellard
    if ( 0x433 <= aExp ) {
3030 158142c2 bellard
        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
3031 158142c2 bellard
            return propagateFloat64NaN( a, a STATUS_VAR );
3032 158142c2 bellard
        }
3033 158142c2 bellard
        return a;
3034 158142c2 bellard
    }
3035 158142c2 bellard
    if ( aExp < 0x3FF ) {
3036 bb98fe42 Andreas Färber
        if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
3037 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3038 158142c2 bellard
        aSign = extractFloat64Sign( a );
3039 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
3040 158142c2 bellard
         case float_round_nearest_even:
3041 158142c2 bellard
            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
3042 158142c2 bellard
                return packFloat64( aSign, 0x3FF, 0 );
3043 158142c2 bellard
            }
3044 158142c2 bellard
            break;
3045 158142c2 bellard
         case float_round_down:
3046 f090c9d4 pbrook
            return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
3047 158142c2 bellard
         case float_round_up:
3048 f090c9d4 pbrook
            return make_float64(
3049 f090c9d4 pbrook
            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
3050 158142c2 bellard
        }
3051 158142c2 bellard
        return packFloat64( aSign, 0, 0 );
3052 158142c2 bellard
    }
3053 158142c2 bellard
    lastBitMask = 1;
3054 158142c2 bellard
    lastBitMask <<= 0x433 - aExp;
3055 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
3056 f090c9d4 pbrook
    z = float64_val(a);
3057 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
3058 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
3059 158142c2 bellard
        z += lastBitMask>>1;
3060 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
3061 158142c2 bellard
    }
3062 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
3063 f090c9d4 pbrook
        if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
3064 158142c2 bellard
            z += roundBitsMask;
3065 158142c2 bellard
        }
3066 158142c2 bellard
    }
3067 158142c2 bellard
    z &= ~ roundBitsMask;
3068 f090c9d4 pbrook
    if ( z != float64_val(a) )
3069 f090c9d4 pbrook
        STATUS(float_exception_flags) |= float_flag_inexact;
3070 f090c9d4 pbrook
    return make_float64(z);
3071 158142c2 bellard
3072 158142c2 bellard
}
3073 158142c2 bellard
3074 e6e5906b pbrook
float64 float64_trunc_to_int( float64 a STATUS_PARAM)
3075 e6e5906b pbrook
{
3076 e6e5906b pbrook
    int oldmode;
3077 e6e5906b pbrook
    float64 res;
3078 e6e5906b pbrook
    oldmode = STATUS(float_rounding_mode);
3079 e6e5906b pbrook
    STATUS(float_rounding_mode) = float_round_to_zero;
3080 e6e5906b pbrook
    res = float64_round_to_int(a STATUS_VAR);
3081 e6e5906b pbrook
    STATUS(float_rounding_mode) = oldmode;
3082 e6e5906b pbrook
    return res;
3083 e6e5906b pbrook
}
3084 e6e5906b pbrook
3085 158142c2 bellard
/*----------------------------------------------------------------------------
3086 158142c2 bellard
| Returns the result of adding the absolute values of the double-precision
3087 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
3088 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
3089 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
3090 158142c2 bellard
| Floating-Point Arithmetic.
3091 158142c2 bellard
*----------------------------------------------------------------------------*/
3092 158142c2 bellard
3093 158142c2 bellard
static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3094 158142c2 bellard
{
3095 158142c2 bellard
    int16 aExp, bExp, zExp;
3096 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig;
3097 158142c2 bellard
    int16 expDiff;
3098 158142c2 bellard
3099 158142c2 bellard
    aSig = extractFloat64Frac( a );
3100 158142c2 bellard
    aExp = extractFloat64Exp( a );
3101 158142c2 bellard
    bSig = extractFloat64Frac( b );
3102 158142c2 bellard
    bExp = extractFloat64Exp( b );
3103 158142c2 bellard
    expDiff = aExp - bExp;
3104 158142c2 bellard
    aSig <<= 9;
3105 158142c2 bellard
    bSig <<= 9;
3106 158142c2 bellard
    if ( 0 < expDiff ) {
3107 158142c2 bellard
        if ( aExp == 0x7FF ) {
3108 158142c2 bellard
            if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3109 158142c2 bellard
            return a;
3110 158142c2 bellard
        }
3111 158142c2 bellard
        if ( bExp == 0 ) {
3112 158142c2 bellard
            --expDiff;
3113 158142c2 bellard
        }
3114 158142c2 bellard
        else {
3115 158142c2 bellard
            bSig |= LIT64( 0x2000000000000000 );
3116 158142c2 bellard
        }
3117 158142c2 bellard
        shift64RightJamming( bSig, expDiff, &bSig );
3118 158142c2 bellard
        zExp = aExp;
3119 158142c2 bellard
    }
3120 158142c2 bellard
    else if ( expDiff < 0 ) {
3121 158142c2 bellard
        if ( bExp == 0x7FF ) {
3122 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3123 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
3124 158142c2 bellard
        }
3125 158142c2 bellard
        if ( aExp == 0 ) {
3126 158142c2 bellard
            ++expDiff;
3127 158142c2 bellard
        }
3128 158142c2 bellard
        else {
3129 158142c2 bellard
            aSig |= LIT64( 0x2000000000000000 );
3130 158142c2 bellard
        }
3131 158142c2 bellard
        shift64RightJamming( aSig, - expDiff, &aSig );
3132 158142c2 bellard
        zExp = bExp;
3133 158142c2 bellard
    }
3134 158142c2 bellard
    else {
3135 158142c2 bellard
        if ( aExp == 0x7FF ) {
3136 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3137 158142c2 bellard
            return a;
3138 158142c2 bellard
        }
3139 fe76d976 pbrook
        if ( aExp == 0 ) {
3140 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
3141 e6afc87f Peter Maydell
                if (aSig | bSig) {
3142 e6afc87f Peter Maydell
                    float_raise(float_flag_output_denormal STATUS_VAR);
3143 e6afc87f Peter Maydell
                }
3144 e6afc87f Peter Maydell
                return packFloat64(zSign, 0, 0);
3145 e6afc87f Peter Maydell
            }
3146 fe76d976 pbrook
            return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
3147 fe76d976 pbrook
        }
3148 158142c2 bellard
        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
3149 158142c2 bellard
        zExp = aExp;
3150 158142c2 bellard
        goto roundAndPack;
3151 158142c2 bellard
    }
3152 158142c2 bellard
    aSig |= LIT64( 0x2000000000000000 );
3153 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
3154 158142c2 bellard
    --zExp;
3155 bb98fe42 Andreas Färber
    if ( (int64_t) zSig < 0 ) {
3156 158142c2 bellard
        zSig = aSig + bSig;
3157 158142c2 bellard
        ++zExp;
3158 158142c2 bellard
    }
3159 158142c2 bellard
 roundAndPack:
3160 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3161 158142c2 bellard
3162 158142c2 bellard
}
3163 158142c2 bellard
3164 158142c2 bellard
/*----------------------------------------------------------------------------
3165 158142c2 bellard
| Returns the result of subtracting the absolute values of the double-
3166 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
3167 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
3168 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
3169 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3170 158142c2 bellard
*----------------------------------------------------------------------------*/
3171 158142c2 bellard
3172 158142c2 bellard
static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3173 158142c2 bellard
{
3174 158142c2 bellard
    int16 aExp, bExp, zExp;
3175 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig;
3176 158142c2 bellard
    int16 expDiff;
3177 158142c2 bellard
3178 158142c2 bellard
    aSig = extractFloat64Frac( a );
3179 158142c2 bellard
    aExp = extractFloat64Exp( a );
3180 158142c2 bellard
    bSig = extractFloat64Frac( b );
3181 158142c2 bellard
    bExp = extractFloat64Exp( b );
3182 158142c2 bellard
    expDiff = aExp - bExp;
3183 158142c2 bellard
    aSig <<= 10;
3184 158142c2 bellard
    bSig <<= 10;
3185 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
3186 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
3187 158142c2 bellard
    if ( aExp == 0x7FF ) {
3188 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3189 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3190 158142c2 bellard
        return float64_default_nan;
3191 158142c2 bellard
    }
3192 158142c2 bellard
    if ( aExp == 0 ) {
3193 158142c2 bellard
        aExp = 1;
3194 158142c2 bellard
        bExp = 1;
3195 158142c2 bellard
    }
3196 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
3197 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
3198 158142c2 bellard
    return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
3199 158142c2 bellard
 bExpBigger:
3200 158142c2 bellard
    if ( bExp == 0x7FF ) {
3201 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3202 158142c2 bellard
        return packFloat64( zSign ^ 1, 0x7FF, 0 );
3203 158142c2 bellard
    }
3204 158142c2 bellard
    if ( aExp == 0 ) {
3205 158142c2 bellard
        ++expDiff;
3206 158142c2 bellard
    }
3207 158142c2 bellard
    else {
3208 158142c2 bellard
        aSig |= LIT64( 0x4000000000000000 );
3209 158142c2 bellard
    }
3210 158142c2 bellard
    shift64RightJamming( aSig, - expDiff, &aSig );
3211 158142c2 bellard
    bSig |= LIT64( 0x4000000000000000 );
3212 158142c2 bellard
 bBigger:
3213 158142c2 bellard
    zSig = bSig - aSig;
3214 158142c2 bellard
    zExp = bExp;
3215 158142c2 bellard
    zSign ^= 1;
3216 158142c2 bellard
    goto normalizeRoundAndPack;
3217 158142c2 bellard
 aExpBigger:
3218 158142c2 bellard
    if ( aExp == 0x7FF ) {
3219 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3220 158142c2 bellard
        return a;
3221 158142c2 bellard
    }
3222 158142c2 bellard
    if ( bExp == 0 ) {
3223 158142c2 bellard
        --expDiff;
3224 158142c2 bellard
    }
3225 158142c2 bellard
    else {
3226 158142c2 bellard
        bSig |= LIT64( 0x4000000000000000 );
3227 158142c2 bellard
    }
3228 158142c2 bellard
    shift64RightJamming( bSig, expDiff, &bSig );
3229 158142c2 bellard
    aSig |= LIT64( 0x4000000000000000 );
3230 158142c2 bellard
 aBigger:
3231 158142c2 bellard
    zSig = aSig - bSig;
3232 158142c2 bellard
    zExp = aExp;
3233 158142c2 bellard
 normalizeRoundAndPack:
3234 158142c2 bellard
    --zExp;
3235 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3236 158142c2 bellard
3237 158142c2 bellard
}
3238 158142c2 bellard
3239 158142c2 bellard
/*----------------------------------------------------------------------------
3240 158142c2 bellard
| Returns the result of adding the double-precision floating-point values `a'
3241 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
3242 158142c2 bellard
| Binary Floating-Point Arithmetic.
3243 158142c2 bellard
*----------------------------------------------------------------------------*/
3244 158142c2 bellard
3245 158142c2 bellard
float64 float64_add( float64 a, float64 b STATUS_PARAM )
3246 158142c2 bellard
{
3247 158142c2 bellard
    flag aSign, bSign;
3248 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3249 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3250 158142c2 bellard
3251 158142c2 bellard
    aSign = extractFloat64Sign( a );
3252 158142c2 bellard
    bSign = extractFloat64Sign( b );
3253 158142c2 bellard
    if ( aSign == bSign ) {
3254 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
3255 158142c2 bellard
    }
3256 158142c2 bellard
    else {
3257 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
3258 158142c2 bellard
    }
3259 158142c2 bellard
3260 158142c2 bellard
}
3261 158142c2 bellard
3262 158142c2 bellard
/*----------------------------------------------------------------------------
3263 158142c2 bellard
| Returns the result of subtracting the double-precision floating-point values
3264 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
3265 158142c2 bellard
| for Binary Floating-Point Arithmetic.
3266 158142c2 bellard
*----------------------------------------------------------------------------*/
3267 158142c2 bellard
3268 158142c2 bellard
float64 float64_sub( float64 a, float64 b STATUS_PARAM )
3269 158142c2 bellard
{
3270 158142c2 bellard
    flag aSign, bSign;
3271 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3272 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3273 158142c2 bellard
3274 158142c2 bellard
    aSign = extractFloat64Sign( a );
3275 158142c2 bellard
    bSign = extractFloat64Sign( b );
3276 158142c2 bellard
    if ( aSign == bSign ) {
3277 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
3278 158142c2 bellard
    }
3279 158142c2 bellard
    else {
3280 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
3281 158142c2 bellard
    }
3282 158142c2 bellard
3283 158142c2 bellard
}
3284 158142c2 bellard
3285 158142c2 bellard
/*----------------------------------------------------------------------------
3286 158142c2 bellard
| Returns the result of multiplying the double-precision floating-point values
3287 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
3288 158142c2 bellard
| for Binary Floating-Point Arithmetic.
3289 158142c2 bellard
*----------------------------------------------------------------------------*/
3290 158142c2 bellard
3291 158142c2 bellard
float64 float64_mul( float64 a, float64 b STATUS_PARAM )
3292 158142c2 bellard
{
3293 158142c2 bellard
    flag aSign, bSign, zSign;
3294 158142c2 bellard
    int16 aExp, bExp, zExp;
3295 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
3296 158142c2 bellard
3297 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3298 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3299 37d18660 Peter Maydell
3300 158142c2 bellard
    aSig = extractFloat64Frac( a );
3301 158142c2 bellard
    aExp = extractFloat64Exp( a );
3302 158142c2 bellard
    aSign = extractFloat64Sign( a );
3303 158142c2 bellard
    bSig = extractFloat64Frac( b );
3304 158142c2 bellard
    bExp = extractFloat64Exp( b );
3305 158142c2 bellard
    bSign = extractFloat64Sign( b );
3306 158142c2 bellard
    zSign = aSign ^ bSign;
3307 158142c2 bellard
    if ( aExp == 0x7FF ) {
3308 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3309 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
3310 158142c2 bellard
        }
3311 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
3312 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3313 158142c2 bellard
            return float64_default_nan;
3314 158142c2 bellard
        }
3315 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3316 158142c2 bellard
    }
3317 158142c2 bellard
    if ( bExp == 0x7FF ) {
3318 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3319 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
3320 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3321 158142c2 bellard
            return float64_default_nan;
3322 158142c2 bellard
        }
3323 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3324 158142c2 bellard
    }
3325 158142c2 bellard
    if ( aExp == 0 ) {
3326 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3327 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3328 158142c2 bellard
    }
3329 158142c2 bellard
    if ( bExp == 0 ) {
3330 158142c2 bellard
        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
3331 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3332 158142c2 bellard
    }
3333 158142c2 bellard
    zExp = aExp + bExp - 0x3FF;
3334 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3335 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3336 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
3337 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
3338 bb98fe42 Andreas Färber
    if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
3339 158142c2 bellard
        zSig0 <<= 1;
3340 158142c2 bellard
        --zExp;
3341 158142c2 bellard
    }
3342 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
3343 158142c2 bellard
3344 158142c2 bellard
}
3345 158142c2 bellard
3346 158142c2 bellard
/*----------------------------------------------------------------------------
3347 158142c2 bellard
| Returns the result of dividing the double-precision floating-point value `a'
3348 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to
3349 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3350 158142c2 bellard
*----------------------------------------------------------------------------*/
3351 158142c2 bellard
3352 158142c2 bellard
float64 float64_div( float64 a, float64 b STATUS_PARAM )
3353 158142c2 bellard
{
3354 158142c2 bellard
    flag aSign, bSign, zSign;
3355 158142c2 bellard
    int16 aExp, bExp, zExp;
3356 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig;
3357 bb98fe42 Andreas Färber
    uint64_t rem0, rem1;
3358 bb98fe42 Andreas Färber
    uint64_t term0, term1;
3359 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3360 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3361 158142c2 bellard
3362 158142c2 bellard
    aSig = extractFloat64Frac( a );
3363 158142c2 bellard
    aExp = extractFloat64Exp( a );
3364 158142c2 bellard
    aSign = extractFloat64Sign( a );
3365 158142c2 bellard
    bSig = extractFloat64Frac( b );
3366 158142c2 bellard
    bExp = extractFloat64Exp( b );
3367 158142c2 bellard
    bSign = extractFloat64Sign( b );
3368 158142c2 bellard
    zSign = aSign ^ bSign;
3369 158142c2 bellard
    if ( aExp == 0x7FF ) {
3370 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3371 158142c2 bellard
        if ( bExp == 0x7FF ) {
3372 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3373 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3374 158142c2 bellard
            return float64_default_nan;
3375 158142c2 bellard
        }
3376 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3377 158142c2 bellard
    }
3378 158142c2 bellard
    if ( bExp == 0x7FF ) {
3379 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3380 158142c2 bellard
        return packFloat64( zSign, 0, 0 );
3381 158142c2 bellard
    }
3382 158142c2 bellard
    if ( bExp == 0 ) {
3383 158142c2 bellard
        if ( bSig == 0 ) {
3384 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
3385 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
3386 158142c2 bellard
                return float64_default_nan;
3387 158142c2 bellard
            }
3388 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
3389 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
3390 158142c2 bellard
        }
3391 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3392 158142c2 bellard
    }
3393 158142c2 bellard
    if ( aExp == 0 ) {
3394 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3395 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3396 158142c2 bellard
    }
3397 158142c2 bellard
    zExp = aExp - bExp + 0x3FD;
3398 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3399 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3400 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
3401 158142c2 bellard
        aSig >>= 1;
3402 158142c2 bellard
        ++zExp;
3403 158142c2 bellard
    }
3404 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, bSig );
3405 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 2 ) {
3406 158142c2 bellard
        mul64To128( bSig, zSig, &term0, &term1 );
3407 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3408 bb98fe42 Andreas Färber
        while ( (int64_t) rem0 < 0 ) {
3409 158142c2 bellard
            --zSig;
3410 158142c2 bellard
            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3411 158142c2 bellard
        }
3412 158142c2 bellard
        zSig |= ( rem1 != 0 );
3413 158142c2 bellard
    }
3414 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3415 158142c2 bellard
3416 158142c2 bellard
}
3417 158142c2 bellard
3418 158142c2 bellard
/*----------------------------------------------------------------------------
3419 158142c2 bellard
| Returns the remainder of the double-precision floating-point value `a'
3420 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
3421 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3422 158142c2 bellard
*----------------------------------------------------------------------------*/
3423 158142c2 bellard
3424 158142c2 bellard
float64 float64_rem( float64 a, float64 b STATUS_PARAM )
3425 158142c2 bellard
{
3426 ed086f3d Blue Swirl
    flag aSign, zSign;
3427 158142c2 bellard
    int16 aExp, bExp, expDiff;
3428 bb98fe42 Andreas Färber
    uint64_t aSig, bSig;
3429 bb98fe42 Andreas Färber
    uint64_t q, alternateASig;
3430 bb98fe42 Andreas Färber
    int64_t sigMean;
3431 158142c2 bellard
3432 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3433 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3434 158142c2 bellard
    aSig = extractFloat64Frac( a );
3435 158142c2 bellard
    aExp = extractFloat64Exp( a );
3436 158142c2 bellard
    aSign = extractFloat64Sign( a );
3437 158142c2 bellard
    bSig = extractFloat64Frac( b );
3438 158142c2 bellard
    bExp = extractFloat64Exp( b );
3439 158142c2 bellard
    if ( aExp == 0x7FF ) {
3440 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3441 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
3442 158142c2 bellard
        }
3443 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3444 158142c2 bellard
        return float64_default_nan;
3445 158142c2 bellard
    }
3446 158142c2 bellard
    if ( bExp == 0x7FF ) {
3447 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3448 158142c2 bellard
        return a;
3449 158142c2 bellard
    }
3450 158142c2 bellard
    if ( bExp == 0 ) {
3451 158142c2 bellard
        if ( bSig == 0 ) {
3452 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3453 158142c2 bellard
            return float64_default_nan;
3454 158142c2 bellard
        }
3455 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3456 158142c2 bellard
    }
3457 158142c2 bellard
    if ( aExp == 0 ) {
3458 158142c2 bellard
        if ( aSig == 0 ) return a;
3459 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3460 158142c2 bellard
    }
3461 158142c2 bellard
    expDiff = aExp - bExp;
3462 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3463 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3464 158142c2 bellard
    if ( expDiff < 0 ) {
3465 158142c2 bellard
        if ( expDiff < -1 ) return a;
3466 158142c2 bellard
        aSig >>= 1;
3467 158142c2 bellard
    }
3468 158142c2 bellard
    q = ( bSig <= aSig );
3469 158142c2 bellard
    if ( q ) aSig -= bSig;
3470 158142c2 bellard
    expDiff -= 64;
3471 158142c2 bellard
    while ( 0 < expDiff ) {
3472 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
3473 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3474 158142c2 bellard
        aSig = - ( ( bSig>>2 ) * q );
3475 158142c2 bellard
        expDiff -= 62;
3476 158142c2 bellard
    }
3477 158142c2 bellard
    expDiff += 64;
3478 158142c2 bellard
    if ( 0 < expDiff ) {
3479 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
3480 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3481 158142c2 bellard
        q >>= 64 - expDiff;
3482 158142c2 bellard
        bSig >>= 2;
3483 158142c2 bellard
        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3484 158142c2 bellard
    }
3485 158142c2 bellard
    else {
3486 158142c2 bellard
        aSig >>= 2;
3487 158142c2 bellard
        bSig >>= 2;
3488 158142c2 bellard
    }
3489 158142c2 bellard
    do {
3490 158142c2 bellard
        alternateASig = aSig;
3491 158142c2 bellard
        ++q;
3492 158142c2 bellard
        aSig -= bSig;
3493 bb98fe42 Andreas Färber
    } while ( 0 <= (int64_t) aSig );
3494 158142c2 bellard
    sigMean = aSig + alternateASig;
3495 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3496 158142c2 bellard
        aSig = alternateASig;
3497 158142c2 bellard
    }
3498 bb98fe42 Andreas Färber
    zSign = ( (int64_t) aSig < 0 );
3499 158142c2 bellard
    if ( zSign ) aSig = - aSig;
3500 158142c2 bellard
    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
3501 158142c2 bellard
3502 158142c2 bellard
}
3503 158142c2 bellard
3504 158142c2 bellard
/*----------------------------------------------------------------------------
3505 158142c2 bellard
| Returns the square root of the double-precision floating-point value `a'.
3506 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
3507 158142c2 bellard
| Floating-Point Arithmetic.
3508 158142c2 bellard
*----------------------------------------------------------------------------*/
3509 158142c2 bellard
3510 158142c2 bellard
float64 float64_sqrt( float64 a STATUS_PARAM )
3511 158142c2 bellard
{
3512 158142c2 bellard
    flag aSign;
3513 158142c2 bellard
    int16 aExp, zExp;
3514 bb98fe42 Andreas Färber
    uint64_t aSig, zSig, doubleZSig;
3515 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, term0, term1;
3516 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3517 158142c2 bellard
3518 158142c2 bellard
    aSig = extractFloat64Frac( a );
3519 158142c2 bellard
    aExp = extractFloat64Exp( a );
3520 158142c2 bellard
    aSign = extractFloat64Sign( a );
3521 158142c2 bellard
    if ( aExp == 0x7FF ) {
3522 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
3523 158142c2 bellard
        if ( ! aSign ) return a;
3524 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3525 158142c2 bellard
        return float64_default_nan;
3526 158142c2 bellard
    }
3527 158142c2 bellard
    if ( aSign ) {
3528 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
3529 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3530 158142c2 bellard
        return float64_default_nan;
3531 158142c2 bellard
    }
3532 158142c2 bellard
    if ( aExp == 0 ) {
3533 f090c9d4 pbrook
        if ( aSig == 0 ) return float64_zero;
3534 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3535 158142c2 bellard
    }
3536 158142c2 bellard
    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3537 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
3538 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig>>21 );
3539 158142c2 bellard
    aSig <<= 9 - ( aExp & 1 );
3540 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3541 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 5 ) {
3542 158142c2 bellard
        doubleZSig = zSig<<1;
3543 158142c2 bellard
        mul64To128( zSig, zSig, &term0, &term1 );
3544 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3545 bb98fe42 Andreas Färber
        while ( (int64_t) rem0 < 0 ) {
3546 158142c2 bellard
            --zSig;
3547 158142c2 bellard
            doubleZSig -= 2;
3548 158142c2 bellard
            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3549 158142c2 bellard
        }
3550 158142c2 bellard
        zSig |= ( ( rem0 | rem1 ) != 0 );
3551 158142c2 bellard
    }
3552 158142c2 bellard
    return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
3553 158142c2 bellard
3554 158142c2 bellard
}
3555 158142c2 bellard
3556 158142c2 bellard
/*----------------------------------------------------------------------------
3557 374dfc33 aurel32
| Returns the binary log of the double-precision floating-point value `a'.
3558 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
3559 374dfc33 aurel32
| Floating-Point Arithmetic.
3560 374dfc33 aurel32
*----------------------------------------------------------------------------*/
3561 374dfc33 aurel32
float64 float64_log2( float64 a STATUS_PARAM )
3562 374dfc33 aurel32
{
3563 374dfc33 aurel32
    flag aSign, zSign;
3564 374dfc33 aurel32
    int16 aExp;
3565 bb98fe42 Andreas Färber
    uint64_t aSig, aSig0, aSig1, zSig, i;
3566 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3567 374dfc33 aurel32
3568 374dfc33 aurel32
    aSig = extractFloat64Frac( a );
3569 374dfc33 aurel32
    aExp = extractFloat64Exp( a );
3570 374dfc33 aurel32
    aSign = extractFloat64Sign( a );
3571 374dfc33 aurel32
3572 374dfc33 aurel32
    if ( aExp == 0 ) {
3573 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
3574 374dfc33 aurel32
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3575 374dfc33 aurel32
    }
3576 374dfc33 aurel32
    if ( aSign ) {
3577 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
3578 374dfc33 aurel32
        return float64_default_nan;
3579 374dfc33 aurel32
    }
3580 374dfc33 aurel32
    if ( aExp == 0x7FF ) {
3581 374dfc33 aurel32
        if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
3582 374dfc33 aurel32
        return a;
3583 374dfc33 aurel32
    }
3584 374dfc33 aurel32
3585 374dfc33 aurel32
    aExp -= 0x3FF;
3586 374dfc33 aurel32
    aSig |= LIT64( 0x0010000000000000 );
3587 374dfc33 aurel32
    zSign = aExp < 0;
3588 bb98fe42 Andreas Färber
    zSig = (uint64_t)aExp << 52;
3589 374dfc33 aurel32
    for (i = 1LL << 51; i > 0; i >>= 1) {
3590 374dfc33 aurel32
        mul64To128( aSig, aSig, &aSig0, &aSig1 );
3591 374dfc33 aurel32
        aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
3592 374dfc33 aurel32
        if ( aSig & LIT64( 0x0020000000000000 ) ) {
3593 374dfc33 aurel32
            aSig >>= 1;
3594 374dfc33 aurel32
            zSig |= i;
3595 374dfc33 aurel32
        }
3596 374dfc33 aurel32
    }
3597 374dfc33 aurel32
3598 374dfc33 aurel32
    if ( zSign )
3599 374dfc33 aurel32
        zSig = -zSig;
3600 374dfc33 aurel32
    return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
3601 374dfc33 aurel32
}
3602 374dfc33 aurel32
3603 374dfc33 aurel32
/*----------------------------------------------------------------------------
3604 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
3605 b689362d Aurelien Jarno
| corresponding value `b', and 0 otherwise.  The invalid exception is raised
3606 b689362d Aurelien Jarno
| if either operand is a NaN.  Otherwise, the comparison is performed
3607 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3608 158142c2 bellard
*----------------------------------------------------------------------------*/
3609 158142c2 bellard
3610 b689362d Aurelien Jarno
int float64_eq( float64 a, float64 b STATUS_PARAM )
3611 158142c2 bellard
{
3612 bb98fe42 Andreas Färber
    uint64_t av, bv;
3613 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3614 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3615 158142c2 bellard
3616 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3617 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3618 158142c2 bellard
       ) {
3619 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
3620 158142c2 bellard
        return 0;
3621 158142c2 bellard
    }
3622 f090c9d4 pbrook
    av = float64_val(a);
3623 a1b91bb4 pbrook
    bv = float64_val(b);
3624 bb98fe42 Andreas Färber
    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
3625 158142c2 bellard
3626 158142c2 bellard
}
3627 158142c2 bellard
3628 158142c2 bellard
/*----------------------------------------------------------------------------
3629 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
3630 f5a64251 Aurelien Jarno
| equal to the corresponding value `b', and 0 otherwise.  The invalid
3631 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
3632 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3633 158142c2 bellard
*----------------------------------------------------------------------------*/
3634 158142c2 bellard
3635 750afe93 bellard
int float64_le( float64 a, float64 b STATUS_PARAM )
3636 158142c2 bellard
{
3637 158142c2 bellard
    flag aSign, bSign;
3638 bb98fe42 Andreas Färber
    uint64_t av, bv;
3639 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3640 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3641 158142c2 bellard
3642 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3643 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3644 158142c2 bellard
       ) {
3645 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3646 158142c2 bellard
        return 0;
3647 158142c2 bellard
    }
3648 158142c2 bellard
    aSign = extractFloat64Sign( a );
3649 158142c2 bellard
    bSign = extractFloat64Sign( b );
3650 f090c9d4 pbrook
    av = float64_val(a);
3651 a1b91bb4 pbrook
    bv = float64_val(b);
3652 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
3653 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
3654 158142c2 bellard
3655 158142c2 bellard
}
3656 158142c2 bellard
3657 158142c2 bellard
/*----------------------------------------------------------------------------
3658 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
3659 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
3660 f5a64251 Aurelien Jarno
| raised if either operand is a NaN.  The comparison is performed according
3661 f5a64251 Aurelien Jarno
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3662 158142c2 bellard
*----------------------------------------------------------------------------*/
3663 158142c2 bellard
3664 750afe93 bellard
int float64_lt( float64 a, float64 b STATUS_PARAM )
3665 158142c2 bellard
{
3666 158142c2 bellard
    flag aSign, bSign;
3667 bb98fe42 Andreas Färber
    uint64_t av, bv;
3668 158142c2 bellard
3669 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3670 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3671 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3672 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3673 158142c2 bellard
       ) {
3674 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3675 158142c2 bellard
        return 0;
3676 158142c2 bellard
    }
3677 158142c2 bellard
    aSign = extractFloat64Sign( a );
3678 158142c2 bellard
    bSign = extractFloat64Sign( b );
3679 f090c9d4 pbrook
    av = float64_val(a);
3680 a1b91bb4 pbrook
    bv = float64_val(b);
3681 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
3682 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
3683 158142c2 bellard
3684 158142c2 bellard
}
3685 158142c2 bellard
3686 158142c2 bellard
/*----------------------------------------------------------------------------
3687 67b7861d Aurelien Jarno
| Returns 1 if the double-precision floating-point values `a' and `b' cannot
3688 f5a64251 Aurelien Jarno
| be compared, and 0 otherwise.  The invalid exception is raised if either
3689 f5a64251 Aurelien Jarno
| operand is a NaN.  The comparison is performed according to the IEC/IEEE
3690 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
3691 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
3692 67b7861d Aurelien Jarno
3693 67b7861d Aurelien Jarno
int float64_unordered( float64 a, float64 b STATUS_PARAM )
3694 67b7861d Aurelien Jarno
{
3695 67b7861d Aurelien Jarno
    a = float64_squash_input_denormal(a STATUS_VAR);
3696 67b7861d Aurelien Jarno
    b = float64_squash_input_denormal(b STATUS_VAR);
3697 67b7861d Aurelien Jarno
3698 67b7861d Aurelien Jarno
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3699 67b7861d Aurelien Jarno
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3700 67b7861d Aurelien Jarno
       ) {
3701 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
3702 67b7861d Aurelien Jarno
        return 1;
3703 67b7861d Aurelien Jarno
    }
3704 67b7861d Aurelien Jarno
    return 0;
3705 67b7861d Aurelien Jarno
}
3706 67b7861d Aurelien Jarno
3707 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
3708 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
3709 f5a64251 Aurelien Jarno
| corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3710 f5a64251 Aurelien Jarno
| exception.The comparison is performed according to the IEC/IEEE Standard
3711 f5a64251 Aurelien Jarno
| for Binary Floating-Point Arithmetic.
3712 158142c2 bellard
*----------------------------------------------------------------------------*/
3713 158142c2 bellard
3714 b689362d Aurelien Jarno
int float64_eq_quiet( float64 a, float64 b STATUS_PARAM )
3715 158142c2 bellard
{
3716 bb98fe42 Andreas Färber
    uint64_t av, bv;
3717 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3718 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3719 158142c2 bellard
3720 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3721 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3722 158142c2 bellard
       ) {
3723 b689362d Aurelien Jarno
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3724 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
3725 b689362d Aurelien Jarno
        }
3726 158142c2 bellard
        return 0;
3727 158142c2 bellard
    }
3728 f090c9d4 pbrook
    av = float64_val(a);
3729 a1b91bb4 pbrook
    bv = float64_val(b);
3730 bb98fe42 Andreas Färber
    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
3731 158142c2 bellard
3732 158142c2 bellard
}
3733 158142c2 bellard
3734 158142c2 bellard
/*----------------------------------------------------------------------------
3735 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
3736 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
3737 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
3738 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3739 158142c2 bellard
*----------------------------------------------------------------------------*/
3740 158142c2 bellard
3741 750afe93 bellard
int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
3742 158142c2 bellard
{
3743 158142c2 bellard
    flag aSign, bSign;
3744 bb98fe42 Andreas Färber
    uint64_t av, bv;
3745 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3746 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3747 158142c2 bellard
3748 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3749 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3750 158142c2 bellard
       ) {
3751 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3752 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3753 158142c2 bellard
        }
3754 158142c2 bellard
        return 0;
3755 158142c2 bellard
    }
3756 158142c2 bellard
    aSign = extractFloat64Sign( a );
3757 158142c2 bellard
    bSign = extractFloat64Sign( b );
3758 f090c9d4 pbrook
    av = float64_val(a);
3759 a1b91bb4 pbrook
    bv = float64_val(b);
3760 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
3761 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
3762 158142c2 bellard
3763 158142c2 bellard
}
3764 158142c2 bellard
3765 158142c2 bellard
/*----------------------------------------------------------------------------
3766 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
3767 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3768 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
3769 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3770 158142c2 bellard
*----------------------------------------------------------------------------*/
3771 158142c2 bellard
3772 750afe93 bellard
int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
3773 158142c2 bellard
{
3774 158142c2 bellard
    flag aSign, bSign;
3775 bb98fe42 Andreas Färber
    uint64_t av, bv;
3776 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3777 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3778 158142c2 bellard
3779 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3780 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3781 158142c2 bellard
       ) {
3782 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3783 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3784 158142c2 bellard
        }
3785 158142c2 bellard
        return 0;
3786 158142c2 bellard
    }
3787 158142c2 bellard
    aSign = extractFloat64Sign( a );
3788 158142c2 bellard
    bSign = extractFloat64Sign( b );
3789 f090c9d4 pbrook
    av = float64_val(a);
3790 a1b91bb4 pbrook
    bv = float64_val(b);
3791 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
3792 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
3793 158142c2 bellard
3794 158142c2 bellard
}
3795 158142c2 bellard
3796 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
3797 67b7861d Aurelien Jarno
| Returns 1 if the double-precision floating-point values `a' and `b' cannot
3798 67b7861d Aurelien Jarno
| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
3799 67b7861d Aurelien Jarno
| comparison is performed according to the IEC/IEEE Standard for Binary
3800 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
3801 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
3802 67b7861d Aurelien Jarno
3803 67b7861d Aurelien Jarno
int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM )
3804 67b7861d Aurelien Jarno
{
3805 67b7861d Aurelien Jarno
    a = float64_squash_input_denormal(a STATUS_VAR);
3806 67b7861d Aurelien Jarno
    b = float64_squash_input_denormal(b STATUS_VAR);
3807 67b7861d Aurelien Jarno
3808 67b7861d Aurelien Jarno
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3809 67b7861d Aurelien Jarno
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3810 67b7861d Aurelien Jarno
       ) {
3811 67b7861d Aurelien Jarno
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3812 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
3813 67b7861d Aurelien Jarno
        }
3814 67b7861d Aurelien Jarno
        return 1;
3815 67b7861d Aurelien Jarno
    }
3816 67b7861d Aurelien Jarno
    return 0;
3817 67b7861d Aurelien Jarno
}
3818 67b7861d Aurelien Jarno
3819 158142c2 bellard
#ifdef FLOATX80
3820 158142c2 bellard
3821 158142c2 bellard
/*----------------------------------------------------------------------------
3822 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3823 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3824 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3825 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3826 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN, the
3827 158142c2 bellard
| largest positive integer is returned.  Otherwise, if the conversion
3828 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3829 158142c2 bellard
*----------------------------------------------------------------------------*/
3830 158142c2 bellard
3831 158142c2 bellard
int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
3832 158142c2 bellard
{
3833 158142c2 bellard
    flag aSign;
3834 158142c2 bellard
    int32 aExp, shiftCount;
3835 bb98fe42 Andreas Färber
    uint64_t aSig;
3836 158142c2 bellard
3837 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3838 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3839 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3840 bb98fe42 Andreas Färber
    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
3841 158142c2 bellard
    shiftCount = 0x4037 - aExp;
3842 158142c2 bellard
    if ( shiftCount <= 0 ) shiftCount = 1;
3843 158142c2 bellard
    shift64RightJamming( aSig, shiftCount, &aSig );
3844 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
3845 158142c2 bellard
3846 158142c2 bellard
}
3847 158142c2 bellard
3848 158142c2 bellard
/*----------------------------------------------------------------------------
3849 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3850 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
3851 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3852 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3853 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3854 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3855 158142c2 bellard
| sign as `a' is returned.
3856 158142c2 bellard
*----------------------------------------------------------------------------*/
3857 158142c2 bellard
3858 158142c2 bellard
int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
3859 158142c2 bellard
{
3860 158142c2 bellard
    flag aSign;
3861 158142c2 bellard
    int32 aExp, shiftCount;
3862 bb98fe42 Andreas Färber
    uint64_t aSig, savedASig;
3863 158142c2 bellard
    int32 z;
3864 158142c2 bellard
3865 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3866 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3867 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3868 158142c2 bellard
    if ( 0x401E < aExp ) {
3869 bb98fe42 Andreas Färber
        if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
3870 158142c2 bellard
        goto invalid;
3871 158142c2 bellard
    }
3872 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3873 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3874 158142c2 bellard
        return 0;
3875 158142c2 bellard
    }
3876 158142c2 bellard
    shiftCount = 0x403E - aExp;
3877 158142c2 bellard
    savedASig = aSig;
3878 158142c2 bellard
    aSig >>= shiftCount;
3879 158142c2 bellard
    z = aSig;
3880 158142c2 bellard
    if ( aSign ) z = - z;
3881 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
3882 158142c2 bellard
 invalid:
3883 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3884 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
3885 158142c2 bellard
    }
3886 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
3887 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3888 158142c2 bellard
    }
3889 158142c2 bellard
    return z;
3890 158142c2 bellard
3891 158142c2 bellard
}
3892 158142c2 bellard
3893 158142c2 bellard
/*----------------------------------------------------------------------------
3894 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3895 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3896 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3897 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
3898 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN,
3899 158142c2 bellard
| the largest positive integer is returned.  Otherwise, if the conversion
3900 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
3901 158142c2 bellard
*----------------------------------------------------------------------------*/
3902 158142c2 bellard
3903 158142c2 bellard
int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
3904 158142c2 bellard
{
3905 158142c2 bellard
    flag aSign;
3906 158142c2 bellard
    int32 aExp, shiftCount;
3907 bb98fe42 Andreas Färber
    uint64_t aSig, aSigExtra;
3908 158142c2 bellard
3909 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3910 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3911 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3912 158142c2 bellard
    shiftCount = 0x403E - aExp;
3913 158142c2 bellard
    if ( shiftCount <= 0 ) {
3914 158142c2 bellard
        if ( shiftCount ) {
3915 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3916 158142c2 bellard
            if (    ! aSign
3917 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
3918 158142c2 bellard
                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
3919 158142c2 bellard
               ) {
3920 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3921 158142c2 bellard
            }
3922 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
3923 158142c2 bellard
        }
3924 158142c2 bellard
        aSigExtra = 0;
3925 158142c2 bellard
    }
3926 158142c2 bellard
    else {
3927 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3928 158142c2 bellard
    }
3929 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
3930 158142c2 bellard
3931 158142c2 bellard
}
3932 158142c2 bellard
3933 158142c2 bellard
/*----------------------------------------------------------------------------
3934 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3935 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
3936 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3937 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
3938 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
3939 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
3940 158142c2 bellard
| sign as `a' is returned.
3941 158142c2 bellard
*----------------------------------------------------------------------------*/
3942 158142c2 bellard
3943 158142c2 bellard
int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
3944 158142c2 bellard
{
3945 158142c2 bellard
    flag aSign;
3946 158142c2 bellard
    int32 aExp, shiftCount;
3947 bb98fe42 Andreas Färber
    uint64_t aSig;
3948 158142c2 bellard
    int64 z;
3949 158142c2 bellard
3950 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3951 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3952 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3953 158142c2 bellard
    shiftCount = aExp - 0x403E;
3954 158142c2 bellard
    if ( 0 <= shiftCount ) {
3955 158142c2 bellard
        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3956 158142c2 bellard
        if ( ( a.high != 0xC03E ) || aSig ) {
3957 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3958 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3959 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
3960 158142c2 bellard
            }
3961 158142c2 bellard
        }
3962 bb98fe42 Andreas Färber
        return (int64_t) LIT64( 0x8000000000000000 );
3963 158142c2 bellard
    }
3964 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
3965 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3966 158142c2 bellard
        return 0;
3967 158142c2 bellard
    }
3968 158142c2 bellard
    z = aSig>>( - shiftCount );
3969 bb98fe42 Andreas Färber
    if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
3970 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3971 158142c2 bellard
    }
3972 158142c2 bellard
    if ( aSign ) z = - z;
3973 158142c2 bellard
    return z;
3974 158142c2 bellard
3975 158142c2 bellard
}
3976 158142c2 bellard
3977 158142c2 bellard
/*----------------------------------------------------------------------------
3978 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
3979 158142c2 bellard
| point value `a' to the single-precision floating-point format.  The
3980 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
3981 158142c2 bellard
| Floating-Point Arithmetic.
3982 158142c2 bellard
*----------------------------------------------------------------------------*/
3983 158142c2 bellard
3984 158142c2 bellard
float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
3985 158142c2 bellard
{
3986 158142c2 bellard
    flag aSign;
3987 158142c2 bellard
    int32 aExp;
3988 bb98fe42 Andreas Färber
    uint64_t aSig;
3989 158142c2 bellard
3990 158142c2 bellard
    aSig = extractFloatx80Frac( a );
3991 158142c2 bellard
    aExp = extractFloatx80Exp( a );
3992 158142c2 bellard
    aSign = extractFloatx80Sign( a );
3993 158142c2 bellard
    if ( aExp == 0x7FFF ) {
3994 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) {
3995 bcd4d9af Christophe Lyon
            return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
3996 158142c2 bellard
        }
3997 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
3998 158142c2 bellard
    }
3999 158142c2 bellard
    shift64RightJamming( aSig, 33, &aSig );
4000 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3F81;
4001 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
4002 158142c2 bellard
4003 158142c2 bellard
}
4004 158142c2 bellard
4005 158142c2 bellard
/*----------------------------------------------------------------------------
4006 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4007 158142c2 bellard
| point value `a' to the double-precision floating-point format.  The
4008 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4009 158142c2 bellard
| Floating-Point Arithmetic.
4010 158142c2 bellard
*----------------------------------------------------------------------------*/
4011 158142c2 bellard
4012 158142c2 bellard
float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
4013 158142c2 bellard
{
4014 158142c2 bellard
    flag aSign;
4015 158142c2 bellard
    int32 aExp;
4016 bb98fe42 Andreas Färber
    uint64_t aSig, zSig;
4017 158142c2 bellard
4018 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4019 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4020 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4021 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4022 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) {
4023 bcd4d9af Christophe Lyon
            return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
4024 158142c2 bellard
        }
4025 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
4026 158142c2 bellard
    }
4027 158142c2 bellard
    shift64RightJamming( aSig, 1, &zSig );
4028 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3C01;
4029 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
4030 158142c2 bellard
4031 158142c2 bellard
}
4032 158142c2 bellard
4033 158142c2 bellard
#ifdef FLOAT128
4034 158142c2 bellard
4035 158142c2 bellard
/*----------------------------------------------------------------------------
4036 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4037 158142c2 bellard
| point value `a' to the quadruple-precision floating-point format.  The
4038 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4039 158142c2 bellard
| Floating-Point Arithmetic.
4040 158142c2 bellard
*----------------------------------------------------------------------------*/
4041 158142c2 bellard
4042 158142c2 bellard
float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
4043 158142c2 bellard
{
4044 158142c2 bellard
    flag aSign;
4045 158142c2 bellard
    int16 aExp;
4046 bb98fe42 Andreas Färber
    uint64_t aSig, zSig0, zSig1;
4047 158142c2 bellard
4048 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4049 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4050 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4051 bb98fe42 Andreas Färber
    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
4052 bcd4d9af Christophe Lyon
        return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
4053 158142c2 bellard
    }
4054 158142c2 bellard
    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4055 158142c2 bellard
    return packFloat128( aSign, aExp, zSig0, zSig1 );
4056 158142c2 bellard
4057 158142c2 bellard
}
4058 158142c2 bellard
4059 158142c2 bellard
#endif
4060 158142c2 bellard
4061 158142c2 bellard
/*----------------------------------------------------------------------------
4062 158142c2 bellard
| Rounds the extended double-precision floating-point value `a' to an integer,
4063 158142c2 bellard
| and returns the result as an extended quadruple-precision floating-point
4064 158142c2 bellard
| value.  The operation is performed according to the IEC/IEEE Standard for
4065 158142c2 bellard
| Binary Floating-Point Arithmetic.
4066 158142c2 bellard
*----------------------------------------------------------------------------*/
4067 158142c2 bellard
4068 158142c2 bellard
floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
4069 158142c2 bellard
{
4070 158142c2 bellard
    flag aSign;
4071 158142c2 bellard
    int32 aExp;
4072 bb98fe42 Andreas Färber
    uint64_t lastBitMask, roundBitsMask;
4073 158142c2 bellard
    int8 roundingMode;
4074 158142c2 bellard
    floatx80 z;
4075 158142c2 bellard
4076 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4077 158142c2 bellard
    if ( 0x403E <= aExp ) {
4078 bb98fe42 Andreas Färber
        if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
4079 158142c2 bellard
            return propagateFloatx80NaN( a, a STATUS_VAR );
4080 158142c2 bellard
        }
4081 158142c2 bellard
        return a;
4082 158142c2 bellard
    }
4083 158142c2 bellard
    if ( aExp < 0x3FFF ) {
4084 158142c2 bellard
        if (    ( aExp == 0 )
4085 bb98fe42 Andreas Färber
             && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
4086 158142c2 bellard
            return a;
4087 158142c2 bellard
        }
4088 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4089 158142c2 bellard
        aSign = extractFloatx80Sign( a );
4090 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
4091 158142c2 bellard
         case float_round_nearest_even:
4092 bb98fe42 Andreas Färber
            if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
4093 158142c2 bellard
               ) {
4094 158142c2 bellard
                return
4095 158142c2 bellard
                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4096 158142c2 bellard
            }
4097 158142c2 bellard
            break;
4098 158142c2 bellard
         case float_round_down:
4099 158142c2 bellard
            return
4100 158142c2 bellard
                  aSign ?
4101 158142c2 bellard
                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4102 158142c2 bellard
                : packFloatx80( 0, 0, 0 );
4103 158142c2 bellard
         case float_round_up:
4104 158142c2 bellard
            return
4105 158142c2 bellard
                  aSign ? packFloatx80( 1, 0, 0 )
4106 158142c2 bellard
                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4107 158142c2 bellard
        }
4108 158142c2 bellard
        return packFloatx80( aSign, 0, 0 );
4109 158142c2 bellard
    }
4110 158142c2 bellard
    lastBitMask = 1;
4111 158142c2 bellard
    lastBitMask <<= 0x403E - aExp;
4112 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
4113 158142c2 bellard
    z = a;
4114 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
4115 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
4116 158142c2 bellard
        z.low += lastBitMask>>1;
4117 158142c2 bellard
        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4118 158142c2 bellard
    }
4119 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
4120 158142c2 bellard
        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
4121 158142c2 bellard
            z.low += roundBitsMask;
4122 158142c2 bellard
        }
4123 158142c2 bellard
    }
4124 158142c2 bellard
    z.low &= ~ roundBitsMask;
4125 158142c2 bellard
    if ( z.low == 0 ) {
4126 158142c2 bellard
        ++z.high;
4127 158142c2 bellard
        z.low = LIT64( 0x8000000000000000 );
4128 158142c2 bellard
    }
4129 158142c2 bellard
    if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
4130 158142c2 bellard
    return z;
4131 158142c2 bellard
4132 158142c2 bellard
}
4133 158142c2 bellard
4134 158142c2 bellard
/*----------------------------------------------------------------------------
4135 158142c2 bellard
| Returns the result of adding the absolute values of the extended double-
4136 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
4137 158142c2 bellard
| negated before being returned.  `zSign' is ignored if the result is a NaN.
4138 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
4139 158142c2 bellard
| Floating-Point Arithmetic.
4140 158142c2 bellard
*----------------------------------------------------------------------------*/
4141 158142c2 bellard
4142 158142c2 bellard
static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
4143 158142c2 bellard
{
4144 158142c2 bellard
    int32 aExp, bExp, zExp;
4145 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4146 158142c2 bellard
    int32 expDiff;
4147 158142c2 bellard
4148 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4149 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4150 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4151 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4152 158142c2 bellard
    expDiff = aExp - bExp;
4153 158142c2 bellard
    if ( 0 < expDiff ) {
4154 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4155 bb98fe42 Andreas Färber
            if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4156 158142c2 bellard
            return a;
4157 158142c2 bellard
        }
4158 158142c2 bellard
        if ( bExp == 0 ) --expDiff;
4159 158142c2 bellard
        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4160 158142c2 bellard
        zExp = aExp;
4161 158142c2 bellard
    }
4162 158142c2 bellard
    else if ( expDiff < 0 ) {
4163 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4164 bb98fe42 Andreas Färber
            if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4165 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4166 158142c2 bellard
        }
4167 158142c2 bellard
        if ( aExp == 0 ) ++expDiff;
4168 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4169 158142c2 bellard
        zExp = bExp;
4170 158142c2 bellard
    }
4171 158142c2 bellard
    else {
4172 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4173 bb98fe42 Andreas Färber
            if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
4174 158142c2 bellard
                return propagateFloatx80NaN( a, b STATUS_VAR );
4175 158142c2 bellard
            }
4176 158142c2 bellard
            return a;
4177 158142c2 bellard
        }
4178 158142c2 bellard
        zSig1 = 0;
4179 158142c2 bellard
        zSig0 = aSig + bSig;
4180 158142c2 bellard
        if ( aExp == 0 ) {
4181 158142c2 bellard
            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
4182 158142c2 bellard
            goto roundAndPack;
4183 158142c2 bellard
        }
4184 158142c2 bellard
        zExp = aExp;
4185 158142c2 bellard
        goto shiftRight1;
4186 158142c2 bellard
    }
4187 158142c2 bellard
    zSig0 = aSig + bSig;
4188 bb98fe42 Andreas Färber
    if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
4189 158142c2 bellard
 shiftRight1:
4190 158142c2 bellard
    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
4191 158142c2 bellard
    zSig0 |= LIT64( 0x8000000000000000 );
4192 158142c2 bellard
    ++zExp;
4193 158142c2 bellard
 roundAndPack:
4194 158142c2 bellard
    return
4195 158142c2 bellard
        roundAndPackFloatx80(
4196 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4197 158142c2 bellard
4198 158142c2 bellard
}
4199 158142c2 bellard
4200 158142c2 bellard
/*----------------------------------------------------------------------------
4201 158142c2 bellard
| Returns the result of subtracting the absolute values of the extended
4202 158142c2 bellard
| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
4203 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
4204 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
4205 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4206 158142c2 bellard
*----------------------------------------------------------------------------*/
4207 158142c2 bellard
4208 158142c2 bellard
static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
4209 158142c2 bellard
{
4210 158142c2 bellard
    int32 aExp, bExp, zExp;
4211 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4212 158142c2 bellard
    int32 expDiff;
4213 158142c2 bellard
    floatx80 z;
4214 158142c2 bellard
4215 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4216 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4217 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4218 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4219 158142c2 bellard
    expDiff = aExp - bExp;
4220 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
4221 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
4222 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4223 bb98fe42 Andreas Färber
        if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
4224 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4225 158142c2 bellard
        }
4226 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4227 158142c2 bellard
        z.low = floatx80_default_nan_low;
4228 158142c2 bellard
        z.high = floatx80_default_nan_high;
4229 158142c2 bellard
        return z;
4230 158142c2 bellard
    }
4231 158142c2 bellard
    if ( aExp == 0 ) {
4232 158142c2 bellard
        aExp = 1;
4233 158142c2 bellard
        bExp = 1;
4234 158142c2 bellard
    }
4235 158142c2 bellard
    zSig1 = 0;
4236 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
4237 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
4238 158142c2 bellard
    return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
4239 158142c2 bellard
 bExpBigger:
4240 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4241 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4242 158142c2 bellard
        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
4243 158142c2 bellard
    }
4244 158142c2 bellard
    if ( aExp == 0 ) ++expDiff;
4245 158142c2 bellard
    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4246 158142c2 bellard
 bBigger:
4247 158142c2 bellard
    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
4248 158142c2 bellard
    zExp = bExp;
4249 158142c2 bellard
    zSign ^= 1;
4250 158142c2 bellard
    goto normalizeRoundAndPack;
4251 158142c2 bellard
 aExpBigger:
4252 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4253 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4254 158142c2 bellard
        return a;
4255 158142c2 bellard
    }
4256 158142c2 bellard
    if ( bExp == 0 ) --expDiff;
4257 158142c2 bellard
    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4258 158142c2 bellard
 aBigger:
4259 158142c2 bellard
    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
4260 158142c2 bellard
    zExp = aExp;
4261 158142c2 bellard
 normalizeRoundAndPack:
4262 158142c2 bellard
    return
4263 158142c2 bellard
        normalizeRoundAndPackFloatx80(
4264 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4265 158142c2 bellard
4266 158142c2 bellard
}
4267 158142c2 bellard
4268 158142c2 bellard
/*----------------------------------------------------------------------------
4269 158142c2 bellard
| Returns the result of adding the extended double-precision floating-point
4270 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
4271 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4272 158142c2 bellard
*----------------------------------------------------------------------------*/
4273 158142c2 bellard
4274 158142c2 bellard
floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
4275 158142c2 bellard
{
4276 158142c2 bellard
    flag aSign, bSign;
4277 158142c2 bellard
4278 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4279 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4280 158142c2 bellard
    if ( aSign == bSign ) {
4281 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4282 158142c2 bellard
    }
4283 158142c2 bellard
    else {
4284 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4285 158142c2 bellard
    }
4286 158142c2 bellard
4287 158142c2 bellard
}
4288 158142c2 bellard
4289 158142c2 bellard
/*----------------------------------------------------------------------------
4290 158142c2 bellard
| Returns the result of subtracting the extended double-precision floating-
4291 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
4292 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4293 158142c2 bellard
*----------------------------------------------------------------------------*/
4294 158142c2 bellard
4295 158142c2 bellard
floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
4296 158142c2 bellard
{
4297 158142c2 bellard
    flag aSign, bSign;
4298 158142c2 bellard
4299 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4300 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4301 158142c2 bellard
    if ( aSign == bSign ) {
4302 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4303 158142c2 bellard
    }
4304 158142c2 bellard
    else {
4305 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4306 158142c2 bellard
    }
4307 158142c2 bellard
4308 158142c2 bellard
}
4309 158142c2 bellard
4310 158142c2 bellard
/*----------------------------------------------------------------------------
4311 158142c2 bellard
| Returns the result of multiplying the extended double-precision floating-
4312 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
4313 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4314 158142c2 bellard
*----------------------------------------------------------------------------*/
4315 158142c2 bellard
4316 158142c2 bellard
floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
4317 158142c2 bellard
{
4318 158142c2 bellard
    flag aSign, bSign, zSign;
4319 158142c2 bellard
    int32 aExp, bExp, zExp;
4320 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4321 158142c2 bellard
    floatx80 z;
4322 158142c2 bellard
4323 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4324 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4325 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4326 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4327 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4328 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4329 158142c2 bellard
    zSign = aSign ^ bSign;
4330 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4331 bb98fe42 Andreas Färber
        if (    (uint64_t) ( aSig<<1 )
4332 bb98fe42 Andreas Färber
             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
4333 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4334 158142c2 bellard
        }
4335 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) goto invalid;
4336 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4337 158142c2 bellard
    }
4338 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4339 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4340 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
4341 158142c2 bellard
 invalid:
4342 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4343 158142c2 bellard
            z.low = floatx80_default_nan_low;
4344 158142c2 bellard
            z.high = floatx80_default_nan_high;
4345 158142c2 bellard
            return z;
4346 158142c2 bellard
        }
4347 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4348 158142c2 bellard
    }
4349 158142c2 bellard
    if ( aExp == 0 ) {
4350 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4351 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4352 158142c2 bellard
    }
4353 158142c2 bellard
    if ( bExp == 0 ) {
4354 158142c2 bellard
        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
4355 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4356 158142c2 bellard
    }
4357 158142c2 bellard
    zExp = aExp + bExp - 0x3FFE;
4358 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
4359 bb98fe42 Andreas Färber
    if ( 0 < (int64_t) zSig0 ) {
4360 158142c2 bellard
        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
4361 158142c2 bellard
        --zExp;
4362 158142c2 bellard
    }
4363 158142c2 bellard
    return
4364 158142c2 bellard
        roundAndPackFloatx80(
4365 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4366 158142c2 bellard
4367 158142c2 bellard
}
4368 158142c2 bellard
4369 158142c2 bellard
/*----------------------------------------------------------------------------
4370 158142c2 bellard
| Returns the result of dividing the extended double-precision floating-point
4371 158142c2 bellard
| value `a' by the corresponding value `b'.  The operation is performed
4372 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4373 158142c2 bellard
*----------------------------------------------------------------------------*/
4374 158142c2 bellard
4375 158142c2 bellard
floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
4376 158142c2 bellard
{
4377 158142c2 bellard
    flag aSign, bSign, zSign;
4378 158142c2 bellard
    int32 aExp, bExp, zExp;
4379 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4380 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, term0, term1, term2;
4381 158142c2 bellard
    floatx80 z;
4382 158142c2 bellard
4383 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4384 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4385 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4386 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4387 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4388 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4389 158142c2 bellard
    zSign = aSign ^ bSign;
4390 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4391 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4392 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4393 bb98fe42 Andreas Färber
            if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4394 158142c2 bellard
            goto invalid;
4395 158142c2 bellard
        }
4396 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4397 158142c2 bellard
    }
4398 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4399 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4400 158142c2 bellard
        return packFloatx80( zSign, 0, 0 );
4401 158142c2 bellard
    }
4402 158142c2 bellard
    if ( bExp == 0 ) {
4403 158142c2 bellard
        if ( bSig == 0 ) {
4404 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
4405 158142c2 bellard
 invalid:
4406 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
4407 158142c2 bellard
                z.low = floatx80_default_nan_low;
4408 158142c2 bellard
                z.high = floatx80_default_nan_high;
4409 158142c2 bellard
                return z;
4410 158142c2 bellard
            }
4411 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
4412 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4413 158142c2 bellard
        }
4414 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4415 158142c2 bellard
    }
4416 158142c2 bellard
    if ( aExp == 0 ) {
4417 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4418 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4419 158142c2 bellard
    }
4420 158142c2 bellard
    zExp = aExp - bExp + 0x3FFE;
4421 158142c2 bellard
    rem1 = 0;
4422 158142c2 bellard
    if ( bSig <= aSig ) {
4423 158142c2 bellard
        shift128Right( aSig, 0, 1, &aSig, &rem1 );
4424 158142c2 bellard
        ++zExp;
4425 158142c2 bellard
    }
4426 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
4427 158142c2 bellard
    mul64To128( bSig, zSig0, &term0, &term1 );
4428 158142c2 bellard
    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
4429 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
4430 158142c2 bellard
        --zSig0;
4431 158142c2 bellard
        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4432 158142c2 bellard
    }
4433 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, bSig );
4434 bb98fe42 Andreas Färber
    if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
4435 158142c2 bellard
        mul64To128( bSig, zSig1, &term1, &term2 );
4436 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4437 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
4438 158142c2 bellard
            --zSig1;
4439 158142c2 bellard
            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
4440 158142c2 bellard
        }
4441 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 ) != 0 );
4442 158142c2 bellard
    }
4443 158142c2 bellard
    return
4444 158142c2 bellard
        roundAndPackFloatx80(
4445 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4446 158142c2 bellard
4447 158142c2 bellard
}
4448 158142c2 bellard
4449 158142c2 bellard
/*----------------------------------------------------------------------------
4450 158142c2 bellard
| Returns the remainder of the extended double-precision floating-point value
4451 158142c2 bellard
| `a' with respect to the corresponding value `b'.  The operation is performed
4452 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4453 158142c2 bellard
*----------------------------------------------------------------------------*/
4454 158142c2 bellard
4455 158142c2 bellard
floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
4456 158142c2 bellard
{
4457 ed086f3d Blue Swirl
    flag aSign, zSign;
4458 158142c2 bellard
    int32 aExp, bExp, expDiff;
4459 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig;
4460 bb98fe42 Andreas Färber
    uint64_t q, term0, term1, alternateASig0, alternateASig1;
4461 158142c2 bellard
    floatx80 z;
4462 158142c2 bellard
4463 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
4464 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4465 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4466 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4467 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4468 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4469 bb98fe42 Andreas Färber
        if (    (uint64_t) ( aSig0<<1 )
4470 bb98fe42 Andreas Färber
             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
4471 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4472 158142c2 bellard
        }
4473 158142c2 bellard
        goto invalid;
4474 158142c2 bellard
    }
4475 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4476 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4477 158142c2 bellard
        return a;
4478 158142c2 bellard
    }
4479 158142c2 bellard
    if ( bExp == 0 ) {
4480 158142c2 bellard
        if ( bSig == 0 ) {
4481 158142c2 bellard
 invalid:
4482 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4483 158142c2 bellard
            z.low = floatx80_default_nan_low;
4484 158142c2 bellard
            z.high = floatx80_default_nan_high;
4485 158142c2 bellard
            return z;
4486 158142c2 bellard
        }
4487 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4488 158142c2 bellard
    }
4489 158142c2 bellard
    if ( aExp == 0 ) {
4490 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
4491 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4492 158142c2 bellard
    }
4493 158142c2 bellard
    bSig |= LIT64( 0x8000000000000000 );
4494 158142c2 bellard
    zSign = aSign;
4495 158142c2 bellard
    expDiff = aExp - bExp;
4496 158142c2 bellard
    aSig1 = 0;
4497 158142c2 bellard
    if ( expDiff < 0 ) {
4498 158142c2 bellard
        if ( expDiff < -1 ) return a;
4499 158142c2 bellard
        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
4500 158142c2 bellard
        expDiff = 0;
4501 158142c2 bellard
    }
4502 158142c2 bellard
    q = ( bSig <= aSig0 );
4503 158142c2 bellard
    if ( q ) aSig0 -= bSig;
4504 158142c2 bellard
    expDiff -= 64;
4505 158142c2 bellard
    while ( 0 < expDiff ) {
4506 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
4507 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
4508 158142c2 bellard
        mul64To128( bSig, q, &term0, &term1 );
4509 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4510 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
4511 158142c2 bellard
        expDiff -= 62;
4512 158142c2 bellard
    }
4513 158142c2 bellard
    expDiff += 64;
4514 158142c2 bellard
    if ( 0 < expDiff ) {
4515 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
4516 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
4517 158142c2 bellard
        q >>= 64 - expDiff;
4518 158142c2 bellard
        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4519 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4520 158142c2 bellard
        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4521 158142c2 bellard
        while ( le128( term0, term1, aSig0, aSig1 ) ) {
4522 158142c2 bellard
            ++q;
4523 158142c2 bellard
            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4524 158142c2 bellard
        }
4525 158142c2 bellard
    }
4526 158142c2 bellard
    else {
4527 158142c2 bellard
        term1 = 0;
4528 158142c2 bellard
        term0 = bSig;
4529 158142c2 bellard
    }
4530 158142c2 bellard
    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4531 158142c2 bellard
    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4532 158142c2 bellard
         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4533 158142c2 bellard
              && ( q & 1 ) )
4534 158142c2 bellard
       ) {
4535 158142c2 bellard
        aSig0 = alternateASig0;
4536 158142c2 bellard
        aSig1 = alternateASig1;
4537 158142c2 bellard
        zSign = ! zSign;
4538 158142c2 bellard
    }
4539 158142c2 bellard
    return
4540 158142c2 bellard
        normalizeRoundAndPackFloatx80(
4541 158142c2 bellard
            80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
4542 158142c2 bellard
4543 158142c2 bellard
}
4544 158142c2 bellard
4545 158142c2 bellard
/*----------------------------------------------------------------------------
4546 158142c2 bellard
| Returns the square root of the extended double-precision floating-point
4547 158142c2 bellard
| value `a'.  The operation is performed according to the IEC/IEEE Standard
4548 158142c2 bellard
| for Binary Floating-Point Arithmetic.
4549 158142c2 bellard
*----------------------------------------------------------------------------*/
4550 158142c2 bellard
4551 158142c2 bellard
floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
4552 158142c2 bellard
{
4553 158142c2 bellard
    flag aSign;
4554 158142c2 bellard
    int32 aExp, zExp;
4555 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4556 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4557 158142c2 bellard
    floatx80 z;
4558 158142c2 bellard
4559 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
4560 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4561 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4562 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4563 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
4564 158142c2 bellard
        if ( ! aSign ) return a;
4565 158142c2 bellard
        goto invalid;
4566 158142c2 bellard
    }
4567 158142c2 bellard
    if ( aSign ) {
4568 158142c2 bellard
        if ( ( aExp | aSig0 ) == 0 ) return a;
4569 158142c2 bellard
 invalid:
4570 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4571 158142c2 bellard
        z.low = floatx80_default_nan_low;
4572 158142c2 bellard
        z.high = floatx80_default_nan_high;
4573 158142c2 bellard
        return z;
4574 158142c2 bellard
    }
4575 158142c2 bellard
    if ( aExp == 0 ) {
4576 158142c2 bellard
        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4577 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4578 158142c2 bellard
    }
4579 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4580 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4581 158142c2 bellard
    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4582 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4583 158142c2 bellard
    doubleZSig0 = zSig0<<1;
4584 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
4585 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4586 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
4587 158142c2 bellard
        --zSig0;
4588 158142c2 bellard
        doubleZSig0 -= 2;
4589 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4590 158142c2 bellard
    }
4591 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4592 158142c2 bellard
    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4593 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
4594 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4595 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4596 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
4597 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4598 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
4599 158142c2 bellard
            --zSig1;
4600 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4601 158142c2 bellard
            term3 |= 1;
4602 158142c2 bellard
            term2 |= doubleZSig0;
4603 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4604 158142c2 bellard
        }
4605 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4606 158142c2 bellard
    }
4607 158142c2 bellard
    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4608 158142c2 bellard
    zSig0 |= doubleZSig0;
4609 158142c2 bellard
    return
4610 158142c2 bellard
        roundAndPackFloatx80(
4611 158142c2 bellard
            STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
4612 158142c2 bellard
4613 158142c2 bellard
}
4614 158142c2 bellard
4615 158142c2 bellard
/*----------------------------------------------------------------------------
4616 b689362d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point value `a' is equal
4617 b689362d Aurelien Jarno
| to the corresponding value `b', and 0 otherwise.  The invalid exception is
4618 b689362d Aurelien Jarno
| raised if either operand is a NaN.  Otherwise, the comparison is performed
4619 b689362d Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4620 158142c2 bellard
*----------------------------------------------------------------------------*/
4621 158142c2 bellard
4622 b689362d Aurelien Jarno
int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
4623 158142c2 bellard
{
4624 158142c2 bellard
4625 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4626 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4627 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4628 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4629 158142c2 bellard
       ) {
4630 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
4631 158142c2 bellard
        return 0;
4632 158142c2 bellard
    }
4633 158142c2 bellard
    return
4634 158142c2 bellard
           ( a.low == b.low )
4635 158142c2 bellard
        && (    ( a.high == b.high )
4636 158142c2 bellard
             || (    ( a.low == 0 )
4637 bb98fe42 Andreas Färber
                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
4638 158142c2 bellard
           );
4639 158142c2 bellard
4640 158142c2 bellard
}
4641 158142c2 bellard
4642 158142c2 bellard
/*----------------------------------------------------------------------------
4643 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4644 158142c2 bellard
| less than or equal to the corresponding value `b', and 0 otherwise.  The
4645 f5a64251 Aurelien Jarno
| invalid exception is raised if either operand is a NaN.  The comparison is
4646 f5a64251 Aurelien Jarno
| performed according to the IEC/IEEE Standard for Binary Floating-Point
4647 f5a64251 Aurelien Jarno
| Arithmetic.
4648 158142c2 bellard
*----------------------------------------------------------------------------*/
4649 158142c2 bellard
4650 750afe93 bellard
int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
4651 158142c2 bellard
{
4652 158142c2 bellard
    flag aSign, bSign;
4653 158142c2 bellard
4654 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4655 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4656 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4657 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4658 158142c2 bellard
       ) {
4659 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4660 158142c2 bellard
        return 0;
4661 158142c2 bellard
    }
4662 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4663 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4664 158142c2 bellard
    if ( aSign != bSign ) {
4665 158142c2 bellard
        return
4666 158142c2 bellard
               aSign
4667 bb98fe42 Andreas Färber
            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4668 158142c2 bellard
                 == 0 );
4669 158142c2 bellard
    }
4670 158142c2 bellard
    return
4671 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
4672 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
4673 158142c2 bellard
4674 158142c2 bellard
}
4675 158142c2 bellard
4676 158142c2 bellard
/*----------------------------------------------------------------------------
4677 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
4678 f5a64251 Aurelien Jarno
| less than the corresponding value `b', and 0 otherwise.  The invalid
4679 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
4680 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4681 158142c2 bellard
*----------------------------------------------------------------------------*/
4682 158142c2 bellard
4683 750afe93 bellard
int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
4684 158142c2 bellard
{
4685 158142c2 bellard
    flag aSign, bSign;
4686 158142c2 bellard
4687 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4688 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4689 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4690 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4691 158142c2 bellard
       ) {
4692 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4693 158142c2 bellard
        return 0;
4694 158142c2 bellard
    }
4695 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4696 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4697 158142c2 bellard
    if ( aSign != bSign ) {
4698 158142c2 bellard
        return
4699 158142c2 bellard
               aSign
4700 bb98fe42 Andreas Färber
            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4701 158142c2 bellard
                 != 0 );
4702 158142c2 bellard
    }
4703 158142c2 bellard
    return
4704 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
4705 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
4706 158142c2 bellard
4707 158142c2 bellard
}
4708 158142c2 bellard
4709 158142c2 bellard
/*----------------------------------------------------------------------------
4710 67b7861d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point values `a' and `b'
4711 f5a64251 Aurelien Jarno
| cannot be compared, and 0 otherwise.  The invalid exception is raised if
4712 f5a64251 Aurelien Jarno
| either operand is a NaN.   The comparison is performed according to the
4713 f5a64251 Aurelien Jarno
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4714 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
4715 67b7861d Aurelien Jarno
int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM )
4716 67b7861d Aurelien Jarno
{
4717 67b7861d Aurelien Jarno
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4718 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4719 67b7861d Aurelien Jarno
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4720 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4721 67b7861d Aurelien Jarno
       ) {
4722 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
4723 67b7861d Aurelien Jarno
        return 1;
4724 67b7861d Aurelien Jarno
    }
4725 67b7861d Aurelien Jarno
    return 0;
4726 67b7861d Aurelien Jarno
}
4727 67b7861d Aurelien Jarno
4728 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
4729 b689362d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point value `a' is
4730 f5a64251 Aurelien Jarno
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
4731 f5a64251 Aurelien Jarno
| cause an exception.  The comparison is performed according to the IEC/IEEE
4732 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
4733 158142c2 bellard
*----------------------------------------------------------------------------*/
4734 158142c2 bellard
4735 b689362d Aurelien Jarno
int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4736 158142c2 bellard
{
4737 158142c2 bellard
4738 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4739 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4740 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4741 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4742 158142c2 bellard
       ) {
4743 b689362d Aurelien Jarno
        if (    floatx80_is_signaling_nan( a )
4744 b689362d Aurelien Jarno
             || floatx80_is_signaling_nan( b ) ) {
4745 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
4746 b689362d Aurelien Jarno
        }
4747 158142c2 bellard
        return 0;
4748 158142c2 bellard
    }
4749 158142c2 bellard
    return
4750 158142c2 bellard
           ( a.low == b.low )
4751 158142c2 bellard
        && (    ( a.high == b.high )
4752 158142c2 bellard
             || (    ( a.low == 0 )
4753 bb98fe42 Andreas Färber
                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
4754 158142c2 bellard
           );
4755 158142c2 bellard
4756 158142c2 bellard
}
4757 158142c2 bellard
4758 158142c2 bellard
/*----------------------------------------------------------------------------
4759 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4760 158142c2 bellard
| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
4761 158142c2 bellard
| do not cause an exception.  Otherwise, the comparison is performed according
4762 158142c2 bellard
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4763 158142c2 bellard
*----------------------------------------------------------------------------*/
4764 158142c2 bellard
4765 750afe93 bellard
int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4766 158142c2 bellard
{
4767 158142c2 bellard
    flag aSign, bSign;
4768 158142c2 bellard
4769 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4770 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4771 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4772 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4773 158142c2 bellard
       ) {
4774 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4775 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4776 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4777 158142c2 bellard
        }
4778 158142c2 bellard
        return 0;
4779 158142c2 bellard
    }
4780 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4781 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4782 158142c2 bellard
    if ( aSign != bSign ) {
4783 158142c2 bellard
        return
4784 158142c2 bellard
               aSign
4785 bb98fe42 Andreas Färber
            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4786 158142c2 bellard
                 == 0 );
4787 158142c2 bellard
    }
4788 158142c2 bellard
    return
4789 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
4790 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
4791 158142c2 bellard
4792 158142c2 bellard
}
4793 158142c2 bellard
4794 158142c2 bellard
/*----------------------------------------------------------------------------
4795 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
4796 158142c2 bellard
| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
4797 158142c2 bellard
| an exception.  Otherwise, the comparison is performed according to the
4798 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4799 158142c2 bellard
*----------------------------------------------------------------------------*/
4800 158142c2 bellard
4801 750afe93 bellard
int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4802 158142c2 bellard
{
4803 158142c2 bellard
    flag aSign, bSign;
4804 158142c2 bellard
4805 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4806 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4807 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4808 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4809 158142c2 bellard
       ) {
4810 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
4811 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
4812 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4813 158142c2 bellard
        }
4814 158142c2 bellard
        return 0;
4815 158142c2 bellard
    }
4816 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4817 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4818 158142c2 bellard
    if ( aSign != bSign ) {
4819 158142c2 bellard
        return
4820 158142c2 bellard
               aSign
4821 bb98fe42 Andreas Färber
            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4822 158142c2 bellard
                 != 0 );
4823 158142c2 bellard
    }
4824 158142c2 bellard
    return
4825 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
4826 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
4827 158142c2 bellard
4828 158142c2 bellard
}
4829 158142c2 bellard
4830 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
4831 67b7861d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point values `a' and `b'
4832 67b7861d Aurelien Jarno
| cannot be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.
4833 67b7861d Aurelien Jarno
| The comparison is performed according to the IEC/IEEE Standard for Binary
4834 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
4835 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
4836 67b7861d Aurelien Jarno
int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4837 67b7861d Aurelien Jarno
{
4838 67b7861d Aurelien Jarno
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4839 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
4840 67b7861d Aurelien Jarno
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4841 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
4842 67b7861d Aurelien Jarno
       ) {
4843 67b7861d Aurelien Jarno
        if (    floatx80_is_signaling_nan( a )
4844 67b7861d Aurelien Jarno
             || floatx80_is_signaling_nan( b ) ) {
4845 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
4846 67b7861d Aurelien Jarno
        }
4847 67b7861d Aurelien Jarno
        return 1;
4848 67b7861d Aurelien Jarno
    }
4849 67b7861d Aurelien Jarno
    return 0;
4850 67b7861d Aurelien Jarno
}
4851 67b7861d Aurelien Jarno
4852 158142c2 bellard
#endif
4853 158142c2 bellard
4854 158142c2 bellard
#ifdef FLOAT128
4855 158142c2 bellard
4856 158142c2 bellard
/*----------------------------------------------------------------------------
4857 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4858 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4859 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4860 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4861 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4862 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4863 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4864 158142c2 bellard
*----------------------------------------------------------------------------*/
4865 158142c2 bellard
4866 158142c2 bellard
int32 float128_to_int32( float128 a STATUS_PARAM )
4867 158142c2 bellard
{
4868 158142c2 bellard
    flag aSign;
4869 158142c2 bellard
    int32 aExp, shiftCount;
4870 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
4871 158142c2 bellard
4872 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4873 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4874 158142c2 bellard
    aExp = extractFloat128Exp( a );
4875 158142c2 bellard
    aSign = extractFloat128Sign( a );
4876 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4877 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4878 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4879 158142c2 bellard
    shiftCount = 0x4028 - aExp;
4880 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4881 158142c2 bellard
    return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
4882 158142c2 bellard
4883 158142c2 bellard
}
4884 158142c2 bellard
4885 158142c2 bellard
/*----------------------------------------------------------------------------
4886 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4887 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
4888 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4889 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
4890 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
4891 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
4892 158142c2 bellard
| returned.
4893 158142c2 bellard
*----------------------------------------------------------------------------*/
4894 158142c2 bellard
4895 158142c2 bellard
int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
4896 158142c2 bellard
{
4897 158142c2 bellard
    flag aSign;
4898 158142c2 bellard
    int32 aExp, shiftCount;
4899 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, savedASig;
4900 158142c2 bellard
    int32 z;
4901 158142c2 bellard
4902 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4903 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4904 158142c2 bellard
    aExp = extractFloat128Exp( a );
4905 158142c2 bellard
    aSign = extractFloat128Sign( a );
4906 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
4907 158142c2 bellard
    if ( 0x401E < aExp ) {
4908 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4909 158142c2 bellard
        goto invalid;
4910 158142c2 bellard
    }
4911 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
4912 158142c2 bellard
        if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
4913 158142c2 bellard
        return 0;
4914 158142c2 bellard
    }
4915 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
4916 158142c2 bellard
    shiftCount = 0x402F - aExp;
4917 158142c2 bellard
    savedASig = aSig0;
4918 158142c2 bellard
    aSig0 >>= shiftCount;
4919 158142c2 bellard
    z = aSig0;
4920 158142c2 bellard
    if ( aSign ) z = - z;
4921 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
4922 158142c2 bellard
 invalid:
4923 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4924 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
4925 158142c2 bellard
    }
4926 158142c2 bellard
    if ( ( aSig0<<shiftCount ) != savedASig ) {
4927 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4928 158142c2 bellard
    }
4929 158142c2 bellard
    return z;
4930 158142c2 bellard
4931 158142c2 bellard
}
4932 158142c2 bellard
4933 158142c2 bellard
/*----------------------------------------------------------------------------
4934 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4935 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4936 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4937 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
4938 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
4939 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
4940 158142c2 bellard
| largest integer with the same sign as `a' is returned.
4941 158142c2 bellard
*----------------------------------------------------------------------------*/
4942 158142c2 bellard
4943 158142c2 bellard
int64 float128_to_int64( float128 a STATUS_PARAM )
4944 158142c2 bellard
{
4945 158142c2 bellard
    flag aSign;
4946 158142c2 bellard
    int32 aExp, shiftCount;
4947 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
4948 158142c2 bellard
4949 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4950 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4951 158142c2 bellard
    aExp = extractFloat128Exp( a );
4952 158142c2 bellard
    aSign = extractFloat128Sign( a );
4953 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4954 158142c2 bellard
    shiftCount = 0x402F - aExp;
4955 158142c2 bellard
    if ( shiftCount <= 0 ) {
4956 158142c2 bellard
        if ( 0x403E < aExp ) {
4957 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4958 158142c2 bellard
            if (    ! aSign
4959 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
4960 158142c2 bellard
                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4961 158142c2 bellard
                    )
4962 158142c2 bellard
               ) {
4963 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
4964 158142c2 bellard
            }
4965 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
4966 158142c2 bellard
        }
4967 158142c2 bellard
        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4968 158142c2 bellard
    }
4969 158142c2 bellard
    else {
4970 158142c2 bellard
        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4971 158142c2 bellard
    }
4972 158142c2 bellard
    return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
4973 158142c2 bellard
4974 158142c2 bellard
}
4975 158142c2 bellard
4976 158142c2 bellard
/*----------------------------------------------------------------------------
4977 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
4978 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
4979 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4980 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
4981 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
4982 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
4983 158142c2 bellard
| returned.
4984 158142c2 bellard
*----------------------------------------------------------------------------*/
4985 158142c2 bellard
4986 158142c2 bellard
int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
4987 158142c2 bellard
{
4988 158142c2 bellard
    flag aSign;
4989 158142c2 bellard
    int32 aExp, shiftCount;
4990 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
4991 158142c2 bellard
    int64 z;
4992 158142c2 bellard
4993 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
4994 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
4995 158142c2 bellard
    aExp = extractFloat128Exp( a );
4996 158142c2 bellard
    aSign = extractFloat128Sign( a );
4997 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4998 158142c2 bellard
    shiftCount = aExp - 0x402F;
4999 158142c2 bellard
    if ( 0 < shiftCount ) {
5000 158142c2 bellard
        if ( 0x403E <= aExp ) {
5001 158142c2 bellard
            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5002 158142c2 bellard
            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
5003 158142c2 bellard
                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
5004 158142c2 bellard
                if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
5005 158142c2 bellard
            }
5006 158142c2 bellard
            else {
5007 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
5008 158142c2 bellard
                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5009 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
5010 158142c2 bellard
                }
5011 158142c2 bellard
            }
5012 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
5013 158142c2 bellard
        }
5014 158142c2 bellard
        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
5015 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig1<<shiftCount ) ) {
5016 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
5017 158142c2 bellard
        }
5018 158142c2 bellard
    }
5019 158142c2 bellard
    else {
5020 158142c2 bellard
        if ( aExp < 0x3FFF ) {
5021 158142c2 bellard
            if ( aExp | aSig0 | aSig1 ) {
5022 158142c2 bellard
                STATUS(float_exception_flags) |= float_flag_inexact;
5023 158142c2 bellard
            }
5024 158142c2 bellard
            return 0;
5025 158142c2 bellard
        }
5026 158142c2 bellard
        z = aSig0>>( - shiftCount );
5027 158142c2 bellard
        if (    aSig1
5028 bb98fe42 Andreas Färber
             || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
5029 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
5030 158142c2 bellard
        }
5031 158142c2 bellard
    }
5032 158142c2 bellard
    if ( aSign ) z = - z;
5033 158142c2 bellard
    return z;
5034 158142c2 bellard
5035 158142c2 bellard
}
5036 158142c2 bellard
5037 158142c2 bellard
/*----------------------------------------------------------------------------
5038 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5039 158142c2 bellard
| value `a' to the single-precision floating-point format.  The conversion
5040 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5041 158142c2 bellard
| Arithmetic.
5042 158142c2 bellard
*----------------------------------------------------------------------------*/
5043 158142c2 bellard
5044 158142c2 bellard
float32 float128_to_float32( float128 a STATUS_PARAM )
5045 158142c2 bellard
{
5046 158142c2 bellard
    flag aSign;
5047 158142c2 bellard
    int32 aExp;
5048 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5049 bb98fe42 Andreas Färber
    uint32_t zSig;
5050 158142c2 bellard
5051 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5052 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5053 158142c2 bellard
    aExp = extractFloat128Exp( a );
5054 158142c2 bellard
    aSign = extractFloat128Sign( a );
5055 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5056 158142c2 bellard
        if ( aSig0 | aSig1 ) {
5057 bcd4d9af Christophe Lyon
            return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5058 158142c2 bellard
        }
5059 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
5060 158142c2 bellard
    }
5061 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
5062 158142c2 bellard
    shift64RightJamming( aSig0, 18, &aSig0 );
5063 158142c2 bellard
    zSig = aSig0;
5064 158142c2 bellard
    if ( aExp || zSig ) {
5065 158142c2 bellard
        zSig |= 0x40000000;
5066 158142c2 bellard
        aExp -= 0x3F81;
5067 158142c2 bellard
    }
5068 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
5069 158142c2 bellard
5070 158142c2 bellard
}
5071 158142c2 bellard
5072 158142c2 bellard
/*----------------------------------------------------------------------------
5073 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5074 158142c2 bellard
| value `a' to the double-precision floating-point format.  The conversion
5075 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5076 158142c2 bellard
| Arithmetic.
5077 158142c2 bellard
*----------------------------------------------------------------------------*/
5078 158142c2 bellard
5079 158142c2 bellard
float64 float128_to_float64( float128 a STATUS_PARAM )
5080 158142c2 bellard
{
5081 158142c2 bellard
    flag aSign;
5082 158142c2 bellard
    int32 aExp;
5083 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5084 158142c2 bellard
5085 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5086 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5087 158142c2 bellard
    aExp = extractFloat128Exp( a );
5088 158142c2 bellard
    aSign = extractFloat128Sign( a );
5089 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5090 158142c2 bellard
        if ( aSig0 | aSig1 ) {
5091 bcd4d9af Christophe Lyon
            return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5092 158142c2 bellard
        }
5093 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
5094 158142c2 bellard
    }
5095 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5096 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
5097 158142c2 bellard
    if ( aExp || aSig0 ) {
5098 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
5099 158142c2 bellard
        aExp -= 0x3C01;
5100 158142c2 bellard
    }
5101 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
5102 158142c2 bellard
5103 158142c2 bellard
}
5104 158142c2 bellard
5105 158142c2 bellard
#ifdef FLOATX80
5106 158142c2 bellard
5107 158142c2 bellard
/*----------------------------------------------------------------------------
5108 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5109 158142c2 bellard
| value `a' to the extended double-precision floating-point format.  The
5110 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
5111 158142c2 bellard
| Floating-Point Arithmetic.
5112 158142c2 bellard
*----------------------------------------------------------------------------*/
5113 158142c2 bellard
5114 158142c2 bellard
floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
5115 158142c2 bellard
{
5116 158142c2 bellard
    flag aSign;
5117 158142c2 bellard
    int32 aExp;
5118 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5119 158142c2 bellard
5120 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5121 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5122 158142c2 bellard
    aExp = extractFloat128Exp( a );
5123 158142c2 bellard
    aSign = extractFloat128Sign( a );
5124 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5125 158142c2 bellard
        if ( aSig0 | aSig1 ) {
5126 bcd4d9af Christophe Lyon
            return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5127 158142c2 bellard
        }
5128 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5129 158142c2 bellard
    }
5130 158142c2 bellard
    if ( aExp == 0 ) {
5131 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
5132 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5133 158142c2 bellard
    }
5134 158142c2 bellard
    else {
5135 158142c2 bellard
        aSig0 |= LIT64( 0x0001000000000000 );
5136 158142c2 bellard
    }
5137 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
5138 158142c2 bellard
    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
5139 158142c2 bellard
5140 158142c2 bellard
}
5141 158142c2 bellard
5142 158142c2 bellard
#endif
5143 158142c2 bellard
5144 158142c2 bellard
/*----------------------------------------------------------------------------
5145 158142c2 bellard
| Rounds the quadruple-precision floating-point value `a' to an integer, and
5146 158142c2 bellard
| returns the result as a quadruple-precision floating-point value.  The
5147 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
5148 158142c2 bellard
| Floating-Point Arithmetic.
5149 158142c2 bellard
*----------------------------------------------------------------------------*/
5150 158142c2 bellard
5151 158142c2 bellard
float128 float128_round_to_int( float128 a STATUS_PARAM )
5152 158142c2 bellard
{
5153 158142c2 bellard
    flag aSign;
5154 158142c2 bellard
    int32 aExp;
5155 bb98fe42 Andreas Färber
    uint64_t lastBitMask, roundBitsMask;
5156 158142c2 bellard
    int8 roundingMode;
5157 158142c2 bellard
    float128 z;
5158 158142c2 bellard
5159 158142c2 bellard
    aExp = extractFloat128Exp( a );
5160 158142c2 bellard
    if ( 0x402F <= aExp ) {
5161 158142c2 bellard
        if ( 0x406F <= aExp ) {
5162 158142c2 bellard
            if (    ( aExp == 0x7FFF )
5163 158142c2 bellard
                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
5164 158142c2 bellard
               ) {
5165 158142c2 bellard
                return propagateFloat128NaN( a, a STATUS_VAR );
5166 158142c2 bellard
            }
5167 158142c2 bellard
            return a;
5168 158142c2 bellard
        }
5169 158142c2 bellard
        lastBitMask = 1;
5170 158142c2 bellard
        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
5171 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
5172 158142c2 bellard
        z = a;
5173 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
5174 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
5175 158142c2 bellard
            if ( lastBitMask ) {
5176 158142c2 bellard
                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
5177 158142c2 bellard
                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
5178 158142c2 bellard
            }
5179 158142c2 bellard
            else {
5180 bb98fe42 Andreas Färber
                if ( (int64_t) z.low < 0 ) {
5181 158142c2 bellard
                    ++z.high;
5182 bb98fe42 Andreas Färber
                    if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
5183 158142c2 bellard
                }
5184 158142c2 bellard
            }
5185 158142c2 bellard
        }
5186 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
5187 158142c2 bellard
            if (   extractFloat128Sign( z )
5188 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
5189 158142c2 bellard
                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
5190 158142c2 bellard
            }
5191 158142c2 bellard
        }
5192 158142c2 bellard
        z.low &= ~ roundBitsMask;
5193 158142c2 bellard
    }
5194 158142c2 bellard
    else {
5195 158142c2 bellard
        if ( aExp < 0x3FFF ) {
5196 bb98fe42 Andreas Färber
            if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
5197 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
5198 158142c2 bellard
            aSign = extractFloat128Sign( a );
5199 158142c2 bellard
            switch ( STATUS(float_rounding_mode) ) {
5200 158142c2 bellard
             case float_round_nearest_even:
5201 158142c2 bellard
                if (    ( aExp == 0x3FFE )
5202 158142c2 bellard
                     && (   extractFloat128Frac0( a )
5203 158142c2 bellard
                          | extractFloat128Frac1( a ) )
5204 158142c2 bellard
                   ) {
5205 158142c2 bellard
                    return packFloat128( aSign, 0x3FFF, 0, 0 );
5206 158142c2 bellard
                }
5207 158142c2 bellard
                break;
5208 158142c2 bellard
             case float_round_down:
5209 158142c2 bellard
                return
5210 158142c2 bellard
                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
5211 158142c2 bellard
                    : packFloat128( 0, 0, 0, 0 );
5212 158142c2 bellard
             case float_round_up:
5213 158142c2 bellard
                return
5214 158142c2 bellard
                      aSign ? packFloat128( 1, 0, 0, 0 )
5215 158142c2 bellard
                    : packFloat128( 0, 0x3FFF, 0, 0 );
5216 158142c2 bellard
            }
5217 158142c2 bellard
            return packFloat128( aSign, 0, 0, 0 );
5218 158142c2 bellard
        }
5219 158142c2 bellard
        lastBitMask = 1;
5220 158142c2 bellard
        lastBitMask <<= 0x402F - aExp;
5221 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
5222 158142c2 bellard
        z.low = 0;
5223 158142c2 bellard
        z.high = a.high;
5224 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
5225 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
5226 158142c2 bellard
            z.high += lastBitMask>>1;
5227 158142c2 bellard
            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
5228 158142c2 bellard
                z.high &= ~ lastBitMask;
5229 158142c2 bellard
            }
5230 158142c2 bellard
        }
5231 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
5232 158142c2 bellard
            if (   extractFloat128Sign( z )
5233 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
5234 158142c2 bellard
                z.high |= ( a.low != 0 );
5235 158142c2 bellard
                z.high += roundBitsMask;
5236 158142c2 bellard
            }
5237 158142c2 bellard
        }
5238 158142c2 bellard
        z.high &= ~ roundBitsMask;
5239 158142c2 bellard
    }
5240 158142c2 bellard
    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
5241 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
5242 158142c2 bellard
    }
5243 158142c2 bellard
    return z;
5244 158142c2 bellard
5245 158142c2 bellard
}
5246 158142c2 bellard
5247 158142c2 bellard
/*----------------------------------------------------------------------------
5248 158142c2 bellard
| Returns the result of adding the absolute values of the quadruple-precision
5249 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
5250 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
5251 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
5252 158142c2 bellard
| Floating-Point Arithmetic.
5253 158142c2 bellard
*----------------------------------------------------------------------------*/
5254 158142c2 bellard
5255 158142c2 bellard
static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5256 158142c2 bellard
{
5257 158142c2 bellard
    int32 aExp, bExp, zExp;
5258 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5259 158142c2 bellard
    int32 expDiff;
5260 158142c2 bellard
5261 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5262 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5263 158142c2 bellard
    aExp = extractFloat128Exp( a );
5264 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5265 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5266 158142c2 bellard
    bExp = extractFloat128Exp( b );
5267 158142c2 bellard
    expDiff = aExp - bExp;
5268 158142c2 bellard
    if ( 0 < expDiff ) {
5269 158142c2 bellard
        if ( aExp == 0x7FFF ) {
5270 158142c2 bellard
            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5271 158142c2 bellard
            return a;
5272 158142c2 bellard
        }
5273 158142c2 bellard
        if ( bExp == 0 ) {
5274 158142c2 bellard
            --expDiff;
5275 158142c2 bellard
        }
5276 158142c2 bellard
        else {
5277 158142c2 bellard
            bSig0 |= LIT64( 0x0001000000000000 );
5278 158142c2 bellard
        }
5279 158142c2 bellard
        shift128ExtraRightJamming(
5280 158142c2 bellard
            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
5281 158142c2 bellard
        zExp = aExp;
5282 158142c2 bellard
    }
5283 158142c2 bellard
    else if ( expDiff < 0 ) {
5284 158142c2 bellard
        if ( bExp == 0x7FFF ) {
5285 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5286 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
5287 158142c2 bellard
        }
5288 158142c2 bellard
        if ( aExp == 0 ) {
5289 158142c2 bellard
            ++expDiff;
5290 158142c2 bellard
        }
5291 158142c2 bellard
        else {
5292 158142c2 bellard
            aSig0 |= LIT64( 0x0001000000000000 );
5293 158142c2 bellard
        }
5294 158142c2 bellard
        shift128ExtraRightJamming(
5295 158142c2 bellard
            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
5296 158142c2 bellard
        zExp = bExp;
5297 158142c2 bellard
    }
5298 158142c2 bellard
    else {
5299 158142c2 bellard
        if ( aExp == 0x7FFF ) {
5300 158142c2 bellard
            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
5301 158142c2 bellard
                return propagateFloat128NaN( a, b STATUS_VAR );
5302 158142c2 bellard
            }
5303 158142c2 bellard
            return a;
5304 158142c2 bellard
        }
5305 158142c2 bellard
        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5306 fe76d976 pbrook
        if ( aExp == 0 ) {
5307 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
5308 e6afc87f Peter Maydell
                if (zSig0 | zSig1) {
5309 e6afc87f Peter Maydell
                    float_raise(float_flag_output_denormal STATUS_VAR);
5310 e6afc87f Peter Maydell
                }
5311 e6afc87f Peter Maydell
                return packFloat128(zSign, 0, 0, 0);
5312 e6afc87f Peter Maydell
            }
5313 fe76d976 pbrook
            return packFloat128( zSign, 0, zSig0, zSig1 );
5314 fe76d976 pbrook
        }
5315 158142c2 bellard
        zSig2 = 0;
5316 158142c2 bellard
        zSig0 |= LIT64( 0x0002000000000000 );
5317 158142c2 bellard
        zExp = aExp;
5318 158142c2 bellard
        goto shiftRight1;
5319 158142c2 bellard
    }
5320 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5321 158142c2 bellard
    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5322 158142c2 bellard
    --zExp;
5323 158142c2 bellard
    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
5324 158142c2 bellard
    ++zExp;
5325 158142c2 bellard
 shiftRight1:
5326 158142c2 bellard
    shift128ExtraRightJamming(
5327 158142c2 bellard
        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5328 158142c2 bellard
 roundAndPack:
5329 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5330 158142c2 bellard
5331 158142c2 bellard
}
5332 158142c2 bellard
5333 158142c2 bellard
/*----------------------------------------------------------------------------
5334 158142c2 bellard
| Returns the result of subtracting the absolute values of the quadruple-
5335 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
5336 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
5337 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
5338 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5339 158142c2 bellard
*----------------------------------------------------------------------------*/
5340 158142c2 bellard
5341 158142c2 bellard
static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5342 158142c2 bellard
{
5343 158142c2 bellard
    int32 aExp, bExp, zExp;
5344 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
5345 158142c2 bellard
    int32 expDiff;
5346 158142c2 bellard
    float128 z;
5347 158142c2 bellard
5348 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5349 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5350 158142c2 bellard
    aExp = extractFloat128Exp( a );
5351 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5352 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5353 158142c2 bellard
    bExp = extractFloat128Exp( b );
5354 158142c2 bellard
    expDiff = aExp - bExp;
5355 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5356 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
5357 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
5358 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
5359 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5360 158142c2 bellard
        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
5361 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5362 158142c2 bellard
        }
5363 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5364 158142c2 bellard
        z.low = float128_default_nan_low;
5365 158142c2 bellard
        z.high = float128_default_nan_high;
5366 158142c2 bellard
        return z;
5367 158142c2 bellard
    }
5368 158142c2 bellard
    if ( aExp == 0 ) {
5369 158142c2 bellard
        aExp = 1;
5370 158142c2 bellard
        bExp = 1;
5371 158142c2 bellard
    }
5372 158142c2 bellard
    if ( bSig0 < aSig0 ) goto aBigger;
5373 158142c2 bellard
    if ( aSig0 < bSig0 ) goto bBigger;
5374 158142c2 bellard
    if ( bSig1 < aSig1 ) goto aBigger;
5375 158142c2 bellard
    if ( aSig1 < bSig1 ) goto bBigger;
5376 158142c2 bellard
    return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
5377 158142c2 bellard
 bExpBigger:
5378 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5379 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5380 158142c2 bellard
        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
5381 158142c2 bellard
    }
5382 158142c2 bellard
    if ( aExp == 0 ) {
5383 158142c2 bellard
        ++expDiff;
5384 158142c2 bellard
    }
5385 158142c2 bellard
    else {
5386 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
5387 158142c2 bellard
    }
5388 158142c2 bellard
    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5389 158142c2 bellard
    bSig0 |= LIT64( 0x4000000000000000 );
5390 158142c2 bellard
 bBigger:
5391 158142c2 bellard
    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
5392 158142c2 bellard
    zExp = bExp;
5393 158142c2 bellard
    zSign ^= 1;
5394 158142c2 bellard
    goto normalizeRoundAndPack;
5395 158142c2 bellard
 aExpBigger:
5396 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5397 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5398 158142c2 bellard
        return a;
5399 158142c2 bellard
    }
5400 158142c2 bellard
    if ( bExp == 0 ) {
5401 158142c2 bellard
        --expDiff;
5402 158142c2 bellard
    }
5403 158142c2 bellard
    else {
5404 158142c2 bellard
        bSig0 |= LIT64( 0x4000000000000000 );
5405 158142c2 bellard
    }
5406 158142c2 bellard
    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
5407 158142c2 bellard
    aSig0 |= LIT64( 0x4000000000000000 );
5408 158142c2 bellard
 aBigger:
5409 158142c2 bellard
    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5410 158142c2 bellard
    zExp = aExp;
5411 158142c2 bellard
 normalizeRoundAndPack:
5412 158142c2 bellard
    --zExp;
5413 158142c2 bellard
    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
5414 158142c2 bellard
5415 158142c2 bellard
}
5416 158142c2 bellard
5417 158142c2 bellard
/*----------------------------------------------------------------------------
5418 158142c2 bellard
| Returns the result of adding the quadruple-precision floating-point values
5419 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
5420 158142c2 bellard
| for Binary Floating-Point Arithmetic.
5421 158142c2 bellard
*----------------------------------------------------------------------------*/
5422 158142c2 bellard
5423 158142c2 bellard
float128 float128_add( float128 a, float128 b STATUS_PARAM )
5424 158142c2 bellard
{
5425 158142c2 bellard
    flag aSign, bSign;
5426 158142c2 bellard
5427 158142c2 bellard
    aSign = extractFloat128Sign( a );
5428 158142c2 bellard
    bSign = extractFloat128Sign( b );
5429 158142c2 bellard
    if ( aSign == bSign ) {
5430 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
5431 158142c2 bellard
    }
5432 158142c2 bellard
    else {
5433 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
5434 158142c2 bellard
    }
5435 158142c2 bellard
5436 158142c2 bellard
}
5437 158142c2 bellard
5438 158142c2 bellard
/*----------------------------------------------------------------------------
5439 158142c2 bellard
| Returns the result of subtracting the quadruple-precision floating-point
5440 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
5441 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5442 158142c2 bellard
*----------------------------------------------------------------------------*/
5443 158142c2 bellard
5444 158142c2 bellard
float128 float128_sub( float128 a, float128 b STATUS_PARAM )
5445 158142c2 bellard
{
5446 158142c2 bellard
    flag aSign, bSign;
5447 158142c2 bellard
5448 158142c2 bellard
    aSign = extractFloat128Sign( a );
5449 158142c2 bellard
    bSign = extractFloat128Sign( b );
5450 158142c2 bellard
    if ( aSign == bSign ) {
5451 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
5452 158142c2 bellard
    }
5453 158142c2 bellard
    else {
5454 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
5455 158142c2 bellard
    }
5456 158142c2 bellard
5457 158142c2 bellard
}
5458 158142c2 bellard
5459 158142c2 bellard
/*----------------------------------------------------------------------------
5460 158142c2 bellard
| Returns the result of multiplying the quadruple-precision floating-point
5461 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
5462 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5463 158142c2 bellard
*----------------------------------------------------------------------------*/
5464 158142c2 bellard
5465 158142c2 bellard
float128 float128_mul( float128 a, float128 b STATUS_PARAM )
5466 158142c2 bellard
{
5467 158142c2 bellard
    flag aSign, bSign, zSign;
5468 158142c2 bellard
    int32 aExp, bExp, zExp;
5469 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
5470 158142c2 bellard
    float128 z;
5471 158142c2 bellard
5472 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5473 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5474 158142c2 bellard
    aExp = extractFloat128Exp( a );
5475 158142c2 bellard
    aSign = extractFloat128Sign( a );
5476 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5477 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5478 158142c2 bellard
    bExp = extractFloat128Exp( b );
5479 158142c2 bellard
    bSign = extractFloat128Sign( b );
5480 158142c2 bellard
    zSign = aSign ^ bSign;
5481 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5482 158142c2 bellard
        if (    ( aSig0 | aSig1 )
5483 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5484 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5485 158142c2 bellard
        }
5486 158142c2 bellard
        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
5487 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5488 158142c2 bellard
    }
5489 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5490 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5491 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5492 158142c2 bellard
 invalid:
5493 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5494 158142c2 bellard
            z.low = float128_default_nan_low;
5495 158142c2 bellard
            z.high = float128_default_nan_high;
5496 158142c2 bellard
            return z;
5497 158142c2 bellard
        }
5498 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5499 158142c2 bellard
    }
5500 158142c2 bellard
    if ( aExp == 0 ) {
5501 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5502 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5503 158142c2 bellard
    }
5504 158142c2 bellard
    if ( bExp == 0 ) {
5505 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5506 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5507 158142c2 bellard
    }
5508 158142c2 bellard
    zExp = aExp + bExp - 0x4000;
5509 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5510 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
5511 158142c2 bellard
    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
5512 158142c2 bellard
    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
5513 158142c2 bellard
    zSig2 |= ( zSig3 != 0 );
5514 158142c2 bellard
    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
5515 158142c2 bellard
        shift128ExtraRightJamming(
5516 158142c2 bellard
            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5517 158142c2 bellard
        ++zExp;
5518 158142c2 bellard
    }
5519 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5520 158142c2 bellard
5521 158142c2 bellard
}
5522 158142c2 bellard
5523 158142c2 bellard
/*----------------------------------------------------------------------------
5524 158142c2 bellard
| Returns the result of dividing the quadruple-precision floating-point value
5525 158142c2 bellard
| `a' by the corresponding value `b'.  The operation is performed according to
5526 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5527 158142c2 bellard
*----------------------------------------------------------------------------*/
5528 158142c2 bellard
5529 158142c2 bellard
float128 float128_div( float128 a, float128 b STATUS_PARAM )
5530 158142c2 bellard
{
5531 158142c2 bellard
    flag aSign, bSign, zSign;
5532 158142c2 bellard
    int32 aExp, bExp, zExp;
5533 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5534 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5535 158142c2 bellard
    float128 z;
5536 158142c2 bellard
5537 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5538 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5539 158142c2 bellard
    aExp = extractFloat128Exp( a );
5540 158142c2 bellard
    aSign = extractFloat128Sign( a );
5541 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5542 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5543 158142c2 bellard
    bExp = extractFloat128Exp( b );
5544 158142c2 bellard
    bSign = extractFloat128Sign( b );
5545 158142c2 bellard
    zSign = aSign ^ bSign;
5546 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5547 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5548 158142c2 bellard
        if ( bExp == 0x7FFF ) {
5549 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5550 158142c2 bellard
            goto invalid;
5551 158142c2 bellard
        }
5552 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5553 158142c2 bellard
    }
5554 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5555 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5556 158142c2 bellard
        return packFloat128( zSign, 0, 0, 0 );
5557 158142c2 bellard
    }
5558 158142c2 bellard
    if ( bExp == 0 ) {
5559 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
5560 158142c2 bellard
            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5561 158142c2 bellard
 invalid:
5562 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
5563 158142c2 bellard
                z.low = float128_default_nan_low;
5564 158142c2 bellard
                z.high = float128_default_nan_high;
5565 158142c2 bellard
                return z;
5566 158142c2 bellard
            }
5567 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
5568 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
5569 158142c2 bellard
        }
5570 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5571 158142c2 bellard
    }
5572 158142c2 bellard
    if ( aExp == 0 ) {
5573 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5574 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5575 158142c2 bellard
    }
5576 158142c2 bellard
    zExp = aExp - bExp + 0x3FFD;
5577 158142c2 bellard
    shortShift128Left(
5578 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
5579 158142c2 bellard
    shortShift128Left(
5580 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5581 158142c2 bellard
    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
5582 158142c2 bellard
        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
5583 158142c2 bellard
        ++zExp;
5584 158142c2 bellard
    }
5585 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
5586 158142c2 bellard
    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
5587 158142c2 bellard
    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
5588 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
5589 158142c2 bellard
        --zSig0;
5590 158142c2 bellard
        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5591 158142c2 bellard
    }
5592 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5593 158142c2 bellard
    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5594 158142c2 bellard
        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5595 158142c2 bellard
        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5596 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
5597 158142c2 bellard
            --zSig1;
5598 158142c2 bellard
            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5599 158142c2 bellard
        }
5600 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5601 158142c2 bellard
    }
5602 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5603 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5604 158142c2 bellard
5605 158142c2 bellard
}
5606 158142c2 bellard
5607 158142c2 bellard
/*----------------------------------------------------------------------------
5608 158142c2 bellard
| Returns the remainder of the quadruple-precision floating-point value `a'
5609 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
5610 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5611 158142c2 bellard
*----------------------------------------------------------------------------*/
5612 158142c2 bellard
5613 158142c2 bellard
float128 float128_rem( float128 a, float128 b STATUS_PARAM )
5614 158142c2 bellard
{
5615 ed086f3d Blue Swirl
    flag aSign, zSign;
5616 158142c2 bellard
    int32 aExp, bExp, expDiff;
5617 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5618 bb98fe42 Andreas Färber
    uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
5619 bb98fe42 Andreas Färber
    int64_t sigMean0;
5620 158142c2 bellard
    float128 z;
5621 158142c2 bellard
5622 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5623 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5624 158142c2 bellard
    aExp = extractFloat128Exp( a );
5625 158142c2 bellard
    aSign = extractFloat128Sign( a );
5626 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5627 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5628 158142c2 bellard
    bExp = extractFloat128Exp( b );
5629 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5630 158142c2 bellard
        if (    ( aSig0 | aSig1 )
5631 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5632 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5633 158142c2 bellard
        }
5634 158142c2 bellard
        goto invalid;
5635 158142c2 bellard
    }
5636 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5637 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5638 158142c2 bellard
        return a;
5639 158142c2 bellard
    }
5640 158142c2 bellard
    if ( bExp == 0 ) {
5641 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
5642 158142c2 bellard
 invalid:
5643 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5644 158142c2 bellard
            z.low = float128_default_nan_low;
5645 158142c2 bellard
            z.high = float128_default_nan_high;
5646 158142c2 bellard
            return z;
5647 158142c2 bellard
        }
5648 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5649 158142c2 bellard
    }
5650 158142c2 bellard
    if ( aExp == 0 ) {
5651 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return a;
5652 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5653 158142c2 bellard
    }
5654 158142c2 bellard
    expDiff = aExp - bExp;
5655 158142c2 bellard
    if ( expDiff < -1 ) return a;
5656 158142c2 bellard
    shortShift128Left(
5657 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ),
5658 158142c2 bellard
        aSig1,
5659 158142c2 bellard
        15 - ( expDiff < 0 ),
5660 158142c2 bellard
        &aSig0,
5661 158142c2 bellard
        &aSig1
5662 158142c2 bellard
    );
5663 158142c2 bellard
    shortShift128Left(
5664 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5665 158142c2 bellard
    q = le128( bSig0, bSig1, aSig0, aSig1 );
5666 158142c2 bellard
    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5667 158142c2 bellard
    expDiff -= 64;
5668 158142c2 bellard
    while ( 0 < expDiff ) {
5669 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5670 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
5671 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5672 158142c2 bellard
        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5673 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5674 158142c2 bellard
        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5675 158142c2 bellard
        expDiff -= 61;
5676 158142c2 bellard
    }
5677 158142c2 bellard
    if ( -64 < expDiff ) {
5678 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5679 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
5680 158142c2 bellard
        q >>= - expDiff;
5681 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5682 158142c2 bellard
        expDiff += 52;
5683 158142c2 bellard
        if ( expDiff < 0 ) {
5684 158142c2 bellard
            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5685 158142c2 bellard
        }
5686 158142c2 bellard
        else {
5687 158142c2 bellard
            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5688 158142c2 bellard
        }
5689 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5690 158142c2 bellard
        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5691 158142c2 bellard
    }
5692 158142c2 bellard
    else {
5693 158142c2 bellard
        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5694 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5695 158142c2 bellard
    }
5696 158142c2 bellard
    do {
5697 158142c2 bellard
        alternateASig0 = aSig0;
5698 158142c2 bellard
        alternateASig1 = aSig1;
5699 158142c2 bellard
        ++q;
5700 158142c2 bellard
        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5701 bb98fe42 Andreas Färber
    } while ( 0 <= (int64_t) aSig0 );
5702 158142c2 bellard
    add128(
5703 bb98fe42 Andreas Färber
        aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
5704 158142c2 bellard
    if (    ( sigMean0 < 0 )
5705 158142c2 bellard
         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5706 158142c2 bellard
        aSig0 = alternateASig0;
5707 158142c2 bellard
        aSig1 = alternateASig1;
5708 158142c2 bellard
    }
5709 bb98fe42 Andreas Färber
    zSign = ( (int64_t) aSig0 < 0 );
5710 158142c2 bellard
    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5711 158142c2 bellard
    return
5712 158142c2 bellard
        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
5713 158142c2 bellard
5714 158142c2 bellard
}
5715 158142c2 bellard
5716 158142c2 bellard
/*----------------------------------------------------------------------------
5717 158142c2 bellard
| Returns the square root of the quadruple-precision floating-point value `a'.
5718 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
5719 158142c2 bellard
| Floating-Point Arithmetic.
5720 158142c2 bellard
*----------------------------------------------------------------------------*/
5721 158142c2 bellard
5722 158142c2 bellard
float128 float128_sqrt( float128 a STATUS_PARAM )
5723 158142c2 bellard
{
5724 158142c2 bellard
    flag aSign;
5725 158142c2 bellard
    int32 aExp, zExp;
5726 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5727 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5728 158142c2 bellard
    float128 z;
5729 158142c2 bellard
5730 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5731 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5732 158142c2 bellard
    aExp = extractFloat128Exp( a );
5733 158142c2 bellard
    aSign = extractFloat128Sign( a );
5734 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5735 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
5736 158142c2 bellard
        if ( ! aSign ) return a;
5737 158142c2 bellard
        goto invalid;
5738 158142c2 bellard
    }
5739 158142c2 bellard
    if ( aSign ) {
5740 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5741 158142c2 bellard
 invalid:
5742 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5743 158142c2 bellard
        z.low = float128_default_nan_low;
5744 158142c2 bellard
        z.high = float128_default_nan_high;
5745 158142c2 bellard
        return z;
5746 158142c2 bellard
    }
5747 158142c2 bellard
    if ( aExp == 0 ) {
5748 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5749 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5750 158142c2 bellard
    }
5751 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5752 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5753 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5754 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5755 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5756 158142c2 bellard
    doubleZSig0 = zSig0<<1;
5757 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
5758 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5759 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
5760 158142c2 bellard
        --zSig0;
5761 158142c2 bellard
        doubleZSig0 -= 2;
5762 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5763 158142c2 bellard
    }
5764 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5765 158142c2 bellard
    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5766 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
5767 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5768 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5769 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
5770 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5771 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
5772 158142c2 bellard
            --zSig1;
5773 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5774 158142c2 bellard
            term3 |= 1;
5775 158142c2 bellard
            term2 |= doubleZSig0;
5776 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5777 158142c2 bellard
        }
5778 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5779 158142c2 bellard
    }
5780 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5781 158142c2 bellard
    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5782 158142c2 bellard
5783 158142c2 bellard
}
5784 158142c2 bellard
5785 158142c2 bellard
/*----------------------------------------------------------------------------
5786 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5787 b689362d Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
5788 b689362d Aurelien Jarno
| raised if either operand is a NaN.  Otherwise, the comparison is performed
5789 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5790 158142c2 bellard
*----------------------------------------------------------------------------*/
5791 158142c2 bellard
5792 b689362d Aurelien Jarno
int float128_eq( float128 a, float128 b STATUS_PARAM )
5793 158142c2 bellard
{
5794 158142c2 bellard
5795 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5796 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5797 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5798 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5799 158142c2 bellard
       ) {
5800 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
5801 158142c2 bellard
        return 0;
5802 158142c2 bellard
    }
5803 158142c2 bellard
    return
5804 158142c2 bellard
           ( a.low == b.low )
5805 158142c2 bellard
        && (    ( a.high == b.high )
5806 158142c2 bellard
             || (    ( a.low == 0 )
5807 bb98fe42 Andreas Färber
                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5808 158142c2 bellard
           );
5809 158142c2 bellard
5810 158142c2 bellard
}
5811 158142c2 bellard
5812 158142c2 bellard
/*----------------------------------------------------------------------------
5813 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5814 f5a64251 Aurelien Jarno
| or equal to the corresponding value `b', and 0 otherwise.  The invalid
5815 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
5816 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5817 158142c2 bellard
*----------------------------------------------------------------------------*/
5818 158142c2 bellard
5819 750afe93 bellard
int float128_le( float128 a, float128 b STATUS_PARAM )
5820 158142c2 bellard
{
5821 158142c2 bellard
    flag aSign, bSign;
5822 158142c2 bellard
5823 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5824 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5825 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5826 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5827 158142c2 bellard
       ) {
5828 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5829 158142c2 bellard
        return 0;
5830 158142c2 bellard
    }
5831 158142c2 bellard
    aSign = extractFloat128Sign( a );
5832 158142c2 bellard
    bSign = extractFloat128Sign( b );
5833 158142c2 bellard
    if ( aSign != bSign ) {
5834 158142c2 bellard
        return
5835 158142c2 bellard
               aSign
5836 bb98fe42 Andreas Färber
            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5837 158142c2 bellard
                 == 0 );
5838 158142c2 bellard
    }
5839 158142c2 bellard
    return
5840 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5841 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5842 158142c2 bellard
5843 158142c2 bellard
}
5844 158142c2 bellard
5845 158142c2 bellard
/*----------------------------------------------------------------------------
5846 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5847 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
5848 f5a64251 Aurelien Jarno
| raised if either operand is a NaN.  The comparison is performed according
5849 f5a64251 Aurelien Jarno
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5850 158142c2 bellard
*----------------------------------------------------------------------------*/
5851 158142c2 bellard
5852 750afe93 bellard
int float128_lt( float128 a, float128 b STATUS_PARAM )
5853 158142c2 bellard
{
5854 158142c2 bellard
    flag aSign, bSign;
5855 158142c2 bellard
5856 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5857 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5858 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5859 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5860 158142c2 bellard
       ) {
5861 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5862 158142c2 bellard
        return 0;
5863 158142c2 bellard
    }
5864 158142c2 bellard
    aSign = extractFloat128Sign( a );
5865 158142c2 bellard
    bSign = extractFloat128Sign( b );
5866 158142c2 bellard
    if ( aSign != bSign ) {
5867 158142c2 bellard
        return
5868 158142c2 bellard
               aSign
5869 bb98fe42 Andreas Färber
            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5870 158142c2 bellard
                 != 0 );
5871 158142c2 bellard
    }
5872 158142c2 bellard
    return
5873 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5874 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5875 158142c2 bellard
5876 158142c2 bellard
}
5877 158142c2 bellard
5878 158142c2 bellard
/*----------------------------------------------------------------------------
5879 67b7861d Aurelien Jarno
| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
5880 f5a64251 Aurelien Jarno
| be compared, and 0 otherwise.  The invalid exception is raised if either
5881 f5a64251 Aurelien Jarno
| operand is a NaN. The comparison is performed according to the IEC/IEEE
5882 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
5883 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
5884 67b7861d Aurelien Jarno
5885 67b7861d Aurelien Jarno
int float128_unordered( float128 a, float128 b STATUS_PARAM )
5886 67b7861d Aurelien Jarno
{
5887 67b7861d Aurelien Jarno
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5888 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5889 67b7861d Aurelien Jarno
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5890 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5891 67b7861d Aurelien Jarno
       ) {
5892 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
5893 67b7861d Aurelien Jarno
        return 1;
5894 67b7861d Aurelien Jarno
    }
5895 67b7861d Aurelien Jarno
    return 0;
5896 67b7861d Aurelien Jarno
}
5897 67b7861d Aurelien Jarno
5898 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
5899 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
5900 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5901 f5a64251 Aurelien Jarno
| exception.  The comparison is performed according to the IEC/IEEE Standard
5902 f5a64251 Aurelien Jarno
| for Binary Floating-Point Arithmetic.
5903 158142c2 bellard
*----------------------------------------------------------------------------*/
5904 158142c2 bellard
5905 b689362d Aurelien Jarno
int float128_eq_quiet( float128 a, float128 b STATUS_PARAM )
5906 158142c2 bellard
{
5907 158142c2 bellard
5908 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5909 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5910 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5911 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5912 158142c2 bellard
       ) {
5913 b689362d Aurelien Jarno
        if (    float128_is_signaling_nan( a )
5914 b689362d Aurelien Jarno
             || float128_is_signaling_nan( b ) ) {
5915 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
5916 b689362d Aurelien Jarno
        }
5917 158142c2 bellard
        return 0;
5918 158142c2 bellard
    }
5919 158142c2 bellard
    return
5920 158142c2 bellard
           ( a.low == b.low )
5921 158142c2 bellard
        && (    ( a.high == b.high )
5922 158142c2 bellard
             || (    ( a.low == 0 )
5923 bb98fe42 Andreas Färber
                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5924 158142c2 bellard
           );
5925 158142c2 bellard
5926 158142c2 bellard
}
5927 158142c2 bellard
5928 158142c2 bellard
/*----------------------------------------------------------------------------
5929 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5930 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5931 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
5932 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5933 158142c2 bellard
*----------------------------------------------------------------------------*/
5934 158142c2 bellard
5935 750afe93 bellard
int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
5936 158142c2 bellard
{
5937 158142c2 bellard
    flag aSign, bSign;
5938 158142c2 bellard
5939 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5940 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5941 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5942 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5943 158142c2 bellard
       ) {
5944 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5945 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5946 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5947 158142c2 bellard
        }
5948 158142c2 bellard
        return 0;
5949 158142c2 bellard
    }
5950 158142c2 bellard
    aSign = extractFloat128Sign( a );
5951 158142c2 bellard
    bSign = extractFloat128Sign( b );
5952 158142c2 bellard
    if ( aSign != bSign ) {
5953 158142c2 bellard
        return
5954 158142c2 bellard
               aSign
5955 bb98fe42 Andreas Färber
            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5956 158142c2 bellard
                 == 0 );
5957 158142c2 bellard
    }
5958 158142c2 bellard
    return
5959 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5960 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5961 158142c2 bellard
5962 158142c2 bellard
}
5963 158142c2 bellard
5964 158142c2 bellard
/*----------------------------------------------------------------------------
5965 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
5966 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5967 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
5968 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5969 158142c2 bellard
*----------------------------------------------------------------------------*/
5970 158142c2 bellard
5971 750afe93 bellard
int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
5972 158142c2 bellard
{
5973 158142c2 bellard
    flag aSign, bSign;
5974 158142c2 bellard
5975 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5976 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5977 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5978 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5979 158142c2 bellard
       ) {
5980 158142c2 bellard
        if (    float128_is_signaling_nan( a )
5981 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
5982 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5983 158142c2 bellard
        }
5984 158142c2 bellard
        return 0;
5985 158142c2 bellard
    }
5986 158142c2 bellard
    aSign = extractFloat128Sign( a );
5987 158142c2 bellard
    bSign = extractFloat128Sign( b );
5988 158142c2 bellard
    if ( aSign != bSign ) {
5989 158142c2 bellard
        return
5990 158142c2 bellard
               aSign
5991 bb98fe42 Andreas Färber
            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5992 158142c2 bellard
                 != 0 );
5993 158142c2 bellard
    }
5994 158142c2 bellard
    return
5995 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5996 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5997 158142c2 bellard
5998 158142c2 bellard
}
5999 158142c2 bellard
6000 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
6001 67b7861d Aurelien Jarno
| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
6002 67b7861d Aurelien Jarno
| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
6003 67b7861d Aurelien Jarno
| comparison is performed according to the IEC/IEEE Standard for Binary
6004 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
6005 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
6006 67b7861d Aurelien Jarno
6007 67b7861d Aurelien Jarno
int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
6008 67b7861d Aurelien Jarno
{
6009 67b7861d Aurelien Jarno
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6010 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6011 67b7861d Aurelien Jarno
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6012 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6013 67b7861d Aurelien Jarno
       ) {
6014 67b7861d Aurelien Jarno
        if (    float128_is_signaling_nan( a )
6015 67b7861d Aurelien Jarno
             || float128_is_signaling_nan( b ) ) {
6016 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
6017 67b7861d Aurelien Jarno
        }
6018 67b7861d Aurelien Jarno
        return 1;
6019 67b7861d Aurelien Jarno
    }
6020 67b7861d Aurelien Jarno
    return 0;
6021 67b7861d Aurelien Jarno
}
6022 67b7861d Aurelien Jarno
6023 158142c2 bellard
#endif
6024 158142c2 bellard
6025 1d6bda35 bellard
/* misc functions */
6026 1d6bda35 bellard
float32 uint32_to_float32( unsigned int a STATUS_PARAM )
6027 1d6bda35 bellard
{
6028 1d6bda35 bellard
    return int64_to_float32(a STATUS_VAR);
6029 1d6bda35 bellard
}
6030 1d6bda35 bellard
6031 1d6bda35 bellard
float64 uint32_to_float64( unsigned int a STATUS_PARAM )
6032 1d6bda35 bellard
{
6033 1d6bda35 bellard
    return int64_to_float64(a STATUS_VAR);
6034 1d6bda35 bellard
}
6035 1d6bda35 bellard
6036 1d6bda35 bellard
unsigned int float32_to_uint32( float32 a STATUS_PARAM )
6037 1d6bda35 bellard
{
6038 1d6bda35 bellard
    int64_t v;
6039 1d6bda35 bellard
    unsigned int res;
6040 1d6bda35 bellard
6041 1d6bda35 bellard
    v = float32_to_int64(a STATUS_VAR);
6042 1d6bda35 bellard
    if (v < 0) {
6043 1d6bda35 bellard
        res = 0;
6044 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6045 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6046 1d6bda35 bellard
        res = 0xffffffff;
6047 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6048 1d6bda35 bellard
    } else {
6049 1d6bda35 bellard
        res = v;
6050 1d6bda35 bellard
    }
6051 1d6bda35 bellard
    return res;
6052 1d6bda35 bellard
}
6053 1d6bda35 bellard
6054 1d6bda35 bellard
unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
6055 1d6bda35 bellard
{
6056 1d6bda35 bellard
    int64_t v;
6057 1d6bda35 bellard
    unsigned int res;
6058 1d6bda35 bellard
6059 1d6bda35 bellard
    v = float32_to_int64_round_to_zero(a STATUS_VAR);
6060 1d6bda35 bellard
    if (v < 0) {
6061 1d6bda35 bellard
        res = 0;
6062 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6063 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6064 1d6bda35 bellard
        res = 0xffffffff;
6065 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6066 1d6bda35 bellard
    } else {
6067 1d6bda35 bellard
        res = v;
6068 1d6bda35 bellard
    }
6069 1d6bda35 bellard
    return res;
6070 1d6bda35 bellard
}
6071 1d6bda35 bellard
6072 cbcef455 Peter Maydell
unsigned int float32_to_uint16_round_to_zero( float32 a STATUS_PARAM )
6073 cbcef455 Peter Maydell
{
6074 cbcef455 Peter Maydell
    int64_t v;
6075 cbcef455 Peter Maydell
    unsigned int res;
6076 cbcef455 Peter Maydell
6077 cbcef455 Peter Maydell
    v = float32_to_int64_round_to_zero(a STATUS_VAR);
6078 cbcef455 Peter Maydell
    if (v < 0) {
6079 cbcef455 Peter Maydell
        res = 0;
6080 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6081 cbcef455 Peter Maydell
    } else if (v > 0xffff) {
6082 cbcef455 Peter Maydell
        res = 0xffff;
6083 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6084 cbcef455 Peter Maydell
    } else {
6085 cbcef455 Peter Maydell
        res = v;
6086 cbcef455 Peter Maydell
    }
6087 cbcef455 Peter Maydell
    return res;
6088 cbcef455 Peter Maydell
}
6089 cbcef455 Peter Maydell
6090 1d6bda35 bellard
unsigned int float64_to_uint32( float64 a STATUS_PARAM )
6091 1d6bda35 bellard
{
6092 1d6bda35 bellard
    int64_t v;
6093 1d6bda35 bellard
    unsigned int res;
6094 1d6bda35 bellard
6095 1d6bda35 bellard
    v = float64_to_int64(a STATUS_VAR);
6096 1d6bda35 bellard
    if (v < 0) {
6097 1d6bda35 bellard
        res = 0;
6098 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6099 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6100 1d6bda35 bellard
        res = 0xffffffff;
6101 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6102 1d6bda35 bellard
    } else {
6103 1d6bda35 bellard
        res = v;
6104 1d6bda35 bellard
    }
6105 1d6bda35 bellard
    return res;
6106 1d6bda35 bellard
}
6107 1d6bda35 bellard
6108 1d6bda35 bellard
unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
6109 1d6bda35 bellard
{
6110 1d6bda35 bellard
    int64_t v;
6111 1d6bda35 bellard
    unsigned int res;
6112 1d6bda35 bellard
6113 1d6bda35 bellard
    v = float64_to_int64_round_to_zero(a STATUS_VAR);
6114 1d6bda35 bellard
    if (v < 0) {
6115 1d6bda35 bellard
        res = 0;
6116 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6117 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6118 1d6bda35 bellard
        res = 0xffffffff;
6119 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6120 1d6bda35 bellard
    } else {
6121 1d6bda35 bellard
        res = v;
6122 1d6bda35 bellard
    }
6123 1d6bda35 bellard
    return res;
6124 1d6bda35 bellard
}
6125 1d6bda35 bellard
6126 cbcef455 Peter Maydell
unsigned int float64_to_uint16_round_to_zero( float64 a STATUS_PARAM )
6127 cbcef455 Peter Maydell
{
6128 cbcef455 Peter Maydell
    int64_t v;
6129 cbcef455 Peter Maydell
    unsigned int res;
6130 cbcef455 Peter Maydell
6131 cbcef455 Peter Maydell
    v = float64_to_int64_round_to_zero(a STATUS_VAR);
6132 cbcef455 Peter Maydell
    if (v < 0) {
6133 cbcef455 Peter Maydell
        res = 0;
6134 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6135 cbcef455 Peter Maydell
    } else if (v > 0xffff) {
6136 cbcef455 Peter Maydell
        res = 0xffff;
6137 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6138 cbcef455 Peter Maydell
    } else {
6139 cbcef455 Peter Maydell
        res = v;
6140 cbcef455 Peter Maydell
    }
6141 cbcef455 Peter Maydell
    return res;
6142 cbcef455 Peter Maydell
}
6143 cbcef455 Peter Maydell
6144 f090c9d4 pbrook
/* FIXME: This looks broken.  */
6145 75d62a58 j_mayer
uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
6146 75d62a58 j_mayer
{
6147 75d62a58 j_mayer
    int64_t v;
6148 75d62a58 j_mayer
6149 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
6150 f090c9d4 pbrook
    v += float64_val(a);
6151 f090c9d4 pbrook
    v = float64_to_int64(make_float64(v) STATUS_VAR);
6152 75d62a58 j_mayer
6153 75d62a58 j_mayer
    return v - INT64_MIN;
6154 75d62a58 j_mayer
}
6155 75d62a58 j_mayer
6156 75d62a58 j_mayer
uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
6157 75d62a58 j_mayer
{
6158 75d62a58 j_mayer
    int64_t v;
6159 75d62a58 j_mayer
6160 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
6161 f090c9d4 pbrook
    v += float64_val(a);
6162 f090c9d4 pbrook
    v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
6163 75d62a58 j_mayer
6164 75d62a58 j_mayer
    return v - INT64_MIN;
6165 75d62a58 j_mayer
}
6166 75d62a58 j_mayer
6167 1d6bda35 bellard
#define COMPARE(s, nan_exp)                                                  \
6168 750afe93 bellard
INLINE int float ## s ## _compare_internal( float ## s a, float ## s b,      \
6169 1d6bda35 bellard
                                      int is_quiet STATUS_PARAM )            \
6170 1d6bda35 bellard
{                                                                            \
6171 1d6bda35 bellard
    flag aSign, bSign;                                                       \
6172 bb98fe42 Andreas Färber
    uint ## s ## _t av, bv;                                                  \
6173 37d18660 Peter Maydell
    a = float ## s ## _squash_input_denormal(a STATUS_VAR);                  \
6174 37d18660 Peter Maydell
    b = float ## s ## _squash_input_denormal(b STATUS_VAR);                  \
6175 1d6bda35 bellard
                                                                             \
6176 1d6bda35 bellard
    if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) &&                    \
6177 1d6bda35 bellard
         extractFloat ## s ## Frac( a ) ) ||                                 \
6178 1d6bda35 bellard
        ( ( extractFloat ## s ## Exp( b ) == nan_exp ) &&                    \
6179 1d6bda35 bellard
          extractFloat ## s ## Frac( b ) )) {                                \
6180 1d6bda35 bellard
        if (!is_quiet ||                                                     \
6181 1d6bda35 bellard
            float ## s ## _is_signaling_nan( a ) ||                          \
6182 1d6bda35 bellard
            float ## s ## _is_signaling_nan( b ) ) {                         \
6183 1d6bda35 bellard
            float_raise( float_flag_invalid STATUS_VAR);                     \
6184 1d6bda35 bellard
        }                                                                    \
6185 1d6bda35 bellard
        return float_relation_unordered;                                     \
6186 1d6bda35 bellard
    }                                                                        \
6187 1d6bda35 bellard
    aSign = extractFloat ## s ## Sign( a );                                  \
6188 1d6bda35 bellard
    bSign = extractFloat ## s ## Sign( b );                                  \
6189 f090c9d4 pbrook
    av = float ## s ## _val(a);                                              \
6190 cd8a2533 blueswir1
    bv = float ## s ## _val(b);                                              \
6191 1d6bda35 bellard
    if ( aSign != bSign ) {                                                  \
6192 bb98fe42 Andreas Färber
        if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) {                   \
6193 1d6bda35 bellard
            /* zero case */                                                  \
6194 1d6bda35 bellard
            return float_relation_equal;                                     \
6195 1d6bda35 bellard
        } else {                                                             \
6196 1d6bda35 bellard
            return 1 - (2 * aSign);                                          \
6197 1d6bda35 bellard
        }                                                                    \
6198 1d6bda35 bellard
    } else {                                                                 \
6199 f090c9d4 pbrook
        if (av == bv) {                                                      \
6200 1d6bda35 bellard
            return float_relation_equal;                                     \
6201 1d6bda35 bellard
        } else {                                                             \
6202 f090c9d4 pbrook
            return 1 - 2 * (aSign ^ ( av < bv ));                            \
6203 1d6bda35 bellard
        }                                                                    \
6204 1d6bda35 bellard
    }                                                                        \
6205 1d6bda35 bellard
}                                                                            \
6206 1d6bda35 bellard
                                                                             \
6207 750afe93 bellard
int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM )        \
6208 1d6bda35 bellard
{                                                                            \
6209 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 0 STATUS_VAR);              \
6210 1d6bda35 bellard
}                                                                            \
6211 1d6bda35 bellard
                                                                             \
6212 750afe93 bellard
int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM )  \
6213 1d6bda35 bellard
{                                                                            \
6214 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 1 STATUS_VAR);              \
6215 1d6bda35 bellard
}
6216 1d6bda35 bellard
6217 1d6bda35 bellard
COMPARE(32, 0xff)
6218 1d6bda35 bellard
COMPARE(64, 0x7ff)
6219 9ee6e8bb pbrook
6220 f6714d36 Aurelien Jarno
INLINE int floatx80_compare_internal( floatx80 a, floatx80 b,
6221 f6714d36 Aurelien Jarno
                                      int is_quiet STATUS_PARAM )
6222 f6714d36 Aurelien Jarno
{
6223 f6714d36 Aurelien Jarno
    flag aSign, bSign;
6224 f6714d36 Aurelien Jarno
6225 f6714d36 Aurelien Jarno
    if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6226 f6714d36 Aurelien Jarno
          ( extractFloatx80Frac( a )<<1 ) ) ||
6227 f6714d36 Aurelien Jarno
        ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6228 f6714d36 Aurelien Jarno
          ( extractFloatx80Frac( b )<<1 ) )) {
6229 f6714d36 Aurelien Jarno
        if (!is_quiet ||
6230 f6714d36 Aurelien Jarno
            floatx80_is_signaling_nan( a ) ||
6231 f6714d36 Aurelien Jarno
            floatx80_is_signaling_nan( b ) ) {
6232 f6714d36 Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
6233 f6714d36 Aurelien Jarno
        }
6234 f6714d36 Aurelien Jarno
        return float_relation_unordered;
6235 f6714d36 Aurelien Jarno
    }
6236 f6714d36 Aurelien Jarno
    aSign = extractFloatx80Sign( a );
6237 f6714d36 Aurelien Jarno
    bSign = extractFloatx80Sign( b );
6238 f6714d36 Aurelien Jarno
    if ( aSign != bSign ) {
6239 f6714d36 Aurelien Jarno
6240 f6714d36 Aurelien Jarno
        if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6241 f6714d36 Aurelien Jarno
             ( ( a.low | b.low ) == 0 ) ) {
6242 f6714d36 Aurelien Jarno
            /* zero case */
6243 f6714d36 Aurelien Jarno
            return float_relation_equal;
6244 f6714d36 Aurelien Jarno
        } else {
6245 f6714d36 Aurelien Jarno
            return 1 - (2 * aSign);
6246 f6714d36 Aurelien Jarno
        }
6247 f6714d36 Aurelien Jarno
    } else {
6248 f6714d36 Aurelien Jarno
        if (a.low == b.low && a.high == b.high) {
6249 f6714d36 Aurelien Jarno
            return float_relation_equal;
6250 f6714d36 Aurelien Jarno
        } else {
6251 f6714d36 Aurelien Jarno
            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6252 f6714d36 Aurelien Jarno
        }
6253 f6714d36 Aurelien Jarno
    }
6254 f6714d36 Aurelien Jarno
}
6255 f6714d36 Aurelien Jarno
6256 f6714d36 Aurelien Jarno
int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
6257 f6714d36 Aurelien Jarno
{
6258 f6714d36 Aurelien Jarno
    return floatx80_compare_internal(a, b, 0 STATUS_VAR);
6259 f6714d36 Aurelien Jarno
}
6260 f6714d36 Aurelien Jarno
6261 f6714d36 Aurelien Jarno
int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
6262 f6714d36 Aurelien Jarno
{
6263 f6714d36 Aurelien Jarno
    return floatx80_compare_internal(a, b, 1 STATUS_VAR);
6264 f6714d36 Aurelien Jarno
}
6265 f6714d36 Aurelien Jarno
6266 1f587329 blueswir1
INLINE int float128_compare_internal( float128 a, float128 b,
6267 1f587329 blueswir1
                                      int is_quiet STATUS_PARAM )
6268 1f587329 blueswir1
{
6269 1f587329 blueswir1
    flag aSign, bSign;
6270 1f587329 blueswir1
6271 1f587329 blueswir1
    if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6272 1f587329 blueswir1
          ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6273 1f587329 blueswir1
        ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6274 1f587329 blueswir1
          ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6275 1f587329 blueswir1
        if (!is_quiet ||
6276 1f587329 blueswir1
            float128_is_signaling_nan( a ) ||
6277 1f587329 blueswir1
            float128_is_signaling_nan( b ) ) {
6278 1f587329 blueswir1
            float_raise( float_flag_invalid STATUS_VAR);
6279 1f587329 blueswir1
        }
6280 1f587329 blueswir1
        return float_relation_unordered;
6281 1f587329 blueswir1
    }
6282 1f587329 blueswir1
    aSign = extractFloat128Sign( a );
6283 1f587329 blueswir1
    bSign = extractFloat128Sign( b );
6284 1f587329 blueswir1
    if ( aSign != bSign ) {
6285 1f587329 blueswir1
        if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6286 1f587329 blueswir1
            /* zero case */
6287 1f587329 blueswir1
            return float_relation_equal;
6288 1f587329 blueswir1
        } else {
6289 1f587329 blueswir1
            return 1 - (2 * aSign);
6290 1f587329 blueswir1
        }
6291 1f587329 blueswir1
    } else {
6292 1f587329 blueswir1
        if (a.low == b.low && a.high == b.high) {
6293 1f587329 blueswir1
            return float_relation_equal;
6294 1f587329 blueswir1
        } else {
6295 1f587329 blueswir1
            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6296 1f587329 blueswir1
        }
6297 1f587329 blueswir1
    }
6298 1f587329 blueswir1
}
6299 1f587329 blueswir1
6300 1f587329 blueswir1
int float128_compare( float128 a, float128 b STATUS_PARAM )
6301 1f587329 blueswir1
{
6302 1f587329 blueswir1
    return float128_compare_internal(a, b, 0 STATUS_VAR);
6303 1f587329 blueswir1
}
6304 1f587329 blueswir1
6305 1f587329 blueswir1
int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
6306 1f587329 blueswir1
{
6307 1f587329 blueswir1
    return float128_compare_internal(a, b, 1 STATUS_VAR);
6308 1f587329 blueswir1
}
6309 1f587329 blueswir1
6310 274f1b04 Peter Maydell
/* min() and max() functions. These can't be implemented as
6311 274f1b04 Peter Maydell
 * 'compare and pick one input' because that would mishandle
6312 274f1b04 Peter Maydell
 * NaNs and +0 vs -0.
6313 274f1b04 Peter Maydell
 */
6314 274f1b04 Peter Maydell
#define MINMAX(s, nan_exp)                                              \
6315 274f1b04 Peter Maydell
INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b,     \
6316 274f1b04 Peter Maydell
                                        int ismin STATUS_PARAM )        \
6317 274f1b04 Peter Maydell
{                                                                       \
6318 274f1b04 Peter Maydell
    flag aSign, bSign;                                                  \
6319 274f1b04 Peter Maydell
    uint ## s ## _t av, bv;                                             \
6320 274f1b04 Peter Maydell
    a = float ## s ## _squash_input_denormal(a STATUS_VAR);             \
6321 274f1b04 Peter Maydell
    b = float ## s ## _squash_input_denormal(b STATUS_VAR);             \
6322 274f1b04 Peter Maydell
    if (float ## s ## _is_any_nan(a) ||                                 \
6323 274f1b04 Peter Maydell
        float ## s ## _is_any_nan(b)) {                                 \
6324 274f1b04 Peter Maydell
        return propagateFloat ## s ## NaN(a, b STATUS_VAR);             \
6325 274f1b04 Peter Maydell
    }                                                                   \
6326 274f1b04 Peter Maydell
    aSign = extractFloat ## s ## Sign(a);                               \
6327 274f1b04 Peter Maydell
    bSign = extractFloat ## s ## Sign(b);                               \
6328 274f1b04 Peter Maydell
    av = float ## s ## _val(a);                                         \
6329 274f1b04 Peter Maydell
    bv = float ## s ## _val(b);                                         \
6330 274f1b04 Peter Maydell
    if (aSign != bSign) {                                               \
6331 274f1b04 Peter Maydell
        if (ismin) {                                                    \
6332 274f1b04 Peter Maydell
            return aSign ? a : b;                                       \
6333 274f1b04 Peter Maydell
        } else {                                                        \
6334 274f1b04 Peter Maydell
            return aSign ? b : a;                                       \
6335 274f1b04 Peter Maydell
        }                                                               \
6336 274f1b04 Peter Maydell
    } else {                                                            \
6337 274f1b04 Peter Maydell
        if (ismin) {                                                    \
6338 274f1b04 Peter Maydell
            return (aSign ^ (av < bv)) ? a : b;                         \
6339 274f1b04 Peter Maydell
        } else {                                                        \
6340 274f1b04 Peter Maydell
            return (aSign ^ (av < bv)) ? b : a;                         \
6341 274f1b04 Peter Maydell
        }                                                               \
6342 274f1b04 Peter Maydell
    }                                                                   \
6343 274f1b04 Peter Maydell
}                                                                       \
6344 274f1b04 Peter Maydell
                                                                        \
6345 274f1b04 Peter Maydell
float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM)  \
6346 274f1b04 Peter Maydell
{                                                                       \
6347 274f1b04 Peter Maydell
    return float ## s ## _minmax(a, b, 1 STATUS_VAR);                   \
6348 274f1b04 Peter Maydell
}                                                                       \
6349 274f1b04 Peter Maydell
                                                                        \
6350 274f1b04 Peter Maydell
float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM)  \
6351 274f1b04 Peter Maydell
{                                                                       \
6352 274f1b04 Peter Maydell
    return float ## s ## _minmax(a, b, 0 STATUS_VAR);                   \
6353 274f1b04 Peter Maydell
}
6354 274f1b04 Peter Maydell
6355 274f1b04 Peter Maydell
MINMAX(32, 0xff)
6356 274f1b04 Peter Maydell
MINMAX(64, 0x7ff)
6357 274f1b04 Peter Maydell
6358 274f1b04 Peter Maydell
6359 9ee6e8bb pbrook
/* Multiply A by 2 raised to the power N.  */
6360 9ee6e8bb pbrook
float32 float32_scalbn( float32 a, int n STATUS_PARAM )
6361 9ee6e8bb pbrook
{
6362 9ee6e8bb pbrook
    flag aSign;
6363 326b9e98 Aurelien Jarno
    int16_t aExp;
6364 bb98fe42 Andreas Färber
    uint32_t aSig;
6365 9ee6e8bb pbrook
6366 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
6367 9ee6e8bb pbrook
    aSig = extractFloat32Frac( a );
6368 9ee6e8bb pbrook
    aExp = extractFloat32Exp( a );
6369 9ee6e8bb pbrook
    aSign = extractFloat32Sign( a );
6370 9ee6e8bb pbrook
6371 9ee6e8bb pbrook
    if ( aExp == 0xFF ) {
6372 326b9e98 Aurelien Jarno
        if ( aSig ) {
6373 326b9e98 Aurelien Jarno
            return propagateFloat32NaN( a, a STATUS_VAR );
6374 326b9e98 Aurelien Jarno
        }
6375 9ee6e8bb pbrook
        return a;
6376 9ee6e8bb pbrook
    }
6377 69397542 pbrook
    if ( aExp != 0 )
6378 69397542 pbrook
        aSig |= 0x00800000;
6379 69397542 pbrook
    else if ( aSig == 0 )
6380 69397542 pbrook
        return a;
6381 69397542 pbrook
6382 326b9e98 Aurelien Jarno
    if (n > 0x200) {
6383 326b9e98 Aurelien Jarno
        n = 0x200;
6384 326b9e98 Aurelien Jarno
    } else if (n < -0x200) {
6385 326b9e98 Aurelien Jarno
        n = -0x200;
6386 326b9e98 Aurelien Jarno
    }
6387 326b9e98 Aurelien Jarno
6388 69397542 pbrook
    aExp += n - 1;
6389 69397542 pbrook
    aSig <<= 7;
6390 69397542 pbrook
    return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
6391 9ee6e8bb pbrook
}
6392 9ee6e8bb pbrook
6393 9ee6e8bb pbrook
float64 float64_scalbn( float64 a, int n STATUS_PARAM )
6394 9ee6e8bb pbrook
{
6395 9ee6e8bb pbrook
    flag aSign;
6396 326b9e98 Aurelien Jarno
    int16_t aExp;
6397 bb98fe42 Andreas Färber
    uint64_t aSig;
6398 9ee6e8bb pbrook
6399 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
6400 9ee6e8bb pbrook
    aSig = extractFloat64Frac( a );
6401 9ee6e8bb pbrook
    aExp = extractFloat64Exp( a );
6402 9ee6e8bb pbrook
    aSign = extractFloat64Sign( a );
6403 9ee6e8bb pbrook
6404 9ee6e8bb pbrook
    if ( aExp == 0x7FF ) {
6405 326b9e98 Aurelien Jarno
        if ( aSig ) {
6406 326b9e98 Aurelien Jarno
            return propagateFloat64NaN( a, a STATUS_VAR );
6407 326b9e98 Aurelien Jarno
        }
6408 9ee6e8bb pbrook
        return a;
6409 9ee6e8bb pbrook
    }
6410 69397542 pbrook
    if ( aExp != 0 )
6411 69397542 pbrook
        aSig |= LIT64( 0x0010000000000000 );
6412 69397542 pbrook
    else if ( aSig == 0 )
6413 69397542 pbrook
        return a;
6414 69397542 pbrook
6415 326b9e98 Aurelien Jarno
    if (n > 0x1000) {
6416 326b9e98 Aurelien Jarno
        n = 0x1000;
6417 326b9e98 Aurelien Jarno
    } else if (n < -0x1000) {
6418 326b9e98 Aurelien Jarno
        n = -0x1000;
6419 326b9e98 Aurelien Jarno
    }
6420 326b9e98 Aurelien Jarno
6421 69397542 pbrook
    aExp += n - 1;
6422 69397542 pbrook
    aSig <<= 10;
6423 69397542 pbrook
    return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
6424 9ee6e8bb pbrook
}
6425 9ee6e8bb pbrook
6426 9ee6e8bb pbrook
#ifdef FLOATX80
6427 9ee6e8bb pbrook
floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
6428 9ee6e8bb pbrook
{
6429 9ee6e8bb pbrook
    flag aSign;
6430 326b9e98 Aurelien Jarno
    int32_t aExp;
6431 bb98fe42 Andreas Färber
    uint64_t aSig;
6432 9ee6e8bb pbrook
6433 9ee6e8bb pbrook
    aSig = extractFloatx80Frac( a );
6434 9ee6e8bb pbrook
    aExp = extractFloatx80Exp( a );
6435 9ee6e8bb pbrook
    aSign = extractFloatx80Sign( a );
6436 9ee6e8bb pbrook
6437 326b9e98 Aurelien Jarno
    if ( aExp == 0x7FFF ) {
6438 326b9e98 Aurelien Jarno
        if ( aSig<<1 ) {
6439 326b9e98 Aurelien Jarno
            return propagateFloatx80NaN( a, a STATUS_VAR );
6440 326b9e98 Aurelien Jarno
        }
6441 9ee6e8bb pbrook
        return a;
6442 9ee6e8bb pbrook
    }
6443 326b9e98 Aurelien Jarno
6444 69397542 pbrook
    if (aExp == 0 && aSig == 0)
6445 69397542 pbrook
        return a;
6446 69397542 pbrook
6447 326b9e98 Aurelien Jarno
    if (n > 0x10000) {
6448 326b9e98 Aurelien Jarno
        n = 0x10000;
6449 326b9e98 Aurelien Jarno
    } else if (n < -0x10000) {
6450 326b9e98 Aurelien Jarno
        n = -0x10000;
6451 326b9e98 Aurelien Jarno
    }
6452 326b9e98 Aurelien Jarno
6453 9ee6e8bb pbrook
    aExp += n;
6454 69397542 pbrook
    return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
6455 69397542 pbrook
                                          aSign, aExp, aSig, 0 STATUS_VAR );
6456 9ee6e8bb pbrook
}
6457 9ee6e8bb pbrook
#endif
6458 9ee6e8bb pbrook
6459 9ee6e8bb pbrook
#ifdef FLOAT128
6460 9ee6e8bb pbrook
float128 float128_scalbn( float128 a, int n STATUS_PARAM )
6461 9ee6e8bb pbrook
{
6462 9ee6e8bb pbrook
    flag aSign;
6463 326b9e98 Aurelien Jarno
    int32_t aExp;
6464 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
6465 9ee6e8bb pbrook
6466 9ee6e8bb pbrook
    aSig1 = extractFloat128Frac1( a );
6467 9ee6e8bb pbrook
    aSig0 = extractFloat128Frac0( a );
6468 9ee6e8bb pbrook
    aExp = extractFloat128Exp( a );
6469 9ee6e8bb pbrook
    aSign = extractFloat128Sign( a );
6470 9ee6e8bb pbrook
    if ( aExp == 0x7FFF ) {
6471 326b9e98 Aurelien Jarno
        if ( aSig0 | aSig1 ) {
6472 326b9e98 Aurelien Jarno
            return propagateFloat128NaN( a, a STATUS_VAR );
6473 326b9e98 Aurelien Jarno
        }
6474 9ee6e8bb pbrook
        return a;
6475 9ee6e8bb pbrook
    }
6476 69397542 pbrook
    if ( aExp != 0 )
6477 69397542 pbrook
        aSig0 |= LIT64( 0x0001000000000000 );
6478 69397542 pbrook
    else if ( aSig0 == 0 && aSig1 == 0 )
6479 69397542 pbrook
        return a;
6480 69397542 pbrook
6481 326b9e98 Aurelien Jarno
    if (n > 0x10000) {
6482 326b9e98 Aurelien Jarno
        n = 0x10000;
6483 326b9e98 Aurelien Jarno
    } else if (n < -0x10000) {
6484 326b9e98 Aurelien Jarno
        n = -0x10000;
6485 326b9e98 Aurelien Jarno
    }
6486 326b9e98 Aurelien Jarno
6487 69397542 pbrook
    aExp += n - 1;
6488 69397542 pbrook
    return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
6489 69397542 pbrook
                                          STATUS_VAR );
6490 9ee6e8bb pbrook
6491 9ee6e8bb pbrook
}
6492 9ee6e8bb pbrook
#endif