Statistics
| Branch: | Revision:

root / fpu / softfloat.c @ 60e1b2a6

History | View | Annotate | Download (243.2 kB)

1 8d725fac Andreas Färber
/*
2 8d725fac Andreas Färber
 * QEMU float support
3 8d725fac Andreas Färber
 *
4 8d725fac Andreas Färber
 * Derived from SoftFloat.
5 8d725fac Andreas Färber
 */
6 158142c2 bellard
7 158142c2 bellard
/*============================================================================
8 158142c2 bellard

9 158142c2 bellard
This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
10 158142c2 bellard
Package, Release 2b.
11 158142c2 bellard

12 158142c2 bellard
Written by John R. Hauser.  This work was made possible in part by the
13 158142c2 bellard
International Computer Science Institute, located at Suite 600, 1947 Center
14 158142c2 bellard
Street, Berkeley, California 94704.  Funding was partially provided by the
15 158142c2 bellard
National Science Foundation under grant MIP-9311980.  The original version
16 158142c2 bellard
of this code was written as part of a project to build a fixed-point vector
17 158142c2 bellard
processor in collaboration with the University of California at Berkeley,
18 158142c2 bellard
overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
19 158142c2 bellard
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20 158142c2 bellard
arithmetic/SoftFloat.html'.
21 158142c2 bellard

22 158142c2 bellard
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
23 158142c2 bellard
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24 158142c2 bellard
RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25 158142c2 bellard
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26 158142c2 bellard
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27 158142c2 bellard
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28 158142c2 bellard
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
29 158142c2 bellard
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
30 158142c2 bellard

31 158142c2 bellard
Derivative works are acceptable, even for commercial purposes, so long as
32 158142c2 bellard
(1) the source code for the derivative work includes prominent notice that
33 158142c2 bellard
the work is derivative, and (2) the source code includes prominent notice with
34 158142c2 bellard
these four paragraphs for those parts of this code that are retained.
35 158142c2 bellard

36 158142c2 bellard
=============================================================================*/
37 158142c2 bellard
38 2ac8bd03 Peter Maydell
/* softfloat (and in particular the code in softfloat-specialize.h) is
39 2ac8bd03 Peter Maydell
 * target-dependent and needs the TARGET_* macros.
40 2ac8bd03 Peter Maydell
 */
41 2ac8bd03 Peter Maydell
#include "config.h"
42 2ac8bd03 Peter Maydell
43 158142c2 bellard
#include "softfloat.h"
44 158142c2 bellard
45 158142c2 bellard
/*----------------------------------------------------------------------------
46 158142c2 bellard
| Primitive arithmetic functions, including multi-word arithmetic, and
47 158142c2 bellard
| division and square root approximations.  (Can be specialized to target if
48 158142c2 bellard
| desired.)
49 158142c2 bellard
*----------------------------------------------------------------------------*/
50 158142c2 bellard
#include "softfloat-macros.h"
51 158142c2 bellard
52 158142c2 bellard
/*----------------------------------------------------------------------------
53 158142c2 bellard
| Functions and definitions to determine:  (1) whether tininess for underflow
54 158142c2 bellard
| is detected before or after rounding by default, (2) what (if anything)
55 158142c2 bellard
| happens when exceptions are raised, (3) how signaling NaNs are distinguished
56 158142c2 bellard
| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
57 158142c2 bellard
| are propagated from function inputs to output.  These details are target-
58 158142c2 bellard
| specific.
59 158142c2 bellard
*----------------------------------------------------------------------------*/
60 158142c2 bellard
#include "softfloat-specialize.h"
61 158142c2 bellard
62 158142c2 bellard
void set_float_rounding_mode(int val STATUS_PARAM)
63 158142c2 bellard
{
64 158142c2 bellard
    STATUS(float_rounding_mode) = val;
65 158142c2 bellard
}
66 158142c2 bellard
67 1d6bda35 bellard
void set_float_exception_flags(int val STATUS_PARAM)
68 1d6bda35 bellard
{
69 1d6bda35 bellard
    STATUS(float_exception_flags) = val;
70 1d6bda35 bellard
}
71 1d6bda35 bellard
72 158142c2 bellard
void set_floatx80_rounding_precision(int val STATUS_PARAM)
73 158142c2 bellard
{
74 158142c2 bellard
    STATUS(floatx80_rounding_precision) = val;
75 158142c2 bellard
}
76 158142c2 bellard
77 158142c2 bellard
/*----------------------------------------------------------------------------
78 bb4d4bb3 Peter Maydell
| Returns the fraction bits of the half-precision floating-point value `a'.
79 bb4d4bb3 Peter Maydell
*----------------------------------------------------------------------------*/
80 bb4d4bb3 Peter Maydell
81 bb4d4bb3 Peter Maydell
INLINE uint32_t extractFloat16Frac(float16 a)
82 bb4d4bb3 Peter Maydell
{
83 bb4d4bb3 Peter Maydell
    return float16_val(a) & 0x3ff;
84 bb4d4bb3 Peter Maydell
}
85 bb4d4bb3 Peter Maydell
86 bb4d4bb3 Peter Maydell
/*----------------------------------------------------------------------------
87 bb4d4bb3 Peter Maydell
| Returns the exponent bits of the half-precision floating-point value `a'.
88 bb4d4bb3 Peter Maydell
*----------------------------------------------------------------------------*/
89 bb4d4bb3 Peter Maydell
90 bb4d4bb3 Peter Maydell
INLINE int16 extractFloat16Exp(float16 a)
91 bb4d4bb3 Peter Maydell
{
92 bb4d4bb3 Peter Maydell
    return (float16_val(a) >> 10) & 0x1f;
93 bb4d4bb3 Peter Maydell
}
94 bb4d4bb3 Peter Maydell
95 bb4d4bb3 Peter Maydell
/*----------------------------------------------------------------------------
96 bb4d4bb3 Peter Maydell
| Returns the sign bit of the single-precision floating-point value `a'.
97 bb4d4bb3 Peter Maydell
*----------------------------------------------------------------------------*/
98 bb4d4bb3 Peter Maydell
99 bb4d4bb3 Peter Maydell
INLINE flag extractFloat16Sign(float16 a)
100 bb4d4bb3 Peter Maydell
{
101 bb4d4bb3 Peter Maydell
    return float16_val(a)>>15;
102 bb4d4bb3 Peter Maydell
}
103 bb4d4bb3 Peter Maydell
104 bb4d4bb3 Peter Maydell
/*----------------------------------------------------------------------------
105 158142c2 bellard
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
106 158142c2 bellard
| and 7, and returns the properly rounded 32-bit integer corresponding to the
107 158142c2 bellard
| input.  If `zSign' is 1, the input is negated before being converted to an
108 158142c2 bellard
| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
109 158142c2 bellard
| is simply rounded to an integer, with the inexact exception raised if the
110 158142c2 bellard
| input cannot be represented exactly as an integer.  However, if the fixed-
111 158142c2 bellard
| point input is too large, the invalid exception is raised and the largest
112 158142c2 bellard
| positive or negative integer is returned.
113 158142c2 bellard
*----------------------------------------------------------------------------*/
114 158142c2 bellard
115 bb98fe42 Andreas Färber
static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
116 158142c2 bellard
{
117 158142c2 bellard
    int8 roundingMode;
118 158142c2 bellard
    flag roundNearestEven;
119 158142c2 bellard
    int8 roundIncrement, roundBits;
120 158142c2 bellard
    int32 z;
121 158142c2 bellard
122 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
123 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
124 158142c2 bellard
    roundIncrement = 0x40;
125 158142c2 bellard
    if ( ! roundNearestEven ) {
126 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
127 158142c2 bellard
            roundIncrement = 0;
128 158142c2 bellard
        }
129 158142c2 bellard
        else {
130 158142c2 bellard
            roundIncrement = 0x7F;
131 158142c2 bellard
            if ( zSign ) {
132 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
133 158142c2 bellard
            }
134 158142c2 bellard
            else {
135 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
136 158142c2 bellard
            }
137 158142c2 bellard
        }
138 158142c2 bellard
    }
139 158142c2 bellard
    roundBits = absZ & 0x7F;
140 158142c2 bellard
    absZ = ( absZ + roundIncrement )>>7;
141 158142c2 bellard
    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
142 158142c2 bellard
    z = absZ;
143 158142c2 bellard
    if ( zSign ) z = - z;
144 158142c2 bellard
    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
145 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
146 bb98fe42 Andreas Färber
        return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
147 158142c2 bellard
    }
148 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
149 158142c2 bellard
    return z;
150 158142c2 bellard
151 158142c2 bellard
}
152 158142c2 bellard
153 158142c2 bellard
/*----------------------------------------------------------------------------
154 158142c2 bellard
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
155 158142c2 bellard
| `absZ1', with binary point between bits 63 and 64 (between the input words),
156 158142c2 bellard
| and returns the properly rounded 64-bit integer corresponding to the input.
157 158142c2 bellard
| If `zSign' is 1, the input is negated before being converted to an integer.
158 158142c2 bellard
| Ordinarily, the fixed-point input is simply rounded to an integer, with
159 158142c2 bellard
| the inexact exception raised if the input cannot be represented exactly as
160 158142c2 bellard
| an integer.  However, if the fixed-point input is too large, the invalid
161 158142c2 bellard
| exception is raised and the largest positive or negative integer is
162 158142c2 bellard
| returned.
163 158142c2 bellard
*----------------------------------------------------------------------------*/
164 158142c2 bellard
165 bb98fe42 Andreas Färber
static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
166 158142c2 bellard
{
167 158142c2 bellard
    int8 roundingMode;
168 158142c2 bellard
    flag roundNearestEven, increment;
169 158142c2 bellard
    int64 z;
170 158142c2 bellard
171 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
172 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
173 bb98fe42 Andreas Färber
    increment = ( (int64_t) absZ1 < 0 );
174 158142c2 bellard
    if ( ! roundNearestEven ) {
175 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
176 158142c2 bellard
            increment = 0;
177 158142c2 bellard
        }
178 158142c2 bellard
        else {
179 158142c2 bellard
            if ( zSign ) {
180 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && absZ1;
181 158142c2 bellard
            }
182 158142c2 bellard
            else {
183 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && absZ1;
184 158142c2 bellard
            }
185 158142c2 bellard
        }
186 158142c2 bellard
    }
187 158142c2 bellard
    if ( increment ) {
188 158142c2 bellard
        ++absZ0;
189 158142c2 bellard
        if ( absZ0 == 0 ) goto overflow;
190 bb98fe42 Andreas Färber
        absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
191 158142c2 bellard
    }
192 158142c2 bellard
    z = absZ0;
193 158142c2 bellard
    if ( zSign ) z = - z;
194 158142c2 bellard
    if ( z && ( ( z < 0 ) ^ zSign ) ) {
195 158142c2 bellard
 overflow:
196 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
197 158142c2 bellard
        return
198 bb98fe42 Andreas Färber
              zSign ? (int64_t) LIT64( 0x8000000000000000 )
199 158142c2 bellard
            : LIT64( 0x7FFFFFFFFFFFFFFF );
200 158142c2 bellard
    }
201 158142c2 bellard
    if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
202 158142c2 bellard
    return z;
203 158142c2 bellard
204 158142c2 bellard
}
205 158142c2 bellard
206 158142c2 bellard
/*----------------------------------------------------------------------------
207 158142c2 bellard
| Returns the fraction bits of the single-precision floating-point value `a'.
208 158142c2 bellard
*----------------------------------------------------------------------------*/
209 158142c2 bellard
210 bb98fe42 Andreas Färber
INLINE uint32_t extractFloat32Frac( float32 a )
211 158142c2 bellard
{
212 158142c2 bellard
213 f090c9d4 pbrook
    return float32_val(a) & 0x007FFFFF;
214 158142c2 bellard
215 158142c2 bellard
}
216 158142c2 bellard
217 158142c2 bellard
/*----------------------------------------------------------------------------
218 158142c2 bellard
| Returns the exponent bits of the single-precision floating-point value `a'.
219 158142c2 bellard
*----------------------------------------------------------------------------*/
220 158142c2 bellard
221 158142c2 bellard
INLINE int16 extractFloat32Exp( float32 a )
222 158142c2 bellard
{
223 158142c2 bellard
224 f090c9d4 pbrook
    return ( float32_val(a)>>23 ) & 0xFF;
225 158142c2 bellard
226 158142c2 bellard
}
227 158142c2 bellard
228 158142c2 bellard
/*----------------------------------------------------------------------------
229 158142c2 bellard
| Returns the sign bit of the single-precision floating-point value `a'.
230 158142c2 bellard
*----------------------------------------------------------------------------*/
231 158142c2 bellard
232 158142c2 bellard
INLINE flag extractFloat32Sign( float32 a )
233 158142c2 bellard
{
234 158142c2 bellard
235 f090c9d4 pbrook
    return float32_val(a)>>31;
236 158142c2 bellard
237 158142c2 bellard
}
238 158142c2 bellard
239 158142c2 bellard
/*----------------------------------------------------------------------------
240 37d18660 Peter Maydell
| If `a' is denormal and we are in flush-to-zero mode then set the
241 37d18660 Peter Maydell
| input-denormal exception and return zero. Otherwise just return the value.
242 37d18660 Peter Maydell
*----------------------------------------------------------------------------*/
243 37d18660 Peter Maydell
static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
244 37d18660 Peter Maydell
{
245 37d18660 Peter Maydell
    if (STATUS(flush_inputs_to_zero)) {
246 37d18660 Peter Maydell
        if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
247 37d18660 Peter Maydell
            float_raise(float_flag_input_denormal STATUS_VAR);
248 37d18660 Peter Maydell
            return make_float32(float32_val(a) & 0x80000000);
249 37d18660 Peter Maydell
        }
250 37d18660 Peter Maydell
    }
251 37d18660 Peter Maydell
    return a;
252 37d18660 Peter Maydell
}
253 37d18660 Peter Maydell
254 37d18660 Peter Maydell
/*----------------------------------------------------------------------------
255 158142c2 bellard
| Normalizes the subnormal single-precision floating-point value represented
256 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
257 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
258 158142c2 bellard
| `zSigPtr', respectively.
259 158142c2 bellard
*----------------------------------------------------------------------------*/
260 158142c2 bellard
261 158142c2 bellard
static void
262 bb98fe42 Andreas Färber
 normalizeFloat32Subnormal( uint32_t aSig, int16 *zExpPtr, uint32_t *zSigPtr )
263 158142c2 bellard
{
264 158142c2 bellard
    int8 shiftCount;
265 158142c2 bellard
266 158142c2 bellard
    shiftCount = countLeadingZeros32( aSig ) - 8;
267 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
268 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
269 158142c2 bellard
270 158142c2 bellard
}
271 158142c2 bellard
272 158142c2 bellard
/*----------------------------------------------------------------------------
273 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
274 158142c2 bellard
| single-precision floating-point value, returning the result.  After being
275 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
276 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
277 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
278 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
279 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
280 158142c2 bellard
| significand.
281 158142c2 bellard
*----------------------------------------------------------------------------*/
282 158142c2 bellard
283 bb98fe42 Andreas Färber
INLINE float32 packFloat32( flag zSign, int16 zExp, uint32_t zSig )
284 158142c2 bellard
{
285 158142c2 bellard
286 f090c9d4 pbrook
    return make_float32(
287 bb98fe42 Andreas Färber
          ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
288 158142c2 bellard
289 158142c2 bellard
}
290 158142c2 bellard
291 158142c2 bellard
/*----------------------------------------------------------------------------
292 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
293 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
294 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
295 158142c2 bellard
| value is simply rounded and packed into the single-precision format, with
296 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
297 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
298 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
299 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
300 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
301 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal single-
302 158142c2 bellard
| precision floating-point number.
303 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 30
304 158142c2 bellard
| and 29, which is 7 bits to the left of the usual location.  This shifted
305 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
306 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
307 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
308 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
309 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
310 158142c2 bellard
| Binary Floating-Point Arithmetic.
311 158142c2 bellard
*----------------------------------------------------------------------------*/
312 158142c2 bellard
313 bb98fe42 Andreas Färber
static float32 roundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
314 158142c2 bellard
{
315 158142c2 bellard
    int8 roundingMode;
316 158142c2 bellard
    flag roundNearestEven;
317 158142c2 bellard
    int8 roundIncrement, roundBits;
318 158142c2 bellard
    flag isTiny;
319 158142c2 bellard
320 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
321 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
322 158142c2 bellard
    roundIncrement = 0x40;
323 158142c2 bellard
    if ( ! roundNearestEven ) {
324 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
325 158142c2 bellard
            roundIncrement = 0;
326 158142c2 bellard
        }
327 158142c2 bellard
        else {
328 158142c2 bellard
            roundIncrement = 0x7F;
329 158142c2 bellard
            if ( zSign ) {
330 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
331 158142c2 bellard
            }
332 158142c2 bellard
            else {
333 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
334 158142c2 bellard
            }
335 158142c2 bellard
        }
336 158142c2 bellard
    }
337 158142c2 bellard
    roundBits = zSig & 0x7F;
338 bb98fe42 Andreas Färber
    if ( 0xFD <= (uint16_t) zExp ) {
339 158142c2 bellard
        if (    ( 0xFD < zExp )
340 158142c2 bellard
             || (    ( zExp == 0xFD )
341 bb98fe42 Andreas Färber
                  && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
342 158142c2 bellard
           ) {
343 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
344 f090c9d4 pbrook
            return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
345 158142c2 bellard
        }
346 158142c2 bellard
        if ( zExp < 0 ) {
347 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
348 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
349 e6afc87f Peter Maydell
                return packFloat32(zSign, 0, 0);
350 e6afc87f Peter Maydell
            }
351 158142c2 bellard
            isTiny =
352 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
353 158142c2 bellard
                || ( zExp < -1 )
354 158142c2 bellard
                || ( zSig + roundIncrement < 0x80000000 );
355 158142c2 bellard
            shift32RightJamming( zSig, - zExp, &zSig );
356 158142c2 bellard
            zExp = 0;
357 158142c2 bellard
            roundBits = zSig & 0x7F;
358 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
359 158142c2 bellard
        }
360 158142c2 bellard
    }
361 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
362 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>7;
363 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
364 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
365 158142c2 bellard
    return packFloat32( zSign, zExp, zSig );
366 158142c2 bellard
367 158142c2 bellard
}
368 158142c2 bellard
369 158142c2 bellard
/*----------------------------------------------------------------------------
370 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
371 158142c2 bellard
| and significand `zSig', and returns the proper single-precision floating-
372 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
373 158142c2 bellard
| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
374 158142c2 bellard
| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
375 158142c2 bellard
| floating-point exponent.
376 158142c2 bellard
*----------------------------------------------------------------------------*/
377 158142c2 bellard
378 158142c2 bellard
static float32
379 bb98fe42 Andreas Färber
 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
380 158142c2 bellard
{
381 158142c2 bellard
    int8 shiftCount;
382 158142c2 bellard
383 158142c2 bellard
    shiftCount = countLeadingZeros32( zSig ) - 1;
384 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
385 158142c2 bellard
386 158142c2 bellard
}
387 158142c2 bellard
388 158142c2 bellard
/*----------------------------------------------------------------------------
389 158142c2 bellard
| Returns the fraction bits of the double-precision floating-point value `a'.
390 158142c2 bellard
*----------------------------------------------------------------------------*/
391 158142c2 bellard
392 bb98fe42 Andreas Färber
INLINE uint64_t extractFloat64Frac( float64 a )
393 158142c2 bellard
{
394 158142c2 bellard
395 f090c9d4 pbrook
    return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
396 158142c2 bellard
397 158142c2 bellard
}
398 158142c2 bellard
399 158142c2 bellard
/*----------------------------------------------------------------------------
400 158142c2 bellard
| Returns the exponent bits of the double-precision floating-point value `a'.
401 158142c2 bellard
*----------------------------------------------------------------------------*/
402 158142c2 bellard
403 158142c2 bellard
INLINE int16 extractFloat64Exp( float64 a )
404 158142c2 bellard
{
405 158142c2 bellard
406 f090c9d4 pbrook
    return ( float64_val(a)>>52 ) & 0x7FF;
407 158142c2 bellard
408 158142c2 bellard
}
409 158142c2 bellard
410 158142c2 bellard
/*----------------------------------------------------------------------------
411 158142c2 bellard
| Returns the sign bit of the double-precision floating-point value `a'.
412 158142c2 bellard
*----------------------------------------------------------------------------*/
413 158142c2 bellard
414 158142c2 bellard
INLINE flag extractFloat64Sign( float64 a )
415 158142c2 bellard
{
416 158142c2 bellard
417 f090c9d4 pbrook
    return float64_val(a)>>63;
418 158142c2 bellard
419 158142c2 bellard
}
420 158142c2 bellard
421 158142c2 bellard
/*----------------------------------------------------------------------------
422 37d18660 Peter Maydell
| If `a' is denormal and we are in flush-to-zero mode then set the
423 37d18660 Peter Maydell
| input-denormal exception and return zero. Otherwise just return the value.
424 37d18660 Peter Maydell
*----------------------------------------------------------------------------*/
425 37d18660 Peter Maydell
static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
426 37d18660 Peter Maydell
{
427 37d18660 Peter Maydell
    if (STATUS(flush_inputs_to_zero)) {
428 37d18660 Peter Maydell
        if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
429 37d18660 Peter Maydell
            float_raise(float_flag_input_denormal STATUS_VAR);
430 37d18660 Peter Maydell
            return make_float64(float64_val(a) & (1ULL << 63));
431 37d18660 Peter Maydell
        }
432 37d18660 Peter Maydell
    }
433 37d18660 Peter Maydell
    return a;
434 37d18660 Peter Maydell
}
435 37d18660 Peter Maydell
436 37d18660 Peter Maydell
/*----------------------------------------------------------------------------
437 158142c2 bellard
| Normalizes the subnormal double-precision floating-point value represented
438 158142c2 bellard
| by the denormalized significand `aSig'.  The normalized exponent and
439 158142c2 bellard
| significand are stored at the locations pointed to by `zExpPtr' and
440 158142c2 bellard
| `zSigPtr', respectively.
441 158142c2 bellard
*----------------------------------------------------------------------------*/
442 158142c2 bellard
443 158142c2 bellard
static void
444 bb98fe42 Andreas Färber
 normalizeFloat64Subnormal( uint64_t aSig, int16 *zExpPtr, uint64_t *zSigPtr )
445 158142c2 bellard
{
446 158142c2 bellard
    int8 shiftCount;
447 158142c2 bellard
448 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig ) - 11;
449 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
450 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
451 158142c2 bellard
452 158142c2 bellard
}
453 158142c2 bellard
454 158142c2 bellard
/*----------------------------------------------------------------------------
455 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
456 158142c2 bellard
| double-precision floating-point value, returning the result.  After being
457 158142c2 bellard
| shifted into the proper positions, the three fields are simply added
458 158142c2 bellard
| together to form the result.  This means that any integer portion of `zSig'
459 158142c2 bellard
| will be added into the exponent.  Since a properly normalized significand
460 158142c2 bellard
| will have an integer portion equal to 1, the `zExp' input should be 1 less
461 158142c2 bellard
| than the desired result exponent whenever `zSig' is a complete, normalized
462 158142c2 bellard
| significand.
463 158142c2 bellard
*----------------------------------------------------------------------------*/
464 158142c2 bellard
465 bb98fe42 Andreas Färber
INLINE float64 packFloat64( flag zSign, int16 zExp, uint64_t zSig )
466 158142c2 bellard
{
467 158142c2 bellard
468 f090c9d4 pbrook
    return make_float64(
469 bb98fe42 Andreas Färber
        ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
470 158142c2 bellard
471 158142c2 bellard
}
472 158142c2 bellard
473 158142c2 bellard
/*----------------------------------------------------------------------------
474 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
475 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
476 158142c2 bellard
| point value corresponding to the abstract input.  Ordinarily, the abstract
477 158142c2 bellard
| value is simply rounded and packed into the double-precision format, with
478 158142c2 bellard
| the inexact exception raised if the abstract input cannot be represented
479 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
480 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
481 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded
482 158142c2 bellard
| to a subnormal number, and the underflow and inexact exceptions are raised
483 158142c2 bellard
| if the abstract input cannot be represented exactly as a subnormal double-
484 158142c2 bellard
| precision floating-point number.
485 158142c2 bellard
|     The input significand `zSig' has its binary point between bits 62
486 158142c2 bellard
| and 61, which is 10 bits to the left of the usual location.  This shifted
487 158142c2 bellard
| significand must be normalized or smaller.  If `zSig' is not normalized,
488 158142c2 bellard
| `zExp' must be 0; in that case, the result returned is a subnormal number,
489 158142c2 bellard
| and it must not require rounding.  In the usual case that `zSig' is
490 158142c2 bellard
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
491 158142c2 bellard
| The handling of underflow and overflow follows the IEC/IEEE Standard for
492 158142c2 bellard
| Binary Floating-Point Arithmetic.
493 158142c2 bellard
*----------------------------------------------------------------------------*/
494 158142c2 bellard
495 bb98fe42 Andreas Färber
static float64 roundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
496 158142c2 bellard
{
497 158142c2 bellard
    int8 roundingMode;
498 158142c2 bellard
    flag roundNearestEven;
499 158142c2 bellard
    int16 roundIncrement, roundBits;
500 158142c2 bellard
    flag isTiny;
501 158142c2 bellard
502 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
503 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
504 158142c2 bellard
    roundIncrement = 0x200;
505 158142c2 bellard
    if ( ! roundNearestEven ) {
506 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
507 158142c2 bellard
            roundIncrement = 0;
508 158142c2 bellard
        }
509 158142c2 bellard
        else {
510 158142c2 bellard
            roundIncrement = 0x3FF;
511 158142c2 bellard
            if ( zSign ) {
512 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
513 158142c2 bellard
            }
514 158142c2 bellard
            else {
515 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
516 158142c2 bellard
            }
517 158142c2 bellard
        }
518 158142c2 bellard
    }
519 158142c2 bellard
    roundBits = zSig & 0x3FF;
520 bb98fe42 Andreas Färber
    if ( 0x7FD <= (uint16_t) zExp ) {
521 158142c2 bellard
        if (    ( 0x7FD < zExp )
522 158142c2 bellard
             || (    ( zExp == 0x7FD )
523 bb98fe42 Andreas Färber
                  && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
524 158142c2 bellard
           ) {
525 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
526 f090c9d4 pbrook
            return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
527 158142c2 bellard
        }
528 158142c2 bellard
        if ( zExp < 0 ) {
529 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
530 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
531 e6afc87f Peter Maydell
                return packFloat64(zSign, 0, 0);
532 e6afc87f Peter Maydell
            }
533 158142c2 bellard
            isTiny =
534 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
535 158142c2 bellard
                || ( zExp < -1 )
536 158142c2 bellard
                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
537 158142c2 bellard
            shift64RightJamming( zSig, - zExp, &zSig );
538 158142c2 bellard
            zExp = 0;
539 158142c2 bellard
            roundBits = zSig & 0x3FF;
540 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
541 158142c2 bellard
        }
542 158142c2 bellard
    }
543 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
544 158142c2 bellard
    zSig = ( zSig + roundIncrement )>>10;
545 158142c2 bellard
    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
546 158142c2 bellard
    if ( zSig == 0 ) zExp = 0;
547 158142c2 bellard
    return packFloat64( zSign, zExp, zSig );
548 158142c2 bellard
549 158142c2 bellard
}
550 158142c2 bellard
551 158142c2 bellard
/*----------------------------------------------------------------------------
552 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
553 158142c2 bellard
| and significand `zSig', and returns the proper double-precision floating-
554 158142c2 bellard
| point value corresponding to the abstract input.  This routine is just like
555 158142c2 bellard
| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
556 158142c2 bellard
| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
557 158142c2 bellard
| floating-point exponent.
558 158142c2 bellard
*----------------------------------------------------------------------------*/
559 158142c2 bellard
560 158142c2 bellard
static float64
561 bb98fe42 Andreas Färber
 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
562 158142c2 bellard
{
563 158142c2 bellard
    int8 shiftCount;
564 158142c2 bellard
565 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig ) - 1;
566 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
567 158142c2 bellard
568 158142c2 bellard
}
569 158142c2 bellard
570 158142c2 bellard
/*----------------------------------------------------------------------------
571 158142c2 bellard
| Returns the fraction bits of the extended double-precision floating-point
572 158142c2 bellard
| value `a'.
573 158142c2 bellard
*----------------------------------------------------------------------------*/
574 158142c2 bellard
575 bb98fe42 Andreas Färber
INLINE uint64_t extractFloatx80Frac( floatx80 a )
576 158142c2 bellard
{
577 158142c2 bellard
578 158142c2 bellard
    return a.low;
579 158142c2 bellard
580 158142c2 bellard
}
581 158142c2 bellard
582 158142c2 bellard
/*----------------------------------------------------------------------------
583 158142c2 bellard
| Returns the exponent bits of the extended double-precision floating-point
584 158142c2 bellard
| value `a'.
585 158142c2 bellard
*----------------------------------------------------------------------------*/
586 158142c2 bellard
587 158142c2 bellard
INLINE int32 extractFloatx80Exp( floatx80 a )
588 158142c2 bellard
{
589 158142c2 bellard
590 158142c2 bellard
    return a.high & 0x7FFF;
591 158142c2 bellard
592 158142c2 bellard
}
593 158142c2 bellard
594 158142c2 bellard
/*----------------------------------------------------------------------------
595 158142c2 bellard
| Returns the sign bit of the extended double-precision floating-point value
596 158142c2 bellard
| `a'.
597 158142c2 bellard
*----------------------------------------------------------------------------*/
598 158142c2 bellard
599 158142c2 bellard
INLINE flag extractFloatx80Sign( floatx80 a )
600 158142c2 bellard
{
601 158142c2 bellard
602 158142c2 bellard
    return a.high>>15;
603 158142c2 bellard
604 158142c2 bellard
}
605 158142c2 bellard
606 158142c2 bellard
/*----------------------------------------------------------------------------
607 158142c2 bellard
| Normalizes the subnormal extended double-precision floating-point value
608 158142c2 bellard
| represented by the denormalized significand `aSig'.  The normalized exponent
609 158142c2 bellard
| and significand are stored at the locations pointed to by `zExpPtr' and
610 158142c2 bellard
| `zSigPtr', respectively.
611 158142c2 bellard
*----------------------------------------------------------------------------*/
612 158142c2 bellard
613 158142c2 bellard
static void
614 bb98fe42 Andreas Färber
 normalizeFloatx80Subnormal( uint64_t aSig, int32 *zExpPtr, uint64_t *zSigPtr )
615 158142c2 bellard
{
616 158142c2 bellard
    int8 shiftCount;
617 158142c2 bellard
618 158142c2 bellard
    shiftCount = countLeadingZeros64( aSig );
619 158142c2 bellard
    *zSigPtr = aSig<<shiftCount;
620 158142c2 bellard
    *zExpPtr = 1 - shiftCount;
621 158142c2 bellard
622 158142c2 bellard
}
623 158142c2 bellard
624 158142c2 bellard
/*----------------------------------------------------------------------------
625 158142c2 bellard
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
626 158142c2 bellard
| extended double-precision floating-point value, returning the result.
627 158142c2 bellard
*----------------------------------------------------------------------------*/
628 158142c2 bellard
629 bb98fe42 Andreas Färber
INLINE floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
630 158142c2 bellard
{
631 158142c2 bellard
    floatx80 z;
632 158142c2 bellard
633 158142c2 bellard
    z.low = zSig;
634 bb98fe42 Andreas Färber
    z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
635 158142c2 bellard
    return z;
636 158142c2 bellard
637 158142c2 bellard
}
638 158142c2 bellard
639 158142c2 bellard
/*----------------------------------------------------------------------------
640 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
641 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0' and `zSig1',
642 158142c2 bellard
| and returns the proper extended double-precision floating-point value
643 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
644 158142c2 bellard
| rounded and packed into the extended double-precision format, with the
645 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
646 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
647 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
648 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
649 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
650 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal extended
651 158142c2 bellard
| double-precision floating-point number.
652 158142c2 bellard
|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
653 158142c2 bellard
| number of bits as single or double precision, respectively.  Otherwise, the
654 158142c2 bellard
| result is rounded to the full precision of the extended double-precision
655 158142c2 bellard
| format.
656 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
657 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
658 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  The
659 158142c2 bellard
| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
660 158142c2 bellard
| Floating-Point Arithmetic.
661 158142c2 bellard
*----------------------------------------------------------------------------*/
662 158142c2 bellard
663 158142c2 bellard
static floatx80
664 158142c2 bellard
 roundAndPackFloatx80(
665 bb98fe42 Andreas Färber
     int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
666 158142c2 bellard
 STATUS_PARAM)
667 158142c2 bellard
{
668 158142c2 bellard
    int8 roundingMode;
669 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
670 158142c2 bellard
    int64 roundIncrement, roundMask, roundBits;
671 158142c2 bellard
672 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
673 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
674 158142c2 bellard
    if ( roundingPrecision == 80 ) goto precision80;
675 158142c2 bellard
    if ( roundingPrecision == 64 ) {
676 158142c2 bellard
        roundIncrement = LIT64( 0x0000000000000400 );
677 158142c2 bellard
        roundMask = LIT64( 0x00000000000007FF );
678 158142c2 bellard
    }
679 158142c2 bellard
    else if ( roundingPrecision == 32 ) {
680 158142c2 bellard
        roundIncrement = LIT64( 0x0000008000000000 );
681 158142c2 bellard
        roundMask = LIT64( 0x000000FFFFFFFFFF );
682 158142c2 bellard
    }
683 158142c2 bellard
    else {
684 158142c2 bellard
        goto precision80;
685 158142c2 bellard
    }
686 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
687 158142c2 bellard
    if ( ! roundNearestEven ) {
688 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
689 158142c2 bellard
            roundIncrement = 0;
690 158142c2 bellard
        }
691 158142c2 bellard
        else {
692 158142c2 bellard
            roundIncrement = roundMask;
693 158142c2 bellard
            if ( zSign ) {
694 158142c2 bellard
                if ( roundingMode == float_round_up ) roundIncrement = 0;
695 158142c2 bellard
            }
696 158142c2 bellard
            else {
697 158142c2 bellard
                if ( roundingMode == float_round_down ) roundIncrement = 0;
698 158142c2 bellard
            }
699 158142c2 bellard
        }
700 158142c2 bellard
    }
701 158142c2 bellard
    roundBits = zSig0 & roundMask;
702 bb98fe42 Andreas Färber
    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
703 158142c2 bellard
        if (    ( 0x7FFE < zExp )
704 158142c2 bellard
             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
705 158142c2 bellard
           ) {
706 158142c2 bellard
            goto overflow;
707 158142c2 bellard
        }
708 158142c2 bellard
        if ( zExp <= 0 ) {
709 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
710 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
711 e6afc87f Peter Maydell
                return packFloatx80(zSign, 0, 0);
712 e6afc87f Peter Maydell
            }
713 158142c2 bellard
            isTiny =
714 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
715 158142c2 bellard
                || ( zExp < 0 )
716 158142c2 bellard
                || ( zSig0 <= zSig0 + roundIncrement );
717 158142c2 bellard
            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
718 158142c2 bellard
            zExp = 0;
719 158142c2 bellard
            roundBits = zSig0 & roundMask;
720 158142c2 bellard
            if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
721 158142c2 bellard
            if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
722 158142c2 bellard
            zSig0 += roundIncrement;
723 bb98fe42 Andreas Färber
            if ( (int64_t) zSig0 < 0 ) zExp = 1;
724 158142c2 bellard
            roundIncrement = roundMask + 1;
725 158142c2 bellard
            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
726 158142c2 bellard
                roundMask |= roundIncrement;
727 158142c2 bellard
            }
728 158142c2 bellard
            zSig0 &= ~ roundMask;
729 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
730 158142c2 bellard
        }
731 158142c2 bellard
    }
732 158142c2 bellard
    if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
733 158142c2 bellard
    zSig0 += roundIncrement;
734 158142c2 bellard
    if ( zSig0 < roundIncrement ) {
735 158142c2 bellard
        ++zExp;
736 158142c2 bellard
        zSig0 = LIT64( 0x8000000000000000 );
737 158142c2 bellard
    }
738 158142c2 bellard
    roundIncrement = roundMask + 1;
739 158142c2 bellard
    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
740 158142c2 bellard
        roundMask |= roundIncrement;
741 158142c2 bellard
    }
742 158142c2 bellard
    zSig0 &= ~ roundMask;
743 158142c2 bellard
    if ( zSig0 == 0 ) zExp = 0;
744 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
745 158142c2 bellard
 precision80:
746 bb98fe42 Andreas Färber
    increment = ( (int64_t) zSig1 < 0 );
747 158142c2 bellard
    if ( ! roundNearestEven ) {
748 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
749 158142c2 bellard
            increment = 0;
750 158142c2 bellard
        }
751 158142c2 bellard
        else {
752 158142c2 bellard
            if ( zSign ) {
753 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig1;
754 158142c2 bellard
            }
755 158142c2 bellard
            else {
756 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig1;
757 158142c2 bellard
            }
758 158142c2 bellard
        }
759 158142c2 bellard
    }
760 bb98fe42 Andreas Färber
    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
761 158142c2 bellard
        if (    ( 0x7FFE < zExp )
762 158142c2 bellard
             || (    ( zExp == 0x7FFE )
763 158142c2 bellard
                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
764 158142c2 bellard
                  && increment
765 158142c2 bellard
                )
766 158142c2 bellard
           ) {
767 158142c2 bellard
            roundMask = 0;
768 158142c2 bellard
 overflow:
769 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
770 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
771 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
772 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
773 158142c2 bellard
               ) {
774 158142c2 bellard
                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
775 158142c2 bellard
            }
776 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
777 158142c2 bellard
        }
778 158142c2 bellard
        if ( zExp <= 0 ) {
779 158142c2 bellard
            isTiny =
780 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
781 158142c2 bellard
                || ( zExp < 0 )
782 158142c2 bellard
                || ! increment
783 158142c2 bellard
                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
784 158142c2 bellard
            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
785 158142c2 bellard
            zExp = 0;
786 158142c2 bellard
            if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
787 158142c2 bellard
            if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
788 158142c2 bellard
            if ( roundNearestEven ) {
789 bb98fe42 Andreas Färber
                increment = ( (int64_t) zSig1 < 0 );
790 158142c2 bellard
            }
791 158142c2 bellard
            else {
792 158142c2 bellard
                if ( zSign ) {
793 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig1;
794 158142c2 bellard
                }
795 158142c2 bellard
                else {
796 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig1;
797 158142c2 bellard
                }
798 158142c2 bellard
            }
799 158142c2 bellard
            if ( increment ) {
800 158142c2 bellard
                ++zSig0;
801 158142c2 bellard
                zSig0 &=
802 bb98fe42 Andreas Färber
                    ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
803 bb98fe42 Andreas Färber
                if ( (int64_t) zSig0 < 0 ) zExp = 1;
804 158142c2 bellard
            }
805 158142c2 bellard
            return packFloatx80( zSign, zExp, zSig0 );
806 158142c2 bellard
        }
807 158142c2 bellard
    }
808 158142c2 bellard
    if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
809 158142c2 bellard
    if ( increment ) {
810 158142c2 bellard
        ++zSig0;
811 158142c2 bellard
        if ( zSig0 == 0 ) {
812 158142c2 bellard
            ++zExp;
813 158142c2 bellard
            zSig0 = LIT64( 0x8000000000000000 );
814 158142c2 bellard
        }
815 158142c2 bellard
        else {
816 bb98fe42 Andreas Färber
            zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
817 158142c2 bellard
        }
818 158142c2 bellard
    }
819 158142c2 bellard
    else {
820 158142c2 bellard
        if ( zSig0 == 0 ) zExp = 0;
821 158142c2 bellard
    }
822 158142c2 bellard
    return packFloatx80( zSign, zExp, zSig0 );
823 158142c2 bellard
824 158142c2 bellard
}
825 158142c2 bellard
826 158142c2 bellard
/*----------------------------------------------------------------------------
827 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent
828 158142c2 bellard
| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
829 158142c2 bellard
| and returns the proper extended double-precision floating-point value
830 158142c2 bellard
| corresponding to the abstract input.  This routine is just like
831 158142c2 bellard
| `roundAndPackFloatx80' except that the input significand does not have to be
832 158142c2 bellard
| normalized.
833 158142c2 bellard
*----------------------------------------------------------------------------*/
834 158142c2 bellard
835 158142c2 bellard
static floatx80
836 158142c2 bellard
 normalizeRoundAndPackFloatx80(
837 bb98fe42 Andreas Färber
     int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
838 158142c2 bellard
 STATUS_PARAM)
839 158142c2 bellard
{
840 158142c2 bellard
    int8 shiftCount;
841 158142c2 bellard
842 158142c2 bellard
    if ( zSig0 == 0 ) {
843 158142c2 bellard
        zSig0 = zSig1;
844 158142c2 bellard
        zSig1 = 0;
845 158142c2 bellard
        zExp -= 64;
846 158142c2 bellard
    }
847 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 );
848 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
849 158142c2 bellard
    zExp -= shiftCount;
850 158142c2 bellard
    return
851 158142c2 bellard
        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
852 158142c2 bellard
853 158142c2 bellard
}
854 158142c2 bellard
855 158142c2 bellard
/*----------------------------------------------------------------------------
856 158142c2 bellard
| Returns the least-significant 64 fraction bits of the quadruple-precision
857 158142c2 bellard
| floating-point value `a'.
858 158142c2 bellard
*----------------------------------------------------------------------------*/
859 158142c2 bellard
860 bb98fe42 Andreas Färber
INLINE uint64_t extractFloat128Frac1( float128 a )
861 158142c2 bellard
{
862 158142c2 bellard
863 158142c2 bellard
    return a.low;
864 158142c2 bellard
865 158142c2 bellard
}
866 158142c2 bellard
867 158142c2 bellard
/*----------------------------------------------------------------------------
868 158142c2 bellard
| Returns the most-significant 48 fraction bits of the quadruple-precision
869 158142c2 bellard
| floating-point value `a'.
870 158142c2 bellard
*----------------------------------------------------------------------------*/
871 158142c2 bellard
872 bb98fe42 Andreas Färber
INLINE uint64_t extractFloat128Frac0( float128 a )
873 158142c2 bellard
{
874 158142c2 bellard
875 158142c2 bellard
    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
876 158142c2 bellard
877 158142c2 bellard
}
878 158142c2 bellard
879 158142c2 bellard
/*----------------------------------------------------------------------------
880 158142c2 bellard
| Returns the exponent bits of the quadruple-precision floating-point value
881 158142c2 bellard
| `a'.
882 158142c2 bellard
*----------------------------------------------------------------------------*/
883 158142c2 bellard
884 158142c2 bellard
INLINE int32 extractFloat128Exp( float128 a )
885 158142c2 bellard
{
886 158142c2 bellard
887 158142c2 bellard
    return ( a.high>>48 ) & 0x7FFF;
888 158142c2 bellard
889 158142c2 bellard
}
890 158142c2 bellard
891 158142c2 bellard
/*----------------------------------------------------------------------------
892 158142c2 bellard
| Returns the sign bit of the quadruple-precision floating-point value `a'.
893 158142c2 bellard
*----------------------------------------------------------------------------*/
894 158142c2 bellard
895 158142c2 bellard
INLINE flag extractFloat128Sign( float128 a )
896 158142c2 bellard
{
897 158142c2 bellard
898 158142c2 bellard
    return a.high>>63;
899 158142c2 bellard
900 158142c2 bellard
}
901 158142c2 bellard
902 158142c2 bellard
/*----------------------------------------------------------------------------
903 158142c2 bellard
| Normalizes the subnormal quadruple-precision floating-point value
904 158142c2 bellard
| represented by the denormalized significand formed by the concatenation of
905 158142c2 bellard
| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
906 158142c2 bellard
| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
907 158142c2 bellard
| significand are stored at the location pointed to by `zSig0Ptr', and the
908 158142c2 bellard
| least significant 64 bits of the normalized significand are stored at the
909 158142c2 bellard
| location pointed to by `zSig1Ptr'.
910 158142c2 bellard
*----------------------------------------------------------------------------*/
911 158142c2 bellard
912 158142c2 bellard
static void
913 158142c2 bellard
 normalizeFloat128Subnormal(
914 bb98fe42 Andreas Färber
     uint64_t aSig0,
915 bb98fe42 Andreas Färber
     uint64_t aSig1,
916 158142c2 bellard
     int32 *zExpPtr,
917 bb98fe42 Andreas Färber
     uint64_t *zSig0Ptr,
918 bb98fe42 Andreas Färber
     uint64_t *zSig1Ptr
919 158142c2 bellard
 )
920 158142c2 bellard
{
921 158142c2 bellard
    int8 shiftCount;
922 158142c2 bellard
923 158142c2 bellard
    if ( aSig0 == 0 ) {
924 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig1 ) - 15;
925 158142c2 bellard
        if ( shiftCount < 0 ) {
926 158142c2 bellard
            *zSig0Ptr = aSig1>>( - shiftCount );
927 158142c2 bellard
            *zSig1Ptr = aSig1<<( shiftCount & 63 );
928 158142c2 bellard
        }
929 158142c2 bellard
        else {
930 158142c2 bellard
            *zSig0Ptr = aSig1<<shiftCount;
931 158142c2 bellard
            *zSig1Ptr = 0;
932 158142c2 bellard
        }
933 158142c2 bellard
        *zExpPtr = - shiftCount - 63;
934 158142c2 bellard
    }
935 158142c2 bellard
    else {
936 158142c2 bellard
        shiftCount = countLeadingZeros64( aSig0 ) - 15;
937 158142c2 bellard
        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
938 158142c2 bellard
        *zExpPtr = 1 - shiftCount;
939 158142c2 bellard
    }
940 158142c2 bellard
941 158142c2 bellard
}
942 158142c2 bellard
943 158142c2 bellard
/*----------------------------------------------------------------------------
944 158142c2 bellard
| Packs the sign `zSign', the exponent `zExp', and the significand formed
945 158142c2 bellard
| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
946 158142c2 bellard
| floating-point value, returning the result.  After being shifted into the
947 158142c2 bellard
| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
948 158142c2 bellard
| added together to form the most significant 32 bits of the result.  This
949 158142c2 bellard
| means that any integer portion of `zSig0' will be added into the exponent.
950 158142c2 bellard
| Since a properly normalized significand will have an integer portion equal
951 158142c2 bellard
| to 1, the `zExp' input should be 1 less than the desired result exponent
952 158142c2 bellard
| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
953 158142c2 bellard
| significand.
954 158142c2 bellard
*----------------------------------------------------------------------------*/
955 158142c2 bellard
956 158142c2 bellard
INLINE float128
957 bb98fe42 Andreas Färber
 packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
958 158142c2 bellard
{
959 158142c2 bellard
    float128 z;
960 158142c2 bellard
961 158142c2 bellard
    z.low = zSig1;
962 bb98fe42 Andreas Färber
    z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
963 158142c2 bellard
    return z;
964 158142c2 bellard
965 158142c2 bellard
}
966 158142c2 bellard
967 158142c2 bellard
/*----------------------------------------------------------------------------
968 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
969 158142c2 bellard
| and extended significand formed by the concatenation of `zSig0', `zSig1',
970 158142c2 bellard
| and `zSig2', and returns the proper quadruple-precision floating-point value
971 158142c2 bellard
| corresponding to the abstract input.  Ordinarily, the abstract value is
972 158142c2 bellard
| simply rounded and packed into the quadruple-precision format, with the
973 158142c2 bellard
| inexact exception raised if the abstract input cannot be represented
974 158142c2 bellard
| exactly.  However, if the abstract value is too large, the overflow and
975 158142c2 bellard
| inexact exceptions are raised and an infinity or maximal finite value is
976 158142c2 bellard
| returned.  If the abstract value is too small, the input value is rounded to
977 158142c2 bellard
| a subnormal number, and the underflow and inexact exceptions are raised if
978 158142c2 bellard
| the abstract input cannot be represented exactly as a subnormal quadruple-
979 158142c2 bellard
| precision floating-point number.
980 158142c2 bellard
|     The input significand must be normalized or smaller.  If the input
981 158142c2 bellard
| significand is not normalized, `zExp' must be 0; in that case, the result
982 158142c2 bellard
| returned is a subnormal number, and it must not require rounding.  In the
983 158142c2 bellard
| usual case that the input significand is normalized, `zExp' must be 1 less
984 158142c2 bellard
| than the ``true'' floating-point exponent.  The handling of underflow and
985 158142c2 bellard
| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
986 158142c2 bellard
*----------------------------------------------------------------------------*/
987 158142c2 bellard
988 158142c2 bellard
static float128
989 158142c2 bellard
 roundAndPackFloat128(
990 bb98fe42 Andreas Färber
     flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2 STATUS_PARAM)
991 158142c2 bellard
{
992 158142c2 bellard
    int8 roundingMode;
993 158142c2 bellard
    flag roundNearestEven, increment, isTiny;
994 158142c2 bellard
995 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
996 158142c2 bellard
    roundNearestEven = ( roundingMode == float_round_nearest_even );
997 bb98fe42 Andreas Färber
    increment = ( (int64_t) zSig2 < 0 );
998 158142c2 bellard
    if ( ! roundNearestEven ) {
999 158142c2 bellard
        if ( roundingMode == float_round_to_zero ) {
1000 158142c2 bellard
            increment = 0;
1001 158142c2 bellard
        }
1002 158142c2 bellard
        else {
1003 158142c2 bellard
            if ( zSign ) {
1004 158142c2 bellard
                increment = ( roundingMode == float_round_down ) && zSig2;
1005 158142c2 bellard
            }
1006 158142c2 bellard
            else {
1007 158142c2 bellard
                increment = ( roundingMode == float_round_up ) && zSig2;
1008 158142c2 bellard
            }
1009 158142c2 bellard
        }
1010 158142c2 bellard
    }
1011 bb98fe42 Andreas Färber
    if ( 0x7FFD <= (uint32_t) zExp ) {
1012 158142c2 bellard
        if (    ( 0x7FFD < zExp )
1013 158142c2 bellard
             || (    ( zExp == 0x7FFD )
1014 158142c2 bellard
                  && eq128(
1015 158142c2 bellard
                         LIT64( 0x0001FFFFFFFFFFFF ),
1016 158142c2 bellard
                         LIT64( 0xFFFFFFFFFFFFFFFF ),
1017 158142c2 bellard
                         zSig0,
1018 158142c2 bellard
                         zSig1
1019 158142c2 bellard
                     )
1020 158142c2 bellard
                  && increment
1021 158142c2 bellard
                )
1022 158142c2 bellard
           ) {
1023 158142c2 bellard
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
1024 158142c2 bellard
            if (    ( roundingMode == float_round_to_zero )
1025 158142c2 bellard
                 || ( zSign && ( roundingMode == float_round_up ) )
1026 158142c2 bellard
                 || ( ! zSign && ( roundingMode == float_round_down ) )
1027 158142c2 bellard
               ) {
1028 158142c2 bellard
                return
1029 158142c2 bellard
                    packFloat128(
1030 158142c2 bellard
                        zSign,
1031 158142c2 bellard
                        0x7FFE,
1032 158142c2 bellard
                        LIT64( 0x0000FFFFFFFFFFFF ),
1033 158142c2 bellard
                        LIT64( 0xFFFFFFFFFFFFFFFF )
1034 158142c2 bellard
                    );
1035 158142c2 bellard
            }
1036 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
1037 158142c2 bellard
        }
1038 158142c2 bellard
        if ( zExp < 0 ) {
1039 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
1040 e6afc87f Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
1041 e6afc87f Peter Maydell
                return packFloat128(zSign, 0, 0, 0);
1042 e6afc87f Peter Maydell
            }
1043 158142c2 bellard
            isTiny =
1044 158142c2 bellard
                   ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
1045 158142c2 bellard
                || ( zExp < -1 )
1046 158142c2 bellard
                || ! increment
1047 158142c2 bellard
                || lt128(
1048 158142c2 bellard
                       zSig0,
1049 158142c2 bellard
                       zSig1,
1050 158142c2 bellard
                       LIT64( 0x0001FFFFFFFFFFFF ),
1051 158142c2 bellard
                       LIT64( 0xFFFFFFFFFFFFFFFF )
1052 158142c2 bellard
                   );
1053 158142c2 bellard
            shift128ExtraRightJamming(
1054 158142c2 bellard
                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1055 158142c2 bellard
            zExp = 0;
1056 158142c2 bellard
            if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
1057 158142c2 bellard
            if ( roundNearestEven ) {
1058 bb98fe42 Andreas Färber
                increment = ( (int64_t) zSig2 < 0 );
1059 158142c2 bellard
            }
1060 158142c2 bellard
            else {
1061 158142c2 bellard
                if ( zSign ) {
1062 158142c2 bellard
                    increment = ( roundingMode == float_round_down ) && zSig2;
1063 158142c2 bellard
                }
1064 158142c2 bellard
                else {
1065 158142c2 bellard
                    increment = ( roundingMode == float_round_up ) && zSig2;
1066 158142c2 bellard
                }
1067 158142c2 bellard
            }
1068 158142c2 bellard
        }
1069 158142c2 bellard
    }
1070 158142c2 bellard
    if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
1071 158142c2 bellard
    if ( increment ) {
1072 158142c2 bellard
        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1073 158142c2 bellard
        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1074 158142c2 bellard
    }
1075 158142c2 bellard
    else {
1076 158142c2 bellard
        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1077 158142c2 bellard
    }
1078 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1079 158142c2 bellard
1080 158142c2 bellard
}
1081 158142c2 bellard
1082 158142c2 bellard
/*----------------------------------------------------------------------------
1083 158142c2 bellard
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1084 158142c2 bellard
| and significand formed by the concatenation of `zSig0' and `zSig1', and
1085 158142c2 bellard
| returns the proper quadruple-precision floating-point value corresponding
1086 158142c2 bellard
| to the abstract input.  This routine is just like `roundAndPackFloat128'
1087 158142c2 bellard
| except that the input significand has fewer bits and does not have to be
1088 158142c2 bellard
| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1089 158142c2 bellard
| point exponent.
1090 158142c2 bellard
*----------------------------------------------------------------------------*/
1091 158142c2 bellard
1092 158142c2 bellard
static float128
1093 158142c2 bellard
 normalizeRoundAndPackFloat128(
1094 bb98fe42 Andreas Färber
     flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 STATUS_PARAM)
1095 158142c2 bellard
{
1096 158142c2 bellard
    int8 shiftCount;
1097 bb98fe42 Andreas Färber
    uint64_t zSig2;
1098 158142c2 bellard
1099 158142c2 bellard
    if ( zSig0 == 0 ) {
1100 158142c2 bellard
        zSig0 = zSig1;
1101 158142c2 bellard
        zSig1 = 0;
1102 158142c2 bellard
        zExp -= 64;
1103 158142c2 bellard
    }
1104 158142c2 bellard
    shiftCount = countLeadingZeros64( zSig0 ) - 15;
1105 158142c2 bellard
    if ( 0 <= shiftCount ) {
1106 158142c2 bellard
        zSig2 = 0;
1107 158142c2 bellard
        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1108 158142c2 bellard
    }
1109 158142c2 bellard
    else {
1110 158142c2 bellard
        shift128ExtraRightJamming(
1111 158142c2 bellard
            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1112 158142c2 bellard
    }
1113 158142c2 bellard
    zExp -= shiftCount;
1114 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
1115 158142c2 bellard
1116 158142c2 bellard
}
1117 158142c2 bellard
1118 158142c2 bellard
/*----------------------------------------------------------------------------
1119 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1120 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1121 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1122 158142c2 bellard
*----------------------------------------------------------------------------*/
1123 158142c2 bellard
1124 158142c2 bellard
float32 int32_to_float32( int32 a STATUS_PARAM )
1125 158142c2 bellard
{
1126 158142c2 bellard
    flag zSign;
1127 158142c2 bellard
1128 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1129 bb98fe42 Andreas Färber
    if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1130 158142c2 bellard
    zSign = ( a < 0 );
1131 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
1132 158142c2 bellard
1133 158142c2 bellard
}
1134 158142c2 bellard
1135 158142c2 bellard
/*----------------------------------------------------------------------------
1136 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1137 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1138 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1139 158142c2 bellard
*----------------------------------------------------------------------------*/
1140 158142c2 bellard
1141 158142c2 bellard
float64 int32_to_float64( int32 a STATUS_PARAM )
1142 158142c2 bellard
{
1143 158142c2 bellard
    flag zSign;
1144 158142c2 bellard
    uint32 absA;
1145 158142c2 bellard
    int8 shiftCount;
1146 bb98fe42 Andreas Färber
    uint64_t zSig;
1147 158142c2 bellard
1148 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1149 158142c2 bellard
    zSign = ( a < 0 );
1150 158142c2 bellard
    absA = zSign ? - a : a;
1151 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 21;
1152 158142c2 bellard
    zSig = absA;
1153 158142c2 bellard
    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1154 158142c2 bellard
1155 158142c2 bellard
}
1156 158142c2 bellard
1157 158142c2 bellard
/*----------------------------------------------------------------------------
1158 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a'
1159 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1160 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1161 158142c2 bellard
| Arithmetic.
1162 158142c2 bellard
*----------------------------------------------------------------------------*/
1163 158142c2 bellard
1164 158142c2 bellard
floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
1165 158142c2 bellard
{
1166 158142c2 bellard
    flag zSign;
1167 158142c2 bellard
    uint32 absA;
1168 158142c2 bellard
    int8 shiftCount;
1169 bb98fe42 Andreas Färber
    uint64_t zSig;
1170 158142c2 bellard
1171 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1172 158142c2 bellard
    zSign = ( a < 0 );
1173 158142c2 bellard
    absA = zSign ? - a : a;
1174 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 32;
1175 158142c2 bellard
    zSig = absA;
1176 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1177 158142c2 bellard
1178 158142c2 bellard
}
1179 158142c2 bellard
1180 158142c2 bellard
/*----------------------------------------------------------------------------
1181 158142c2 bellard
| Returns the result of converting the 32-bit two's complement integer `a' to
1182 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1183 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1184 158142c2 bellard
*----------------------------------------------------------------------------*/
1185 158142c2 bellard
1186 158142c2 bellard
float128 int32_to_float128( int32 a STATUS_PARAM )
1187 158142c2 bellard
{
1188 158142c2 bellard
    flag zSign;
1189 158142c2 bellard
    uint32 absA;
1190 158142c2 bellard
    int8 shiftCount;
1191 bb98fe42 Andreas Färber
    uint64_t zSig0;
1192 158142c2 bellard
1193 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1194 158142c2 bellard
    zSign = ( a < 0 );
1195 158142c2 bellard
    absA = zSign ? - a : a;
1196 158142c2 bellard
    shiftCount = countLeadingZeros32( absA ) + 17;
1197 158142c2 bellard
    zSig0 = absA;
1198 158142c2 bellard
    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1199 158142c2 bellard
1200 158142c2 bellard
}
1201 158142c2 bellard
1202 158142c2 bellard
/*----------------------------------------------------------------------------
1203 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1204 158142c2 bellard
| to the single-precision floating-point format.  The conversion is performed
1205 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1206 158142c2 bellard
*----------------------------------------------------------------------------*/
1207 158142c2 bellard
1208 158142c2 bellard
float32 int64_to_float32( int64 a STATUS_PARAM )
1209 158142c2 bellard
{
1210 158142c2 bellard
    flag zSign;
1211 158142c2 bellard
    uint64 absA;
1212 158142c2 bellard
    int8 shiftCount;
1213 158142c2 bellard
1214 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1215 158142c2 bellard
    zSign = ( a < 0 );
1216 158142c2 bellard
    absA = zSign ? - a : a;
1217 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) - 40;
1218 158142c2 bellard
    if ( 0 <= shiftCount ) {
1219 158142c2 bellard
        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1220 158142c2 bellard
    }
1221 158142c2 bellard
    else {
1222 158142c2 bellard
        shiftCount += 7;
1223 158142c2 bellard
        if ( shiftCount < 0 ) {
1224 158142c2 bellard
            shift64RightJamming( absA, - shiftCount, &absA );
1225 158142c2 bellard
        }
1226 158142c2 bellard
        else {
1227 158142c2 bellard
            absA <<= shiftCount;
1228 158142c2 bellard
        }
1229 158142c2 bellard
        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
1230 158142c2 bellard
    }
1231 158142c2 bellard
1232 158142c2 bellard
}
1233 158142c2 bellard
1234 3430b0be j_mayer
float32 uint64_to_float32( uint64 a STATUS_PARAM )
1235 75d62a58 j_mayer
{
1236 75d62a58 j_mayer
    int8 shiftCount;
1237 75d62a58 j_mayer
1238 f090c9d4 pbrook
    if ( a == 0 ) return float32_zero;
1239 75d62a58 j_mayer
    shiftCount = countLeadingZeros64( a ) - 40;
1240 75d62a58 j_mayer
    if ( 0 <= shiftCount ) {
1241 75d62a58 j_mayer
        return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
1242 75d62a58 j_mayer
    }
1243 75d62a58 j_mayer
    else {
1244 75d62a58 j_mayer
        shiftCount += 7;
1245 75d62a58 j_mayer
        if ( shiftCount < 0 ) {
1246 75d62a58 j_mayer
            shift64RightJamming( a, - shiftCount, &a );
1247 75d62a58 j_mayer
        }
1248 75d62a58 j_mayer
        else {
1249 75d62a58 j_mayer
            a <<= shiftCount;
1250 75d62a58 j_mayer
        }
1251 75d62a58 j_mayer
        return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
1252 75d62a58 j_mayer
    }
1253 75d62a58 j_mayer
}
1254 75d62a58 j_mayer
1255 158142c2 bellard
/*----------------------------------------------------------------------------
1256 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1257 158142c2 bellard
| to the double-precision floating-point format.  The conversion is performed
1258 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1259 158142c2 bellard
*----------------------------------------------------------------------------*/
1260 158142c2 bellard
1261 158142c2 bellard
float64 int64_to_float64( int64 a STATUS_PARAM )
1262 158142c2 bellard
{
1263 158142c2 bellard
    flag zSign;
1264 158142c2 bellard
1265 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1266 bb98fe42 Andreas Färber
    if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
1267 158142c2 bellard
        return packFloat64( 1, 0x43E, 0 );
1268 158142c2 bellard
    }
1269 158142c2 bellard
    zSign = ( a < 0 );
1270 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
1271 158142c2 bellard
1272 158142c2 bellard
}
1273 158142c2 bellard
1274 75d62a58 j_mayer
float64 uint64_to_float64( uint64 a STATUS_PARAM )
1275 75d62a58 j_mayer
{
1276 f090c9d4 pbrook
    if ( a == 0 ) return float64_zero;
1277 75d62a58 j_mayer
    return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
1278 75d62a58 j_mayer
1279 75d62a58 j_mayer
}
1280 75d62a58 j_mayer
1281 158142c2 bellard
/*----------------------------------------------------------------------------
1282 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a'
1283 158142c2 bellard
| to the extended double-precision floating-point format.  The conversion
1284 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1285 158142c2 bellard
| Arithmetic.
1286 158142c2 bellard
*----------------------------------------------------------------------------*/
1287 158142c2 bellard
1288 158142c2 bellard
floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
1289 158142c2 bellard
{
1290 158142c2 bellard
    flag zSign;
1291 158142c2 bellard
    uint64 absA;
1292 158142c2 bellard
    int8 shiftCount;
1293 158142c2 bellard
1294 158142c2 bellard
    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1295 158142c2 bellard
    zSign = ( a < 0 );
1296 158142c2 bellard
    absA = zSign ? - a : a;
1297 158142c2 bellard
    shiftCount = countLeadingZeros64( absA );
1298 158142c2 bellard
    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1299 158142c2 bellard
1300 158142c2 bellard
}
1301 158142c2 bellard
1302 158142c2 bellard
/*----------------------------------------------------------------------------
1303 158142c2 bellard
| Returns the result of converting the 64-bit two's complement integer `a' to
1304 158142c2 bellard
| the quadruple-precision floating-point format.  The conversion is performed
1305 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1306 158142c2 bellard
*----------------------------------------------------------------------------*/
1307 158142c2 bellard
1308 158142c2 bellard
float128 int64_to_float128( int64 a STATUS_PARAM )
1309 158142c2 bellard
{
1310 158142c2 bellard
    flag zSign;
1311 158142c2 bellard
    uint64 absA;
1312 158142c2 bellard
    int8 shiftCount;
1313 158142c2 bellard
    int32 zExp;
1314 bb98fe42 Andreas Färber
    uint64_t zSig0, zSig1;
1315 158142c2 bellard
1316 158142c2 bellard
    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1317 158142c2 bellard
    zSign = ( a < 0 );
1318 158142c2 bellard
    absA = zSign ? - a : a;
1319 158142c2 bellard
    shiftCount = countLeadingZeros64( absA ) + 49;
1320 158142c2 bellard
    zExp = 0x406E - shiftCount;
1321 158142c2 bellard
    if ( 64 <= shiftCount ) {
1322 158142c2 bellard
        zSig1 = 0;
1323 158142c2 bellard
        zSig0 = absA;
1324 158142c2 bellard
        shiftCount -= 64;
1325 158142c2 bellard
    }
1326 158142c2 bellard
    else {
1327 158142c2 bellard
        zSig1 = absA;
1328 158142c2 bellard
        zSig0 = 0;
1329 158142c2 bellard
    }
1330 158142c2 bellard
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1331 158142c2 bellard
    return packFloat128( zSign, zExp, zSig0, zSig1 );
1332 158142c2 bellard
1333 158142c2 bellard
}
1334 158142c2 bellard
1335 158142c2 bellard
/*----------------------------------------------------------------------------
1336 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1337 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1338 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1339 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1340 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1341 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1342 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1343 158142c2 bellard
*----------------------------------------------------------------------------*/
1344 158142c2 bellard
1345 158142c2 bellard
int32 float32_to_int32( float32 a STATUS_PARAM )
1346 158142c2 bellard
{
1347 158142c2 bellard
    flag aSign;
1348 158142c2 bellard
    int16 aExp, shiftCount;
1349 bb98fe42 Andreas Färber
    uint32_t aSig;
1350 bb98fe42 Andreas Färber
    uint64_t aSig64;
1351 158142c2 bellard
1352 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1353 158142c2 bellard
    aSig = extractFloat32Frac( a );
1354 158142c2 bellard
    aExp = extractFloat32Exp( a );
1355 158142c2 bellard
    aSign = extractFloat32Sign( a );
1356 158142c2 bellard
    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1357 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1358 158142c2 bellard
    shiftCount = 0xAF - aExp;
1359 158142c2 bellard
    aSig64 = aSig;
1360 158142c2 bellard
    aSig64 <<= 32;
1361 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1362 158142c2 bellard
    return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
1363 158142c2 bellard
1364 158142c2 bellard
}
1365 158142c2 bellard
1366 158142c2 bellard
/*----------------------------------------------------------------------------
1367 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1368 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
1369 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1370 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
1371 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1372 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
1373 158142c2 bellard
| returned.
1374 158142c2 bellard
*----------------------------------------------------------------------------*/
1375 158142c2 bellard
1376 158142c2 bellard
int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
1377 158142c2 bellard
{
1378 158142c2 bellard
    flag aSign;
1379 158142c2 bellard
    int16 aExp, shiftCount;
1380 bb98fe42 Andreas Färber
    uint32_t aSig;
1381 158142c2 bellard
    int32 z;
1382 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1383 158142c2 bellard
1384 158142c2 bellard
    aSig = extractFloat32Frac( a );
1385 158142c2 bellard
    aExp = extractFloat32Exp( a );
1386 158142c2 bellard
    aSign = extractFloat32Sign( a );
1387 158142c2 bellard
    shiftCount = aExp - 0x9E;
1388 158142c2 bellard
    if ( 0 <= shiftCount ) {
1389 f090c9d4 pbrook
        if ( float32_val(a) != 0xCF000000 ) {
1390 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1391 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1392 158142c2 bellard
        }
1393 bb98fe42 Andreas Färber
        return (int32_t) 0x80000000;
1394 158142c2 bellard
    }
1395 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1396 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1397 158142c2 bellard
        return 0;
1398 158142c2 bellard
    }
1399 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
1400 158142c2 bellard
    z = aSig>>( - shiftCount );
1401 bb98fe42 Andreas Färber
    if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1402 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1403 158142c2 bellard
    }
1404 158142c2 bellard
    if ( aSign ) z = - z;
1405 158142c2 bellard
    return z;
1406 158142c2 bellard
1407 158142c2 bellard
}
1408 158142c2 bellard
1409 158142c2 bellard
/*----------------------------------------------------------------------------
1410 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1411 cbcef455 Peter Maydell
| `a' to the 16-bit two's complement integer format.  The conversion is
1412 cbcef455 Peter Maydell
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1413 cbcef455 Peter Maydell
| Arithmetic, except that the conversion is always rounded toward zero.
1414 cbcef455 Peter Maydell
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1415 cbcef455 Peter Maydell
| the conversion overflows, the largest integer with the same sign as `a' is
1416 cbcef455 Peter Maydell
| returned.
1417 cbcef455 Peter Maydell
*----------------------------------------------------------------------------*/
1418 cbcef455 Peter Maydell
1419 cbcef455 Peter Maydell
int16 float32_to_int16_round_to_zero( float32 a STATUS_PARAM )
1420 cbcef455 Peter Maydell
{
1421 cbcef455 Peter Maydell
    flag aSign;
1422 cbcef455 Peter Maydell
    int16 aExp, shiftCount;
1423 bb98fe42 Andreas Färber
    uint32_t aSig;
1424 cbcef455 Peter Maydell
    int32 z;
1425 cbcef455 Peter Maydell
1426 cbcef455 Peter Maydell
    aSig = extractFloat32Frac( a );
1427 cbcef455 Peter Maydell
    aExp = extractFloat32Exp( a );
1428 cbcef455 Peter Maydell
    aSign = extractFloat32Sign( a );
1429 cbcef455 Peter Maydell
    shiftCount = aExp - 0x8E;
1430 cbcef455 Peter Maydell
    if ( 0 <= shiftCount ) {
1431 cbcef455 Peter Maydell
        if ( float32_val(a) != 0xC7000000 ) {
1432 cbcef455 Peter Maydell
            float_raise( float_flag_invalid STATUS_VAR);
1433 cbcef455 Peter Maydell
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1434 cbcef455 Peter Maydell
                return 0x7FFF;
1435 cbcef455 Peter Maydell
            }
1436 cbcef455 Peter Maydell
        }
1437 bb98fe42 Andreas Färber
        return (int32_t) 0xffff8000;
1438 cbcef455 Peter Maydell
    }
1439 cbcef455 Peter Maydell
    else if ( aExp <= 0x7E ) {
1440 cbcef455 Peter Maydell
        if ( aExp | aSig ) {
1441 cbcef455 Peter Maydell
            STATUS(float_exception_flags) |= float_flag_inexact;
1442 cbcef455 Peter Maydell
        }
1443 cbcef455 Peter Maydell
        return 0;
1444 cbcef455 Peter Maydell
    }
1445 cbcef455 Peter Maydell
    shiftCount -= 0x10;
1446 cbcef455 Peter Maydell
    aSig = ( aSig | 0x00800000 )<<8;
1447 cbcef455 Peter Maydell
    z = aSig>>( - shiftCount );
1448 bb98fe42 Andreas Färber
    if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1449 cbcef455 Peter Maydell
        STATUS(float_exception_flags) |= float_flag_inexact;
1450 cbcef455 Peter Maydell
    }
1451 cbcef455 Peter Maydell
    if ( aSign ) {
1452 cbcef455 Peter Maydell
        z = - z;
1453 cbcef455 Peter Maydell
    }
1454 cbcef455 Peter Maydell
    return z;
1455 cbcef455 Peter Maydell
1456 cbcef455 Peter Maydell
}
1457 cbcef455 Peter Maydell
1458 cbcef455 Peter Maydell
/*----------------------------------------------------------------------------
1459 cbcef455 Peter Maydell
| Returns the result of converting the single-precision floating-point value
1460 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1461 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1462 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
1463 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
1464 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
1465 158142c2 bellard
| largest integer with the same sign as `a' is returned.
1466 158142c2 bellard
*----------------------------------------------------------------------------*/
1467 158142c2 bellard
1468 158142c2 bellard
int64 float32_to_int64( float32 a STATUS_PARAM )
1469 158142c2 bellard
{
1470 158142c2 bellard
    flag aSign;
1471 158142c2 bellard
    int16 aExp, shiftCount;
1472 bb98fe42 Andreas Färber
    uint32_t aSig;
1473 bb98fe42 Andreas Färber
    uint64_t aSig64, aSigExtra;
1474 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1475 158142c2 bellard
1476 158142c2 bellard
    aSig = extractFloat32Frac( a );
1477 158142c2 bellard
    aExp = extractFloat32Exp( a );
1478 158142c2 bellard
    aSign = extractFloat32Sign( a );
1479 158142c2 bellard
    shiftCount = 0xBE - aExp;
1480 158142c2 bellard
    if ( shiftCount < 0 ) {
1481 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1482 158142c2 bellard
        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1483 158142c2 bellard
            return LIT64( 0x7FFFFFFFFFFFFFFF );
1484 158142c2 bellard
        }
1485 bb98fe42 Andreas Färber
        return (int64_t) LIT64( 0x8000000000000000 );
1486 158142c2 bellard
    }
1487 158142c2 bellard
    if ( aExp ) aSig |= 0x00800000;
1488 158142c2 bellard
    aSig64 = aSig;
1489 158142c2 bellard
    aSig64 <<= 40;
1490 158142c2 bellard
    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1491 158142c2 bellard
    return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
1492 158142c2 bellard
1493 158142c2 bellard
}
1494 158142c2 bellard
1495 158142c2 bellard
/*----------------------------------------------------------------------------
1496 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1497 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
1498 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1499 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
1500 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1501 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
1502 158142c2 bellard
| returned.
1503 158142c2 bellard
*----------------------------------------------------------------------------*/
1504 158142c2 bellard
1505 158142c2 bellard
int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
1506 158142c2 bellard
{
1507 158142c2 bellard
    flag aSign;
1508 158142c2 bellard
    int16 aExp, shiftCount;
1509 bb98fe42 Andreas Färber
    uint32_t aSig;
1510 bb98fe42 Andreas Färber
    uint64_t aSig64;
1511 158142c2 bellard
    int64 z;
1512 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1513 158142c2 bellard
1514 158142c2 bellard
    aSig = extractFloat32Frac( a );
1515 158142c2 bellard
    aExp = extractFloat32Exp( a );
1516 158142c2 bellard
    aSign = extractFloat32Sign( a );
1517 158142c2 bellard
    shiftCount = aExp - 0xBE;
1518 158142c2 bellard
    if ( 0 <= shiftCount ) {
1519 f090c9d4 pbrook
        if ( float32_val(a) != 0xDF000000 ) {
1520 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1521 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1522 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
1523 158142c2 bellard
            }
1524 158142c2 bellard
        }
1525 bb98fe42 Andreas Färber
        return (int64_t) LIT64( 0x8000000000000000 );
1526 158142c2 bellard
    }
1527 158142c2 bellard
    else if ( aExp <= 0x7E ) {
1528 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1529 158142c2 bellard
        return 0;
1530 158142c2 bellard
    }
1531 158142c2 bellard
    aSig64 = aSig | 0x00800000;
1532 158142c2 bellard
    aSig64 <<= 40;
1533 158142c2 bellard
    z = aSig64>>( - shiftCount );
1534 bb98fe42 Andreas Färber
    if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
1535 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1536 158142c2 bellard
    }
1537 158142c2 bellard
    if ( aSign ) z = - z;
1538 158142c2 bellard
    return z;
1539 158142c2 bellard
1540 158142c2 bellard
}
1541 158142c2 bellard
1542 158142c2 bellard
/*----------------------------------------------------------------------------
1543 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1544 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1545 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1546 158142c2 bellard
| Arithmetic.
1547 158142c2 bellard
*----------------------------------------------------------------------------*/
1548 158142c2 bellard
1549 158142c2 bellard
float64 float32_to_float64( float32 a STATUS_PARAM )
1550 158142c2 bellard
{
1551 158142c2 bellard
    flag aSign;
1552 158142c2 bellard
    int16 aExp;
1553 bb98fe42 Andreas Färber
    uint32_t aSig;
1554 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1555 158142c2 bellard
1556 158142c2 bellard
    aSig = extractFloat32Frac( a );
1557 158142c2 bellard
    aExp = extractFloat32Exp( a );
1558 158142c2 bellard
    aSign = extractFloat32Sign( a );
1559 158142c2 bellard
    if ( aExp == 0xFF ) {
1560 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1561 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
1562 158142c2 bellard
    }
1563 158142c2 bellard
    if ( aExp == 0 ) {
1564 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1565 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1566 158142c2 bellard
        --aExp;
1567 158142c2 bellard
    }
1568 bb98fe42 Andreas Färber
    return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
1569 158142c2 bellard
1570 158142c2 bellard
}
1571 158142c2 bellard
1572 158142c2 bellard
/*----------------------------------------------------------------------------
1573 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1574 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
1575 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1576 158142c2 bellard
| Arithmetic.
1577 158142c2 bellard
*----------------------------------------------------------------------------*/
1578 158142c2 bellard
1579 158142c2 bellard
floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
1580 158142c2 bellard
{
1581 158142c2 bellard
    flag aSign;
1582 158142c2 bellard
    int16 aExp;
1583 bb98fe42 Andreas Färber
    uint32_t aSig;
1584 158142c2 bellard
1585 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1586 158142c2 bellard
    aSig = extractFloat32Frac( a );
1587 158142c2 bellard
    aExp = extractFloat32Exp( a );
1588 158142c2 bellard
    aSign = extractFloat32Sign( a );
1589 158142c2 bellard
    if ( aExp == 0xFF ) {
1590 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1591 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1592 158142c2 bellard
    }
1593 158142c2 bellard
    if ( aExp == 0 ) {
1594 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1595 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1596 158142c2 bellard
    }
1597 158142c2 bellard
    aSig |= 0x00800000;
1598 bb98fe42 Andreas Färber
    return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
1599 158142c2 bellard
1600 158142c2 bellard
}
1601 158142c2 bellard
1602 158142c2 bellard
/*----------------------------------------------------------------------------
1603 158142c2 bellard
| Returns the result of converting the single-precision floating-point value
1604 158142c2 bellard
| `a' to the double-precision floating-point format.  The conversion is
1605 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
1606 158142c2 bellard
| Arithmetic.
1607 158142c2 bellard
*----------------------------------------------------------------------------*/
1608 158142c2 bellard
1609 158142c2 bellard
float128 float32_to_float128( float32 a STATUS_PARAM )
1610 158142c2 bellard
{
1611 158142c2 bellard
    flag aSign;
1612 158142c2 bellard
    int16 aExp;
1613 bb98fe42 Andreas Färber
    uint32_t aSig;
1614 158142c2 bellard
1615 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1616 158142c2 bellard
    aSig = extractFloat32Frac( a );
1617 158142c2 bellard
    aExp = extractFloat32Exp( a );
1618 158142c2 bellard
    aSign = extractFloat32Sign( a );
1619 158142c2 bellard
    if ( aExp == 0xFF ) {
1620 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
1621 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
1622 158142c2 bellard
    }
1623 158142c2 bellard
    if ( aExp == 0 ) {
1624 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1625 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1626 158142c2 bellard
        --aExp;
1627 158142c2 bellard
    }
1628 bb98fe42 Andreas Färber
    return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
1629 158142c2 bellard
1630 158142c2 bellard
}
1631 158142c2 bellard
1632 158142c2 bellard
/*----------------------------------------------------------------------------
1633 158142c2 bellard
| Rounds the single-precision floating-point value `a' to an integer, and
1634 158142c2 bellard
| returns the result as a single-precision floating-point value.  The
1635 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
1636 158142c2 bellard
| Floating-Point Arithmetic.
1637 158142c2 bellard
*----------------------------------------------------------------------------*/
1638 158142c2 bellard
1639 158142c2 bellard
float32 float32_round_to_int( float32 a STATUS_PARAM)
1640 158142c2 bellard
{
1641 158142c2 bellard
    flag aSign;
1642 158142c2 bellard
    int16 aExp;
1643 bb98fe42 Andreas Färber
    uint32_t lastBitMask, roundBitsMask;
1644 158142c2 bellard
    int8 roundingMode;
1645 bb98fe42 Andreas Färber
    uint32_t z;
1646 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1647 158142c2 bellard
1648 158142c2 bellard
    aExp = extractFloat32Exp( a );
1649 158142c2 bellard
    if ( 0x96 <= aExp ) {
1650 158142c2 bellard
        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1651 158142c2 bellard
            return propagateFloat32NaN( a, a STATUS_VAR );
1652 158142c2 bellard
        }
1653 158142c2 bellard
        return a;
1654 158142c2 bellard
    }
1655 158142c2 bellard
    if ( aExp <= 0x7E ) {
1656 bb98fe42 Andreas Färber
        if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
1657 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
1658 158142c2 bellard
        aSign = extractFloat32Sign( a );
1659 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
1660 158142c2 bellard
         case float_round_nearest_even:
1661 158142c2 bellard
            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1662 158142c2 bellard
                return packFloat32( aSign, 0x7F, 0 );
1663 158142c2 bellard
            }
1664 158142c2 bellard
            break;
1665 158142c2 bellard
         case float_round_down:
1666 f090c9d4 pbrook
            return make_float32(aSign ? 0xBF800000 : 0);
1667 158142c2 bellard
         case float_round_up:
1668 f090c9d4 pbrook
            return make_float32(aSign ? 0x80000000 : 0x3F800000);
1669 158142c2 bellard
        }
1670 158142c2 bellard
        return packFloat32( aSign, 0, 0 );
1671 158142c2 bellard
    }
1672 158142c2 bellard
    lastBitMask = 1;
1673 158142c2 bellard
    lastBitMask <<= 0x96 - aExp;
1674 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
1675 f090c9d4 pbrook
    z = float32_val(a);
1676 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
1677 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
1678 158142c2 bellard
        z += lastBitMask>>1;
1679 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1680 158142c2 bellard
    }
1681 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
1682 f090c9d4 pbrook
        if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
1683 158142c2 bellard
            z += roundBitsMask;
1684 158142c2 bellard
        }
1685 158142c2 bellard
    }
1686 158142c2 bellard
    z &= ~ roundBitsMask;
1687 f090c9d4 pbrook
    if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
1688 f090c9d4 pbrook
    return make_float32(z);
1689 158142c2 bellard
1690 158142c2 bellard
}
1691 158142c2 bellard
1692 158142c2 bellard
/*----------------------------------------------------------------------------
1693 158142c2 bellard
| Returns the result of adding the absolute values of the single-precision
1694 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1695 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
1696 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
1697 158142c2 bellard
| Floating-Point Arithmetic.
1698 158142c2 bellard
*----------------------------------------------------------------------------*/
1699 158142c2 bellard
1700 158142c2 bellard
static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1701 158142c2 bellard
{
1702 158142c2 bellard
    int16 aExp, bExp, zExp;
1703 bb98fe42 Andreas Färber
    uint32_t aSig, bSig, zSig;
1704 158142c2 bellard
    int16 expDiff;
1705 158142c2 bellard
1706 158142c2 bellard
    aSig = extractFloat32Frac( a );
1707 158142c2 bellard
    aExp = extractFloat32Exp( a );
1708 158142c2 bellard
    bSig = extractFloat32Frac( b );
1709 158142c2 bellard
    bExp = extractFloat32Exp( b );
1710 158142c2 bellard
    expDiff = aExp - bExp;
1711 158142c2 bellard
    aSig <<= 6;
1712 158142c2 bellard
    bSig <<= 6;
1713 158142c2 bellard
    if ( 0 < expDiff ) {
1714 158142c2 bellard
        if ( aExp == 0xFF ) {
1715 158142c2 bellard
            if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1716 158142c2 bellard
            return a;
1717 158142c2 bellard
        }
1718 158142c2 bellard
        if ( bExp == 0 ) {
1719 158142c2 bellard
            --expDiff;
1720 158142c2 bellard
        }
1721 158142c2 bellard
        else {
1722 158142c2 bellard
            bSig |= 0x20000000;
1723 158142c2 bellard
        }
1724 158142c2 bellard
        shift32RightJamming( bSig, expDiff, &bSig );
1725 158142c2 bellard
        zExp = aExp;
1726 158142c2 bellard
    }
1727 158142c2 bellard
    else if ( expDiff < 0 ) {
1728 158142c2 bellard
        if ( bExp == 0xFF ) {
1729 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1730 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1731 158142c2 bellard
        }
1732 158142c2 bellard
        if ( aExp == 0 ) {
1733 158142c2 bellard
            ++expDiff;
1734 158142c2 bellard
        }
1735 158142c2 bellard
        else {
1736 158142c2 bellard
            aSig |= 0x20000000;
1737 158142c2 bellard
        }
1738 158142c2 bellard
        shift32RightJamming( aSig, - expDiff, &aSig );
1739 158142c2 bellard
        zExp = bExp;
1740 158142c2 bellard
    }
1741 158142c2 bellard
    else {
1742 158142c2 bellard
        if ( aExp == 0xFF ) {
1743 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1744 158142c2 bellard
            return a;
1745 158142c2 bellard
        }
1746 fe76d976 pbrook
        if ( aExp == 0 ) {
1747 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
1748 e6afc87f Peter Maydell
                if (aSig | bSig) {
1749 e6afc87f Peter Maydell
                    float_raise(float_flag_output_denormal STATUS_VAR);
1750 e6afc87f Peter Maydell
                }
1751 e6afc87f Peter Maydell
                return packFloat32(zSign, 0, 0);
1752 e6afc87f Peter Maydell
            }
1753 fe76d976 pbrook
            return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1754 fe76d976 pbrook
        }
1755 158142c2 bellard
        zSig = 0x40000000 + aSig + bSig;
1756 158142c2 bellard
        zExp = aExp;
1757 158142c2 bellard
        goto roundAndPack;
1758 158142c2 bellard
    }
1759 158142c2 bellard
    aSig |= 0x20000000;
1760 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
1761 158142c2 bellard
    --zExp;
1762 bb98fe42 Andreas Färber
    if ( (int32_t) zSig < 0 ) {
1763 158142c2 bellard
        zSig = aSig + bSig;
1764 158142c2 bellard
        ++zExp;
1765 158142c2 bellard
    }
1766 158142c2 bellard
 roundAndPack:
1767 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1768 158142c2 bellard
1769 158142c2 bellard
}
1770 158142c2 bellard
1771 158142c2 bellard
/*----------------------------------------------------------------------------
1772 158142c2 bellard
| Returns the result of subtracting the absolute values of the single-
1773 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
1774 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
1775 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
1776 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
1777 158142c2 bellard
*----------------------------------------------------------------------------*/
1778 158142c2 bellard
1779 158142c2 bellard
static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1780 158142c2 bellard
{
1781 158142c2 bellard
    int16 aExp, bExp, zExp;
1782 bb98fe42 Andreas Färber
    uint32_t aSig, bSig, zSig;
1783 158142c2 bellard
    int16 expDiff;
1784 158142c2 bellard
1785 158142c2 bellard
    aSig = extractFloat32Frac( a );
1786 158142c2 bellard
    aExp = extractFloat32Exp( a );
1787 158142c2 bellard
    bSig = extractFloat32Frac( b );
1788 158142c2 bellard
    bExp = extractFloat32Exp( b );
1789 158142c2 bellard
    expDiff = aExp - bExp;
1790 158142c2 bellard
    aSig <<= 7;
1791 158142c2 bellard
    bSig <<= 7;
1792 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
1793 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
1794 158142c2 bellard
    if ( aExp == 0xFF ) {
1795 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1796 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
1797 158142c2 bellard
        return float32_default_nan;
1798 158142c2 bellard
    }
1799 158142c2 bellard
    if ( aExp == 0 ) {
1800 158142c2 bellard
        aExp = 1;
1801 158142c2 bellard
        bExp = 1;
1802 158142c2 bellard
    }
1803 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
1804 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
1805 158142c2 bellard
    return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
1806 158142c2 bellard
 bExpBigger:
1807 158142c2 bellard
    if ( bExp == 0xFF ) {
1808 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1809 158142c2 bellard
        return packFloat32( zSign ^ 1, 0xFF, 0 );
1810 158142c2 bellard
    }
1811 158142c2 bellard
    if ( aExp == 0 ) {
1812 158142c2 bellard
        ++expDiff;
1813 158142c2 bellard
    }
1814 158142c2 bellard
    else {
1815 158142c2 bellard
        aSig |= 0x40000000;
1816 158142c2 bellard
    }
1817 158142c2 bellard
    shift32RightJamming( aSig, - expDiff, &aSig );
1818 158142c2 bellard
    bSig |= 0x40000000;
1819 158142c2 bellard
 bBigger:
1820 158142c2 bellard
    zSig = bSig - aSig;
1821 158142c2 bellard
    zExp = bExp;
1822 158142c2 bellard
    zSign ^= 1;
1823 158142c2 bellard
    goto normalizeRoundAndPack;
1824 158142c2 bellard
 aExpBigger:
1825 158142c2 bellard
    if ( aExp == 0xFF ) {
1826 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1827 158142c2 bellard
        return a;
1828 158142c2 bellard
    }
1829 158142c2 bellard
    if ( bExp == 0 ) {
1830 158142c2 bellard
        --expDiff;
1831 158142c2 bellard
    }
1832 158142c2 bellard
    else {
1833 158142c2 bellard
        bSig |= 0x40000000;
1834 158142c2 bellard
    }
1835 158142c2 bellard
    shift32RightJamming( bSig, expDiff, &bSig );
1836 158142c2 bellard
    aSig |= 0x40000000;
1837 158142c2 bellard
 aBigger:
1838 158142c2 bellard
    zSig = aSig - bSig;
1839 158142c2 bellard
    zExp = aExp;
1840 158142c2 bellard
 normalizeRoundAndPack:
1841 158142c2 bellard
    --zExp;
1842 158142c2 bellard
    return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1843 158142c2 bellard
1844 158142c2 bellard
}
1845 158142c2 bellard
1846 158142c2 bellard
/*----------------------------------------------------------------------------
1847 158142c2 bellard
| Returns the result of adding the single-precision floating-point values `a'
1848 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
1849 158142c2 bellard
| Binary Floating-Point Arithmetic.
1850 158142c2 bellard
*----------------------------------------------------------------------------*/
1851 158142c2 bellard
1852 158142c2 bellard
float32 float32_add( float32 a, float32 b STATUS_PARAM )
1853 158142c2 bellard
{
1854 158142c2 bellard
    flag aSign, bSign;
1855 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1856 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1857 158142c2 bellard
1858 158142c2 bellard
    aSign = extractFloat32Sign( a );
1859 158142c2 bellard
    bSign = extractFloat32Sign( b );
1860 158142c2 bellard
    if ( aSign == bSign ) {
1861 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR);
1862 158142c2 bellard
    }
1863 158142c2 bellard
    else {
1864 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1865 158142c2 bellard
    }
1866 158142c2 bellard
1867 158142c2 bellard
}
1868 158142c2 bellard
1869 158142c2 bellard
/*----------------------------------------------------------------------------
1870 158142c2 bellard
| Returns the result of subtracting the single-precision floating-point values
1871 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1872 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1873 158142c2 bellard
*----------------------------------------------------------------------------*/
1874 158142c2 bellard
1875 158142c2 bellard
float32 float32_sub( float32 a, float32 b STATUS_PARAM )
1876 158142c2 bellard
{
1877 158142c2 bellard
    flag aSign, bSign;
1878 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1879 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1880 158142c2 bellard
1881 158142c2 bellard
    aSign = extractFloat32Sign( a );
1882 158142c2 bellard
    bSign = extractFloat32Sign( b );
1883 158142c2 bellard
    if ( aSign == bSign ) {
1884 158142c2 bellard
        return subFloat32Sigs( a, b, aSign STATUS_VAR );
1885 158142c2 bellard
    }
1886 158142c2 bellard
    else {
1887 158142c2 bellard
        return addFloat32Sigs( a, b, aSign STATUS_VAR );
1888 158142c2 bellard
    }
1889 158142c2 bellard
1890 158142c2 bellard
}
1891 158142c2 bellard
1892 158142c2 bellard
/*----------------------------------------------------------------------------
1893 158142c2 bellard
| Returns the result of multiplying the single-precision floating-point values
1894 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1895 158142c2 bellard
| for Binary Floating-Point Arithmetic.
1896 158142c2 bellard
*----------------------------------------------------------------------------*/
1897 158142c2 bellard
1898 158142c2 bellard
float32 float32_mul( float32 a, float32 b STATUS_PARAM )
1899 158142c2 bellard
{
1900 158142c2 bellard
    flag aSign, bSign, zSign;
1901 158142c2 bellard
    int16 aExp, bExp, zExp;
1902 bb98fe42 Andreas Färber
    uint32_t aSig, bSig;
1903 bb98fe42 Andreas Färber
    uint64_t zSig64;
1904 bb98fe42 Andreas Färber
    uint32_t zSig;
1905 158142c2 bellard
1906 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1907 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1908 37d18660 Peter Maydell
1909 158142c2 bellard
    aSig = extractFloat32Frac( a );
1910 158142c2 bellard
    aExp = extractFloat32Exp( a );
1911 158142c2 bellard
    aSign = extractFloat32Sign( a );
1912 158142c2 bellard
    bSig = extractFloat32Frac( b );
1913 158142c2 bellard
    bExp = extractFloat32Exp( b );
1914 158142c2 bellard
    bSign = extractFloat32Sign( b );
1915 158142c2 bellard
    zSign = aSign ^ bSign;
1916 158142c2 bellard
    if ( aExp == 0xFF ) {
1917 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1918 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
1919 158142c2 bellard
        }
1920 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
1921 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1922 158142c2 bellard
            return float32_default_nan;
1923 158142c2 bellard
        }
1924 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1925 158142c2 bellard
    }
1926 158142c2 bellard
    if ( bExp == 0xFF ) {
1927 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1928 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
1929 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1930 158142c2 bellard
            return float32_default_nan;
1931 158142c2 bellard
        }
1932 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1933 158142c2 bellard
    }
1934 158142c2 bellard
    if ( aExp == 0 ) {
1935 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1936 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1937 158142c2 bellard
    }
1938 158142c2 bellard
    if ( bExp == 0 ) {
1939 158142c2 bellard
        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1940 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1941 158142c2 bellard
    }
1942 158142c2 bellard
    zExp = aExp + bExp - 0x7F;
1943 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
1944 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
1945 bb98fe42 Andreas Färber
    shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
1946 158142c2 bellard
    zSig = zSig64;
1947 bb98fe42 Andreas Färber
    if ( 0 <= (int32_t) ( zSig<<1 ) ) {
1948 158142c2 bellard
        zSig <<= 1;
1949 158142c2 bellard
        --zExp;
1950 158142c2 bellard
    }
1951 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1952 158142c2 bellard
1953 158142c2 bellard
}
1954 158142c2 bellard
1955 158142c2 bellard
/*----------------------------------------------------------------------------
1956 158142c2 bellard
| Returns the result of dividing the single-precision floating-point value `a'
1957 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to the
1958 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1959 158142c2 bellard
*----------------------------------------------------------------------------*/
1960 158142c2 bellard
1961 158142c2 bellard
float32 float32_div( float32 a, float32 b STATUS_PARAM )
1962 158142c2 bellard
{
1963 158142c2 bellard
    flag aSign, bSign, zSign;
1964 158142c2 bellard
    int16 aExp, bExp, zExp;
1965 bb98fe42 Andreas Färber
    uint32_t aSig, bSig, zSig;
1966 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
1967 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
1968 158142c2 bellard
1969 158142c2 bellard
    aSig = extractFloat32Frac( a );
1970 158142c2 bellard
    aExp = extractFloat32Exp( a );
1971 158142c2 bellard
    aSign = extractFloat32Sign( a );
1972 158142c2 bellard
    bSig = extractFloat32Frac( b );
1973 158142c2 bellard
    bExp = extractFloat32Exp( b );
1974 158142c2 bellard
    bSign = extractFloat32Sign( b );
1975 158142c2 bellard
    zSign = aSign ^ bSign;
1976 158142c2 bellard
    if ( aExp == 0xFF ) {
1977 158142c2 bellard
        if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1978 158142c2 bellard
        if ( bExp == 0xFF ) {
1979 158142c2 bellard
            if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1980 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
1981 158142c2 bellard
            return float32_default_nan;
1982 158142c2 bellard
        }
1983 158142c2 bellard
        return packFloat32( zSign, 0xFF, 0 );
1984 158142c2 bellard
    }
1985 158142c2 bellard
    if ( bExp == 0xFF ) {
1986 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1987 158142c2 bellard
        return packFloat32( zSign, 0, 0 );
1988 158142c2 bellard
    }
1989 158142c2 bellard
    if ( bExp == 0 ) {
1990 158142c2 bellard
        if ( bSig == 0 ) {
1991 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
1992 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
1993 158142c2 bellard
                return float32_default_nan;
1994 158142c2 bellard
            }
1995 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
1996 158142c2 bellard
            return packFloat32( zSign, 0xFF, 0 );
1997 158142c2 bellard
        }
1998 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1999 158142c2 bellard
    }
2000 158142c2 bellard
    if ( aExp == 0 ) {
2001 158142c2 bellard
        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2002 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2003 158142c2 bellard
    }
2004 158142c2 bellard
    zExp = aExp - bExp + 0x7D;
2005 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<7;
2006 158142c2 bellard
    bSig = ( bSig | 0x00800000 )<<8;
2007 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
2008 158142c2 bellard
        aSig >>= 1;
2009 158142c2 bellard
        ++zExp;
2010 158142c2 bellard
    }
2011 bb98fe42 Andreas Färber
    zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
2012 158142c2 bellard
    if ( ( zSig & 0x3F ) == 0 ) {
2013 bb98fe42 Andreas Färber
        zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
2014 158142c2 bellard
    }
2015 158142c2 bellard
    return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
2016 158142c2 bellard
2017 158142c2 bellard
}
2018 158142c2 bellard
2019 158142c2 bellard
/*----------------------------------------------------------------------------
2020 158142c2 bellard
| Returns the remainder of the single-precision floating-point value `a'
2021 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
2022 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2023 158142c2 bellard
*----------------------------------------------------------------------------*/
2024 158142c2 bellard
2025 158142c2 bellard
float32 float32_rem( float32 a, float32 b STATUS_PARAM )
2026 158142c2 bellard
{
2027 ed086f3d Blue Swirl
    flag aSign, zSign;
2028 158142c2 bellard
    int16 aExp, bExp, expDiff;
2029 bb98fe42 Andreas Färber
    uint32_t aSig, bSig;
2030 bb98fe42 Andreas Färber
    uint32_t q;
2031 bb98fe42 Andreas Färber
    uint64_t aSig64, bSig64, q64;
2032 bb98fe42 Andreas Färber
    uint32_t alternateASig;
2033 bb98fe42 Andreas Färber
    int32_t sigMean;
2034 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2035 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2036 158142c2 bellard
2037 158142c2 bellard
    aSig = extractFloat32Frac( a );
2038 158142c2 bellard
    aExp = extractFloat32Exp( a );
2039 158142c2 bellard
    aSign = extractFloat32Sign( a );
2040 158142c2 bellard
    bSig = extractFloat32Frac( b );
2041 158142c2 bellard
    bExp = extractFloat32Exp( b );
2042 158142c2 bellard
    if ( aExp == 0xFF ) {
2043 158142c2 bellard
        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2044 158142c2 bellard
            return propagateFloat32NaN( a, b STATUS_VAR );
2045 158142c2 bellard
        }
2046 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2047 158142c2 bellard
        return float32_default_nan;
2048 158142c2 bellard
    }
2049 158142c2 bellard
    if ( bExp == 0xFF ) {
2050 158142c2 bellard
        if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2051 158142c2 bellard
        return a;
2052 158142c2 bellard
    }
2053 158142c2 bellard
    if ( bExp == 0 ) {
2054 158142c2 bellard
        if ( bSig == 0 ) {
2055 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2056 158142c2 bellard
            return float32_default_nan;
2057 158142c2 bellard
        }
2058 158142c2 bellard
        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2059 158142c2 bellard
    }
2060 158142c2 bellard
    if ( aExp == 0 ) {
2061 158142c2 bellard
        if ( aSig == 0 ) return a;
2062 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2063 158142c2 bellard
    }
2064 158142c2 bellard
    expDiff = aExp - bExp;
2065 158142c2 bellard
    aSig |= 0x00800000;
2066 158142c2 bellard
    bSig |= 0x00800000;
2067 158142c2 bellard
    if ( expDiff < 32 ) {
2068 158142c2 bellard
        aSig <<= 8;
2069 158142c2 bellard
        bSig <<= 8;
2070 158142c2 bellard
        if ( expDiff < 0 ) {
2071 158142c2 bellard
            if ( expDiff < -1 ) return a;
2072 158142c2 bellard
            aSig >>= 1;
2073 158142c2 bellard
        }
2074 158142c2 bellard
        q = ( bSig <= aSig );
2075 158142c2 bellard
        if ( q ) aSig -= bSig;
2076 158142c2 bellard
        if ( 0 < expDiff ) {
2077 bb98fe42 Andreas Färber
            q = ( ( (uint64_t) aSig )<<32 ) / bSig;
2078 158142c2 bellard
            q >>= 32 - expDiff;
2079 158142c2 bellard
            bSig >>= 2;
2080 158142c2 bellard
            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2081 158142c2 bellard
        }
2082 158142c2 bellard
        else {
2083 158142c2 bellard
            aSig >>= 2;
2084 158142c2 bellard
            bSig >>= 2;
2085 158142c2 bellard
        }
2086 158142c2 bellard
    }
2087 158142c2 bellard
    else {
2088 158142c2 bellard
        if ( bSig <= aSig ) aSig -= bSig;
2089 bb98fe42 Andreas Färber
        aSig64 = ( (uint64_t) aSig )<<40;
2090 bb98fe42 Andreas Färber
        bSig64 = ( (uint64_t) bSig )<<40;
2091 158142c2 bellard
        expDiff -= 64;
2092 158142c2 bellard
        while ( 0 < expDiff ) {
2093 158142c2 bellard
            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2094 158142c2 bellard
            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2095 158142c2 bellard
            aSig64 = - ( ( bSig * q64 )<<38 );
2096 158142c2 bellard
            expDiff -= 62;
2097 158142c2 bellard
        }
2098 158142c2 bellard
        expDiff += 64;
2099 158142c2 bellard
        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2100 158142c2 bellard
        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2101 158142c2 bellard
        q = q64>>( 64 - expDiff );
2102 158142c2 bellard
        bSig <<= 6;
2103 158142c2 bellard
        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2104 158142c2 bellard
    }
2105 158142c2 bellard
    do {
2106 158142c2 bellard
        alternateASig = aSig;
2107 158142c2 bellard
        ++q;
2108 158142c2 bellard
        aSig -= bSig;
2109 bb98fe42 Andreas Färber
    } while ( 0 <= (int32_t) aSig );
2110 158142c2 bellard
    sigMean = aSig + alternateASig;
2111 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2112 158142c2 bellard
        aSig = alternateASig;
2113 158142c2 bellard
    }
2114 bb98fe42 Andreas Färber
    zSign = ( (int32_t) aSig < 0 );
2115 158142c2 bellard
    if ( zSign ) aSig = - aSig;
2116 158142c2 bellard
    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
2117 158142c2 bellard
2118 158142c2 bellard
}
2119 158142c2 bellard
2120 158142c2 bellard
/*----------------------------------------------------------------------------
2121 369be8f6 Peter Maydell
| Returns the result of multiplying the single-precision floating-point values
2122 369be8f6 Peter Maydell
| `a' and `b' then adding 'c', with no intermediate rounding step after the
2123 369be8f6 Peter Maydell
| multiplication.  The operation is performed according to the IEC/IEEE
2124 369be8f6 Peter Maydell
| Standard for Binary Floating-Point Arithmetic 754-2008.
2125 369be8f6 Peter Maydell
| The flags argument allows the caller to select negation of the
2126 369be8f6 Peter Maydell
| addend, the intermediate product, or the final result. (The difference
2127 369be8f6 Peter Maydell
| between this and having the caller do a separate negation is that negating
2128 369be8f6 Peter Maydell
| externally will flip the sign bit on NaNs.)
2129 369be8f6 Peter Maydell
*----------------------------------------------------------------------------*/
2130 369be8f6 Peter Maydell
2131 369be8f6 Peter Maydell
float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
2132 369be8f6 Peter Maydell
{
2133 369be8f6 Peter Maydell
    flag aSign, bSign, cSign, zSign;
2134 369be8f6 Peter Maydell
    int aExp, bExp, cExp, pExp, zExp, expDiff;
2135 369be8f6 Peter Maydell
    uint32_t aSig, bSig, cSig;
2136 369be8f6 Peter Maydell
    flag pInf, pZero, pSign;
2137 369be8f6 Peter Maydell
    uint64_t pSig64, cSig64, zSig64;
2138 369be8f6 Peter Maydell
    uint32_t pSig;
2139 369be8f6 Peter Maydell
    int shiftcount;
2140 369be8f6 Peter Maydell
    flag signflip, infzero;
2141 369be8f6 Peter Maydell
2142 369be8f6 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2143 369be8f6 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2144 369be8f6 Peter Maydell
    c = float32_squash_input_denormal(c STATUS_VAR);
2145 369be8f6 Peter Maydell
    aSig = extractFloat32Frac(a);
2146 369be8f6 Peter Maydell
    aExp = extractFloat32Exp(a);
2147 369be8f6 Peter Maydell
    aSign = extractFloat32Sign(a);
2148 369be8f6 Peter Maydell
    bSig = extractFloat32Frac(b);
2149 369be8f6 Peter Maydell
    bExp = extractFloat32Exp(b);
2150 369be8f6 Peter Maydell
    bSign = extractFloat32Sign(b);
2151 369be8f6 Peter Maydell
    cSig = extractFloat32Frac(c);
2152 369be8f6 Peter Maydell
    cExp = extractFloat32Exp(c);
2153 369be8f6 Peter Maydell
    cSign = extractFloat32Sign(c);
2154 369be8f6 Peter Maydell
2155 369be8f6 Peter Maydell
    infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) ||
2156 369be8f6 Peter Maydell
               (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
2157 369be8f6 Peter Maydell
2158 369be8f6 Peter Maydell
    /* It is implementation-defined whether the cases of (0,inf,qnan)
2159 369be8f6 Peter Maydell
     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
2160 369be8f6 Peter Maydell
     * they return if they do), so we have to hand this information
2161 369be8f6 Peter Maydell
     * off to the target-specific pick-a-NaN routine.
2162 369be8f6 Peter Maydell
     */
2163 369be8f6 Peter Maydell
    if (((aExp == 0xff) && aSig) ||
2164 369be8f6 Peter Maydell
        ((bExp == 0xff) && bSig) ||
2165 369be8f6 Peter Maydell
        ((cExp == 0xff) && cSig)) {
2166 369be8f6 Peter Maydell
        return propagateFloat32MulAddNaN(a, b, c, infzero STATUS_VAR);
2167 369be8f6 Peter Maydell
    }
2168 369be8f6 Peter Maydell
2169 369be8f6 Peter Maydell
    if (infzero) {
2170 369be8f6 Peter Maydell
        float_raise(float_flag_invalid STATUS_VAR);
2171 369be8f6 Peter Maydell
        return float32_default_nan;
2172 369be8f6 Peter Maydell
    }
2173 369be8f6 Peter Maydell
2174 369be8f6 Peter Maydell
    if (flags & float_muladd_negate_c) {
2175 369be8f6 Peter Maydell
        cSign ^= 1;
2176 369be8f6 Peter Maydell
    }
2177 369be8f6 Peter Maydell
2178 369be8f6 Peter Maydell
    signflip = (flags & float_muladd_negate_result) ? 1 : 0;
2179 369be8f6 Peter Maydell
2180 369be8f6 Peter Maydell
    /* Work out the sign and type of the product */
2181 369be8f6 Peter Maydell
    pSign = aSign ^ bSign;
2182 369be8f6 Peter Maydell
    if (flags & float_muladd_negate_product) {
2183 369be8f6 Peter Maydell
        pSign ^= 1;
2184 369be8f6 Peter Maydell
    }
2185 369be8f6 Peter Maydell
    pInf = (aExp == 0xff) || (bExp == 0xff);
2186 369be8f6 Peter Maydell
    pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
2187 369be8f6 Peter Maydell
2188 369be8f6 Peter Maydell
    if (cExp == 0xff) {
2189 369be8f6 Peter Maydell
        if (pInf && (pSign ^ cSign)) {
2190 369be8f6 Peter Maydell
            /* addition of opposite-signed infinities => InvalidOperation */
2191 369be8f6 Peter Maydell
            float_raise(float_flag_invalid STATUS_VAR);
2192 369be8f6 Peter Maydell
            return float32_default_nan;
2193 369be8f6 Peter Maydell
        }
2194 369be8f6 Peter Maydell
        /* Otherwise generate an infinity of the same sign */
2195 369be8f6 Peter Maydell
        return packFloat32(cSign ^ signflip, 0xff, 0);
2196 369be8f6 Peter Maydell
    }
2197 369be8f6 Peter Maydell
2198 369be8f6 Peter Maydell
    if (pInf) {
2199 369be8f6 Peter Maydell
        return packFloat32(pSign ^ signflip, 0xff, 0);
2200 369be8f6 Peter Maydell
    }
2201 369be8f6 Peter Maydell
2202 369be8f6 Peter Maydell
    if (pZero) {
2203 369be8f6 Peter Maydell
        if (cExp == 0) {
2204 369be8f6 Peter Maydell
            if (cSig == 0) {
2205 369be8f6 Peter Maydell
                /* Adding two exact zeroes */
2206 369be8f6 Peter Maydell
                if (pSign == cSign) {
2207 369be8f6 Peter Maydell
                    zSign = pSign;
2208 369be8f6 Peter Maydell
                } else if (STATUS(float_rounding_mode) == float_round_down) {
2209 369be8f6 Peter Maydell
                    zSign = 1;
2210 369be8f6 Peter Maydell
                } else {
2211 369be8f6 Peter Maydell
                    zSign = 0;
2212 369be8f6 Peter Maydell
                }
2213 369be8f6 Peter Maydell
                return packFloat32(zSign ^ signflip, 0, 0);
2214 369be8f6 Peter Maydell
            }
2215 369be8f6 Peter Maydell
            /* Exact zero plus a denorm */
2216 369be8f6 Peter Maydell
            if (STATUS(flush_to_zero)) {
2217 369be8f6 Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
2218 369be8f6 Peter Maydell
                return packFloat32(cSign ^ signflip, 0, 0);
2219 369be8f6 Peter Maydell
            }
2220 369be8f6 Peter Maydell
        }
2221 369be8f6 Peter Maydell
        /* Zero plus something non-zero : just return the something */
2222 369be8f6 Peter Maydell
        return c ^ (signflip << 31);
2223 369be8f6 Peter Maydell
    }
2224 369be8f6 Peter Maydell
2225 369be8f6 Peter Maydell
    if (aExp == 0) {
2226 369be8f6 Peter Maydell
        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
2227 369be8f6 Peter Maydell
    }
2228 369be8f6 Peter Maydell
    if (bExp == 0) {
2229 369be8f6 Peter Maydell
        normalizeFloat32Subnormal(bSig, &bExp, &bSig);
2230 369be8f6 Peter Maydell
    }
2231 369be8f6 Peter Maydell
2232 369be8f6 Peter Maydell
    /* Calculate the actual result a * b + c */
2233 369be8f6 Peter Maydell
2234 369be8f6 Peter Maydell
    /* Multiply first; this is easy. */
2235 369be8f6 Peter Maydell
    /* NB: we subtract 0x7e where float32_mul() subtracts 0x7f
2236 369be8f6 Peter Maydell
     * because we want the true exponent, not the "one-less-than"
2237 369be8f6 Peter Maydell
     * flavour that roundAndPackFloat32() takes.
2238 369be8f6 Peter Maydell
     */
2239 369be8f6 Peter Maydell
    pExp = aExp + bExp - 0x7e;
2240 369be8f6 Peter Maydell
    aSig = (aSig | 0x00800000) << 7;
2241 369be8f6 Peter Maydell
    bSig = (bSig | 0x00800000) << 8;
2242 369be8f6 Peter Maydell
    pSig64 = (uint64_t)aSig * bSig;
2243 369be8f6 Peter Maydell
    if ((int64_t)(pSig64 << 1) >= 0) {
2244 369be8f6 Peter Maydell
        pSig64 <<= 1;
2245 369be8f6 Peter Maydell
        pExp--;
2246 369be8f6 Peter Maydell
    }
2247 369be8f6 Peter Maydell
2248 369be8f6 Peter Maydell
    zSign = pSign ^ signflip;
2249 369be8f6 Peter Maydell
2250 369be8f6 Peter Maydell
    /* Now pSig64 is the significand of the multiply, with the explicit bit in
2251 369be8f6 Peter Maydell
     * position 62.
2252 369be8f6 Peter Maydell
     */
2253 369be8f6 Peter Maydell
    if (cExp == 0) {
2254 369be8f6 Peter Maydell
        if (!cSig) {
2255 369be8f6 Peter Maydell
            /* Throw out the special case of c being an exact zero now */
2256 369be8f6 Peter Maydell
            shift64RightJamming(pSig64, 32, &pSig64);
2257 369be8f6 Peter Maydell
            pSig = pSig64;
2258 369be8f6 Peter Maydell
            return roundAndPackFloat32(zSign, pExp - 1,
2259 369be8f6 Peter Maydell
                                       pSig STATUS_VAR);
2260 369be8f6 Peter Maydell
        }
2261 369be8f6 Peter Maydell
        normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2262 369be8f6 Peter Maydell
    }
2263 369be8f6 Peter Maydell
2264 369be8f6 Peter Maydell
    cSig64 = (uint64_t)cSig << (62 - 23);
2265 369be8f6 Peter Maydell
    cSig64 |= LIT64(0x4000000000000000);
2266 369be8f6 Peter Maydell
    expDiff = pExp - cExp;
2267 369be8f6 Peter Maydell
2268 369be8f6 Peter Maydell
    if (pSign == cSign) {
2269 369be8f6 Peter Maydell
        /* Addition */
2270 369be8f6 Peter Maydell
        if (expDiff > 0) {
2271 369be8f6 Peter Maydell
            /* scale c to match p */
2272 369be8f6 Peter Maydell
            shift64RightJamming(cSig64, expDiff, &cSig64);
2273 369be8f6 Peter Maydell
            zExp = pExp;
2274 369be8f6 Peter Maydell
        } else if (expDiff < 0) {
2275 369be8f6 Peter Maydell
            /* scale p to match c */
2276 369be8f6 Peter Maydell
            shift64RightJamming(pSig64, -expDiff, &pSig64);
2277 369be8f6 Peter Maydell
            zExp = cExp;
2278 369be8f6 Peter Maydell
        } else {
2279 369be8f6 Peter Maydell
            /* no scaling needed */
2280 369be8f6 Peter Maydell
            zExp = cExp;
2281 369be8f6 Peter Maydell
        }
2282 369be8f6 Peter Maydell
        /* Add significands and make sure explicit bit ends up in posn 62 */
2283 369be8f6 Peter Maydell
        zSig64 = pSig64 + cSig64;
2284 369be8f6 Peter Maydell
        if ((int64_t)zSig64 < 0) {
2285 369be8f6 Peter Maydell
            shift64RightJamming(zSig64, 1, &zSig64);
2286 369be8f6 Peter Maydell
        } else {
2287 369be8f6 Peter Maydell
            zExp--;
2288 369be8f6 Peter Maydell
        }
2289 369be8f6 Peter Maydell
    } else {
2290 369be8f6 Peter Maydell
        /* Subtraction */
2291 369be8f6 Peter Maydell
        if (expDiff > 0) {
2292 369be8f6 Peter Maydell
            shift64RightJamming(cSig64, expDiff, &cSig64);
2293 369be8f6 Peter Maydell
            zSig64 = pSig64 - cSig64;
2294 369be8f6 Peter Maydell
            zExp = pExp;
2295 369be8f6 Peter Maydell
        } else if (expDiff < 0) {
2296 369be8f6 Peter Maydell
            shift64RightJamming(pSig64, -expDiff, &pSig64);
2297 369be8f6 Peter Maydell
            zSig64 = cSig64 - pSig64;
2298 369be8f6 Peter Maydell
            zExp = cExp;
2299 369be8f6 Peter Maydell
            zSign ^= 1;
2300 369be8f6 Peter Maydell
        } else {
2301 369be8f6 Peter Maydell
            zExp = pExp;
2302 369be8f6 Peter Maydell
            if (cSig64 < pSig64) {
2303 369be8f6 Peter Maydell
                zSig64 = pSig64 - cSig64;
2304 369be8f6 Peter Maydell
            } else if (pSig64 < cSig64) {
2305 369be8f6 Peter Maydell
                zSig64 = cSig64 - pSig64;
2306 369be8f6 Peter Maydell
                zSign ^= 1;
2307 369be8f6 Peter Maydell
            } else {
2308 369be8f6 Peter Maydell
                /* Exact zero */
2309 369be8f6 Peter Maydell
                zSign = signflip;
2310 369be8f6 Peter Maydell
                if (STATUS(float_rounding_mode) == float_round_down) {
2311 369be8f6 Peter Maydell
                    zSign ^= 1;
2312 369be8f6 Peter Maydell
                }
2313 369be8f6 Peter Maydell
                return packFloat32(zSign, 0, 0);
2314 369be8f6 Peter Maydell
            }
2315 369be8f6 Peter Maydell
        }
2316 369be8f6 Peter Maydell
        --zExp;
2317 369be8f6 Peter Maydell
        /* Normalize to put the explicit bit back into bit 62. */
2318 369be8f6 Peter Maydell
        shiftcount = countLeadingZeros64(zSig64) - 1;
2319 369be8f6 Peter Maydell
        zSig64 <<= shiftcount;
2320 369be8f6 Peter Maydell
        zExp -= shiftcount;
2321 369be8f6 Peter Maydell
    }
2322 369be8f6 Peter Maydell
    shift64RightJamming(zSig64, 32, &zSig64);
2323 369be8f6 Peter Maydell
    return roundAndPackFloat32(zSign, zExp, zSig64 STATUS_VAR);
2324 369be8f6 Peter Maydell
}
2325 369be8f6 Peter Maydell
2326 369be8f6 Peter Maydell
2327 369be8f6 Peter Maydell
/*----------------------------------------------------------------------------
2328 158142c2 bellard
| Returns the square root of the single-precision floating-point value `a'.
2329 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
2330 158142c2 bellard
| Floating-Point Arithmetic.
2331 158142c2 bellard
*----------------------------------------------------------------------------*/
2332 158142c2 bellard
2333 158142c2 bellard
float32 float32_sqrt( float32 a STATUS_PARAM )
2334 158142c2 bellard
{
2335 158142c2 bellard
    flag aSign;
2336 158142c2 bellard
    int16 aExp, zExp;
2337 bb98fe42 Andreas Färber
    uint32_t aSig, zSig;
2338 bb98fe42 Andreas Färber
    uint64_t rem, term;
2339 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2340 158142c2 bellard
2341 158142c2 bellard
    aSig = extractFloat32Frac( a );
2342 158142c2 bellard
    aExp = extractFloat32Exp( a );
2343 158142c2 bellard
    aSign = extractFloat32Sign( a );
2344 158142c2 bellard
    if ( aExp == 0xFF ) {
2345 f090c9d4 pbrook
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2346 158142c2 bellard
        if ( ! aSign ) return a;
2347 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2348 158142c2 bellard
        return float32_default_nan;
2349 158142c2 bellard
    }
2350 158142c2 bellard
    if ( aSign ) {
2351 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
2352 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2353 158142c2 bellard
        return float32_default_nan;
2354 158142c2 bellard
    }
2355 158142c2 bellard
    if ( aExp == 0 ) {
2356 f090c9d4 pbrook
        if ( aSig == 0 ) return float32_zero;
2357 158142c2 bellard
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2358 158142c2 bellard
    }
2359 158142c2 bellard
    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2360 158142c2 bellard
    aSig = ( aSig | 0x00800000 )<<8;
2361 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig ) + 2;
2362 158142c2 bellard
    if ( ( zSig & 0x7F ) <= 5 ) {
2363 158142c2 bellard
        if ( zSig < 2 ) {
2364 158142c2 bellard
            zSig = 0x7FFFFFFF;
2365 158142c2 bellard
            goto roundAndPack;
2366 158142c2 bellard
        }
2367 158142c2 bellard
        aSig >>= aExp & 1;
2368 bb98fe42 Andreas Färber
        term = ( (uint64_t) zSig ) * zSig;
2369 bb98fe42 Andreas Färber
        rem = ( ( (uint64_t) aSig )<<32 ) - term;
2370 bb98fe42 Andreas Färber
        while ( (int64_t) rem < 0 ) {
2371 158142c2 bellard
            --zSig;
2372 bb98fe42 Andreas Färber
            rem += ( ( (uint64_t) zSig )<<1 ) | 1;
2373 158142c2 bellard
        }
2374 158142c2 bellard
        zSig |= ( rem != 0 );
2375 158142c2 bellard
    }
2376 158142c2 bellard
    shift32RightJamming( zSig, 1, &zSig );
2377 158142c2 bellard
 roundAndPack:
2378 158142c2 bellard
    return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
2379 158142c2 bellard
2380 158142c2 bellard
}
2381 158142c2 bellard
2382 158142c2 bellard
/*----------------------------------------------------------------------------
2383 8229c991 Aurelien Jarno
| Returns the binary exponential of the single-precision floating-point value
2384 8229c991 Aurelien Jarno
| `a'. The operation is performed according to the IEC/IEEE Standard for
2385 8229c991 Aurelien Jarno
| Binary Floating-Point Arithmetic.
2386 8229c991 Aurelien Jarno
|
2387 8229c991 Aurelien Jarno
| Uses the following identities:
2388 8229c991 Aurelien Jarno
|
2389 8229c991 Aurelien Jarno
| 1. -------------------------------------------------------------------------
2390 8229c991 Aurelien Jarno
|      x    x*ln(2)
2391 8229c991 Aurelien Jarno
|     2  = e
2392 8229c991 Aurelien Jarno
|
2393 8229c991 Aurelien Jarno
| 2. -------------------------------------------------------------------------
2394 8229c991 Aurelien Jarno
|                      2     3     4     5           n
2395 8229c991 Aurelien Jarno
|      x        x     x     x     x     x           x
2396 8229c991 Aurelien Jarno
|     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
2397 8229c991 Aurelien Jarno
|               1!    2!    3!    4!    5!          n!
2398 8229c991 Aurelien Jarno
*----------------------------------------------------------------------------*/
2399 8229c991 Aurelien Jarno
2400 8229c991 Aurelien Jarno
static const float64 float32_exp2_coefficients[15] =
2401 8229c991 Aurelien Jarno
{
2402 d5138cf4 Peter Maydell
    const_float64( 0x3ff0000000000000ll ), /*  1 */
2403 d5138cf4 Peter Maydell
    const_float64( 0x3fe0000000000000ll ), /*  2 */
2404 d5138cf4 Peter Maydell
    const_float64( 0x3fc5555555555555ll ), /*  3 */
2405 d5138cf4 Peter Maydell
    const_float64( 0x3fa5555555555555ll ), /*  4 */
2406 d5138cf4 Peter Maydell
    const_float64( 0x3f81111111111111ll ), /*  5 */
2407 d5138cf4 Peter Maydell
    const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
2408 d5138cf4 Peter Maydell
    const_float64( 0x3f2a01a01a01a01all ), /*  7 */
2409 d5138cf4 Peter Maydell
    const_float64( 0x3efa01a01a01a01all ), /*  8 */
2410 d5138cf4 Peter Maydell
    const_float64( 0x3ec71de3a556c734ll ), /*  9 */
2411 d5138cf4 Peter Maydell
    const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
2412 d5138cf4 Peter Maydell
    const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
2413 d5138cf4 Peter Maydell
    const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
2414 d5138cf4 Peter Maydell
    const_float64( 0x3de6124613a86d09ll ), /* 13 */
2415 d5138cf4 Peter Maydell
    const_float64( 0x3da93974a8c07c9dll ), /* 14 */
2416 d5138cf4 Peter Maydell
    const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
2417 8229c991 Aurelien Jarno
};
2418 8229c991 Aurelien Jarno
2419 8229c991 Aurelien Jarno
float32 float32_exp2( float32 a STATUS_PARAM )
2420 8229c991 Aurelien Jarno
{
2421 8229c991 Aurelien Jarno
    flag aSign;
2422 8229c991 Aurelien Jarno
    int16 aExp;
2423 bb98fe42 Andreas Färber
    uint32_t aSig;
2424 8229c991 Aurelien Jarno
    float64 r, x, xn;
2425 8229c991 Aurelien Jarno
    int i;
2426 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2427 8229c991 Aurelien Jarno
2428 8229c991 Aurelien Jarno
    aSig = extractFloat32Frac( a );
2429 8229c991 Aurelien Jarno
    aExp = extractFloat32Exp( a );
2430 8229c991 Aurelien Jarno
    aSign = extractFloat32Sign( a );
2431 8229c991 Aurelien Jarno
2432 8229c991 Aurelien Jarno
    if ( aExp == 0xFF) {
2433 8229c991 Aurelien Jarno
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2434 8229c991 Aurelien Jarno
        return (aSign) ? float32_zero : a;
2435 8229c991 Aurelien Jarno
    }
2436 8229c991 Aurelien Jarno
    if (aExp == 0) {
2437 8229c991 Aurelien Jarno
        if (aSig == 0) return float32_one;
2438 8229c991 Aurelien Jarno
    }
2439 8229c991 Aurelien Jarno
2440 8229c991 Aurelien Jarno
    float_raise( float_flag_inexact STATUS_VAR);
2441 8229c991 Aurelien Jarno
2442 8229c991 Aurelien Jarno
    /* ******************************* */
2443 8229c991 Aurelien Jarno
    /* using float64 for approximation */
2444 8229c991 Aurelien Jarno
    /* ******************************* */
2445 8229c991 Aurelien Jarno
    x = float32_to_float64(a STATUS_VAR);
2446 8229c991 Aurelien Jarno
    x = float64_mul(x, float64_ln2 STATUS_VAR);
2447 8229c991 Aurelien Jarno
2448 8229c991 Aurelien Jarno
    xn = x;
2449 8229c991 Aurelien Jarno
    r = float64_one;
2450 8229c991 Aurelien Jarno
    for (i = 0 ; i < 15 ; i++) {
2451 8229c991 Aurelien Jarno
        float64 f;
2452 8229c991 Aurelien Jarno
2453 8229c991 Aurelien Jarno
        f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR);
2454 8229c991 Aurelien Jarno
        r = float64_add(r, f STATUS_VAR);
2455 8229c991 Aurelien Jarno
2456 8229c991 Aurelien Jarno
        xn = float64_mul(xn, x STATUS_VAR);
2457 8229c991 Aurelien Jarno
    }
2458 8229c991 Aurelien Jarno
2459 8229c991 Aurelien Jarno
    return float64_to_float32(r, status);
2460 8229c991 Aurelien Jarno
}
2461 8229c991 Aurelien Jarno
2462 8229c991 Aurelien Jarno
/*----------------------------------------------------------------------------
2463 374dfc33 aurel32
| Returns the binary log of the single-precision floating-point value `a'.
2464 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
2465 374dfc33 aurel32
| Floating-Point Arithmetic.
2466 374dfc33 aurel32
*----------------------------------------------------------------------------*/
2467 374dfc33 aurel32
float32 float32_log2( float32 a STATUS_PARAM )
2468 374dfc33 aurel32
{
2469 374dfc33 aurel32
    flag aSign, zSign;
2470 374dfc33 aurel32
    int16 aExp;
2471 bb98fe42 Andreas Färber
    uint32_t aSig, zSig, i;
2472 374dfc33 aurel32
2473 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2474 374dfc33 aurel32
    aSig = extractFloat32Frac( a );
2475 374dfc33 aurel32
    aExp = extractFloat32Exp( a );
2476 374dfc33 aurel32
    aSign = extractFloat32Sign( a );
2477 374dfc33 aurel32
2478 374dfc33 aurel32
    if ( aExp == 0 ) {
2479 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2480 374dfc33 aurel32
        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2481 374dfc33 aurel32
    }
2482 374dfc33 aurel32
    if ( aSign ) {
2483 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
2484 374dfc33 aurel32
        return float32_default_nan;
2485 374dfc33 aurel32
    }
2486 374dfc33 aurel32
    if ( aExp == 0xFF ) {
2487 374dfc33 aurel32
        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2488 374dfc33 aurel32
        return a;
2489 374dfc33 aurel32
    }
2490 374dfc33 aurel32
2491 374dfc33 aurel32
    aExp -= 0x7F;
2492 374dfc33 aurel32
    aSig |= 0x00800000;
2493 374dfc33 aurel32
    zSign = aExp < 0;
2494 374dfc33 aurel32
    zSig = aExp << 23;
2495 374dfc33 aurel32
2496 374dfc33 aurel32
    for (i = 1 << 22; i > 0; i >>= 1) {
2497 bb98fe42 Andreas Färber
        aSig = ( (uint64_t)aSig * aSig ) >> 23;
2498 374dfc33 aurel32
        if ( aSig & 0x01000000 ) {
2499 374dfc33 aurel32
            aSig >>= 1;
2500 374dfc33 aurel32
            zSig |= i;
2501 374dfc33 aurel32
        }
2502 374dfc33 aurel32
    }
2503 374dfc33 aurel32
2504 374dfc33 aurel32
    if ( zSign )
2505 374dfc33 aurel32
        zSig = -zSig;
2506 374dfc33 aurel32
2507 374dfc33 aurel32
    return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
2508 374dfc33 aurel32
}
2509 374dfc33 aurel32
2510 374dfc33 aurel32
/*----------------------------------------------------------------------------
2511 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2512 b689362d Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
2513 b689362d Aurelien Jarno
| raised if either operand is a NaN.  Otherwise, the comparison is performed
2514 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2515 158142c2 bellard
*----------------------------------------------------------------------------*/
2516 158142c2 bellard
2517 b689362d Aurelien Jarno
int float32_eq( float32 a, float32 b STATUS_PARAM )
2518 158142c2 bellard
{
2519 b689362d Aurelien Jarno
    uint32_t av, bv;
2520 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2521 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2522 158142c2 bellard
2523 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2524 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2525 158142c2 bellard
       ) {
2526 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
2527 158142c2 bellard
        return 0;
2528 158142c2 bellard
    }
2529 b689362d Aurelien Jarno
    av = float32_val(a);
2530 b689362d Aurelien Jarno
    bv = float32_val(b);
2531 b689362d Aurelien Jarno
    return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2532 158142c2 bellard
}
2533 158142c2 bellard
2534 158142c2 bellard
/*----------------------------------------------------------------------------
2535 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2536 f5a64251 Aurelien Jarno
| or equal to the corresponding value `b', and 0 otherwise.  The invalid
2537 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
2538 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2539 158142c2 bellard
*----------------------------------------------------------------------------*/
2540 158142c2 bellard
2541 750afe93 bellard
int float32_le( float32 a, float32 b STATUS_PARAM )
2542 158142c2 bellard
{
2543 158142c2 bellard
    flag aSign, bSign;
2544 bb98fe42 Andreas Färber
    uint32_t av, bv;
2545 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2546 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2547 158142c2 bellard
2548 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2549 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2550 158142c2 bellard
       ) {
2551 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2552 158142c2 bellard
        return 0;
2553 158142c2 bellard
    }
2554 158142c2 bellard
    aSign = extractFloat32Sign( a );
2555 158142c2 bellard
    bSign = extractFloat32Sign( b );
2556 f090c9d4 pbrook
    av = float32_val(a);
2557 f090c9d4 pbrook
    bv = float32_val(b);
2558 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2559 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2560 158142c2 bellard
2561 158142c2 bellard
}
2562 158142c2 bellard
2563 158142c2 bellard
/*----------------------------------------------------------------------------
2564 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2565 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
2566 f5a64251 Aurelien Jarno
| raised if either operand is a NaN.  The comparison is performed according
2567 f5a64251 Aurelien Jarno
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2568 158142c2 bellard
*----------------------------------------------------------------------------*/
2569 158142c2 bellard
2570 750afe93 bellard
int float32_lt( float32 a, float32 b STATUS_PARAM )
2571 158142c2 bellard
{
2572 158142c2 bellard
    flag aSign, bSign;
2573 bb98fe42 Andreas Färber
    uint32_t av, bv;
2574 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2575 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2576 158142c2 bellard
2577 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2578 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2579 158142c2 bellard
       ) {
2580 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2581 158142c2 bellard
        return 0;
2582 158142c2 bellard
    }
2583 158142c2 bellard
    aSign = extractFloat32Sign( a );
2584 158142c2 bellard
    bSign = extractFloat32Sign( b );
2585 f090c9d4 pbrook
    av = float32_val(a);
2586 f090c9d4 pbrook
    bv = float32_val(b);
2587 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
2588 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2589 158142c2 bellard
2590 158142c2 bellard
}
2591 158142c2 bellard
2592 158142c2 bellard
/*----------------------------------------------------------------------------
2593 67b7861d Aurelien Jarno
| Returns 1 if the single-precision floating-point values `a' and `b' cannot
2594 f5a64251 Aurelien Jarno
| be compared, and 0 otherwise.  The invalid exception is raised if either
2595 f5a64251 Aurelien Jarno
| operand is a NaN.  The comparison is performed according to the IEC/IEEE
2596 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
2597 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
2598 67b7861d Aurelien Jarno
2599 67b7861d Aurelien Jarno
int float32_unordered( float32 a, float32 b STATUS_PARAM )
2600 67b7861d Aurelien Jarno
{
2601 67b7861d Aurelien Jarno
    a = float32_squash_input_denormal(a STATUS_VAR);
2602 67b7861d Aurelien Jarno
    b = float32_squash_input_denormal(b STATUS_VAR);
2603 67b7861d Aurelien Jarno
2604 67b7861d Aurelien Jarno
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2605 67b7861d Aurelien Jarno
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2606 67b7861d Aurelien Jarno
       ) {
2607 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
2608 67b7861d Aurelien Jarno
        return 1;
2609 67b7861d Aurelien Jarno
    }
2610 67b7861d Aurelien Jarno
    return 0;
2611 67b7861d Aurelien Jarno
}
2612 b689362d Aurelien Jarno
2613 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
2614 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is equal to
2615 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2616 f5a64251 Aurelien Jarno
| exception.  The comparison is performed according to the IEC/IEEE Standard
2617 f5a64251 Aurelien Jarno
| for Binary Floating-Point Arithmetic.
2618 158142c2 bellard
*----------------------------------------------------------------------------*/
2619 158142c2 bellard
2620 b689362d Aurelien Jarno
int float32_eq_quiet( float32 a, float32 b STATUS_PARAM )
2621 158142c2 bellard
{
2622 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2623 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2624 158142c2 bellard
2625 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2626 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2627 158142c2 bellard
       ) {
2628 b689362d Aurelien Jarno
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2629 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
2630 b689362d Aurelien Jarno
        }
2631 158142c2 bellard
        return 0;
2632 158142c2 bellard
    }
2633 b689362d Aurelien Jarno
    return ( float32_val(a) == float32_val(b) ) ||
2634 b689362d Aurelien Jarno
            ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
2635 158142c2 bellard
}
2636 158142c2 bellard
2637 158142c2 bellard
/*----------------------------------------------------------------------------
2638 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than or
2639 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2640 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
2641 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2642 158142c2 bellard
*----------------------------------------------------------------------------*/
2643 158142c2 bellard
2644 750afe93 bellard
int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
2645 158142c2 bellard
{
2646 158142c2 bellard
    flag aSign, bSign;
2647 bb98fe42 Andreas Färber
    uint32_t av, bv;
2648 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2649 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2650 158142c2 bellard
2651 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2652 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2653 158142c2 bellard
       ) {
2654 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2655 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2656 158142c2 bellard
        }
2657 158142c2 bellard
        return 0;
2658 158142c2 bellard
    }
2659 158142c2 bellard
    aSign = extractFloat32Sign( a );
2660 158142c2 bellard
    bSign = extractFloat32Sign( b );
2661 f090c9d4 pbrook
    av = float32_val(a);
2662 f090c9d4 pbrook
    bv = float32_val(b);
2663 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2664 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
2665 158142c2 bellard
2666 158142c2 bellard
}
2667 158142c2 bellard
2668 158142c2 bellard
/*----------------------------------------------------------------------------
2669 158142c2 bellard
| Returns 1 if the single-precision floating-point value `a' is less than
2670 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2671 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2672 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
2673 158142c2 bellard
*----------------------------------------------------------------------------*/
2674 158142c2 bellard
2675 750afe93 bellard
int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
2676 158142c2 bellard
{
2677 158142c2 bellard
    flag aSign, bSign;
2678 bb98fe42 Andreas Färber
    uint32_t av, bv;
2679 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
2680 37d18660 Peter Maydell
    b = float32_squash_input_denormal(b STATUS_VAR);
2681 158142c2 bellard
2682 158142c2 bellard
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2683 158142c2 bellard
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2684 158142c2 bellard
       ) {
2685 158142c2 bellard
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2686 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2687 158142c2 bellard
        }
2688 158142c2 bellard
        return 0;
2689 158142c2 bellard
    }
2690 158142c2 bellard
    aSign = extractFloat32Sign( a );
2691 158142c2 bellard
    bSign = extractFloat32Sign( b );
2692 f090c9d4 pbrook
    av = float32_val(a);
2693 f090c9d4 pbrook
    bv = float32_val(b);
2694 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
2695 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
2696 158142c2 bellard
2697 158142c2 bellard
}
2698 158142c2 bellard
2699 158142c2 bellard
/*----------------------------------------------------------------------------
2700 67b7861d Aurelien Jarno
| Returns 1 if the single-precision floating-point values `a' and `b' cannot
2701 67b7861d Aurelien Jarno
| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
2702 67b7861d Aurelien Jarno
| comparison is performed according to the IEC/IEEE Standard for Binary
2703 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
2704 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
2705 67b7861d Aurelien Jarno
2706 67b7861d Aurelien Jarno
int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM )
2707 67b7861d Aurelien Jarno
{
2708 67b7861d Aurelien Jarno
    a = float32_squash_input_denormal(a STATUS_VAR);
2709 67b7861d Aurelien Jarno
    b = float32_squash_input_denormal(b STATUS_VAR);
2710 67b7861d Aurelien Jarno
2711 67b7861d Aurelien Jarno
    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2712 67b7861d Aurelien Jarno
         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2713 67b7861d Aurelien Jarno
       ) {
2714 67b7861d Aurelien Jarno
        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2715 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
2716 67b7861d Aurelien Jarno
        }
2717 67b7861d Aurelien Jarno
        return 1;
2718 67b7861d Aurelien Jarno
    }
2719 67b7861d Aurelien Jarno
    return 0;
2720 67b7861d Aurelien Jarno
}
2721 67b7861d Aurelien Jarno
2722 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
2723 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2724 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2725 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2726 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2727 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2728 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2729 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2730 158142c2 bellard
*----------------------------------------------------------------------------*/
2731 158142c2 bellard
2732 158142c2 bellard
int32 float64_to_int32( float64 a STATUS_PARAM )
2733 158142c2 bellard
{
2734 158142c2 bellard
    flag aSign;
2735 158142c2 bellard
    int16 aExp, shiftCount;
2736 bb98fe42 Andreas Färber
    uint64_t aSig;
2737 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2738 158142c2 bellard
2739 158142c2 bellard
    aSig = extractFloat64Frac( a );
2740 158142c2 bellard
    aExp = extractFloat64Exp( a );
2741 158142c2 bellard
    aSign = extractFloat64Sign( a );
2742 158142c2 bellard
    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2743 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2744 158142c2 bellard
    shiftCount = 0x42C - aExp;
2745 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2746 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
2747 158142c2 bellard
2748 158142c2 bellard
}
2749 158142c2 bellard
2750 158142c2 bellard
/*----------------------------------------------------------------------------
2751 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2752 158142c2 bellard
| `a' to the 32-bit two's complement integer format.  The conversion is
2753 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2754 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2755 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2756 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2757 158142c2 bellard
| returned.
2758 158142c2 bellard
*----------------------------------------------------------------------------*/
2759 158142c2 bellard
2760 158142c2 bellard
int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
2761 158142c2 bellard
{
2762 158142c2 bellard
    flag aSign;
2763 158142c2 bellard
    int16 aExp, shiftCount;
2764 bb98fe42 Andreas Färber
    uint64_t aSig, savedASig;
2765 158142c2 bellard
    int32 z;
2766 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2767 158142c2 bellard
2768 158142c2 bellard
    aSig = extractFloat64Frac( a );
2769 158142c2 bellard
    aExp = extractFloat64Exp( a );
2770 158142c2 bellard
    aSign = extractFloat64Sign( a );
2771 158142c2 bellard
    if ( 0x41E < aExp ) {
2772 158142c2 bellard
        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2773 158142c2 bellard
        goto invalid;
2774 158142c2 bellard
    }
2775 158142c2 bellard
    else if ( aExp < 0x3FF ) {
2776 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2777 158142c2 bellard
        return 0;
2778 158142c2 bellard
    }
2779 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
2780 158142c2 bellard
    shiftCount = 0x433 - aExp;
2781 158142c2 bellard
    savedASig = aSig;
2782 158142c2 bellard
    aSig >>= shiftCount;
2783 158142c2 bellard
    z = aSig;
2784 158142c2 bellard
    if ( aSign ) z = - z;
2785 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
2786 158142c2 bellard
 invalid:
2787 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
2788 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
2789 158142c2 bellard
    }
2790 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
2791 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
2792 158142c2 bellard
    }
2793 158142c2 bellard
    return z;
2794 158142c2 bellard
2795 158142c2 bellard
}
2796 158142c2 bellard
2797 158142c2 bellard
/*----------------------------------------------------------------------------
2798 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2799 cbcef455 Peter Maydell
| `a' to the 16-bit two's complement integer format.  The conversion is
2800 cbcef455 Peter Maydell
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2801 cbcef455 Peter Maydell
| Arithmetic, except that the conversion is always rounded toward zero.
2802 cbcef455 Peter Maydell
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2803 cbcef455 Peter Maydell
| the conversion overflows, the largest integer with the same sign as `a' is
2804 cbcef455 Peter Maydell
| returned.
2805 cbcef455 Peter Maydell
*----------------------------------------------------------------------------*/
2806 cbcef455 Peter Maydell
2807 cbcef455 Peter Maydell
int16 float64_to_int16_round_to_zero( float64 a STATUS_PARAM )
2808 cbcef455 Peter Maydell
{
2809 cbcef455 Peter Maydell
    flag aSign;
2810 cbcef455 Peter Maydell
    int16 aExp, shiftCount;
2811 bb98fe42 Andreas Färber
    uint64_t aSig, savedASig;
2812 cbcef455 Peter Maydell
    int32 z;
2813 cbcef455 Peter Maydell
2814 cbcef455 Peter Maydell
    aSig = extractFloat64Frac( a );
2815 cbcef455 Peter Maydell
    aExp = extractFloat64Exp( a );
2816 cbcef455 Peter Maydell
    aSign = extractFloat64Sign( a );
2817 cbcef455 Peter Maydell
    if ( 0x40E < aExp ) {
2818 cbcef455 Peter Maydell
        if ( ( aExp == 0x7FF ) && aSig ) {
2819 cbcef455 Peter Maydell
            aSign = 0;
2820 cbcef455 Peter Maydell
        }
2821 cbcef455 Peter Maydell
        goto invalid;
2822 cbcef455 Peter Maydell
    }
2823 cbcef455 Peter Maydell
    else if ( aExp < 0x3FF ) {
2824 cbcef455 Peter Maydell
        if ( aExp || aSig ) {
2825 cbcef455 Peter Maydell
            STATUS(float_exception_flags) |= float_flag_inexact;
2826 cbcef455 Peter Maydell
        }
2827 cbcef455 Peter Maydell
        return 0;
2828 cbcef455 Peter Maydell
    }
2829 cbcef455 Peter Maydell
    aSig |= LIT64( 0x0010000000000000 );
2830 cbcef455 Peter Maydell
    shiftCount = 0x433 - aExp;
2831 cbcef455 Peter Maydell
    savedASig = aSig;
2832 cbcef455 Peter Maydell
    aSig >>= shiftCount;
2833 cbcef455 Peter Maydell
    z = aSig;
2834 cbcef455 Peter Maydell
    if ( aSign ) {
2835 cbcef455 Peter Maydell
        z = - z;
2836 cbcef455 Peter Maydell
    }
2837 cbcef455 Peter Maydell
    if ( ( (int16_t)z < 0 ) ^ aSign ) {
2838 cbcef455 Peter Maydell
 invalid:
2839 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
2840 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
2841 cbcef455 Peter Maydell
    }
2842 cbcef455 Peter Maydell
    if ( ( aSig<<shiftCount ) != savedASig ) {
2843 cbcef455 Peter Maydell
        STATUS(float_exception_flags) |= float_flag_inexact;
2844 cbcef455 Peter Maydell
    }
2845 cbcef455 Peter Maydell
    return z;
2846 cbcef455 Peter Maydell
}
2847 cbcef455 Peter Maydell
2848 cbcef455 Peter Maydell
/*----------------------------------------------------------------------------
2849 cbcef455 Peter Maydell
| Returns the result of converting the double-precision floating-point value
2850 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2851 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2852 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
2853 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
2854 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
2855 158142c2 bellard
| largest integer with the same sign as `a' is returned.
2856 158142c2 bellard
*----------------------------------------------------------------------------*/
2857 158142c2 bellard
2858 158142c2 bellard
int64 float64_to_int64( float64 a STATUS_PARAM )
2859 158142c2 bellard
{
2860 158142c2 bellard
    flag aSign;
2861 158142c2 bellard
    int16 aExp, shiftCount;
2862 bb98fe42 Andreas Färber
    uint64_t aSig, aSigExtra;
2863 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2864 158142c2 bellard
2865 158142c2 bellard
    aSig = extractFloat64Frac( a );
2866 158142c2 bellard
    aExp = extractFloat64Exp( a );
2867 158142c2 bellard
    aSign = extractFloat64Sign( a );
2868 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2869 158142c2 bellard
    shiftCount = 0x433 - aExp;
2870 158142c2 bellard
    if ( shiftCount <= 0 ) {
2871 158142c2 bellard
        if ( 0x43E < aExp ) {
2872 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
2873 158142c2 bellard
            if (    ! aSign
2874 158142c2 bellard
                 || (    ( aExp == 0x7FF )
2875 158142c2 bellard
                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
2876 158142c2 bellard
               ) {
2877 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
2878 158142c2 bellard
            }
2879 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
2880 158142c2 bellard
        }
2881 158142c2 bellard
        aSigExtra = 0;
2882 158142c2 bellard
        aSig <<= - shiftCount;
2883 158142c2 bellard
    }
2884 158142c2 bellard
    else {
2885 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2886 158142c2 bellard
    }
2887 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
2888 158142c2 bellard
2889 158142c2 bellard
}
2890 158142c2 bellard
2891 158142c2 bellard
/*----------------------------------------------------------------------------
2892 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2893 158142c2 bellard
| `a' to the 64-bit two's complement integer format.  The conversion is
2894 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2895 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
2896 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2897 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
2898 158142c2 bellard
| returned.
2899 158142c2 bellard
*----------------------------------------------------------------------------*/
2900 158142c2 bellard
2901 158142c2 bellard
int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
2902 158142c2 bellard
{
2903 158142c2 bellard
    flag aSign;
2904 158142c2 bellard
    int16 aExp, shiftCount;
2905 bb98fe42 Andreas Färber
    uint64_t aSig;
2906 158142c2 bellard
    int64 z;
2907 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2908 158142c2 bellard
2909 158142c2 bellard
    aSig = extractFloat64Frac( a );
2910 158142c2 bellard
    aExp = extractFloat64Exp( a );
2911 158142c2 bellard
    aSign = extractFloat64Sign( a );
2912 158142c2 bellard
    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2913 158142c2 bellard
    shiftCount = aExp - 0x433;
2914 158142c2 bellard
    if ( 0 <= shiftCount ) {
2915 158142c2 bellard
        if ( 0x43E <= aExp ) {
2916 f090c9d4 pbrook
            if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
2917 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
2918 158142c2 bellard
                if (    ! aSign
2919 158142c2 bellard
                     || (    ( aExp == 0x7FF )
2920 158142c2 bellard
                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
2921 158142c2 bellard
                   ) {
2922 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
2923 158142c2 bellard
                }
2924 158142c2 bellard
            }
2925 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
2926 158142c2 bellard
        }
2927 158142c2 bellard
        z = aSig<<shiftCount;
2928 158142c2 bellard
    }
2929 158142c2 bellard
    else {
2930 158142c2 bellard
        if ( aExp < 0x3FE ) {
2931 158142c2 bellard
            if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2932 158142c2 bellard
            return 0;
2933 158142c2 bellard
        }
2934 158142c2 bellard
        z = aSig>>( - shiftCount );
2935 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
2936 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
2937 158142c2 bellard
        }
2938 158142c2 bellard
    }
2939 158142c2 bellard
    if ( aSign ) z = - z;
2940 158142c2 bellard
    return z;
2941 158142c2 bellard
2942 158142c2 bellard
}
2943 158142c2 bellard
2944 158142c2 bellard
/*----------------------------------------------------------------------------
2945 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
2946 158142c2 bellard
| `a' to the single-precision floating-point format.  The conversion is
2947 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
2948 158142c2 bellard
| Arithmetic.
2949 158142c2 bellard
*----------------------------------------------------------------------------*/
2950 158142c2 bellard
2951 158142c2 bellard
float32 float64_to_float32( float64 a STATUS_PARAM )
2952 158142c2 bellard
{
2953 158142c2 bellard
    flag aSign;
2954 158142c2 bellard
    int16 aExp;
2955 bb98fe42 Andreas Färber
    uint64_t aSig;
2956 bb98fe42 Andreas Färber
    uint32_t zSig;
2957 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
2958 158142c2 bellard
2959 158142c2 bellard
    aSig = extractFloat64Frac( a );
2960 158142c2 bellard
    aExp = extractFloat64Exp( a );
2961 158142c2 bellard
    aSign = extractFloat64Sign( a );
2962 158142c2 bellard
    if ( aExp == 0x7FF ) {
2963 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
2964 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
2965 158142c2 bellard
    }
2966 158142c2 bellard
    shift64RightJamming( aSig, 22, &aSig );
2967 158142c2 bellard
    zSig = aSig;
2968 158142c2 bellard
    if ( aExp || zSig ) {
2969 158142c2 bellard
        zSig |= 0x40000000;
2970 158142c2 bellard
        aExp -= 0x381;
2971 158142c2 bellard
    }
2972 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
2973 158142c2 bellard
2974 158142c2 bellard
}
2975 158142c2 bellard
2976 60011498 Paul Brook
2977 60011498 Paul Brook
/*----------------------------------------------------------------------------
2978 60011498 Paul Brook
| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2979 60011498 Paul Brook
| half-precision floating-point value, returning the result.  After being
2980 60011498 Paul Brook
| shifted into the proper positions, the three fields are simply added
2981 60011498 Paul Brook
| together to form the result.  This means that any integer portion of `zSig'
2982 60011498 Paul Brook
| will be added into the exponent.  Since a properly normalized significand
2983 60011498 Paul Brook
| will have an integer portion equal to 1, the `zExp' input should be 1 less
2984 60011498 Paul Brook
| than the desired result exponent whenever `zSig' is a complete, normalized
2985 60011498 Paul Brook
| significand.
2986 60011498 Paul Brook
*----------------------------------------------------------------------------*/
2987 bb98fe42 Andreas Färber
static float16 packFloat16(flag zSign, int16 zExp, uint16_t zSig)
2988 60011498 Paul Brook
{
2989 bb4d4bb3 Peter Maydell
    return make_float16(
2990 bb98fe42 Andreas Färber
        (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
2991 60011498 Paul Brook
}
2992 60011498 Paul Brook
2993 60011498 Paul Brook
/* Half precision floats come in two formats: standard IEEE and "ARM" format.
2994 60011498 Paul Brook
   The latter gains extra exponent range by omitting the NaN/Inf encodings.  */
2995 bb4d4bb3 Peter Maydell
2996 bb4d4bb3 Peter Maydell
float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
2997 60011498 Paul Brook
{
2998 60011498 Paul Brook
    flag aSign;
2999 60011498 Paul Brook
    int16 aExp;
3000 bb98fe42 Andreas Färber
    uint32_t aSig;
3001 60011498 Paul Brook
3002 bb4d4bb3 Peter Maydell
    aSign = extractFloat16Sign(a);
3003 bb4d4bb3 Peter Maydell
    aExp = extractFloat16Exp(a);
3004 bb4d4bb3 Peter Maydell
    aSig = extractFloat16Frac(a);
3005 60011498 Paul Brook
3006 60011498 Paul Brook
    if (aExp == 0x1f && ieee) {
3007 60011498 Paul Brook
        if (aSig) {
3008 f591e1be Peter Maydell
            return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
3009 60011498 Paul Brook
        }
3010 60011498 Paul Brook
        return packFloat32(aSign, 0xff, aSig << 13);
3011 60011498 Paul Brook
    }
3012 60011498 Paul Brook
    if (aExp == 0) {
3013 60011498 Paul Brook
        int8 shiftCount;
3014 60011498 Paul Brook
3015 60011498 Paul Brook
        if (aSig == 0) {
3016 60011498 Paul Brook
            return packFloat32(aSign, 0, 0);
3017 60011498 Paul Brook
        }
3018 60011498 Paul Brook
3019 60011498 Paul Brook
        shiftCount = countLeadingZeros32( aSig ) - 21;
3020 60011498 Paul Brook
        aSig = aSig << shiftCount;
3021 60011498 Paul Brook
        aExp = -shiftCount;
3022 60011498 Paul Brook
    }
3023 60011498 Paul Brook
    return packFloat32( aSign, aExp + 0x70, aSig << 13);
3024 60011498 Paul Brook
}
3025 60011498 Paul Brook
3026 bb4d4bb3 Peter Maydell
float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
3027 60011498 Paul Brook
{
3028 60011498 Paul Brook
    flag aSign;
3029 60011498 Paul Brook
    int16 aExp;
3030 bb98fe42 Andreas Färber
    uint32_t aSig;
3031 bb98fe42 Andreas Färber
    uint32_t mask;
3032 bb98fe42 Andreas Färber
    uint32_t increment;
3033 60011498 Paul Brook
    int8 roundingMode;
3034 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
3035 60011498 Paul Brook
3036 60011498 Paul Brook
    aSig = extractFloat32Frac( a );
3037 60011498 Paul Brook
    aExp = extractFloat32Exp( a );
3038 60011498 Paul Brook
    aSign = extractFloat32Sign( a );
3039 60011498 Paul Brook
    if ( aExp == 0xFF ) {
3040 60011498 Paul Brook
        if (aSig) {
3041 600e30d2 Peter Maydell
            /* Input is a NaN */
3042 600e30d2 Peter Maydell
            float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
3043 600e30d2 Peter Maydell
            if (!ieee) {
3044 600e30d2 Peter Maydell
                return packFloat16(aSign, 0, 0);
3045 600e30d2 Peter Maydell
            }
3046 600e30d2 Peter Maydell
            return r;
3047 60011498 Paul Brook
        }
3048 600e30d2 Peter Maydell
        /* Infinity */
3049 600e30d2 Peter Maydell
        if (!ieee) {
3050 600e30d2 Peter Maydell
            float_raise(float_flag_invalid STATUS_VAR);
3051 600e30d2 Peter Maydell
            return packFloat16(aSign, 0x1f, 0x3ff);
3052 600e30d2 Peter Maydell
        }
3053 600e30d2 Peter Maydell
        return packFloat16(aSign, 0x1f, 0);
3054 60011498 Paul Brook
    }
3055 600e30d2 Peter Maydell
    if (aExp == 0 && aSig == 0) {
3056 60011498 Paul Brook
        return packFloat16(aSign, 0, 0);
3057 60011498 Paul Brook
    }
3058 60011498 Paul Brook
    /* Decimal point between bits 22 and 23.  */
3059 60011498 Paul Brook
    aSig |= 0x00800000;
3060 60011498 Paul Brook
    aExp -= 0x7f;
3061 60011498 Paul Brook
    if (aExp < -14) {
3062 600e30d2 Peter Maydell
        mask = 0x00ffffff;
3063 600e30d2 Peter Maydell
        if (aExp >= -24) {
3064 600e30d2 Peter Maydell
            mask >>= 25 + aExp;
3065 60011498 Paul Brook
        }
3066 60011498 Paul Brook
    } else {
3067 60011498 Paul Brook
        mask = 0x00001fff;
3068 60011498 Paul Brook
    }
3069 60011498 Paul Brook
    if (aSig & mask) {
3070 60011498 Paul Brook
        float_raise( float_flag_underflow STATUS_VAR );
3071 60011498 Paul Brook
        roundingMode = STATUS(float_rounding_mode);
3072 60011498 Paul Brook
        switch (roundingMode) {
3073 60011498 Paul Brook
        case float_round_nearest_even:
3074 60011498 Paul Brook
            increment = (mask + 1) >> 1;
3075 60011498 Paul Brook
            if ((aSig & mask) == increment) {
3076 60011498 Paul Brook
                increment = aSig & (increment << 1);
3077 60011498 Paul Brook
            }
3078 60011498 Paul Brook
            break;
3079 60011498 Paul Brook
        case float_round_up:
3080 60011498 Paul Brook
            increment = aSign ? 0 : mask;
3081 60011498 Paul Brook
            break;
3082 60011498 Paul Brook
        case float_round_down:
3083 60011498 Paul Brook
            increment = aSign ? mask : 0;
3084 60011498 Paul Brook
            break;
3085 60011498 Paul Brook
        default: /* round_to_zero */
3086 60011498 Paul Brook
            increment = 0;
3087 60011498 Paul Brook
            break;
3088 60011498 Paul Brook
        }
3089 60011498 Paul Brook
        aSig += increment;
3090 60011498 Paul Brook
        if (aSig >= 0x01000000) {
3091 60011498 Paul Brook
            aSig >>= 1;
3092 60011498 Paul Brook
            aExp++;
3093 60011498 Paul Brook
        }
3094 60011498 Paul Brook
    } else if (aExp < -14
3095 60011498 Paul Brook
          && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
3096 60011498 Paul Brook
        float_raise( float_flag_underflow STATUS_VAR);
3097 60011498 Paul Brook
    }
3098 60011498 Paul Brook
3099 60011498 Paul Brook
    if (ieee) {
3100 60011498 Paul Brook
        if (aExp > 15) {
3101 60011498 Paul Brook
            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
3102 60011498 Paul Brook
            return packFloat16(aSign, 0x1f, 0);
3103 60011498 Paul Brook
        }
3104 60011498 Paul Brook
    } else {
3105 60011498 Paul Brook
        if (aExp > 16) {
3106 600e30d2 Peter Maydell
            float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
3107 60011498 Paul Brook
            return packFloat16(aSign, 0x1f, 0x3ff);
3108 60011498 Paul Brook
        }
3109 60011498 Paul Brook
    }
3110 60011498 Paul Brook
    if (aExp < -24) {
3111 60011498 Paul Brook
        return packFloat16(aSign, 0, 0);
3112 60011498 Paul Brook
    }
3113 60011498 Paul Brook
    if (aExp < -14) {
3114 60011498 Paul Brook
        aSig >>= -14 - aExp;
3115 60011498 Paul Brook
        aExp = -14;
3116 60011498 Paul Brook
    }
3117 60011498 Paul Brook
    return packFloat16(aSign, aExp + 14, aSig >> 13);
3118 60011498 Paul Brook
}
3119 60011498 Paul Brook
3120 158142c2 bellard
/*----------------------------------------------------------------------------
3121 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
3122 158142c2 bellard
| `a' to the extended double-precision floating-point format.  The conversion
3123 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3124 158142c2 bellard
| Arithmetic.
3125 158142c2 bellard
*----------------------------------------------------------------------------*/
3126 158142c2 bellard
3127 158142c2 bellard
floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
3128 158142c2 bellard
{
3129 158142c2 bellard
    flag aSign;
3130 158142c2 bellard
    int16 aExp;
3131 bb98fe42 Andreas Färber
    uint64_t aSig;
3132 158142c2 bellard
3133 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3134 158142c2 bellard
    aSig = extractFloat64Frac( a );
3135 158142c2 bellard
    aExp = extractFloat64Exp( a );
3136 158142c2 bellard
    aSign = extractFloat64Sign( a );
3137 158142c2 bellard
    if ( aExp == 0x7FF ) {
3138 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
3139 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3140 158142c2 bellard
    }
3141 158142c2 bellard
    if ( aExp == 0 ) {
3142 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3143 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3144 158142c2 bellard
    }
3145 158142c2 bellard
    return
3146 158142c2 bellard
        packFloatx80(
3147 158142c2 bellard
            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
3148 158142c2 bellard
3149 158142c2 bellard
}
3150 158142c2 bellard
3151 158142c2 bellard
/*----------------------------------------------------------------------------
3152 158142c2 bellard
| Returns the result of converting the double-precision floating-point value
3153 158142c2 bellard
| `a' to the quadruple-precision floating-point format.  The conversion is
3154 158142c2 bellard
| performed according to the IEC/IEEE Standard for Binary Floating-Point
3155 158142c2 bellard
| Arithmetic.
3156 158142c2 bellard
*----------------------------------------------------------------------------*/
3157 158142c2 bellard
3158 158142c2 bellard
float128 float64_to_float128( float64 a STATUS_PARAM )
3159 158142c2 bellard
{
3160 158142c2 bellard
    flag aSign;
3161 158142c2 bellard
    int16 aExp;
3162 bb98fe42 Andreas Färber
    uint64_t aSig, zSig0, zSig1;
3163 158142c2 bellard
3164 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3165 158142c2 bellard
    aSig = extractFloat64Frac( a );
3166 158142c2 bellard
    aExp = extractFloat64Exp( a );
3167 158142c2 bellard
    aSign = extractFloat64Sign( a );
3168 158142c2 bellard
    if ( aExp == 0x7FF ) {
3169 bcd4d9af Christophe Lyon
        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
3170 158142c2 bellard
        return packFloat128( aSign, 0x7FFF, 0, 0 );
3171 158142c2 bellard
    }
3172 158142c2 bellard
    if ( aExp == 0 ) {
3173 158142c2 bellard
        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3174 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3175 158142c2 bellard
        --aExp;
3176 158142c2 bellard
    }
3177 158142c2 bellard
    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
3178 158142c2 bellard
    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
3179 158142c2 bellard
3180 158142c2 bellard
}
3181 158142c2 bellard
3182 158142c2 bellard
/*----------------------------------------------------------------------------
3183 158142c2 bellard
| Rounds the double-precision floating-point value `a' to an integer, and
3184 158142c2 bellard
| returns the result as a double-precision floating-point value.  The
3185 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
3186 158142c2 bellard
| Floating-Point Arithmetic.
3187 158142c2 bellard
*----------------------------------------------------------------------------*/
3188 158142c2 bellard
3189 158142c2 bellard
float64 float64_round_to_int( float64 a STATUS_PARAM )
3190 158142c2 bellard
{
3191 158142c2 bellard
    flag aSign;
3192 158142c2 bellard
    int16 aExp;
3193 bb98fe42 Andreas Färber
    uint64_t lastBitMask, roundBitsMask;
3194 158142c2 bellard
    int8 roundingMode;
3195 bb98fe42 Andreas Färber
    uint64_t z;
3196 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3197 158142c2 bellard
3198 158142c2 bellard
    aExp = extractFloat64Exp( a );
3199 158142c2 bellard
    if ( 0x433 <= aExp ) {
3200 158142c2 bellard
        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
3201 158142c2 bellard
            return propagateFloat64NaN( a, a STATUS_VAR );
3202 158142c2 bellard
        }
3203 158142c2 bellard
        return a;
3204 158142c2 bellard
    }
3205 158142c2 bellard
    if ( aExp < 0x3FF ) {
3206 bb98fe42 Andreas Färber
        if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
3207 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
3208 158142c2 bellard
        aSign = extractFloat64Sign( a );
3209 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
3210 158142c2 bellard
         case float_round_nearest_even:
3211 158142c2 bellard
            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
3212 158142c2 bellard
                return packFloat64( aSign, 0x3FF, 0 );
3213 158142c2 bellard
            }
3214 158142c2 bellard
            break;
3215 158142c2 bellard
         case float_round_down:
3216 f090c9d4 pbrook
            return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
3217 158142c2 bellard
         case float_round_up:
3218 f090c9d4 pbrook
            return make_float64(
3219 f090c9d4 pbrook
            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
3220 158142c2 bellard
        }
3221 158142c2 bellard
        return packFloat64( aSign, 0, 0 );
3222 158142c2 bellard
    }
3223 158142c2 bellard
    lastBitMask = 1;
3224 158142c2 bellard
    lastBitMask <<= 0x433 - aExp;
3225 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
3226 f090c9d4 pbrook
    z = float64_val(a);
3227 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
3228 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
3229 158142c2 bellard
        z += lastBitMask>>1;
3230 158142c2 bellard
        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
3231 158142c2 bellard
    }
3232 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
3233 f090c9d4 pbrook
        if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
3234 158142c2 bellard
            z += roundBitsMask;
3235 158142c2 bellard
        }
3236 158142c2 bellard
    }
3237 158142c2 bellard
    z &= ~ roundBitsMask;
3238 f090c9d4 pbrook
    if ( z != float64_val(a) )
3239 f090c9d4 pbrook
        STATUS(float_exception_flags) |= float_flag_inexact;
3240 f090c9d4 pbrook
    return make_float64(z);
3241 158142c2 bellard
3242 158142c2 bellard
}
3243 158142c2 bellard
3244 e6e5906b pbrook
float64 float64_trunc_to_int( float64 a STATUS_PARAM)
3245 e6e5906b pbrook
{
3246 e6e5906b pbrook
    int oldmode;
3247 e6e5906b pbrook
    float64 res;
3248 e6e5906b pbrook
    oldmode = STATUS(float_rounding_mode);
3249 e6e5906b pbrook
    STATUS(float_rounding_mode) = float_round_to_zero;
3250 e6e5906b pbrook
    res = float64_round_to_int(a STATUS_VAR);
3251 e6e5906b pbrook
    STATUS(float_rounding_mode) = oldmode;
3252 e6e5906b pbrook
    return res;
3253 e6e5906b pbrook
}
3254 e6e5906b pbrook
3255 158142c2 bellard
/*----------------------------------------------------------------------------
3256 158142c2 bellard
| Returns the result of adding the absolute values of the double-precision
3257 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
3258 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
3259 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
3260 158142c2 bellard
| Floating-Point Arithmetic.
3261 158142c2 bellard
*----------------------------------------------------------------------------*/
3262 158142c2 bellard
3263 158142c2 bellard
static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3264 158142c2 bellard
{
3265 158142c2 bellard
    int16 aExp, bExp, zExp;
3266 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig;
3267 158142c2 bellard
    int16 expDiff;
3268 158142c2 bellard
3269 158142c2 bellard
    aSig = extractFloat64Frac( a );
3270 158142c2 bellard
    aExp = extractFloat64Exp( a );
3271 158142c2 bellard
    bSig = extractFloat64Frac( b );
3272 158142c2 bellard
    bExp = extractFloat64Exp( b );
3273 158142c2 bellard
    expDiff = aExp - bExp;
3274 158142c2 bellard
    aSig <<= 9;
3275 158142c2 bellard
    bSig <<= 9;
3276 158142c2 bellard
    if ( 0 < expDiff ) {
3277 158142c2 bellard
        if ( aExp == 0x7FF ) {
3278 158142c2 bellard
            if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3279 158142c2 bellard
            return a;
3280 158142c2 bellard
        }
3281 158142c2 bellard
        if ( bExp == 0 ) {
3282 158142c2 bellard
            --expDiff;
3283 158142c2 bellard
        }
3284 158142c2 bellard
        else {
3285 158142c2 bellard
            bSig |= LIT64( 0x2000000000000000 );
3286 158142c2 bellard
        }
3287 158142c2 bellard
        shift64RightJamming( bSig, expDiff, &bSig );
3288 158142c2 bellard
        zExp = aExp;
3289 158142c2 bellard
    }
3290 158142c2 bellard
    else if ( expDiff < 0 ) {
3291 158142c2 bellard
        if ( bExp == 0x7FF ) {
3292 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3293 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
3294 158142c2 bellard
        }
3295 158142c2 bellard
        if ( aExp == 0 ) {
3296 158142c2 bellard
            ++expDiff;
3297 158142c2 bellard
        }
3298 158142c2 bellard
        else {
3299 158142c2 bellard
            aSig |= LIT64( 0x2000000000000000 );
3300 158142c2 bellard
        }
3301 158142c2 bellard
        shift64RightJamming( aSig, - expDiff, &aSig );
3302 158142c2 bellard
        zExp = bExp;
3303 158142c2 bellard
    }
3304 158142c2 bellard
    else {
3305 158142c2 bellard
        if ( aExp == 0x7FF ) {
3306 158142c2 bellard
            if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3307 158142c2 bellard
            return a;
3308 158142c2 bellard
        }
3309 fe76d976 pbrook
        if ( aExp == 0 ) {
3310 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
3311 e6afc87f Peter Maydell
                if (aSig | bSig) {
3312 e6afc87f Peter Maydell
                    float_raise(float_flag_output_denormal STATUS_VAR);
3313 e6afc87f Peter Maydell
                }
3314 e6afc87f Peter Maydell
                return packFloat64(zSign, 0, 0);
3315 e6afc87f Peter Maydell
            }
3316 fe76d976 pbrook
            return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
3317 fe76d976 pbrook
        }
3318 158142c2 bellard
        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
3319 158142c2 bellard
        zExp = aExp;
3320 158142c2 bellard
        goto roundAndPack;
3321 158142c2 bellard
    }
3322 158142c2 bellard
    aSig |= LIT64( 0x2000000000000000 );
3323 158142c2 bellard
    zSig = ( aSig + bSig )<<1;
3324 158142c2 bellard
    --zExp;
3325 bb98fe42 Andreas Färber
    if ( (int64_t) zSig < 0 ) {
3326 158142c2 bellard
        zSig = aSig + bSig;
3327 158142c2 bellard
        ++zExp;
3328 158142c2 bellard
    }
3329 158142c2 bellard
 roundAndPack:
3330 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3331 158142c2 bellard
3332 158142c2 bellard
}
3333 158142c2 bellard
3334 158142c2 bellard
/*----------------------------------------------------------------------------
3335 158142c2 bellard
| Returns the result of subtracting the absolute values of the double-
3336 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
3337 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
3338 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
3339 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
3340 158142c2 bellard
*----------------------------------------------------------------------------*/
3341 158142c2 bellard
3342 158142c2 bellard
static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3343 158142c2 bellard
{
3344 158142c2 bellard
    int16 aExp, bExp, zExp;
3345 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig;
3346 158142c2 bellard
    int16 expDiff;
3347 158142c2 bellard
3348 158142c2 bellard
    aSig = extractFloat64Frac( a );
3349 158142c2 bellard
    aExp = extractFloat64Exp( a );
3350 158142c2 bellard
    bSig = extractFloat64Frac( b );
3351 158142c2 bellard
    bExp = extractFloat64Exp( b );
3352 158142c2 bellard
    expDiff = aExp - bExp;
3353 158142c2 bellard
    aSig <<= 10;
3354 158142c2 bellard
    bSig <<= 10;
3355 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
3356 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
3357 158142c2 bellard
    if ( aExp == 0x7FF ) {
3358 158142c2 bellard
        if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3359 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3360 158142c2 bellard
        return float64_default_nan;
3361 158142c2 bellard
    }
3362 158142c2 bellard
    if ( aExp == 0 ) {
3363 158142c2 bellard
        aExp = 1;
3364 158142c2 bellard
        bExp = 1;
3365 158142c2 bellard
    }
3366 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
3367 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
3368 158142c2 bellard
    return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
3369 158142c2 bellard
 bExpBigger:
3370 158142c2 bellard
    if ( bExp == 0x7FF ) {
3371 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3372 158142c2 bellard
        return packFloat64( zSign ^ 1, 0x7FF, 0 );
3373 158142c2 bellard
    }
3374 158142c2 bellard
    if ( aExp == 0 ) {
3375 158142c2 bellard
        ++expDiff;
3376 158142c2 bellard
    }
3377 158142c2 bellard
    else {
3378 158142c2 bellard
        aSig |= LIT64( 0x4000000000000000 );
3379 158142c2 bellard
    }
3380 158142c2 bellard
    shift64RightJamming( aSig, - expDiff, &aSig );
3381 158142c2 bellard
    bSig |= LIT64( 0x4000000000000000 );
3382 158142c2 bellard
 bBigger:
3383 158142c2 bellard
    zSig = bSig - aSig;
3384 158142c2 bellard
    zExp = bExp;
3385 158142c2 bellard
    zSign ^= 1;
3386 158142c2 bellard
    goto normalizeRoundAndPack;
3387 158142c2 bellard
 aExpBigger:
3388 158142c2 bellard
    if ( aExp == 0x7FF ) {
3389 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3390 158142c2 bellard
        return a;
3391 158142c2 bellard
    }
3392 158142c2 bellard
    if ( bExp == 0 ) {
3393 158142c2 bellard
        --expDiff;
3394 158142c2 bellard
    }
3395 158142c2 bellard
    else {
3396 158142c2 bellard
        bSig |= LIT64( 0x4000000000000000 );
3397 158142c2 bellard
    }
3398 158142c2 bellard
    shift64RightJamming( bSig, expDiff, &bSig );
3399 158142c2 bellard
    aSig |= LIT64( 0x4000000000000000 );
3400 158142c2 bellard
 aBigger:
3401 158142c2 bellard
    zSig = aSig - bSig;
3402 158142c2 bellard
    zExp = aExp;
3403 158142c2 bellard
 normalizeRoundAndPack:
3404 158142c2 bellard
    --zExp;
3405 158142c2 bellard
    return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3406 158142c2 bellard
3407 158142c2 bellard
}
3408 158142c2 bellard
3409 158142c2 bellard
/*----------------------------------------------------------------------------
3410 158142c2 bellard
| Returns the result of adding the double-precision floating-point values `a'
3411 158142c2 bellard
| and `b'.  The operation is performed according to the IEC/IEEE Standard for
3412 158142c2 bellard
| Binary Floating-Point Arithmetic.
3413 158142c2 bellard
*----------------------------------------------------------------------------*/
3414 158142c2 bellard
3415 158142c2 bellard
float64 float64_add( float64 a, float64 b STATUS_PARAM )
3416 158142c2 bellard
{
3417 158142c2 bellard
    flag aSign, bSign;
3418 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3419 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3420 158142c2 bellard
3421 158142c2 bellard
    aSign = extractFloat64Sign( a );
3422 158142c2 bellard
    bSign = extractFloat64Sign( b );
3423 158142c2 bellard
    if ( aSign == bSign ) {
3424 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
3425 158142c2 bellard
    }
3426 158142c2 bellard
    else {
3427 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
3428 158142c2 bellard
    }
3429 158142c2 bellard
3430 158142c2 bellard
}
3431 158142c2 bellard
3432 158142c2 bellard
/*----------------------------------------------------------------------------
3433 158142c2 bellard
| Returns the result of subtracting the double-precision floating-point values
3434 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
3435 158142c2 bellard
| for Binary Floating-Point Arithmetic.
3436 158142c2 bellard
*----------------------------------------------------------------------------*/
3437 158142c2 bellard
3438 158142c2 bellard
float64 float64_sub( float64 a, float64 b STATUS_PARAM )
3439 158142c2 bellard
{
3440 158142c2 bellard
    flag aSign, bSign;
3441 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3442 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3443 158142c2 bellard
3444 158142c2 bellard
    aSign = extractFloat64Sign( a );
3445 158142c2 bellard
    bSign = extractFloat64Sign( b );
3446 158142c2 bellard
    if ( aSign == bSign ) {
3447 158142c2 bellard
        return subFloat64Sigs( a, b, aSign STATUS_VAR );
3448 158142c2 bellard
    }
3449 158142c2 bellard
    else {
3450 158142c2 bellard
        return addFloat64Sigs( a, b, aSign STATUS_VAR );
3451 158142c2 bellard
    }
3452 158142c2 bellard
3453 158142c2 bellard
}
3454 158142c2 bellard
3455 158142c2 bellard
/*----------------------------------------------------------------------------
3456 158142c2 bellard
| Returns the result of multiplying the double-precision floating-point values
3457 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
3458 158142c2 bellard
| for Binary Floating-Point Arithmetic.
3459 158142c2 bellard
*----------------------------------------------------------------------------*/
3460 158142c2 bellard
3461 158142c2 bellard
float64 float64_mul( float64 a, float64 b STATUS_PARAM )
3462 158142c2 bellard
{
3463 158142c2 bellard
    flag aSign, bSign, zSign;
3464 158142c2 bellard
    int16 aExp, bExp, zExp;
3465 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
3466 158142c2 bellard
3467 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3468 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3469 37d18660 Peter Maydell
3470 158142c2 bellard
    aSig = extractFloat64Frac( a );
3471 158142c2 bellard
    aExp = extractFloat64Exp( a );
3472 158142c2 bellard
    aSign = extractFloat64Sign( a );
3473 158142c2 bellard
    bSig = extractFloat64Frac( b );
3474 158142c2 bellard
    bExp = extractFloat64Exp( b );
3475 158142c2 bellard
    bSign = extractFloat64Sign( b );
3476 158142c2 bellard
    zSign = aSign ^ bSign;
3477 158142c2 bellard
    if ( aExp == 0x7FF ) {
3478 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3479 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
3480 158142c2 bellard
        }
3481 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) {
3482 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3483 158142c2 bellard
            return float64_default_nan;
3484 158142c2 bellard
        }
3485 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3486 158142c2 bellard
    }
3487 158142c2 bellard
    if ( bExp == 0x7FF ) {
3488 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3489 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
3490 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3491 158142c2 bellard
            return float64_default_nan;
3492 158142c2 bellard
        }
3493 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3494 158142c2 bellard
    }
3495 158142c2 bellard
    if ( aExp == 0 ) {
3496 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3497 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3498 158142c2 bellard
    }
3499 158142c2 bellard
    if ( bExp == 0 ) {
3500 158142c2 bellard
        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
3501 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3502 158142c2 bellard
    }
3503 158142c2 bellard
    zExp = aExp + bExp - 0x3FF;
3504 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3505 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3506 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
3507 158142c2 bellard
    zSig0 |= ( zSig1 != 0 );
3508 bb98fe42 Andreas Färber
    if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
3509 158142c2 bellard
        zSig0 <<= 1;
3510 158142c2 bellard
        --zExp;
3511 158142c2 bellard
    }
3512 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
3513 158142c2 bellard
3514 158142c2 bellard
}
3515 158142c2 bellard
3516 158142c2 bellard
/*----------------------------------------------------------------------------
3517 158142c2 bellard
| Returns the result of dividing the double-precision floating-point value `a'
3518 158142c2 bellard
| by the corresponding value `b'.  The operation is performed according to
3519 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3520 158142c2 bellard
*----------------------------------------------------------------------------*/
3521 158142c2 bellard
3522 158142c2 bellard
float64 float64_div( float64 a, float64 b STATUS_PARAM )
3523 158142c2 bellard
{
3524 158142c2 bellard
    flag aSign, bSign, zSign;
3525 158142c2 bellard
    int16 aExp, bExp, zExp;
3526 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig;
3527 bb98fe42 Andreas Färber
    uint64_t rem0, rem1;
3528 bb98fe42 Andreas Färber
    uint64_t term0, term1;
3529 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3530 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3531 158142c2 bellard
3532 158142c2 bellard
    aSig = extractFloat64Frac( a );
3533 158142c2 bellard
    aExp = extractFloat64Exp( a );
3534 158142c2 bellard
    aSign = extractFloat64Sign( a );
3535 158142c2 bellard
    bSig = extractFloat64Frac( b );
3536 158142c2 bellard
    bExp = extractFloat64Exp( b );
3537 158142c2 bellard
    bSign = extractFloat64Sign( b );
3538 158142c2 bellard
    zSign = aSign ^ bSign;
3539 158142c2 bellard
    if ( aExp == 0x7FF ) {
3540 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3541 158142c2 bellard
        if ( bExp == 0x7FF ) {
3542 158142c2 bellard
            if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3543 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3544 158142c2 bellard
            return float64_default_nan;
3545 158142c2 bellard
        }
3546 158142c2 bellard
        return packFloat64( zSign, 0x7FF, 0 );
3547 158142c2 bellard
    }
3548 158142c2 bellard
    if ( bExp == 0x7FF ) {
3549 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3550 158142c2 bellard
        return packFloat64( zSign, 0, 0 );
3551 158142c2 bellard
    }
3552 158142c2 bellard
    if ( bExp == 0 ) {
3553 158142c2 bellard
        if ( bSig == 0 ) {
3554 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
3555 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
3556 158142c2 bellard
                return float64_default_nan;
3557 158142c2 bellard
            }
3558 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
3559 158142c2 bellard
            return packFloat64( zSign, 0x7FF, 0 );
3560 158142c2 bellard
        }
3561 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3562 158142c2 bellard
    }
3563 158142c2 bellard
    if ( aExp == 0 ) {
3564 158142c2 bellard
        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3565 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3566 158142c2 bellard
    }
3567 158142c2 bellard
    zExp = aExp - bExp + 0x3FD;
3568 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3569 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3570 158142c2 bellard
    if ( bSig <= ( aSig + aSig ) ) {
3571 158142c2 bellard
        aSig >>= 1;
3572 158142c2 bellard
        ++zExp;
3573 158142c2 bellard
    }
3574 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, bSig );
3575 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 2 ) {
3576 158142c2 bellard
        mul64To128( bSig, zSig, &term0, &term1 );
3577 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3578 bb98fe42 Andreas Färber
        while ( (int64_t) rem0 < 0 ) {
3579 158142c2 bellard
            --zSig;
3580 158142c2 bellard
            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3581 158142c2 bellard
        }
3582 158142c2 bellard
        zSig |= ( rem1 != 0 );
3583 158142c2 bellard
    }
3584 158142c2 bellard
    return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3585 158142c2 bellard
3586 158142c2 bellard
}
3587 158142c2 bellard
3588 158142c2 bellard
/*----------------------------------------------------------------------------
3589 158142c2 bellard
| Returns the remainder of the double-precision floating-point value `a'
3590 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
3591 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3592 158142c2 bellard
*----------------------------------------------------------------------------*/
3593 158142c2 bellard
3594 158142c2 bellard
float64 float64_rem( float64 a, float64 b STATUS_PARAM )
3595 158142c2 bellard
{
3596 ed086f3d Blue Swirl
    flag aSign, zSign;
3597 158142c2 bellard
    int16 aExp, bExp, expDiff;
3598 bb98fe42 Andreas Färber
    uint64_t aSig, bSig;
3599 bb98fe42 Andreas Färber
    uint64_t q, alternateASig;
3600 bb98fe42 Andreas Färber
    int64_t sigMean;
3601 158142c2 bellard
3602 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3603 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3604 158142c2 bellard
    aSig = extractFloat64Frac( a );
3605 158142c2 bellard
    aExp = extractFloat64Exp( a );
3606 158142c2 bellard
    aSign = extractFloat64Sign( a );
3607 158142c2 bellard
    bSig = extractFloat64Frac( b );
3608 158142c2 bellard
    bExp = extractFloat64Exp( b );
3609 158142c2 bellard
    if ( aExp == 0x7FF ) {
3610 158142c2 bellard
        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3611 158142c2 bellard
            return propagateFloat64NaN( a, b STATUS_VAR );
3612 158142c2 bellard
        }
3613 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3614 158142c2 bellard
        return float64_default_nan;
3615 158142c2 bellard
    }
3616 158142c2 bellard
    if ( bExp == 0x7FF ) {
3617 158142c2 bellard
        if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3618 158142c2 bellard
        return a;
3619 158142c2 bellard
    }
3620 158142c2 bellard
    if ( bExp == 0 ) {
3621 158142c2 bellard
        if ( bSig == 0 ) {
3622 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
3623 158142c2 bellard
            return float64_default_nan;
3624 158142c2 bellard
        }
3625 158142c2 bellard
        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3626 158142c2 bellard
    }
3627 158142c2 bellard
    if ( aExp == 0 ) {
3628 158142c2 bellard
        if ( aSig == 0 ) return a;
3629 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3630 158142c2 bellard
    }
3631 158142c2 bellard
    expDiff = aExp - bExp;
3632 158142c2 bellard
    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3633 158142c2 bellard
    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3634 158142c2 bellard
    if ( expDiff < 0 ) {
3635 158142c2 bellard
        if ( expDiff < -1 ) return a;
3636 158142c2 bellard
        aSig >>= 1;
3637 158142c2 bellard
    }
3638 158142c2 bellard
    q = ( bSig <= aSig );
3639 158142c2 bellard
    if ( q ) aSig -= bSig;
3640 158142c2 bellard
    expDiff -= 64;
3641 158142c2 bellard
    while ( 0 < expDiff ) {
3642 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
3643 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3644 158142c2 bellard
        aSig = - ( ( bSig>>2 ) * q );
3645 158142c2 bellard
        expDiff -= 62;
3646 158142c2 bellard
    }
3647 158142c2 bellard
    expDiff += 64;
3648 158142c2 bellard
    if ( 0 < expDiff ) {
3649 158142c2 bellard
        q = estimateDiv128To64( aSig, 0, bSig );
3650 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
3651 158142c2 bellard
        q >>= 64 - expDiff;
3652 158142c2 bellard
        bSig >>= 2;
3653 158142c2 bellard
        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3654 158142c2 bellard
    }
3655 158142c2 bellard
    else {
3656 158142c2 bellard
        aSig >>= 2;
3657 158142c2 bellard
        bSig >>= 2;
3658 158142c2 bellard
    }
3659 158142c2 bellard
    do {
3660 158142c2 bellard
        alternateASig = aSig;
3661 158142c2 bellard
        ++q;
3662 158142c2 bellard
        aSig -= bSig;
3663 bb98fe42 Andreas Färber
    } while ( 0 <= (int64_t) aSig );
3664 158142c2 bellard
    sigMean = aSig + alternateASig;
3665 158142c2 bellard
    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3666 158142c2 bellard
        aSig = alternateASig;
3667 158142c2 bellard
    }
3668 bb98fe42 Andreas Färber
    zSign = ( (int64_t) aSig < 0 );
3669 158142c2 bellard
    if ( zSign ) aSig = - aSig;
3670 158142c2 bellard
    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
3671 158142c2 bellard
3672 158142c2 bellard
}
3673 158142c2 bellard
3674 158142c2 bellard
/*----------------------------------------------------------------------------
3675 369be8f6 Peter Maydell
| Returns the result of multiplying the double-precision floating-point values
3676 369be8f6 Peter Maydell
| `a' and `b' then adding 'c', with no intermediate rounding step after the
3677 369be8f6 Peter Maydell
| multiplication.  The operation is performed according to the IEC/IEEE
3678 369be8f6 Peter Maydell
| Standard for Binary Floating-Point Arithmetic 754-2008.
3679 369be8f6 Peter Maydell
| The flags argument allows the caller to select negation of the
3680 369be8f6 Peter Maydell
| addend, the intermediate product, or the final result. (The difference
3681 369be8f6 Peter Maydell
| between this and having the caller do a separate negation is that negating
3682 369be8f6 Peter Maydell
| externally will flip the sign bit on NaNs.)
3683 369be8f6 Peter Maydell
*----------------------------------------------------------------------------*/
3684 369be8f6 Peter Maydell
3685 369be8f6 Peter Maydell
float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
3686 369be8f6 Peter Maydell
{
3687 369be8f6 Peter Maydell
    flag aSign, bSign, cSign, zSign;
3688 369be8f6 Peter Maydell
    int aExp, bExp, cExp, pExp, zExp, expDiff;
3689 369be8f6 Peter Maydell
    uint64_t aSig, bSig, cSig;
3690 369be8f6 Peter Maydell
    flag pInf, pZero, pSign;
3691 369be8f6 Peter Maydell
    uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
3692 369be8f6 Peter Maydell
    int shiftcount;
3693 369be8f6 Peter Maydell
    flag signflip, infzero;
3694 369be8f6 Peter Maydell
3695 369be8f6 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3696 369be8f6 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
3697 369be8f6 Peter Maydell
    c = float64_squash_input_denormal(c STATUS_VAR);
3698 369be8f6 Peter Maydell
    aSig = extractFloat64Frac(a);
3699 369be8f6 Peter Maydell
    aExp = extractFloat64Exp(a);
3700 369be8f6 Peter Maydell
    aSign = extractFloat64Sign(a);
3701 369be8f6 Peter Maydell
    bSig = extractFloat64Frac(b);
3702 369be8f6 Peter Maydell
    bExp = extractFloat64Exp(b);
3703 369be8f6 Peter Maydell
    bSign = extractFloat64Sign(b);
3704 369be8f6 Peter Maydell
    cSig = extractFloat64Frac(c);
3705 369be8f6 Peter Maydell
    cExp = extractFloat64Exp(c);
3706 369be8f6 Peter Maydell
    cSign = extractFloat64Sign(c);
3707 369be8f6 Peter Maydell
3708 369be8f6 Peter Maydell
    infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) ||
3709 369be8f6 Peter Maydell
               (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
3710 369be8f6 Peter Maydell
3711 369be8f6 Peter Maydell
    /* It is implementation-defined whether the cases of (0,inf,qnan)
3712 369be8f6 Peter Maydell
     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
3713 369be8f6 Peter Maydell
     * they return if they do), so we have to hand this information
3714 369be8f6 Peter Maydell
     * off to the target-specific pick-a-NaN routine.
3715 369be8f6 Peter Maydell
     */
3716 369be8f6 Peter Maydell
    if (((aExp == 0x7ff) && aSig) ||
3717 369be8f6 Peter Maydell
        ((bExp == 0x7ff) && bSig) ||
3718 369be8f6 Peter Maydell
        ((cExp == 0x7ff) && cSig)) {
3719 369be8f6 Peter Maydell
        return propagateFloat64MulAddNaN(a, b, c, infzero STATUS_VAR);
3720 369be8f6 Peter Maydell
    }
3721 369be8f6 Peter Maydell
3722 369be8f6 Peter Maydell
    if (infzero) {
3723 369be8f6 Peter Maydell
        float_raise(float_flag_invalid STATUS_VAR);
3724 369be8f6 Peter Maydell
        return float64_default_nan;
3725 369be8f6 Peter Maydell
    }
3726 369be8f6 Peter Maydell
3727 369be8f6 Peter Maydell
    if (flags & float_muladd_negate_c) {
3728 369be8f6 Peter Maydell
        cSign ^= 1;
3729 369be8f6 Peter Maydell
    }
3730 369be8f6 Peter Maydell
3731 369be8f6 Peter Maydell
    signflip = (flags & float_muladd_negate_result) ? 1 : 0;
3732 369be8f6 Peter Maydell
3733 369be8f6 Peter Maydell
    /* Work out the sign and type of the product */
3734 369be8f6 Peter Maydell
    pSign = aSign ^ bSign;
3735 369be8f6 Peter Maydell
    if (flags & float_muladd_negate_product) {
3736 369be8f6 Peter Maydell
        pSign ^= 1;
3737 369be8f6 Peter Maydell
    }
3738 369be8f6 Peter Maydell
    pInf = (aExp == 0x7ff) || (bExp == 0x7ff);
3739 369be8f6 Peter Maydell
    pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
3740 369be8f6 Peter Maydell
3741 369be8f6 Peter Maydell
    if (cExp == 0x7ff) {
3742 369be8f6 Peter Maydell
        if (pInf && (pSign ^ cSign)) {
3743 369be8f6 Peter Maydell
            /* addition of opposite-signed infinities => InvalidOperation */
3744 369be8f6 Peter Maydell
            float_raise(float_flag_invalid STATUS_VAR);
3745 369be8f6 Peter Maydell
            return float64_default_nan;
3746 369be8f6 Peter Maydell
        }
3747 369be8f6 Peter Maydell
        /* Otherwise generate an infinity of the same sign */
3748 369be8f6 Peter Maydell
        return packFloat64(cSign ^ signflip, 0x7ff, 0);
3749 369be8f6 Peter Maydell
    }
3750 369be8f6 Peter Maydell
3751 369be8f6 Peter Maydell
    if (pInf) {
3752 369be8f6 Peter Maydell
        return packFloat64(pSign ^ signflip, 0x7ff, 0);
3753 369be8f6 Peter Maydell
    }
3754 369be8f6 Peter Maydell
3755 369be8f6 Peter Maydell
    if (pZero) {
3756 369be8f6 Peter Maydell
        if (cExp == 0) {
3757 369be8f6 Peter Maydell
            if (cSig == 0) {
3758 369be8f6 Peter Maydell
                /* Adding two exact zeroes */
3759 369be8f6 Peter Maydell
                if (pSign == cSign) {
3760 369be8f6 Peter Maydell
                    zSign = pSign;
3761 369be8f6 Peter Maydell
                } else if (STATUS(float_rounding_mode) == float_round_down) {
3762 369be8f6 Peter Maydell
                    zSign = 1;
3763 369be8f6 Peter Maydell
                } else {
3764 369be8f6 Peter Maydell
                    zSign = 0;
3765 369be8f6 Peter Maydell
                }
3766 369be8f6 Peter Maydell
                return packFloat64(zSign ^ signflip, 0, 0);
3767 369be8f6 Peter Maydell
            }
3768 369be8f6 Peter Maydell
            /* Exact zero plus a denorm */
3769 369be8f6 Peter Maydell
            if (STATUS(flush_to_zero)) {
3770 369be8f6 Peter Maydell
                float_raise(float_flag_output_denormal STATUS_VAR);
3771 369be8f6 Peter Maydell
                return packFloat64(cSign ^ signflip, 0, 0);
3772 369be8f6 Peter Maydell
            }
3773 369be8f6 Peter Maydell
        }
3774 369be8f6 Peter Maydell
        /* Zero plus something non-zero : just return the something */
3775 369be8f6 Peter Maydell
        return c ^ ((uint64_t)signflip << 63);
3776 369be8f6 Peter Maydell
    }
3777 369be8f6 Peter Maydell
3778 369be8f6 Peter Maydell
    if (aExp == 0) {
3779 369be8f6 Peter Maydell
        normalizeFloat64Subnormal(aSig, &aExp, &aSig);
3780 369be8f6 Peter Maydell
    }
3781 369be8f6 Peter Maydell
    if (bExp == 0) {
3782 369be8f6 Peter Maydell
        normalizeFloat64Subnormal(bSig, &bExp, &bSig);
3783 369be8f6 Peter Maydell
    }
3784 369be8f6 Peter Maydell
3785 369be8f6 Peter Maydell
    /* Calculate the actual result a * b + c */
3786 369be8f6 Peter Maydell
3787 369be8f6 Peter Maydell
    /* Multiply first; this is easy. */
3788 369be8f6 Peter Maydell
    /* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff
3789 369be8f6 Peter Maydell
     * because we want the true exponent, not the "one-less-than"
3790 369be8f6 Peter Maydell
     * flavour that roundAndPackFloat64() takes.
3791 369be8f6 Peter Maydell
     */
3792 369be8f6 Peter Maydell
    pExp = aExp + bExp - 0x3fe;
3793 369be8f6 Peter Maydell
    aSig = (aSig | LIT64(0x0010000000000000))<<10;
3794 369be8f6 Peter Maydell
    bSig = (bSig | LIT64(0x0010000000000000))<<11;
3795 369be8f6 Peter Maydell
    mul64To128(aSig, bSig, &pSig0, &pSig1);
3796 369be8f6 Peter Maydell
    if ((int64_t)(pSig0 << 1) >= 0) {
3797 369be8f6 Peter Maydell
        shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
3798 369be8f6 Peter Maydell
        pExp--;
3799 369be8f6 Peter Maydell
    }
3800 369be8f6 Peter Maydell
3801 369be8f6 Peter Maydell
    zSign = pSign ^ signflip;
3802 369be8f6 Peter Maydell
3803 369be8f6 Peter Maydell
    /* Now [pSig0:pSig1] is the significand of the multiply, with the explicit
3804 369be8f6 Peter Maydell
     * bit in position 126.
3805 369be8f6 Peter Maydell
     */
3806 369be8f6 Peter Maydell
    if (cExp == 0) {
3807 369be8f6 Peter Maydell
        if (!cSig) {
3808 369be8f6 Peter Maydell
            /* Throw out the special case of c being an exact zero now */
3809 369be8f6 Peter Maydell
            shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
3810 369be8f6 Peter Maydell
            return roundAndPackFloat64(zSign, pExp - 1,
3811 369be8f6 Peter Maydell
                                       pSig1 STATUS_VAR);
3812 369be8f6 Peter Maydell
        }
3813 369be8f6 Peter Maydell
        normalizeFloat64Subnormal(cSig, &cExp, &cSig);
3814 369be8f6 Peter Maydell
    }
3815 369be8f6 Peter Maydell
3816 369be8f6 Peter Maydell
    /* Shift cSig and add the explicit bit so [cSig0:cSig1] is the
3817 369be8f6 Peter Maydell
     * significand of the addend, with the explicit bit in position 126.
3818 369be8f6 Peter Maydell
     */
3819 369be8f6 Peter Maydell
    cSig0 = cSig << (126 - 64 - 52);
3820 369be8f6 Peter Maydell
    cSig1 = 0;
3821 369be8f6 Peter Maydell
    cSig0 |= LIT64(0x4000000000000000);
3822 369be8f6 Peter Maydell
    expDiff = pExp - cExp;
3823 369be8f6 Peter Maydell
3824 369be8f6 Peter Maydell
    if (pSign == cSign) {
3825 369be8f6 Peter Maydell
        /* Addition */
3826 369be8f6 Peter Maydell
        if (expDiff > 0) {
3827 369be8f6 Peter Maydell
            /* scale c to match p */
3828 369be8f6 Peter Maydell
            shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
3829 369be8f6 Peter Maydell
            zExp = pExp;
3830 369be8f6 Peter Maydell
        } else if (expDiff < 0) {
3831 369be8f6 Peter Maydell
            /* scale p to match c */
3832 369be8f6 Peter Maydell
            shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
3833 369be8f6 Peter Maydell
            zExp = cExp;
3834 369be8f6 Peter Maydell
        } else {
3835 369be8f6 Peter Maydell
            /* no scaling needed */
3836 369be8f6 Peter Maydell
            zExp = cExp;
3837 369be8f6 Peter Maydell
        }
3838 369be8f6 Peter Maydell
        /* Add significands and make sure explicit bit ends up in posn 126 */
3839 369be8f6 Peter Maydell
        add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
3840 369be8f6 Peter Maydell
        if ((int64_t)zSig0 < 0) {
3841 369be8f6 Peter Maydell
            shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
3842 369be8f6 Peter Maydell
        } else {
3843 369be8f6 Peter Maydell
            zExp--;
3844 369be8f6 Peter Maydell
        }
3845 369be8f6 Peter Maydell
        shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
3846 369be8f6 Peter Maydell
        return roundAndPackFloat64(zSign, zExp, zSig1 STATUS_VAR);
3847 369be8f6 Peter Maydell
    } else {
3848 369be8f6 Peter Maydell
        /* Subtraction */
3849 369be8f6 Peter Maydell
        if (expDiff > 0) {
3850 369be8f6 Peter Maydell
            shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
3851 369be8f6 Peter Maydell
            sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
3852 369be8f6 Peter Maydell
            zExp = pExp;
3853 369be8f6 Peter Maydell
        } else if (expDiff < 0) {
3854 369be8f6 Peter Maydell
            shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
3855 369be8f6 Peter Maydell
            sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
3856 369be8f6 Peter Maydell
            zExp = cExp;
3857 369be8f6 Peter Maydell
            zSign ^= 1;
3858 369be8f6 Peter Maydell
        } else {
3859 369be8f6 Peter Maydell
            zExp = pExp;
3860 369be8f6 Peter Maydell
            if (lt128(cSig0, cSig1, pSig0, pSig1)) {
3861 369be8f6 Peter Maydell
                sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
3862 369be8f6 Peter Maydell
            } else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
3863 369be8f6 Peter Maydell
                sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
3864 369be8f6 Peter Maydell
                zSign ^= 1;
3865 369be8f6 Peter Maydell
            } else {
3866 369be8f6 Peter Maydell
                /* Exact zero */
3867 369be8f6 Peter Maydell
                zSign = signflip;
3868 369be8f6 Peter Maydell
                if (STATUS(float_rounding_mode) == float_round_down) {
3869 369be8f6 Peter Maydell
                    zSign ^= 1;
3870 369be8f6 Peter Maydell
                }
3871 369be8f6 Peter Maydell
                return packFloat64(zSign, 0, 0);
3872 369be8f6 Peter Maydell
            }
3873 369be8f6 Peter Maydell
        }
3874 369be8f6 Peter Maydell
        --zExp;
3875 369be8f6 Peter Maydell
        /* Do the equivalent of normalizeRoundAndPackFloat64() but
3876 369be8f6 Peter Maydell
         * starting with the significand in a pair of uint64_t.
3877 369be8f6 Peter Maydell
         */
3878 369be8f6 Peter Maydell
        if (zSig0) {
3879 369be8f6 Peter Maydell
            shiftcount = countLeadingZeros64(zSig0) - 1;
3880 369be8f6 Peter Maydell
            shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
3881 369be8f6 Peter Maydell
            if (zSig1) {
3882 369be8f6 Peter Maydell
                zSig0 |= 1;
3883 369be8f6 Peter Maydell
            }
3884 369be8f6 Peter Maydell
            zExp -= shiftcount;
3885 369be8f6 Peter Maydell
        } else {
3886 369be8f6 Peter Maydell
            shiftcount = countLeadingZeros64(zSig1) - 1;
3887 369be8f6 Peter Maydell
            zSig0 = zSig1 << shiftcount;
3888 369be8f6 Peter Maydell
            zExp -= (shiftcount + 64);
3889 369be8f6 Peter Maydell
        }
3890 369be8f6 Peter Maydell
        return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR);
3891 369be8f6 Peter Maydell
    }
3892 369be8f6 Peter Maydell
}
3893 369be8f6 Peter Maydell
3894 369be8f6 Peter Maydell
/*----------------------------------------------------------------------------
3895 158142c2 bellard
| Returns the square root of the double-precision floating-point value `a'.
3896 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
3897 158142c2 bellard
| Floating-Point Arithmetic.
3898 158142c2 bellard
*----------------------------------------------------------------------------*/
3899 158142c2 bellard
3900 158142c2 bellard
float64 float64_sqrt( float64 a STATUS_PARAM )
3901 158142c2 bellard
{
3902 158142c2 bellard
    flag aSign;
3903 158142c2 bellard
    int16 aExp, zExp;
3904 bb98fe42 Andreas Färber
    uint64_t aSig, zSig, doubleZSig;
3905 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, term0, term1;
3906 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3907 158142c2 bellard
3908 158142c2 bellard
    aSig = extractFloat64Frac( a );
3909 158142c2 bellard
    aExp = extractFloat64Exp( a );
3910 158142c2 bellard
    aSign = extractFloat64Sign( a );
3911 158142c2 bellard
    if ( aExp == 0x7FF ) {
3912 158142c2 bellard
        if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
3913 158142c2 bellard
        if ( ! aSign ) return a;
3914 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3915 158142c2 bellard
        return float64_default_nan;
3916 158142c2 bellard
    }
3917 158142c2 bellard
    if ( aSign ) {
3918 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) return a;
3919 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
3920 158142c2 bellard
        return float64_default_nan;
3921 158142c2 bellard
    }
3922 158142c2 bellard
    if ( aExp == 0 ) {
3923 f090c9d4 pbrook
        if ( aSig == 0 ) return float64_zero;
3924 158142c2 bellard
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3925 158142c2 bellard
    }
3926 158142c2 bellard
    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3927 158142c2 bellard
    aSig |= LIT64( 0x0010000000000000 );
3928 158142c2 bellard
    zSig = estimateSqrt32( aExp, aSig>>21 );
3929 158142c2 bellard
    aSig <<= 9 - ( aExp & 1 );
3930 158142c2 bellard
    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3931 158142c2 bellard
    if ( ( zSig & 0x1FF ) <= 5 ) {
3932 158142c2 bellard
        doubleZSig = zSig<<1;
3933 158142c2 bellard
        mul64To128( zSig, zSig, &term0, &term1 );
3934 158142c2 bellard
        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3935 bb98fe42 Andreas Färber
        while ( (int64_t) rem0 < 0 ) {
3936 158142c2 bellard
            --zSig;
3937 158142c2 bellard
            doubleZSig -= 2;
3938 158142c2 bellard
            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3939 158142c2 bellard
        }
3940 158142c2 bellard
        zSig |= ( ( rem0 | rem1 ) != 0 );
3941 158142c2 bellard
    }
3942 158142c2 bellard
    return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
3943 158142c2 bellard
3944 158142c2 bellard
}
3945 158142c2 bellard
3946 158142c2 bellard
/*----------------------------------------------------------------------------
3947 374dfc33 aurel32
| Returns the binary log of the double-precision floating-point value `a'.
3948 374dfc33 aurel32
| The operation is performed according to the IEC/IEEE Standard for Binary
3949 374dfc33 aurel32
| Floating-Point Arithmetic.
3950 374dfc33 aurel32
*----------------------------------------------------------------------------*/
3951 374dfc33 aurel32
float64 float64_log2( float64 a STATUS_PARAM )
3952 374dfc33 aurel32
{
3953 374dfc33 aurel32
    flag aSign, zSign;
3954 374dfc33 aurel32
    int16 aExp;
3955 bb98fe42 Andreas Färber
    uint64_t aSig, aSig0, aSig1, zSig, i;
3956 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
3957 374dfc33 aurel32
3958 374dfc33 aurel32
    aSig = extractFloat64Frac( a );
3959 374dfc33 aurel32
    aExp = extractFloat64Exp( a );
3960 374dfc33 aurel32
    aSign = extractFloat64Sign( a );
3961 374dfc33 aurel32
3962 374dfc33 aurel32
    if ( aExp == 0 ) {
3963 374dfc33 aurel32
        if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
3964 374dfc33 aurel32
        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3965 374dfc33 aurel32
    }
3966 374dfc33 aurel32
    if ( aSign ) {
3967 374dfc33 aurel32
        float_raise( float_flag_invalid STATUS_VAR);
3968 374dfc33 aurel32
        return float64_default_nan;
3969 374dfc33 aurel32
    }
3970 374dfc33 aurel32
    if ( aExp == 0x7FF ) {
3971 374dfc33 aurel32
        if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
3972 374dfc33 aurel32
        return a;
3973 374dfc33 aurel32
    }
3974 374dfc33 aurel32
3975 374dfc33 aurel32
    aExp -= 0x3FF;
3976 374dfc33 aurel32
    aSig |= LIT64( 0x0010000000000000 );
3977 374dfc33 aurel32
    zSign = aExp < 0;
3978 bb98fe42 Andreas Färber
    zSig = (uint64_t)aExp << 52;
3979 374dfc33 aurel32
    for (i = 1LL << 51; i > 0; i >>= 1) {
3980 374dfc33 aurel32
        mul64To128( aSig, aSig, &aSig0, &aSig1 );
3981 374dfc33 aurel32
        aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
3982 374dfc33 aurel32
        if ( aSig & LIT64( 0x0020000000000000 ) ) {
3983 374dfc33 aurel32
            aSig >>= 1;
3984 374dfc33 aurel32
            zSig |= i;
3985 374dfc33 aurel32
        }
3986 374dfc33 aurel32
    }
3987 374dfc33 aurel32
3988 374dfc33 aurel32
    if ( zSign )
3989 374dfc33 aurel32
        zSig = -zSig;
3990 374dfc33 aurel32
    return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
3991 374dfc33 aurel32
}
3992 374dfc33 aurel32
3993 374dfc33 aurel32
/*----------------------------------------------------------------------------
3994 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
3995 b689362d Aurelien Jarno
| corresponding value `b', and 0 otherwise.  The invalid exception is raised
3996 b689362d Aurelien Jarno
| if either operand is a NaN.  Otherwise, the comparison is performed
3997 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3998 158142c2 bellard
*----------------------------------------------------------------------------*/
3999 158142c2 bellard
4000 b689362d Aurelien Jarno
int float64_eq( float64 a, float64 b STATUS_PARAM )
4001 158142c2 bellard
{
4002 bb98fe42 Andreas Färber
    uint64_t av, bv;
4003 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
4004 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
4005 158142c2 bellard
4006 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4007 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4008 158142c2 bellard
       ) {
4009 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
4010 158142c2 bellard
        return 0;
4011 158142c2 bellard
    }
4012 f090c9d4 pbrook
    av = float64_val(a);
4013 a1b91bb4 pbrook
    bv = float64_val(b);
4014 bb98fe42 Andreas Färber
    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4015 158142c2 bellard
4016 158142c2 bellard
}
4017 158142c2 bellard
4018 158142c2 bellard
/*----------------------------------------------------------------------------
4019 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
4020 f5a64251 Aurelien Jarno
| equal to the corresponding value `b', and 0 otherwise.  The invalid
4021 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
4022 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4023 158142c2 bellard
*----------------------------------------------------------------------------*/
4024 158142c2 bellard
4025 750afe93 bellard
int float64_le( float64 a, float64 b STATUS_PARAM )
4026 158142c2 bellard
{
4027 158142c2 bellard
    flag aSign, bSign;
4028 bb98fe42 Andreas Färber
    uint64_t av, bv;
4029 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
4030 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
4031 158142c2 bellard
4032 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4033 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4034 158142c2 bellard
       ) {
4035 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4036 158142c2 bellard
        return 0;
4037 158142c2 bellard
    }
4038 158142c2 bellard
    aSign = extractFloat64Sign( a );
4039 158142c2 bellard
    bSign = extractFloat64Sign( b );
4040 f090c9d4 pbrook
    av = float64_val(a);
4041 a1b91bb4 pbrook
    bv = float64_val(b);
4042 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4043 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
4044 158142c2 bellard
4045 158142c2 bellard
}
4046 158142c2 bellard
4047 158142c2 bellard
/*----------------------------------------------------------------------------
4048 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
4049 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
4050 f5a64251 Aurelien Jarno
| raised if either operand is a NaN.  The comparison is performed according
4051 f5a64251 Aurelien Jarno
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4052 158142c2 bellard
*----------------------------------------------------------------------------*/
4053 158142c2 bellard
4054 750afe93 bellard
int float64_lt( float64 a, float64 b STATUS_PARAM )
4055 158142c2 bellard
{
4056 158142c2 bellard
    flag aSign, bSign;
4057 bb98fe42 Andreas Färber
    uint64_t av, bv;
4058 158142c2 bellard
4059 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
4060 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
4061 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4062 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4063 158142c2 bellard
       ) {
4064 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4065 158142c2 bellard
        return 0;
4066 158142c2 bellard
    }
4067 158142c2 bellard
    aSign = extractFloat64Sign( a );
4068 158142c2 bellard
    bSign = extractFloat64Sign( b );
4069 f090c9d4 pbrook
    av = float64_val(a);
4070 a1b91bb4 pbrook
    bv = float64_val(b);
4071 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4072 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
4073 158142c2 bellard
4074 158142c2 bellard
}
4075 158142c2 bellard
4076 158142c2 bellard
/*----------------------------------------------------------------------------
4077 67b7861d Aurelien Jarno
| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4078 f5a64251 Aurelien Jarno
| be compared, and 0 otherwise.  The invalid exception is raised if either
4079 f5a64251 Aurelien Jarno
| operand is a NaN.  The comparison is performed according to the IEC/IEEE
4080 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
4081 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
4082 67b7861d Aurelien Jarno
4083 67b7861d Aurelien Jarno
int float64_unordered( float64 a, float64 b STATUS_PARAM )
4084 67b7861d Aurelien Jarno
{
4085 67b7861d Aurelien Jarno
    a = float64_squash_input_denormal(a STATUS_VAR);
4086 67b7861d Aurelien Jarno
    b = float64_squash_input_denormal(b STATUS_VAR);
4087 67b7861d Aurelien Jarno
4088 67b7861d Aurelien Jarno
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4089 67b7861d Aurelien Jarno
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4090 67b7861d Aurelien Jarno
       ) {
4091 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
4092 67b7861d Aurelien Jarno
        return 1;
4093 67b7861d Aurelien Jarno
    }
4094 67b7861d Aurelien Jarno
    return 0;
4095 67b7861d Aurelien Jarno
}
4096 67b7861d Aurelien Jarno
4097 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
4098 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is equal to the
4099 f5a64251 Aurelien Jarno
| corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
4100 f5a64251 Aurelien Jarno
| exception.The comparison is performed according to the IEC/IEEE Standard
4101 f5a64251 Aurelien Jarno
| for Binary Floating-Point Arithmetic.
4102 158142c2 bellard
*----------------------------------------------------------------------------*/
4103 158142c2 bellard
4104 b689362d Aurelien Jarno
int float64_eq_quiet( float64 a, float64 b STATUS_PARAM )
4105 158142c2 bellard
{
4106 bb98fe42 Andreas Färber
    uint64_t av, bv;
4107 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
4108 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
4109 158142c2 bellard
4110 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4111 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4112 158142c2 bellard
       ) {
4113 b689362d Aurelien Jarno
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4114 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
4115 b689362d Aurelien Jarno
        }
4116 158142c2 bellard
        return 0;
4117 158142c2 bellard
    }
4118 f090c9d4 pbrook
    av = float64_val(a);
4119 a1b91bb4 pbrook
    bv = float64_val(b);
4120 bb98fe42 Andreas Färber
    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4121 158142c2 bellard
4122 158142c2 bellard
}
4123 158142c2 bellard
4124 158142c2 bellard
/*----------------------------------------------------------------------------
4125 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than or
4126 158142c2 bellard
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
4127 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
4128 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4129 158142c2 bellard
*----------------------------------------------------------------------------*/
4130 158142c2 bellard
4131 750afe93 bellard
int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
4132 158142c2 bellard
{
4133 158142c2 bellard
    flag aSign, bSign;
4134 bb98fe42 Andreas Färber
    uint64_t av, bv;
4135 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
4136 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
4137 158142c2 bellard
4138 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4139 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4140 158142c2 bellard
       ) {
4141 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4142 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4143 158142c2 bellard
        }
4144 158142c2 bellard
        return 0;
4145 158142c2 bellard
    }
4146 158142c2 bellard
    aSign = extractFloat64Sign( a );
4147 158142c2 bellard
    bSign = extractFloat64Sign( b );
4148 f090c9d4 pbrook
    av = float64_val(a);
4149 a1b91bb4 pbrook
    bv = float64_val(b);
4150 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4151 f090c9d4 pbrook
    return ( av == bv ) || ( aSign ^ ( av < bv ) );
4152 158142c2 bellard
4153 158142c2 bellard
}
4154 158142c2 bellard
4155 158142c2 bellard
/*----------------------------------------------------------------------------
4156 158142c2 bellard
| Returns 1 if the double-precision floating-point value `a' is less than
4157 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
4158 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
4159 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4160 158142c2 bellard
*----------------------------------------------------------------------------*/
4161 158142c2 bellard
4162 750afe93 bellard
int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
4163 158142c2 bellard
{
4164 158142c2 bellard
    flag aSign, bSign;
4165 bb98fe42 Andreas Färber
    uint64_t av, bv;
4166 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
4167 37d18660 Peter Maydell
    b = float64_squash_input_denormal(b STATUS_VAR);
4168 158142c2 bellard
4169 158142c2 bellard
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4170 158142c2 bellard
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4171 158142c2 bellard
       ) {
4172 158142c2 bellard
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4173 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4174 158142c2 bellard
        }
4175 158142c2 bellard
        return 0;
4176 158142c2 bellard
    }
4177 158142c2 bellard
    aSign = extractFloat64Sign( a );
4178 158142c2 bellard
    bSign = extractFloat64Sign( b );
4179 f090c9d4 pbrook
    av = float64_val(a);
4180 a1b91bb4 pbrook
    bv = float64_val(b);
4181 bb98fe42 Andreas Färber
    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4182 f090c9d4 pbrook
    return ( av != bv ) && ( aSign ^ ( av < bv ) );
4183 158142c2 bellard
4184 158142c2 bellard
}
4185 158142c2 bellard
4186 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
4187 67b7861d Aurelien Jarno
| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4188 67b7861d Aurelien Jarno
| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
4189 67b7861d Aurelien Jarno
| comparison is performed according to the IEC/IEEE Standard for Binary
4190 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
4191 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
4192 67b7861d Aurelien Jarno
4193 67b7861d Aurelien Jarno
int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM )
4194 67b7861d Aurelien Jarno
{
4195 67b7861d Aurelien Jarno
    a = float64_squash_input_denormal(a STATUS_VAR);
4196 67b7861d Aurelien Jarno
    b = float64_squash_input_denormal(b STATUS_VAR);
4197 67b7861d Aurelien Jarno
4198 67b7861d Aurelien Jarno
    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4199 67b7861d Aurelien Jarno
         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4200 67b7861d Aurelien Jarno
       ) {
4201 67b7861d Aurelien Jarno
        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4202 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
4203 67b7861d Aurelien Jarno
        }
4204 67b7861d Aurelien Jarno
        return 1;
4205 67b7861d Aurelien Jarno
    }
4206 67b7861d Aurelien Jarno
    return 0;
4207 67b7861d Aurelien Jarno
}
4208 67b7861d Aurelien Jarno
4209 158142c2 bellard
/*----------------------------------------------------------------------------
4210 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4211 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
4212 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4213 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
4214 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN, the
4215 158142c2 bellard
| largest positive integer is returned.  Otherwise, if the conversion
4216 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
4217 158142c2 bellard
*----------------------------------------------------------------------------*/
4218 158142c2 bellard
4219 158142c2 bellard
int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
4220 158142c2 bellard
{
4221 158142c2 bellard
    flag aSign;
4222 158142c2 bellard
    int32 aExp, shiftCount;
4223 bb98fe42 Andreas Färber
    uint64_t aSig;
4224 158142c2 bellard
4225 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4226 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4227 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4228 bb98fe42 Andreas Färber
    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4229 158142c2 bellard
    shiftCount = 0x4037 - aExp;
4230 158142c2 bellard
    if ( shiftCount <= 0 ) shiftCount = 1;
4231 158142c2 bellard
    shift64RightJamming( aSig, shiftCount, &aSig );
4232 158142c2 bellard
    return roundAndPackInt32( aSign, aSig STATUS_VAR );
4233 158142c2 bellard
4234 158142c2 bellard
}
4235 158142c2 bellard
4236 158142c2 bellard
/*----------------------------------------------------------------------------
4237 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4238 158142c2 bellard
| point value `a' to the 32-bit two's complement integer format.  The
4239 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4240 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
4241 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
4242 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
4243 158142c2 bellard
| sign as `a' is returned.
4244 158142c2 bellard
*----------------------------------------------------------------------------*/
4245 158142c2 bellard
4246 158142c2 bellard
int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
4247 158142c2 bellard
{
4248 158142c2 bellard
    flag aSign;
4249 158142c2 bellard
    int32 aExp, shiftCount;
4250 bb98fe42 Andreas Färber
    uint64_t aSig, savedASig;
4251 158142c2 bellard
    int32 z;
4252 158142c2 bellard
4253 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4254 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4255 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4256 158142c2 bellard
    if ( 0x401E < aExp ) {
4257 bb98fe42 Andreas Färber
        if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4258 158142c2 bellard
        goto invalid;
4259 158142c2 bellard
    }
4260 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
4261 158142c2 bellard
        if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
4262 158142c2 bellard
        return 0;
4263 158142c2 bellard
    }
4264 158142c2 bellard
    shiftCount = 0x403E - aExp;
4265 158142c2 bellard
    savedASig = aSig;
4266 158142c2 bellard
    aSig >>= shiftCount;
4267 158142c2 bellard
    z = aSig;
4268 158142c2 bellard
    if ( aSign ) z = - z;
4269 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
4270 158142c2 bellard
 invalid:
4271 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4272 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
4273 158142c2 bellard
    }
4274 158142c2 bellard
    if ( ( aSig<<shiftCount ) != savedASig ) {
4275 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4276 158142c2 bellard
    }
4277 158142c2 bellard
    return z;
4278 158142c2 bellard
4279 158142c2 bellard
}
4280 158142c2 bellard
4281 158142c2 bellard
/*----------------------------------------------------------------------------
4282 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4283 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
4284 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4285 158142c2 bellard
| Floating-Point Arithmetic---which means in particular that the conversion
4286 158142c2 bellard
| is rounded according to the current rounding mode.  If `a' is a NaN,
4287 158142c2 bellard
| the largest positive integer is returned.  Otherwise, if the conversion
4288 158142c2 bellard
| overflows, the largest integer with the same sign as `a' is returned.
4289 158142c2 bellard
*----------------------------------------------------------------------------*/
4290 158142c2 bellard
4291 158142c2 bellard
int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
4292 158142c2 bellard
{
4293 158142c2 bellard
    flag aSign;
4294 158142c2 bellard
    int32 aExp, shiftCount;
4295 bb98fe42 Andreas Färber
    uint64_t aSig, aSigExtra;
4296 158142c2 bellard
4297 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4298 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4299 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4300 158142c2 bellard
    shiftCount = 0x403E - aExp;
4301 158142c2 bellard
    if ( shiftCount <= 0 ) {
4302 158142c2 bellard
        if ( shiftCount ) {
4303 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4304 158142c2 bellard
            if (    ! aSign
4305 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
4306 158142c2 bellard
                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
4307 158142c2 bellard
               ) {
4308 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
4309 158142c2 bellard
            }
4310 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
4311 158142c2 bellard
        }
4312 158142c2 bellard
        aSigExtra = 0;
4313 158142c2 bellard
    }
4314 158142c2 bellard
    else {
4315 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4316 158142c2 bellard
    }
4317 158142c2 bellard
    return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
4318 158142c2 bellard
4319 158142c2 bellard
}
4320 158142c2 bellard
4321 158142c2 bellard
/*----------------------------------------------------------------------------
4322 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4323 158142c2 bellard
| point value `a' to the 64-bit two's complement integer format.  The
4324 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4325 158142c2 bellard
| Floating-Point Arithmetic, except that the conversion is always rounded
4326 158142c2 bellard
| toward zero.  If `a' is a NaN, the largest positive integer is returned.
4327 158142c2 bellard
| Otherwise, if the conversion overflows, the largest integer with the same
4328 158142c2 bellard
| sign as `a' is returned.
4329 158142c2 bellard
*----------------------------------------------------------------------------*/
4330 158142c2 bellard
4331 158142c2 bellard
int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
4332 158142c2 bellard
{
4333 158142c2 bellard
    flag aSign;
4334 158142c2 bellard
    int32 aExp, shiftCount;
4335 bb98fe42 Andreas Färber
    uint64_t aSig;
4336 158142c2 bellard
    int64 z;
4337 158142c2 bellard
4338 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4339 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4340 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4341 158142c2 bellard
    shiftCount = aExp - 0x403E;
4342 158142c2 bellard
    if ( 0 <= shiftCount ) {
4343 158142c2 bellard
        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4344 158142c2 bellard
        if ( ( a.high != 0xC03E ) || aSig ) {
4345 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4346 158142c2 bellard
            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4347 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
4348 158142c2 bellard
            }
4349 158142c2 bellard
        }
4350 bb98fe42 Andreas Färber
        return (int64_t) LIT64( 0x8000000000000000 );
4351 158142c2 bellard
    }
4352 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
4353 158142c2 bellard
        if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
4354 158142c2 bellard
        return 0;
4355 158142c2 bellard
    }
4356 158142c2 bellard
    z = aSig>>( - shiftCount );
4357 bb98fe42 Andreas Färber
    if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
4358 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4359 158142c2 bellard
    }
4360 158142c2 bellard
    if ( aSign ) z = - z;
4361 158142c2 bellard
    return z;
4362 158142c2 bellard
4363 158142c2 bellard
}
4364 158142c2 bellard
4365 158142c2 bellard
/*----------------------------------------------------------------------------
4366 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4367 158142c2 bellard
| point value `a' to the single-precision floating-point format.  The
4368 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4369 158142c2 bellard
| Floating-Point Arithmetic.
4370 158142c2 bellard
*----------------------------------------------------------------------------*/
4371 158142c2 bellard
4372 158142c2 bellard
float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
4373 158142c2 bellard
{
4374 158142c2 bellard
    flag aSign;
4375 158142c2 bellard
    int32 aExp;
4376 bb98fe42 Andreas Färber
    uint64_t aSig;
4377 158142c2 bellard
4378 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4379 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4380 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4381 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4382 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) {
4383 bcd4d9af Christophe Lyon
            return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
4384 158142c2 bellard
        }
4385 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
4386 158142c2 bellard
    }
4387 158142c2 bellard
    shift64RightJamming( aSig, 33, &aSig );
4388 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3F81;
4389 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
4390 158142c2 bellard
4391 158142c2 bellard
}
4392 158142c2 bellard
4393 158142c2 bellard
/*----------------------------------------------------------------------------
4394 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4395 158142c2 bellard
| point value `a' to the double-precision floating-point format.  The
4396 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4397 158142c2 bellard
| Floating-Point Arithmetic.
4398 158142c2 bellard
*----------------------------------------------------------------------------*/
4399 158142c2 bellard
4400 158142c2 bellard
float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
4401 158142c2 bellard
{
4402 158142c2 bellard
    flag aSign;
4403 158142c2 bellard
    int32 aExp;
4404 bb98fe42 Andreas Färber
    uint64_t aSig, zSig;
4405 158142c2 bellard
4406 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4407 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4408 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4409 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4410 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) {
4411 bcd4d9af Christophe Lyon
            return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
4412 158142c2 bellard
        }
4413 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
4414 158142c2 bellard
    }
4415 158142c2 bellard
    shift64RightJamming( aSig, 1, &zSig );
4416 158142c2 bellard
    if ( aExp || aSig ) aExp -= 0x3C01;
4417 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
4418 158142c2 bellard
4419 158142c2 bellard
}
4420 158142c2 bellard
4421 158142c2 bellard
/*----------------------------------------------------------------------------
4422 158142c2 bellard
| Returns the result of converting the extended double-precision floating-
4423 158142c2 bellard
| point value `a' to the quadruple-precision floating-point format.  The
4424 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
4425 158142c2 bellard
| Floating-Point Arithmetic.
4426 158142c2 bellard
*----------------------------------------------------------------------------*/
4427 158142c2 bellard
4428 158142c2 bellard
float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
4429 158142c2 bellard
{
4430 158142c2 bellard
    flag aSign;
4431 158142c2 bellard
    int16 aExp;
4432 bb98fe42 Andreas Färber
    uint64_t aSig, zSig0, zSig1;
4433 158142c2 bellard
4434 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4435 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4436 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4437 bb98fe42 Andreas Färber
    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
4438 bcd4d9af Christophe Lyon
        return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
4439 158142c2 bellard
    }
4440 158142c2 bellard
    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4441 158142c2 bellard
    return packFloat128( aSign, aExp, zSig0, zSig1 );
4442 158142c2 bellard
4443 158142c2 bellard
}
4444 158142c2 bellard
4445 158142c2 bellard
/*----------------------------------------------------------------------------
4446 158142c2 bellard
| Rounds the extended double-precision floating-point value `a' to an integer,
4447 158142c2 bellard
| and returns the result as an extended quadruple-precision floating-point
4448 158142c2 bellard
| value.  The operation is performed according to the IEC/IEEE Standard for
4449 158142c2 bellard
| Binary Floating-Point Arithmetic.
4450 158142c2 bellard
*----------------------------------------------------------------------------*/
4451 158142c2 bellard
4452 158142c2 bellard
floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
4453 158142c2 bellard
{
4454 158142c2 bellard
    flag aSign;
4455 158142c2 bellard
    int32 aExp;
4456 bb98fe42 Andreas Färber
    uint64_t lastBitMask, roundBitsMask;
4457 158142c2 bellard
    int8 roundingMode;
4458 158142c2 bellard
    floatx80 z;
4459 158142c2 bellard
4460 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4461 158142c2 bellard
    if ( 0x403E <= aExp ) {
4462 bb98fe42 Andreas Färber
        if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
4463 158142c2 bellard
            return propagateFloatx80NaN( a, a STATUS_VAR );
4464 158142c2 bellard
        }
4465 158142c2 bellard
        return a;
4466 158142c2 bellard
    }
4467 158142c2 bellard
    if ( aExp < 0x3FFF ) {
4468 158142c2 bellard
        if (    ( aExp == 0 )
4469 bb98fe42 Andreas Färber
             && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
4470 158142c2 bellard
            return a;
4471 158142c2 bellard
        }
4472 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
4473 158142c2 bellard
        aSign = extractFloatx80Sign( a );
4474 158142c2 bellard
        switch ( STATUS(float_rounding_mode) ) {
4475 158142c2 bellard
         case float_round_nearest_even:
4476 bb98fe42 Andreas Färber
            if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
4477 158142c2 bellard
               ) {
4478 158142c2 bellard
                return
4479 158142c2 bellard
                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4480 158142c2 bellard
            }
4481 158142c2 bellard
            break;
4482 158142c2 bellard
         case float_round_down:
4483 158142c2 bellard
            return
4484 158142c2 bellard
                  aSign ?
4485 158142c2 bellard
                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4486 158142c2 bellard
                : packFloatx80( 0, 0, 0 );
4487 158142c2 bellard
         case float_round_up:
4488 158142c2 bellard
            return
4489 158142c2 bellard
                  aSign ? packFloatx80( 1, 0, 0 )
4490 158142c2 bellard
                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4491 158142c2 bellard
        }
4492 158142c2 bellard
        return packFloatx80( aSign, 0, 0 );
4493 158142c2 bellard
    }
4494 158142c2 bellard
    lastBitMask = 1;
4495 158142c2 bellard
    lastBitMask <<= 0x403E - aExp;
4496 158142c2 bellard
    roundBitsMask = lastBitMask - 1;
4497 158142c2 bellard
    z = a;
4498 158142c2 bellard
    roundingMode = STATUS(float_rounding_mode);
4499 158142c2 bellard
    if ( roundingMode == float_round_nearest_even ) {
4500 158142c2 bellard
        z.low += lastBitMask>>1;
4501 158142c2 bellard
        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4502 158142c2 bellard
    }
4503 158142c2 bellard
    else if ( roundingMode != float_round_to_zero ) {
4504 158142c2 bellard
        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
4505 158142c2 bellard
            z.low += roundBitsMask;
4506 158142c2 bellard
        }
4507 158142c2 bellard
    }
4508 158142c2 bellard
    z.low &= ~ roundBitsMask;
4509 158142c2 bellard
    if ( z.low == 0 ) {
4510 158142c2 bellard
        ++z.high;
4511 158142c2 bellard
        z.low = LIT64( 0x8000000000000000 );
4512 158142c2 bellard
    }
4513 158142c2 bellard
    if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
4514 158142c2 bellard
    return z;
4515 158142c2 bellard
4516 158142c2 bellard
}
4517 158142c2 bellard
4518 158142c2 bellard
/*----------------------------------------------------------------------------
4519 158142c2 bellard
| Returns the result of adding the absolute values of the extended double-
4520 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
4521 158142c2 bellard
| negated before being returned.  `zSign' is ignored if the result is a NaN.
4522 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
4523 158142c2 bellard
| Floating-Point Arithmetic.
4524 158142c2 bellard
*----------------------------------------------------------------------------*/
4525 158142c2 bellard
4526 158142c2 bellard
static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
4527 158142c2 bellard
{
4528 158142c2 bellard
    int32 aExp, bExp, zExp;
4529 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4530 158142c2 bellard
    int32 expDiff;
4531 158142c2 bellard
4532 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4533 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4534 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4535 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4536 158142c2 bellard
    expDiff = aExp - bExp;
4537 158142c2 bellard
    if ( 0 < expDiff ) {
4538 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4539 bb98fe42 Andreas Färber
            if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4540 158142c2 bellard
            return a;
4541 158142c2 bellard
        }
4542 158142c2 bellard
        if ( bExp == 0 ) --expDiff;
4543 158142c2 bellard
        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4544 158142c2 bellard
        zExp = aExp;
4545 158142c2 bellard
    }
4546 158142c2 bellard
    else if ( expDiff < 0 ) {
4547 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4548 bb98fe42 Andreas Färber
            if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4549 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4550 158142c2 bellard
        }
4551 158142c2 bellard
        if ( aExp == 0 ) ++expDiff;
4552 158142c2 bellard
        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4553 158142c2 bellard
        zExp = bExp;
4554 158142c2 bellard
    }
4555 158142c2 bellard
    else {
4556 158142c2 bellard
        if ( aExp == 0x7FFF ) {
4557 bb98fe42 Andreas Färber
            if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
4558 158142c2 bellard
                return propagateFloatx80NaN( a, b STATUS_VAR );
4559 158142c2 bellard
            }
4560 158142c2 bellard
            return a;
4561 158142c2 bellard
        }
4562 158142c2 bellard
        zSig1 = 0;
4563 158142c2 bellard
        zSig0 = aSig + bSig;
4564 158142c2 bellard
        if ( aExp == 0 ) {
4565 158142c2 bellard
            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
4566 158142c2 bellard
            goto roundAndPack;
4567 158142c2 bellard
        }
4568 158142c2 bellard
        zExp = aExp;
4569 158142c2 bellard
        goto shiftRight1;
4570 158142c2 bellard
    }
4571 158142c2 bellard
    zSig0 = aSig + bSig;
4572 bb98fe42 Andreas Färber
    if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
4573 158142c2 bellard
 shiftRight1:
4574 158142c2 bellard
    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
4575 158142c2 bellard
    zSig0 |= LIT64( 0x8000000000000000 );
4576 158142c2 bellard
    ++zExp;
4577 158142c2 bellard
 roundAndPack:
4578 158142c2 bellard
    return
4579 158142c2 bellard
        roundAndPackFloatx80(
4580 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4581 158142c2 bellard
4582 158142c2 bellard
}
4583 158142c2 bellard
4584 158142c2 bellard
/*----------------------------------------------------------------------------
4585 158142c2 bellard
| Returns the result of subtracting the absolute values of the extended
4586 158142c2 bellard
| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
4587 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
4588 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
4589 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4590 158142c2 bellard
*----------------------------------------------------------------------------*/
4591 158142c2 bellard
4592 158142c2 bellard
static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
4593 158142c2 bellard
{
4594 158142c2 bellard
    int32 aExp, bExp, zExp;
4595 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4596 158142c2 bellard
    int32 expDiff;
4597 158142c2 bellard
    floatx80 z;
4598 158142c2 bellard
4599 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4600 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4601 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4602 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4603 158142c2 bellard
    expDiff = aExp - bExp;
4604 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
4605 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
4606 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4607 bb98fe42 Andreas Färber
        if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
4608 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4609 158142c2 bellard
        }
4610 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4611 158142c2 bellard
        z.low = floatx80_default_nan_low;
4612 158142c2 bellard
        z.high = floatx80_default_nan_high;
4613 158142c2 bellard
        return z;
4614 158142c2 bellard
    }
4615 158142c2 bellard
    if ( aExp == 0 ) {
4616 158142c2 bellard
        aExp = 1;
4617 158142c2 bellard
        bExp = 1;
4618 158142c2 bellard
    }
4619 158142c2 bellard
    zSig1 = 0;
4620 158142c2 bellard
    if ( bSig < aSig ) goto aBigger;
4621 158142c2 bellard
    if ( aSig < bSig ) goto bBigger;
4622 158142c2 bellard
    return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
4623 158142c2 bellard
 bExpBigger:
4624 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4625 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4626 158142c2 bellard
        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
4627 158142c2 bellard
    }
4628 158142c2 bellard
    if ( aExp == 0 ) ++expDiff;
4629 158142c2 bellard
    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4630 158142c2 bellard
 bBigger:
4631 158142c2 bellard
    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
4632 158142c2 bellard
    zExp = bExp;
4633 158142c2 bellard
    zSign ^= 1;
4634 158142c2 bellard
    goto normalizeRoundAndPack;
4635 158142c2 bellard
 aExpBigger:
4636 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4637 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4638 158142c2 bellard
        return a;
4639 158142c2 bellard
    }
4640 158142c2 bellard
    if ( bExp == 0 ) --expDiff;
4641 158142c2 bellard
    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4642 158142c2 bellard
 aBigger:
4643 158142c2 bellard
    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
4644 158142c2 bellard
    zExp = aExp;
4645 158142c2 bellard
 normalizeRoundAndPack:
4646 158142c2 bellard
    return
4647 158142c2 bellard
        normalizeRoundAndPackFloatx80(
4648 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4649 158142c2 bellard
4650 158142c2 bellard
}
4651 158142c2 bellard
4652 158142c2 bellard
/*----------------------------------------------------------------------------
4653 158142c2 bellard
| Returns the result of adding the extended double-precision floating-point
4654 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
4655 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
4656 158142c2 bellard
*----------------------------------------------------------------------------*/
4657 158142c2 bellard
4658 158142c2 bellard
floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
4659 158142c2 bellard
{
4660 158142c2 bellard
    flag aSign, bSign;
4661 158142c2 bellard
4662 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4663 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4664 158142c2 bellard
    if ( aSign == bSign ) {
4665 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4666 158142c2 bellard
    }
4667 158142c2 bellard
    else {
4668 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4669 158142c2 bellard
    }
4670 158142c2 bellard
4671 158142c2 bellard
}
4672 158142c2 bellard
4673 158142c2 bellard
/*----------------------------------------------------------------------------
4674 158142c2 bellard
| Returns the result of subtracting the extended double-precision floating-
4675 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
4676 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4677 158142c2 bellard
*----------------------------------------------------------------------------*/
4678 158142c2 bellard
4679 158142c2 bellard
floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
4680 158142c2 bellard
{
4681 158142c2 bellard
    flag aSign, bSign;
4682 158142c2 bellard
4683 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4684 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4685 158142c2 bellard
    if ( aSign == bSign ) {
4686 158142c2 bellard
        return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4687 158142c2 bellard
    }
4688 158142c2 bellard
    else {
4689 158142c2 bellard
        return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4690 158142c2 bellard
    }
4691 158142c2 bellard
4692 158142c2 bellard
}
4693 158142c2 bellard
4694 158142c2 bellard
/*----------------------------------------------------------------------------
4695 158142c2 bellard
| Returns the result of multiplying the extended double-precision floating-
4696 158142c2 bellard
| point values `a' and `b'.  The operation is performed according to the
4697 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4698 158142c2 bellard
*----------------------------------------------------------------------------*/
4699 158142c2 bellard
4700 158142c2 bellard
floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
4701 158142c2 bellard
{
4702 158142c2 bellard
    flag aSign, bSign, zSign;
4703 158142c2 bellard
    int32 aExp, bExp, zExp;
4704 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4705 158142c2 bellard
    floatx80 z;
4706 158142c2 bellard
4707 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4708 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4709 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4710 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4711 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4712 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4713 158142c2 bellard
    zSign = aSign ^ bSign;
4714 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4715 bb98fe42 Andreas Färber
        if (    (uint64_t) ( aSig<<1 )
4716 bb98fe42 Andreas Färber
             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
4717 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4718 158142c2 bellard
        }
4719 158142c2 bellard
        if ( ( bExp | bSig ) == 0 ) goto invalid;
4720 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4721 158142c2 bellard
    }
4722 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4723 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4724 158142c2 bellard
        if ( ( aExp | aSig ) == 0 ) {
4725 158142c2 bellard
 invalid:
4726 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4727 158142c2 bellard
            z.low = floatx80_default_nan_low;
4728 158142c2 bellard
            z.high = floatx80_default_nan_high;
4729 158142c2 bellard
            return z;
4730 158142c2 bellard
        }
4731 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4732 158142c2 bellard
    }
4733 158142c2 bellard
    if ( aExp == 0 ) {
4734 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4735 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4736 158142c2 bellard
    }
4737 158142c2 bellard
    if ( bExp == 0 ) {
4738 158142c2 bellard
        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
4739 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4740 158142c2 bellard
    }
4741 158142c2 bellard
    zExp = aExp + bExp - 0x3FFE;
4742 158142c2 bellard
    mul64To128( aSig, bSig, &zSig0, &zSig1 );
4743 bb98fe42 Andreas Färber
    if ( 0 < (int64_t) zSig0 ) {
4744 158142c2 bellard
        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
4745 158142c2 bellard
        --zExp;
4746 158142c2 bellard
    }
4747 158142c2 bellard
    return
4748 158142c2 bellard
        roundAndPackFloatx80(
4749 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4750 158142c2 bellard
4751 158142c2 bellard
}
4752 158142c2 bellard
4753 158142c2 bellard
/*----------------------------------------------------------------------------
4754 158142c2 bellard
| Returns the result of dividing the extended double-precision floating-point
4755 158142c2 bellard
| value `a' by the corresponding value `b'.  The operation is performed
4756 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4757 158142c2 bellard
*----------------------------------------------------------------------------*/
4758 158142c2 bellard
4759 158142c2 bellard
floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
4760 158142c2 bellard
{
4761 158142c2 bellard
    flag aSign, bSign, zSign;
4762 158142c2 bellard
    int32 aExp, bExp, zExp;
4763 bb98fe42 Andreas Färber
    uint64_t aSig, bSig, zSig0, zSig1;
4764 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, term0, term1, term2;
4765 158142c2 bellard
    floatx80 z;
4766 158142c2 bellard
4767 158142c2 bellard
    aSig = extractFloatx80Frac( a );
4768 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4769 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4770 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4771 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4772 158142c2 bellard
    bSign = extractFloatx80Sign( b );
4773 158142c2 bellard
    zSign = aSign ^ bSign;
4774 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4775 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4776 158142c2 bellard
        if ( bExp == 0x7FFF ) {
4777 bb98fe42 Andreas Färber
            if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4778 158142c2 bellard
            goto invalid;
4779 158142c2 bellard
        }
4780 158142c2 bellard
        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4781 158142c2 bellard
    }
4782 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4783 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4784 158142c2 bellard
        return packFloatx80( zSign, 0, 0 );
4785 158142c2 bellard
    }
4786 158142c2 bellard
    if ( bExp == 0 ) {
4787 158142c2 bellard
        if ( bSig == 0 ) {
4788 158142c2 bellard
            if ( ( aExp | aSig ) == 0 ) {
4789 158142c2 bellard
 invalid:
4790 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
4791 158142c2 bellard
                z.low = floatx80_default_nan_low;
4792 158142c2 bellard
                z.high = floatx80_default_nan_high;
4793 158142c2 bellard
                return z;
4794 158142c2 bellard
            }
4795 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
4796 158142c2 bellard
            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4797 158142c2 bellard
        }
4798 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4799 158142c2 bellard
    }
4800 158142c2 bellard
    if ( aExp == 0 ) {
4801 158142c2 bellard
        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4802 158142c2 bellard
        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4803 158142c2 bellard
    }
4804 158142c2 bellard
    zExp = aExp - bExp + 0x3FFE;
4805 158142c2 bellard
    rem1 = 0;
4806 158142c2 bellard
    if ( bSig <= aSig ) {
4807 158142c2 bellard
        shift128Right( aSig, 0, 1, &aSig, &rem1 );
4808 158142c2 bellard
        ++zExp;
4809 158142c2 bellard
    }
4810 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
4811 158142c2 bellard
    mul64To128( bSig, zSig0, &term0, &term1 );
4812 158142c2 bellard
    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
4813 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
4814 158142c2 bellard
        --zSig0;
4815 158142c2 bellard
        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4816 158142c2 bellard
    }
4817 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, bSig );
4818 bb98fe42 Andreas Färber
    if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
4819 158142c2 bellard
        mul64To128( bSig, zSig1, &term1, &term2 );
4820 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4821 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
4822 158142c2 bellard
            --zSig1;
4823 158142c2 bellard
            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
4824 158142c2 bellard
        }
4825 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 ) != 0 );
4826 158142c2 bellard
    }
4827 158142c2 bellard
    return
4828 158142c2 bellard
        roundAndPackFloatx80(
4829 158142c2 bellard
            STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4830 158142c2 bellard
4831 158142c2 bellard
}
4832 158142c2 bellard
4833 158142c2 bellard
/*----------------------------------------------------------------------------
4834 158142c2 bellard
| Returns the remainder of the extended double-precision floating-point value
4835 158142c2 bellard
| `a' with respect to the corresponding value `b'.  The operation is performed
4836 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4837 158142c2 bellard
*----------------------------------------------------------------------------*/
4838 158142c2 bellard
4839 158142c2 bellard
floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
4840 158142c2 bellard
{
4841 ed086f3d Blue Swirl
    flag aSign, zSign;
4842 158142c2 bellard
    int32 aExp, bExp, expDiff;
4843 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig;
4844 bb98fe42 Andreas Färber
    uint64_t q, term0, term1, alternateASig0, alternateASig1;
4845 158142c2 bellard
    floatx80 z;
4846 158142c2 bellard
4847 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
4848 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4849 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4850 158142c2 bellard
    bSig = extractFloatx80Frac( b );
4851 158142c2 bellard
    bExp = extractFloatx80Exp( b );
4852 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4853 bb98fe42 Andreas Färber
        if (    (uint64_t) ( aSig0<<1 )
4854 bb98fe42 Andreas Färber
             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
4855 158142c2 bellard
            return propagateFloatx80NaN( a, b STATUS_VAR );
4856 158142c2 bellard
        }
4857 158142c2 bellard
        goto invalid;
4858 158142c2 bellard
    }
4859 158142c2 bellard
    if ( bExp == 0x7FFF ) {
4860 bb98fe42 Andreas Färber
        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4861 158142c2 bellard
        return a;
4862 158142c2 bellard
    }
4863 158142c2 bellard
    if ( bExp == 0 ) {
4864 158142c2 bellard
        if ( bSig == 0 ) {
4865 158142c2 bellard
 invalid:
4866 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
4867 158142c2 bellard
            z.low = floatx80_default_nan_low;
4868 158142c2 bellard
            z.high = floatx80_default_nan_high;
4869 158142c2 bellard
            return z;
4870 158142c2 bellard
        }
4871 158142c2 bellard
        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4872 158142c2 bellard
    }
4873 158142c2 bellard
    if ( aExp == 0 ) {
4874 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
4875 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4876 158142c2 bellard
    }
4877 158142c2 bellard
    bSig |= LIT64( 0x8000000000000000 );
4878 158142c2 bellard
    zSign = aSign;
4879 158142c2 bellard
    expDiff = aExp - bExp;
4880 158142c2 bellard
    aSig1 = 0;
4881 158142c2 bellard
    if ( expDiff < 0 ) {
4882 158142c2 bellard
        if ( expDiff < -1 ) return a;
4883 158142c2 bellard
        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
4884 158142c2 bellard
        expDiff = 0;
4885 158142c2 bellard
    }
4886 158142c2 bellard
    q = ( bSig <= aSig0 );
4887 158142c2 bellard
    if ( q ) aSig0 -= bSig;
4888 158142c2 bellard
    expDiff -= 64;
4889 158142c2 bellard
    while ( 0 < expDiff ) {
4890 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
4891 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
4892 158142c2 bellard
        mul64To128( bSig, q, &term0, &term1 );
4893 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4894 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
4895 158142c2 bellard
        expDiff -= 62;
4896 158142c2 bellard
    }
4897 158142c2 bellard
    expDiff += 64;
4898 158142c2 bellard
    if ( 0 < expDiff ) {
4899 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig );
4900 158142c2 bellard
        q = ( 2 < q ) ? q - 2 : 0;
4901 158142c2 bellard
        q >>= 64 - expDiff;
4902 158142c2 bellard
        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4903 158142c2 bellard
        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4904 158142c2 bellard
        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4905 158142c2 bellard
        while ( le128( term0, term1, aSig0, aSig1 ) ) {
4906 158142c2 bellard
            ++q;
4907 158142c2 bellard
            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4908 158142c2 bellard
        }
4909 158142c2 bellard
    }
4910 158142c2 bellard
    else {
4911 158142c2 bellard
        term1 = 0;
4912 158142c2 bellard
        term0 = bSig;
4913 158142c2 bellard
    }
4914 158142c2 bellard
    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4915 158142c2 bellard
    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4916 158142c2 bellard
         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4917 158142c2 bellard
              && ( q & 1 ) )
4918 158142c2 bellard
       ) {
4919 158142c2 bellard
        aSig0 = alternateASig0;
4920 158142c2 bellard
        aSig1 = alternateASig1;
4921 158142c2 bellard
        zSign = ! zSign;
4922 158142c2 bellard
    }
4923 158142c2 bellard
    return
4924 158142c2 bellard
        normalizeRoundAndPackFloatx80(
4925 158142c2 bellard
            80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
4926 158142c2 bellard
4927 158142c2 bellard
}
4928 158142c2 bellard
4929 158142c2 bellard
/*----------------------------------------------------------------------------
4930 158142c2 bellard
| Returns the square root of the extended double-precision floating-point
4931 158142c2 bellard
| value `a'.  The operation is performed according to the IEC/IEEE Standard
4932 158142c2 bellard
| for Binary Floating-Point Arithmetic.
4933 158142c2 bellard
*----------------------------------------------------------------------------*/
4934 158142c2 bellard
4935 158142c2 bellard
floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
4936 158142c2 bellard
{
4937 158142c2 bellard
    flag aSign;
4938 158142c2 bellard
    int32 aExp, zExp;
4939 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4940 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4941 158142c2 bellard
    floatx80 z;
4942 158142c2 bellard
4943 158142c2 bellard
    aSig0 = extractFloatx80Frac( a );
4944 158142c2 bellard
    aExp = extractFloatx80Exp( a );
4945 158142c2 bellard
    aSign = extractFloatx80Sign( a );
4946 158142c2 bellard
    if ( aExp == 0x7FFF ) {
4947 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
4948 158142c2 bellard
        if ( ! aSign ) return a;
4949 158142c2 bellard
        goto invalid;
4950 158142c2 bellard
    }
4951 158142c2 bellard
    if ( aSign ) {
4952 158142c2 bellard
        if ( ( aExp | aSig0 ) == 0 ) return a;
4953 158142c2 bellard
 invalid:
4954 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
4955 158142c2 bellard
        z.low = floatx80_default_nan_low;
4956 158142c2 bellard
        z.high = floatx80_default_nan_high;
4957 158142c2 bellard
        return z;
4958 158142c2 bellard
    }
4959 158142c2 bellard
    if ( aExp == 0 ) {
4960 158142c2 bellard
        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4961 158142c2 bellard
        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4962 158142c2 bellard
    }
4963 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4964 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4965 158142c2 bellard
    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4966 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4967 158142c2 bellard
    doubleZSig0 = zSig0<<1;
4968 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
4969 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4970 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
4971 158142c2 bellard
        --zSig0;
4972 158142c2 bellard
        doubleZSig0 -= 2;
4973 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4974 158142c2 bellard
    }
4975 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4976 158142c2 bellard
    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4977 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
4978 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4979 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4980 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
4981 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4982 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
4983 158142c2 bellard
            --zSig1;
4984 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4985 158142c2 bellard
            term3 |= 1;
4986 158142c2 bellard
            term2 |= doubleZSig0;
4987 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4988 158142c2 bellard
        }
4989 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4990 158142c2 bellard
    }
4991 158142c2 bellard
    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4992 158142c2 bellard
    zSig0 |= doubleZSig0;
4993 158142c2 bellard
    return
4994 158142c2 bellard
        roundAndPackFloatx80(
4995 158142c2 bellard
            STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
4996 158142c2 bellard
4997 158142c2 bellard
}
4998 158142c2 bellard
4999 158142c2 bellard
/*----------------------------------------------------------------------------
5000 b689362d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point value `a' is equal
5001 b689362d Aurelien Jarno
| to the corresponding value `b', and 0 otherwise.  The invalid exception is
5002 b689362d Aurelien Jarno
| raised if either operand is a NaN.  Otherwise, the comparison is performed
5003 b689362d Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5004 158142c2 bellard
*----------------------------------------------------------------------------*/
5005 158142c2 bellard
5006 b689362d Aurelien Jarno
int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
5007 158142c2 bellard
{
5008 158142c2 bellard
5009 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5010 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5011 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5012 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5013 158142c2 bellard
       ) {
5014 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
5015 158142c2 bellard
        return 0;
5016 158142c2 bellard
    }
5017 158142c2 bellard
    return
5018 158142c2 bellard
           ( a.low == b.low )
5019 158142c2 bellard
        && (    ( a.high == b.high )
5020 158142c2 bellard
             || (    ( a.low == 0 )
5021 bb98fe42 Andreas Färber
                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5022 158142c2 bellard
           );
5023 158142c2 bellard
5024 158142c2 bellard
}
5025 158142c2 bellard
5026 158142c2 bellard
/*----------------------------------------------------------------------------
5027 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
5028 158142c2 bellard
| less than or equal to the corresponding value `b', and 0 otherwise.  The
5029 f5a64251 Aurelien Jarno
| invalid exception is raised if either operand is a NaN.  The comparison is
5030 f5a64251 Aurelien Jarno
| performed according to the IEC/IEEE Standard for Binary Floating-Point
5031 f5a64251 Aurelien Jarno
| Arithmetic.
5032 158142c2 bellard
*----------------------------------------------------------------------------*/
5033 158142c2 bellard
5034 750afe93 bellard
int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
5035 158142c2 bellard
{
5036 158142c2 bellard
    flag aSign, bSign;
5037 158142c2 bellard
5038 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5039 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5040 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5041 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5042 158142c2 bellard
       ) {
5043 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5044 158142c2 bellard
        return 0;
5045 158142c2 bellard
    }
5046 158142c2 bellard
    aSign = extractFloatx80Sign( a );
5047 158142c2 bellard
    bSign = extractFloatx80Sign( b );
5048 158142c2 bellard
    if ( aSign != bSign ) {
5049 158142c2 bellard
        return
5050 158142c2 bellard
               aSign
5051 bb98fe42 Andreas Färber
            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5052 158142c2 bellard
                 == 0 );
5053 158142c2 bellard
    }
5054 158142c2 bellard
    return
5055 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5056 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5057 158142c2 bellard
5058 158142c2 bellard
}
5059 158142c2 bellard
5060 158142c2 bellard
/*----------------------------------------------------------------------------
5061 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is
5062 f5a64251 Aurelien Jarno
| less than the corresponding value `b', and 0 otherwise.  The invalid
5063 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
5064 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5065 158142c2 bellard
*----------------------------------------------------------------------------*/
5066 158142c2 bellard
5067 750afe93 bellard
int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
5068 158142c2 bellard
{
5069 158142c2 bellard
    flag aSign, bSign;
5070 158142c2 bellard
5071 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5072 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5073 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5074 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5075 158142c2 bellard
       ) {
5076 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5077 158142c2 bellard
        return 0;
5078 158142c2 bellard
    }
5079 158142c2 bellard
    aSign = extractFloatx80Sign( a );
5080 158142c2 bellard
    bSign = extractFloatx80Sign( b );
5081 158142c2 bellard
    if ( aSign != bSign ) {
5082 158142c2 bellard
        return
5083 158142c2 bellard
               aSign
5084 bb98fe42 Andreas Färber
            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5085 158142c2 bellard
                 != 0 );
5086 158142c2 bellard
    }
5087 158142c2 bellard
    return
5088 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5089 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5090 158142c2 bellard
5091 158142c2 bellard
}
5092 158142c2 bellard
5093 158142c2 bellard
/*----------------------------------------------------------------------------
5094 67b7861d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point values `a' and `b'
5095 f5a64251 Aurelien Jarno
| cannot be compared, and 0 otherwise.  The invalid exception is raised if
5096 f5a64251 Aurelien Jarno
| either operand is a NaN.   The comparison is performed according to the
5097 f5a64251 Aurelien Jarno
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5098 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
5099 67b7861d Aurelien Jarno
int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM )
5100 67b7861d Aurelien Jarno
{
5101 67b7861d Aurelien Jarno
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5102 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5103 67b7861d Aurelien Jarno
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5104 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5105 67b7861d Aurelien Jarno
       ) {
5106 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
5107 67b7861d Aurelien Jarno
        return 1;
5108 67b7861d Aurelien Jarno
    }
5109 67b7861d Aurelien Jarno
    return 0;
5110 67b7861d Aurelien Jarno
}
5111 67b7861d Aurelien Jarno
5112 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
5113 b689362d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point value `a' is
5114 f5a64251 Aurelien Jarno
| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5115 f5a64251 Aurelien Jarno
| cause an exception.  The comparison is performed according to the IEC/IEEE
5116 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
5117 158142c2 bellard
*----------------------------------------------------------------------------*/
5118 158142c2 bellard
5119 b689362d Aurelien Jarno
int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM )
5120 158142c2 bellard
{
5121 158142c2 bellard
5122 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5123 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5124 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5125 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5126 158142c2 bellard
       ) {
5127 b689362d Aurelien Jarno
        if (    floatx80_is_signaling_nan( a )
5128 b689362d Aurelien Jarno
             || floatx80_is_signaling_nan( b ) ) {
5129 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
5130 b689362d Aurelien Jarno
        }
5131 158142c2 bellard
        return 0;
5132 158142c2 bellard
    }
5133 158142c2 bellard
    return
5134 158142c2 bellard
           ( a.low == b.low )
5135 158142c2 bellard
        && (    ( a.high == b.high )
5136 158142c2 bellard
             || (    ( a.low == 0 )
5137 bb98fe42 Andreas Färber
                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5138 158142c2 bellard
           );
5139 158142c2 bellard
5140 158142c2 bellard
}
5141 158142c2 bellard
5142 158142c2 bellard
/*----------------------------------------------------------------------------
5143 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
5144 158142c2 bellard
| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
5145 158142c2 bellard
| do not cause an exception.  Otherwise, the comparison is performed according
5146 158142c2 bellard
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5147 158142c2 bellard
*----------------------------------------------------------------------------*/
5148 158142c2 bellard
5149 750afe93 bellard
int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
5150 158142c2 bellard
{
5151 158142c2 bellard
    flag aSign, bSign;
5152 158142c2 bellard
5153 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5154 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5155 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5156 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5157 158142c2 bellard
       ) {
5158 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
5159 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
5160 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5161 158142c2 bellard
        }
5162 158142c2 bellard
        return 0;
5163 158142c2 bellard
    }
5164 158142c2 bellard
    aSign = extractFloatx80Sign( a );
5165 158142c2 bellard
    bSign = extractFloatx80Sign( b );
5166 158142c2 bellard
    if ( aSign != bSign ) {
5167 158142c2 bellard
        return
5168 158142c2 bellard
               aSign
5169 bb98fe42 Andreas Färber
            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5170 158142c2 bellard
                 == 0 );
5171 158142c2 bellard
    }
5172 158142c2 bellard
    return
5173 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
5174 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
5175 158142c2 bellard
5176 158142c2 bellard
}
5177 158142c2 bellard
5178 158142c2 bellard
/*----------------------------------------------------------------------------
5179 158142c2 bellard
| Returns 1 if the extended double-precision floating-point value `a' is less
5180 158142c2 bellard
| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
5181 158142c2 bellard
| an exception.  Otherwise, the comparison is performed according to the
5182 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5183 158142c2 bellard
*----------------------------------------------------------------------------*/
5184 158142c2 bellard
5185 750afe93 bellard
int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
5186 158142c2 bellard
{
5187 158142c2 bellard
    flag aSign, bSign;
5188 158142c2 bellard
5189 158142c2 bellard
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5190 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5191 158142c2 bellard
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5192 bb98fe42 Andreas Färber
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5193 158142c2 bellard
       ) {
5194 158142c2 bellard
        if (    floatx80_is_signaling_nan( a )
5195 158142c2 bellard
             || floatx80_is_signaling_nan( b ) ) {
5196 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5197 158142c2 bellard
        }
5198 158142c2 bellard
        return 0;
5199 158142c2 bellard
    }
5200 158142c2 bellard
    aSign = extractFloatx80Sign( a );
5201 158142c2 bellard
    bSign = extractFloatx80Sign( b );
5202 158142c2 bellard
    if ( aSign != bSign ) {
5203 158142c2 bellard
        return
5204 158142c2 bellard
               aSign
5205 bb98fe42 Andreas Färber
            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5206 158142c2 bellard
                 != 0 );
5207 158142c2 bellard
    }
5208 158142c2 bellard
    return
5209 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
5210 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
5211 158142c2 bellard
5212 158142c2 bellard
}
5213 158142c2 bellard
5214 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
5215 67b7861d Aurelien Jarno
| Returns 1 if the extended double-precision floating-point values `a' and `b'
5216 67b7861d Aurelien Jarno
| cannot be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.
5217 67b7861d Aurelien Jarno
| The comparison is performed according to the IEC/IEEE Standard for Binary
5218 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
5219 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
5220 67b7861d Aurelien Jarno
int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM )
5221 67b7861d Aurelien Jarno
{
5222 67b7861d Aurelien Jarno
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
5223 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5224 67b7861d Aurelien Jarno
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
5225 67b7861d Aurelien Jarno
              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5226 67b7861d Aurelien Jarno
       ) {
5227 67b7861d Aurelien Jarno
        if (    floatx80_is_signaling_nan( a )
5228 67b7861d Aurelien Jarno
             || floatx80_is_signaling_nan( b ) ) {
5229 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
5230 67b7861d Aurelien Jarno
        }
5231 67b7861d Aurelien Jarno
        return 1;
5232 67b7861d Aurelien Jarno
    }
5233 67b7861d Aurelien Jarno
    return 0;
5234 67b7861d Aurelien Jarno
}
5235 67b7861d Aurelien Jarno
5236 158142c2 bellard
/*----------------------------------------------------------------------------
5237 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5238 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
5239 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5240 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
5241 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
5242 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
5243 158142c2 bellard
| largest integer with the same sign as `a' is returned.
5244 158142c2 bellard
*----------------------------------------------------------------------------*/
5245 158142c2 bellard
5246 158142c2 bellard
int32 float128_to_int32( float128 a STATUS_PARAM )
5247 158142c2 bellard
{
5248 158142c2 bellard
    flag aSign;
5249 158142c2 bellard
    int32 aExp, shiftCount;
5250 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5251 158142c2 bellard
5252 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5253 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5254 158142c2 bellard
    aExp = extractFloat128Exp( a );
5255 158142c2 bellard
    aSign = extractFloat128Sign( a );
5256 158142c2 bellard
    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5257 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5258 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
5259 158142c2 bellard
    shiftCount = 0x4028 - aExp;
5260 158142c2 bellard
    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
5261 158142c2 bellard
    return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
5262 158142c2 bellard
5263 158142c2 bellard
}
5264 158142c2 bellard
5265 158142c2 bellard
/*----------------------------------------------------------------------------
5266 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5267 158142c2 bellard
| value `a' to the 32-bit two's complement integer format.  The conversion
5268 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5269 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.  If
5270 158142c2 bellard
| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
5271 158142c2 bellard
| conversion overflows, the largest integer with the same sign as `a' is
5272 158142c2 bellard
| returned.
5273 158142c2 bellard
*----------------------------------------------------------------------------*/
5274 158142c2 bellard
5275 158142c2 bellard
int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
5276 158142c2 bellard
{
5277 158142c2 bellard
    flag aSign;
5278 158142c2 bellard
    int32 aExp, shiftCount;
5279 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, savedASig;
5280 158142c2 bellard
    int32 z;
5281 158142c2 bellard
5282 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5283 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5284 158142c2 bellard
    aExp = extractFloat128Exp( a );
5285 158142c2 bellard
    aSign = extractFloat128Sign( a );
5286 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
5287 158142c2 bellard
    if ( 0x401E < aExp ) {
5288 158142c2 bellard
        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5289 158142c2 bellard
        goto invalid;
5290 158142c2 bellard
    }
5291 158142c2 bellard
    else if ( aExp < 0x3FFF ) {
5292 158142c2 bellard
        if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
5293 158142c2 bellard
        return 0;
5294 158142c2 bellard
    }
5295 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5296 158142c2 bellard
    shiftCount = 0x402F - aExp;
5297 158142c2 bellard
    savedASig = aSig0;
5298 158142c2 bellard
    aSig0 >>= shiftCount;
5299 158142c2 bellard
    z = aSig0;
5300 158142c2 bellard
    if ( aSign ) z = - z;
5301 158142c2 bellard
    if ( ( z < 0 ) ^ aSign ) {
5302 158142c2 bellard
 invalid:
5303 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5304 bb98fe42 Andreas Färber
        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5305 158142c2 bellard
    }
5306 158142c2 bellard
    if ( ( aSig0<<shiftCount ) != savedASig ) {
5307 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
5308 158142c2 bellard
    }
5309 158142c2 bellard
    return z;
5310 158142c2 bellard
5311 158142c2 bellard
}
5312 158142c2 bellard
5313 158142c2 bellard
/*----------------------------------------------------------------------------
5314 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5315 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
5316 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5317 158142c2 bellard
| Arithmetic---which means in particular that the conversion is rounded
5318 158142c2 bellard
| according to the current rounding mode.  If `a' is a NaN, the largest
5319 158142c2 bellard
| positive integer is returned.  Otherwise, if the conversion overflows, the
5320 158142c2 bellard
| largest integer with the same sign as `a' is returned.
5321 158142c2 bellard
*----------------------------------------------------------------------------*/
5322 158142c2 bellard
5323 158142c2 bellard
int64 float128_to_int64( float128 a STATUS_PARAM )
5324 158142c2 bellard
{
5325 158142c2 bellard
    flag aSign;
5326 158142c2 bellard
    int32 aExp, shiftCount;
5327 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5328 158142c2 bellard
5329 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5330 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5331 158142c2 bellard
    aExp = extractFloat128Exp( a );
5332 158142c2 bellard
    aSign = extractFloat128Sign( a );
5333 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5334 158142c2 bellard
    shiftCount = 0x402F - aExp;
5335 158142c2 bellard
    if ( shiftCount <= 0 ) {
5336 158142c2 bellard
        if ( 0x403E < aExp ) {
5337 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5338 158142c2 bellard
            if (    ! aSign
5339 158142c2 bellard
                 || (    ( aExp == 0x7FFF )
5340 158142c2 bellard
                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5341 158142c2 bellard
                    )
5342 158142c2 bellard
               ) {
5343 158142c2 bellard
                return LIT64( 0x7FFFFFFFFFFFFFFF );
5344 158142c2 bellard
            }
5345 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
5346 158142c2 bellard
        }
5347 158142c2 bellard
        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5348 158142c2 bellard
    }
5349 158142c2 bellard
    else {
5350 158142c2 bellard
        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5351 158142c2 bellard
    }
5352 158142c2 bellard
    return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
5353 158142c2 bellard
5354 158142c2 bellard
}
5355 158142c2 bellard
5356 158142c2 bellard
/*----------------------------------------------------------------------------
5357 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5358 158142c2 bellard
| value `a' to the 64-bit two's complement integer format.  The conversion
5359 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5360 158142c2 bellard
| Arithmetic, except that the conversion is always rounded toward zero.
5361 158142c2 bellard
| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
5362 158142c2 bellard
| the conversion overflows, the largest integer with the same sign as `a' is
5363 158142c2 bellard
| returned.
5364 158142c2 bellard
*----------------------------------------------------------------------------*/
5365 158142c2 bellard
5366 158142c2 bellard
int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
5367 158142c2 bellard
{
5368 158142c2 bellard
    flag aSign;
5369 158142c2 bellard
    int32 aExp, shiftCount;
5370 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5371 158142c2 bellard
    int64 z;
5372 158142c2 bellard
5373 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5374 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5375 158142c2 bellard
    aExp = extractFloat128Exp( a );
5376 158142c2 bellard
    aSign = extractFloat128Sign( a );
5377 158142c2 bellard
    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5378 158142c2 bellard
    shiftCount = aExp - 0x402F;
5379 158142c2 bellard
    if ( 0 < shiftCount ) {
5380 158142c2 bellard
        if ( 0x403E <= aExp ) {
5381 158142c2 bellard
            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5382 158142c2 bellard
            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
5383 158142c2 bellard
                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
5384 158142c2 bellard
                if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
5385 158142c2 bellard
            }
5386 158142c2 bellard
            else {
5387 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
5388 158142c2 bellard
                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5389 158142c2 bellard
                    return LIT64( 0x7FFFFFFFFFFFFFFF );
5390 158142c2 bellard
                }
5391 158142c2 bellard
            }
5392 bb98fe42 Andreas Färber
            return (int64_t) LIT64( 0x8000000000000000 );
5393 158142c2 bellard
        }
5394 158142c2 bellard
        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
5395 bb98fe42 Andreas Färber
        if ( (uint64_t) ( aSig1<<shiftCount ) ) {
5396 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
5397 158142c2 bellard
        }
5398 158142c2 bellard
    }
5399 158142c2 bellard
    else {
5400 158142c2 bellard
        if ( aExp < 0x3FFF ) {
5401 158142c2 bellard
            if ( aExp | aSig0 | aSig1 ) {
5402 158142c2 bellard
                STATUS(float_exception_flags) |= float_flag_inexact;
5403 158142c2 bellard
            }
5404 158142c2 bellard
            return 0;
5405 158142c2 bellard
        }
5406 158142c2 bellard
        z = aSig0>>( - shiftCount );
5407 158142c2 bellard
        if (    aSig1
5408 bb98fe42 Andreas Färber
             || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
5409 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
5410 158142c2 bellard
        }
5411 158142c2 bellard
    }
5412 158142c2 bellard
    if ( aSign ) z = - z;
5413 158142c2 bellard
    return z;
5414 158142c2 bellard
5415 158142c2 bellard
}
5416 158142c2 bellard
5417 158142c2 bellard
/*----------------------------------------------------------------------------
5418 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5419 158142c2 bellard
| value `a' to the single-precision floating-point format.  The conversion
5420 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5421 158142c2 bellard
| Arithmetic.
5422 158142c2 bellard
*----------------------------------------------------------------------------*/
5423 158142c2 bellard
5424 158142c2 bellard
float32 float128_to_float32( float128 a STATUS_PARAM )
5425 158142c2 bellard
{
5426 158142c2 bellard
    flag aSign;
5427 158142c2 bellard
    int32 aExp;
5428 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5429 bb98fe42 Andreas Färber
    uint32_t zSig;
5430 158142c2 bellard
5431 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5432 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5433 158142c2 bellard
    aExp = extractFloat128Exp( a );
5434 158142c2 bellard
    aSign = extractFloat128Sign( a );
5435 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5436 158142c2 bellard
        if ( aSig0 | aSig1 ) {
5437 bcd4d9af Christophe Lyon
            return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5438 158142c2 bellard
        }
5439 158142c2 bellard
        return packFloat32( aSign, 0xFF, 0 );
5440 158142c2 bellard
    }
5441 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
5442 158142c2 bellard
    shift64RightJamming( aSig0, 18, &aSig0 );
5443 158142c2 bellard
    zSig = aSig0;
5444 158142c2 bellard
    if ( aExp || zSig ) {
5445 158142c2 bellard
        zSig |= 0x40000000;
5446 158142c2 bellard
        aExp -= 0x3F81;
5447 158142c2 bellard
    }
5448 158142c2 bellard
    return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
5449 158142c2 bellard
5450 158142c2 bellard
}
5451 158142c2 bellard
5452 158142c2 bellard
/*----------------------------------------------------------------------------
5453 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5454 158142c2 bellard
| value `a' to the double-precision floating-point format.  The conversion
5455 158142c2 bellard
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5456 158142c2 bellard
| Arithmetic.
5457 158142c2 bellard
*----------------------------------------------------------------------------*/
5458 158142c2 bellard
5459 158142c2 bellard
float64 float128_to_float64( float128 a STATUS_PARAM )
5460 158142c2 bellard
{
5461 158142c2 bellard
    flag aSign;
5462 158142c2 bellard
    int32 aExp;
5463 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5464 158142c2 bellard
5465 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5466 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5467 158142c2 bellard
    aExp = extractFloat128Exp( a );
5468 158142c2 bellard
    aSign = extractFloat128Sign( a );
5469 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5470 158142c2 bellard
        if ( aSig0 | aSig1 ) {
5471 bcd4d9af Christophe Lyon
            return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5472 158142c2 bellard
        }
5473 158142c2 bellard
        return packFloat64( aSign, 0x7FF, 0 );
5474 158142c2 bellard
    }
5475 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5476 158142c2 bellard
    aSig0 |= ( aSig1 != 0 );
5477 158142c2 bellard
    if ( aExp || aSig0 ) {
5478 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
5479 158142c2 bellard
        aExp -= 0x3C01;
5480 158142c2 bellard
    }
5481 158142c2 bellard
    return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
5482 158142c2 bellard
5483 158142c2 bellard
}
5484 158142c2 bellard
5485 158142c2 bellard
/*----------------------------------------------------------------------------
5486 158142c2 bellard
| Returns the result of converting the quadruple-precision floating-point
5487 158142c2 bellard
| value `a' to the extended double-precision floating-point format.  The
5488 158142c2 bellard
| conversion is performed according to the IEC/IEEE Standard for Binary
5489 158142c2 bellard
| Floating-Point Arithmetic.
5490 158142c2 bellard
*----------------------------------------------------------------------------*/
5491 158142c2 bellard
5492 158142c2 bellard
floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
5493 158142c2 bellard
{
5494 158142c2 bellard
    flag aSign;
5495 158142c2 bellard
    int32 aExp;
5496 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
5497 158142c2 bellard
5498 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5499 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5500 158142c2 bellard
    aExp = extractFloat128Exp( a );
5501 158142c2 bellard
    aSign = extractFloat128Sign( a );
5502 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5503 158142c2 bellard
        if ( aSig0 | aSig1 ) {
5504 bcd4d9af Christophe Lyon
            return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
5505 158142c2 bellard
        }
5506 158142c2 bellard
        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5507 158142c2 bellard
    }
5508 158142c2 bellard
    if ( aExp == 0 ) {
5509 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
5510 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5511 158142c2 bellard
    }
5512 158142c2 bellard
    else {
5513 158142c2 bellard
        aSig0 |= LIT64( 0x0001000000000000 );
5514 158142c2 bellard
    }
5515 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
5516 158142c2 bellard
    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
5517 158142c2 bellard
5518 158142c2 bellard
}
5519 158142c2 bellard
5520 158142c2 bellard
/*----------------------------------------------------------------------------
5521 158142c2 bellard
| Rounds the quadruple-precision floating-point value `a' to an integer, and
5522 158142c2 bellard
| returns the result as a quadruple-precision floating-point value.  The
5523 158142c2 bellard
| operation is performed according to the IEC/IEEE Standard for Binary
5524 158142c2 bellard
| Floating-Point Arithmetic.
5525 158142c2 bellard
*----------------------------------------------------------------------------*/
5526 158142c2 bellard
5527 158142c2 bellard
float128 float128_round_to_int( float128 a STATUS_PARAM )
5528 158142c2 bellard
{
5529 158142c2 bellard
    flag aSign;
5530 158142c2 bellard
    int32 aExp;
5531 bb98fe42 Andreas Färber
    uint64_t lastBitMask, roundBitsMask;
5532 158142c2 bellard
    int8 roundingMode;
5533 158142c2 bellard
    float128 z;
5534 158142c2 bellard
5535 158142c2 bellard
    aExp = extractFloat128Exp( a );
5536 158142c2 bellard
    if ( 0x402F <= aExp ) {
5537 158142c2 bellard
        if ( 0x406F <= aExp ) {
5538 158142c2 bellard
            if (    ( aExp == 0x7FFF )
5539 158142c2 bellard
                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
5540 158142c2 bellard
               ) {
5541 158142c2 bellard
                return propagateFloat128NaN( a, a STATUS_VAR );
5542 158142c2 bellard
            }
5543 158142c2 bellard
            return a;
5544 158142c2 bellard
        }
5545 158142c2 bellard
        lastBitMask = 1;
5546 158142c2 bellard
        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
5547 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
5548 158142c2 bellard
        z = a;
5549 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
5550 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
5551 158142c2 bellard
            if ( lastBitMask ) {
5552 158142c2 bellard
                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
5553 158142c2 bellard
                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
5554 158142c2 bellard
            }
5555 158142c2 bellard
            else {
5556 bb98fe42 Andreas Färber
                if ( (int64_t) z.low < 0 ) {
5557 158142c2 bellard
                    ++z.high;
5558 bb98fe42 Andreas Färber
                    if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
5559 158142c2 bellard
                }
5560 158142c2 bellard
            }
5561 158142c2 bellard
        }
5562 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
5563 158142c2 bellard
            if (   extractFloat128Sign( z )
5564 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
5565 158142c2 bellard
                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
5566 158142c2 bellard
            }
5567 158142c2 bellard
        }
5568 158142c2 bellard
        z.low &= ~ roundBitsMask;
5569 158142c2 bellard
    }
5570 158142c2 bellard
    else {
5571 158142c2 bellard
        if ( aExp < 0x3FFF ) {
5572 bb98fe42 Andreas Färber
            if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
5573 158142c2 bellard
            STATUS(float_exception_flags) |= float_flag_inexact;
5574 158142c2 bellard
            aSign = extractFloat128Sign( a );
5575 158142c2 bellard
            switch ( STATUS(float_rounding_mode) ) {
5576 158142c2 bellard
             case float_round_nearest_even:
5577 158142c2 bellard
                if (    ( aExp == 0x3FFE )
5578 158142c2 bellard
                     && (   extractFloat128Frac0( a )
5579 158142c2 bellard
                          | extractFloat128Frac1( a ) )
5580 158142c2 bellard
                   ) {
5581 158142c2 bellard
                    return packFloat128( aSign, 0x3FFF, 0, 0 );
5582 158142c2 bellard
                }
5583 158142c2 bellard
                break;
5584 158142c2 bellard
             case float_round_down:
5585 158142c2 bellard
                return
5586 158142c2 bellard
                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
5587 158142c2 bellard
                    : packFloat128( 0, 0, 0, 0 );
5588 158142c2 bellard
             case float_round_up:
5589 158142c2 bellard
                return
5590 158142c2 bellard
                      aSign ? packFloat128( 1, 0, 0, 0 )
5591 158142c2 bellard
                    : packFloat128( 0, 0x3FFF, 0, 0 );
5592 158142c2 bellard
            }
5593 158142c2 bellard
            return packFloat128( aSign, 0, 0, 0 );
5594 158142c2 bellard
        }
5595 158142c2 bellard
        lastBitMask = 1;
5596 158142c2 bellard
        lastBitMask <<= 0x402F - aExp;
5597 158142c2 bellard
        roundBitsMask = lastBitMask - 1;
5598 158142c2 bellard
        z.low = 0;
5599 158142c2 bellard
        z.high = a.high;
5600 158142c2 bellard
        roundingMode = STATUS(float_rounding_mode);
5601 158142c2 bellard
        if ( roundingMode == float_round_nearest_even ) {
5602 158142c2 bellard
            z.high += lastBitMask>>1;
5603 158142c2 bellard
            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
5604 158142c2 bellard
                z.high &= ~ lastBitMask;
5605 158142c2 bellard
            }
5606 158142c2 bellard
        }
5607 158142c2 bellard
        else if ( roundingMode != float_round_to_zero ) {
5608 158142c2 bellard
            if (   extractFloat128Sign( z )
5609 158142c2 bellard
                 ^ ( roundingMode == float_round_up ) ) {
5610 158142c2 bellard
                z.high |= ( a.low != 0 );
5611 158142c2 bellard
                z.high += roundBitsMask;
5612 158142c2 bellard
            }
5613 158142c2 bellard
        }
5614 158142c2 bellard
        z.high &= ~ roundBitsMask;
5615 158142c2 bellard
    }
5616 158142c2 bellard
    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
5617 158142c2 bellard
        STATUS(float_exception_flags) |= float_flag_inexact;
5618 158142c2 bellard
    }
5619 158142c2 bellard
    return z;
5620 158142c2 bellard
5621 158142c2 bellard
}
5622 158142c2 bellard
5623 158142c2 bellard
/*----------------------------------------------------------------------------
5624 158142c2 bellard
| Returns the result of adding the absolute values of the quadruple-precision
5625 158142c2 bellard
| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
5626 158142c2 bellard
| before being returned.  `zSign' is ignored if the result is a NaN.
5627 158142c2 bellard
| The addition is performed according to the IEC/IEEE Standard for Binary
5628 158142c2 bellard
| Floating-Point Arithmetic.
5629 158142c2 bellard
*----------------------------------------------------------------------------*/
5630 158142c2 bellard
5631 158142c2 bellard
static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5632 158142c2 bellard
{
5633 158142c2 bellard
    int32 aExp, bExp, zExp;
5634 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5635 158142c2 bellard
    int32 expDiff;
5636 158142c2 bellard
5637 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5638 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5639 158142c2 bellard
    aExp = extractFloat128Exp( a );
5640 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5641 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5642 158142c2 bellard
    bExp = extractFloat128Exp( b );
5643 158142c2 bellard
    expDiff = aExp - bExp;
5644 158142c2 bellard
    if ( 0 < expDiff ) {
5645 158142c2 bellard
        if ( aExp == 0x7FFF ) {
5646 158142c2 bellard
            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5647 158142c2 bellard
            return a;
5648 158142c2 bellard
        }
5649 158142c2 bellard
        if ( bExp == 0 ) {
5650 158142c2 bellard
            --expDiff;
5651 158142c2 bellard
        }
5652 158142c2 bellard
        else {
5653 158142c2 bellard
            bSig0 |= LIT64( 0x0001000000000000 );
5654 158142c2 bellard
        }
5655 158142c2 bellard
        shift128ExtraRightJamming(
5656 158142c2 bellard
            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
5657 158142c2 bellard
        zExp = aExp;
5658 158142c2 bellard
    }
5659 158142c2 bellard
    else if ( expDiff < 0 ) {
5660 158142c2 bellard
        if ( bExp == 0x7FFF ) {
5661 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5662 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
5663 158142c2 bellard
        }
5664 158142c2 bellard
        if ( aExp == 0 ) {
5665 158142c2 bellard
            ++expDiff;
5666 158142c2 bellard
        }
5667 158142c2 bellard
        else {
5668 158142c2 bellard
            aSig0 |= LIT64( 0x0001000000000000 );
5669 158142c2 bellard
        }
5670 158142c2 bellard
        shift128ExtraRightJamming(
5671 158142c2 bellard
            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
5672 158142c2 bellard
        zExp = bExp;
5673 158142c2 bellard
    }
5674 158142c2 bellard
    else {
5675 158142c2 bellard
        if ( aExp == 0x7FFF ) {
5676 158142c2 bellard
            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
5677 158142c2 bellard
                return propagateFloat128NaN( a, b STATUS_VAR );
5678 158142c2 bellard
            }
5679 158142c2 bellard
            return a;
5680 158142c2 bellard
        }
5681 158142c2 bellard
        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5682 fe76d976 pbrook
        if ( aExp == 0 ) {
5683 e6afc87f Peter Maydell
            if (STATUS(flush_to_zero)) {
5684 e6afc87f Peter Maydell
                if (zSig0 | zSig1) {
5685 e6afc87f Peter Maydell
                    float_raise(float_flag_output_denormal STATUS_VAR);
5686 e6afc87f Peter Maydell
                }
5687 e6afc87f Peter Maydell
                return packFloat128(zSign, 0, 0, 0);
5688 e6afc87f Peter Maydell
            }
5689 fe76d976 pbrook
            return packFloat128( zSign, 0, zSig0, zSig1 );
5690 fe76d976 pbrook
        }
5691 158142c2 bellard
        zSig2 = 0;
5692 158142c2 bellard
        zSig0 |= LIT64( 0x0002000000000000 );
5693 158142c2 bellard
        zExp = aExp;
5694 158142c2 bellard
        goto shiftRight1;
5695 158142c2 bellard
    }
5696 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5697 158142c2 bellard
    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5698 158142c2 bellard
    --zExp;
5699 158142c2 bellard
    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
5700 158142c2 bellard
    ++zExp;
5701 158142c2 bellard
 shiftRight1:
5702 158142c2 bellard
    shift128ExtraRightJamming(
5703 158142c2 bellard
        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5704 158142c2 bellard
 roundAndPack:
5705 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5706 158142c2 bellard
5707 158142c2 bellard
}
5708 158142c2 bellard
5709 158142c2 bellard
/*----------------------------------------------------------------------------
5710 158142c2 bellard
| Returns the result of subtracting the absolute values of the quadruple-
5711 158142c2 bellard
| precision floating-point values `a' and `b'.  If `zSign' is 1, the
5712 158142c2 bellard
| difference is negated before being returned.  `zSign' is ignored if the
5713 158142c2 bellard
| result is a NaN.  The subtraction is performed according to the IEC/IEEE
5714 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5715 158142c2 bellard
*----------------------------------------------------------------------------*/
5716 158142c2 bellard
5717 158142c2 bellard
static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5718 158142c2 bellard
{
5719 158142c2 bellard
    int32 aExp, bExp, zExp;
5720 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
5721 158142c2 bellard
    int32 expDiff;
5722 158142c2 bellard
    float128 z;
5723 158142c2 bellard
5724 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5725 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5726 158142c2 bellard
    aExp = extractFloat128Exp( a );
5727 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5728 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5729 158142c2 bellard
    bExp = extractFloat128Exp( b );
5730 158142c2 bellard
    expDiff = aExp - bExp;
5731 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5732 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
5733 158142c2 bellard
    if ( 0 < expDiff ) goto aExpBigger;
5734 158142c2 bellard
    if ( expDiff < 0 ) goto bExpBigger;
5735 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5736 158142c2 bellard
        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
5737 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5738 158142c2 bellard
        }
5739 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
5740 158142c2 bellard
        z.low = float128_default_nan_low;
5741 158142c2 bellard
        z.high = float128_default_nan_high;
5742 158142c2 bellard
        return z;
5743 158142c2 bellard
    }
5744 158142c2 bellard
    if ( aExp == 0 ) {
5745 158142c2 bellard
        aExp = 1;
5746 158142c2 bellard
        bExp = 1;
5747 158142c2 bellard
    }
5748 158142c2 bellard
    if ( bSig0 < aSig0 ) goto aBigger;
5749 158142c2 bellard
    if ( aSig0 < bSig0 ) goto bBigger;
5750 158142c2 bellard
    if ( bSig1 < aSig1 ) goto aBigger;
5751 158142c2 bellard
    if ( aSig1 < bSig1 ) goto bBigger;
5752 158142c2 bellard
    return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
5753 158142c2 bellard
 bExpBigger:
5754 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5755 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5756 158142c2 bellard
        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
5757 158142c2 bellard
    }
5758 158142c2 bellard
    if ( aExp == 0 ) {
5759 158142c2 bellard
        ++expDiff;
5760 158142c2 bellard
    }
5761 158142c2 bellard
    else {
5762 158142c2 bellard
        aSig0 |= LIT64( 0x4000000000000000 );
5763 158142c2 bellard
    }
5764 158142c2 bellard
    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5765 158142c2 bellard
    bSig0 |= LIT64( 0x4000000000000000 );
5766 158142c2 bellard
 bBigger:
5767 158142c2 bellard
    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
5768 158142c2 bellard
    zExp = bExp;
5769 158142c2 bellard
    zSign ^= 1;
5770 158142c2 bellard
    goto normalizeRoundAndPack;
5771 158142c2 bellard
 aExpBigger:
5772 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5773 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5774 158142c2 bellard
        return a;
5775 158142c2 bellard
    }
5776 158142c2 bellard
    if ( bExp == 0 ) {
5777 158142c2 bellard
        --expDiff;
5778 158142c2 bellard
    }
5779 158142c2 bellard
    else {
5780 158142c2 bellard
        bSig0 |= LIT64( 0x4000000000000000 );
5781 158142c2 bellard
    }
5782 158142c2 bellard
    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
5783 158142c2 bellard
    aSig0 |= LIT64( 0x4000000000000000 );
5784 158142c2 bellard
 aBigger:
5785 158142c2 bellard
    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5786 158142c2 bellard
    zExp = aExp;
5787 158142c2 bellard
 normalizeRoundAndPack:
5788 158142c2 bellard
    --zExp;
5789 158142c2 bellard
    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
5790 158142c2 bellard
5791 158142c2 bellard
}
5792 158142c2 bellard
5793 158142c2 bellard
/*----------------------------------------------------------------------------
5794 158142c2 bellard
| Returns the result of adding the quadruple-precision floating-point values
5795 158142c2 bellard
| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
5796 158142c2 bellard
| for Binary Floating-Point Arithmetic.
5797 158142c2 bellard
*----------------------------------------------------------------------------*/
5798 158142c2 bellard
5799 158142c2 bellard
float128 float128_add( float128 a, float128 b STATUS_PARAM )
5800 158142c2 bellard
{
5801 158142c2 bellard
    flag aSign, bSign;
5802 158142c2 bellard
5803 158142c2 bellard
    aSign = extractFloat128Sign( a );
5804 158142c2 bellard
    bSign = extractFloat128Sign( b );
5805 158142c2 bellard
    if ( aSign == bSign ) {
5806 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
5807 158142c2 bellard
    }
5808 158142c2 bellard
    else {
5809 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
5810 158142c2 bellard
    }
5811 158142c2 bellard
5812 158142c2 bellard
}
5813 158142c2 bellard
5814 158142c2 bellard
/*----------------------------------------------------------------------------
5815 158142c2 bellard
| Returns the result of subtracting the quadruple-precision floating-point
5816 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
5817 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5818 158142c2 bellard
*----------------------------------------------------------------------------*/
5819 158142c2 bellard
5820 158142c2 bellard
float128 float128_sub( float128 a, float128 b STATUS_PARAM )
5821 158142c2 bellard
{
5822 158142c2 bellard
    flag aSign, bSign;
5823 158142c2 bellard
5824 158142c2 bellard
    aSign = extractFloat128Sign( a );
5825 158142c2 bellard
    bSign = extractFloat128Sign( b );
5826 158142c2 bellard
    if ( aSign == bSign ) {
5827 158142c2 bellard
        return subFloat128Sigs( a, b, aSign STATUS_VAR );
5828 158142c2 bellard
    }
5829 158142c2 bellard
    else {
5830 158142c2 bellard
        return addFloat128Sigs( a, b, aSign STATUS_VAR );
5831 158142c2 bellard
    }
5832 158142c2 bellard
5833 158142c2 bellard
}
5834 158142c2 bellard
5835 158142c2 bellard
/*----------------------------------------------------------------------------
5836 158142c2 bellard
| Returns the result of multiplying the quadruple-precision floating-point
5837 158142c2 bellard
| values `a' and `b'.  The operation is performed according to the IEC/IEEE
5838 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
5839 158142c2 bellard
*----------------------------------------------------------------------------*/
5840 158142c2 bellard
5841 158142c2 bellard
float128 float128_mul( float128 a, float128 b STATUS_PARAM )
5842 158142c2 bellard
{
5843 158142c2 bellard
    flag aSign, bSign, zSign;
5844 158142c2 bellard
    int32 aExp, bExp, zExp;
5845 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
5846 158142c2 bellard
    float128 z;
5847 158142c2 bellard
5848 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5849 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5850 158142c2 bellard
    aExp = extractFloat128Exp( a );
5851 158142c2 bellard
    aSign = extractFloat128Sign( a );
5852 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5853 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5854 158142c2 bellard
    bExp = extractFloat128Exp( b );
5855 158142c2 bellard
    bSign = extractFloat128Sign( b );
5856 158142c2 bellard
    zSign = aSign ^ bSign;
5857 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5858 158142c2 bellard
        if (    ( aSig0 | aSig1 )
5859 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5860 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
5861 158142c2 bellard
        }
5862 158142c2 bellard
        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
5863 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5864 158142c2 bellard
    }
5865 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5866 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5867 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5868 158142c2 bellard
 invalid:
5869 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
5870 158142c2 bellard
            z.low = float128_default_nan_low;
5871 158142c2 bellard
            z.high = float128_default_nan_high;
5872 158142c2 bellard
            return z;
5873 158142c2 bellard
        }
5874 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5875 158142c2 bellard
    }
5876 158142c2 bellard
    if ( aExp == 0 ) {
5877 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5878 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5879 158142c2 bellard
    }
5880 158142c2 bellard
    if ( bExp == 0 ) {
5881 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5882 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5883 158142c2 bellard
    }
5884 158142c2 bellard
    zExp = aExp + bExp - 0x4000;
5885 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
5886 158142c2 bellard
    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
5887 158142c2 bellard
    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
5888 158142c2 bellard
    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
5889 158142c2 bellard
    zSig2 |= ( zSig3 != 0 );
5890 158142c2 bellard
    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
5891 158142c2 bellard
        shift128ExtraRightJamming(
5892 158142c2 bellard
            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5893 158142c2 bellard
        ++zExp;
5894 158142c2 bellard
    }
5895 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5896 158142c2 bellard
5897 158142c2 bellard
}
5898 158142c2 bellard
5899 158142c2 bellard
/*----------------------------------------------------------------------------
5900 158142c2 bellard
| Returns the result of dividing the quadruple-precision floating-point value
5901 158142c2 bellard
| `a' by the corresponding value `b'.  The operation is performed according to
5902 158142c2 bellard
| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5903 158142c2 bellard
*----------------------------------------------------------------------------*/
5904 158142c2 bellard
5905 158142c2 bellard
float128 float128_div( float128 a, float128 b STATUS_PARAM )
5906 158142c2 bellard
{
5907 158142c2 bellard
    flag aSign, bSign, zSign;
5908 158142c2 bellard
    int32 aExp, bExp, zExp;
5909 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5910 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5911 158142c2 bellard
    float128 z;
5912 158142c2 bellard
5913 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5914 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
5915 158142c2 bellard
    aExp = extractFloat128Exp( a );
5916 158142c2 bellard
    aSign = extractFloat128Sign( a );
5917 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
5918 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
5919 158142c2 bellard
    bExp = extractFloat128Exp( b );
5920 158142c2 bellard
    bSign = extractFloat128Sign( b );
5921 158142c2 bellard
    zSign = aSign ^ bSign;
5922 158142c2 bellard
    if ( aExp == 0x7FFF ) {
5923 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5924 158142c2 bellard
        if ( bExp == 0x7FFF ) {
5925 158142c2 bellard
            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5926 158142c2 bellard
            goto invalid;
5927 158142c2 bellard
        }
5928 158142c2 bellard
        return packFloat128( zSign, 0x7FFF, 0, 0 );
5929 158142c2 bellard
    }
5930 158142c2 bellard
    if ( bExp == 0x7FFF ) {
5931 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5932 158142c2 bellard
        return packFloat128( zSign, 0, 0, 0 );
5933 158142c2 bellard
    }
5934 158142c2 bellard
    if ( bExp == 0 ) {
5935 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
5936 158142c2 bellard
            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5937 158142c2 bellard
 invalid:
5938 158142c2 bellard
                float_raise( float_flag_invalid STATUS_VAR);
5939 158142c2 bellard
                z.low = float128_default_nan_low;
5940 158142c2 bellard
                z.high = float128_default_nan_high;
5941 158142c2 bellard
                return z;
5942 158142c2 bellard
            }
5943 158142c2 bellard
            float_raise( float_flag_divbyzero STATUS_VAR);
5944 158142c2 bellard
            return packFloat128( zSign, 0x7FFF, 0, 0 );
5945 158142c2 bellard
        }
5946 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5947 158142c2 bellard
    }
5948 158142c2 bellard
    if ( aExp == 0 ) {
5949 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5950 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5951 158142c2 bellard
    }
5952 158142c2 bellard
    zExp = aExp - bExp + 0x3FFD;
5953 158142c2 bellard
    shortShift128Left(
5954 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
5955 158142c2 bellard
    shortShift128Left(
5956 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5957 158142c2 bellard
    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
5958 158142c2 bellard
        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
5959 158142c2 bellard
        ++zExp;
5960 158142c2 bellard
    }
5961 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
5962 158142c2 bellard
    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
5963 158142c2 bellard
    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
5964 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
5965 158142c2 bellard
        --zSig0;
5966 158142c2 bellard
        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5967 158142c2 bellard
    }
5968 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5969 158142c2 bellard
    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5970 158142c2 bellard
        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5971 158142c2 bellard
        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5972 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
5973 158142c2 bellard
            --zSig1;
5974 158142c2 bellard
            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5975 158142c2 bellard
        }
5976 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5977 158142c2 bellard
    }
5978 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5979 158142c2 bellard
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5980 158142c2 bellard
5981 158142c2 bellard
}
5982 158142c2 bellard
5983 158142c2 bellard
/*----------------------------------------------------------------------------
5984 158142c2 bellard
| Returns the remainder of the quadruple-precision floating-point value `a'
5985 158142c2 bellard
| with respect to the corresponding value `b'.  The operation is performed
5986 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5987 158142c2 bellard
*----------------------------------------------------------------------------*/
5988 158142c2 bellard
5989 158142c2 bellard
float128 float128_rem( float128 a, float128 b STATUS_PARAM )
5990 158142c2 bellard
{
5991 ed086f3d Blue Swirl
    flag aSign, zSign;
5992 158142c2 bellard
    int32 aExp, bExp, expDiff;
5993 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5994 bb98fe42 Andreas Färber
    uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
5995 bb98fe42 Andreas Färber
    int64_t sigMean0;
5996 158142c2 bellard
    float128 z;
5997 158142c2 bellard
5998 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
5999 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
6000 158142c2 bellard
    aExp = extractFloat128Exp( a );
6001 158142c2 bellard
    aSign = extractFloat128Sign( a );
6002 158142c2 bellard
    bSig1 = extractFloat128Frac1( b );
6003 158142c2 bellard
    bSig0 = extractFloat128Frac0( b );
6004 158142c2 bellard
    bExp = extractFloat128Exp( b );
6005 158142c2 bellard
    if ( aExp == 0x7FFF ) {
6006 158142c2 bellard
        if (    ( aSig0 | aSig1 )
6007 158142c2 bellard
             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6008 158142c2 bellard
            return propagateFloat128NaN( a, b STATUS_VAR );
6009 158142c2 bellard
        }
6010 158142c2 bellard
        goto invalid;
6011 158142c2 bellard
    }
6012 158142c2 bellard
    if ( bExp == 0x7FFF ) {
6013 158142c2 bellard
        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6014 158142c2 bellard
        return a;
6015 158142c2 bellard
    }
6016 158142c2 bellard
    if ( bExp == 0 ) {
6017 158142c2 bellard
        if ( ( bSig0 | bSig1 ) == 0 ) {
6018 158142c2 bellard
 invalid:
6019 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
6020 158142c2 bellard
            z.low = float128_default_nan_low;
6021 158142c2 bellard
            z.high = float128_default_nan_high;
6022 158142c2 bellard
            return z;
6023 158142c2 bellard
        }
6024 158142c2 bellard
        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6025 158142c2 bellard
    }
6026 158142c2 bellard
    if ( aExp == 0 ) {
6027 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return a;
6028 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6029 158142c2 bellard
    }
6030 158142c2 bellard
    expDiff = aExp - bExp;
6031 158142c2 bellard
    if ( expDiff < -1 ) return a;
6032 158142c2 bellard
    shortShift128Left(
6033 158142c2 bellard
        aSig0 | LIT64( 0x0001000000000000 ),
6034 158142c2 bellard
        aSig1,
6035 158142c2 bellard
        15 - ( expDiff < 0 ),
6036 158142c2 bellard
        &aSig0,
6037 158142c2 bellard
        &aSig1
6038 158142c2 bellard
    );
6039 158142c2 bellard
    shortShift128Left(
6040 158142c2 bellard
        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6041 158142c2 bellard
    q = le128( bSig0, bSig1, aSig0, aSig1 );
6042 158142c2 bellard
    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6043 158142c2 bellard
    expDiff -= 64;
6044 158142c2 bellard
    while ( 0 < expDiff ) {
6045 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6046 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
6047 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6048 158142c2 bellard
        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6049 158142c2 bellard
        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6050 158142c2 bellard
        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6051 158142c2 bellard
        expDiff -= 61;
6052 158142c2 bellard
    }
6053 158142c2 bellard
    if ( -64 < expDiff ) {
6054 158142c2 bellard
        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6055 158142c2 bellard
        q = ( 4 < q ) ? q - 4 : 0;
6056 158142c2 bellard
        q >>= - expDiff;
6057 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6058 158142c2 bellard
        expDiff += 52;
6059 158142c2 bellard
        if ( expDiff < 0 ) {
6060 158142c2 bellard
            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6061 158142c2 bellard
        }
6062 158142c2 bellard
        else {
6063 158142c2 bellard
            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6064 158142c2 bellard
        }
6065 158142c2 bellard
        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6066 158142c2 bellard
        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6067 158142c2 bellard
    }
6068 158142c2 bellard
    else {
6069 158142c2 bellard
        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6070 158142c2 bellard
        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6071 158142c2 bellard
    }
6072 158142c2 bellard
    do {
6073 158142c2 bellard
        alternateASig0 = aSig0;
6074 158142c2 bellard
        alternateASig1 = aSig1;
6075 158142c2 bellard
        ++q;
6076 158142c2 bellard
        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6077 bb98fe42 Andreas Färber
    } while ( 0 <= (int64_t) aSig0 );
6078 158142c2 bellard
    add128(
6079 bb98fe42 Andreas Färber
        aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6080 158142c2 bellard
    if (    ( sigMean0 < 0 )
6081 158142c2 bellard
         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6082 158142c2 bellard
        aSig0 = alternateASig0;
6083 158142c2 bellard
        aSig1 = alternateASig1;
6084 158142c2 bellard
    }
6085 bb98fe42 Andreas Färber
    zSign = ( (int64_t) aSig0 < 0 );
6086 158142c2 bellard
    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6087 158142c2 bellard
    return
6088 158142c2 bellard
        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
6089 158142c2 bellard
6090 158142c2 bellard
}
6091 158142c2 bellard
6092 158142c2 bellard
/*----------------------------------------------------------------------------
6093 158142c2 bellard
| Returns the square root of the quadruple-precision floating-point value `a'.
6094 158142c2 bellard
| The operation is performed according to the IEC/IEEE Standard for Binary
6095 158142c2 bellard
| Floating-Point Arithmetic.
6096 158142c2 bellard
*----------------------------------------------------------------------------*/
6097 158142c2 bellard
6098 158142c2 bellard
float128 float128_sqrt( float128 a STATUS_PARAM )
6099 158142c2 bellard
{
6100 158142c2 bellard
    flag aSign;
6101 158142c2 bellard
    int32 aExp, zExp;
6102 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6103 bb98fe42 Andreas Färber
    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6104 158142c2 bellard
    float128 z;
6105 158142c2 bellard
6106 158142c2 bellard
    aSig1 = extractFloat128Frac1( a );
6107 158142c2 bellard
    aSig0 = extractFloat128Frac0( a );
6108 158142c2 bellard
    aExp = extractFloat128Exp( a );
6109 158142c2 bellard
    aSign = extractFloat128Sign( a );
6110 158142c2 bellard
    if ( aExp == 0x7FFF ) {
6111 158142c2 bellard
        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
6112 158142c2 bellard
        if ( ! aSign ) return a;
6113 158142c2 bellard
        goto invalid;
6114 158142c2 bellard
    }
6115 158142c2 bellard
    if ( aSign ) {
6116 158142c2 bellard
        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6117 158142c2 bellard
 invalid:
6118 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6119 158142c2 bellard
        z.low = float128_default_nan_low;
6120 158142c2 bellard
        z.high = float128_default_nan_high;
6121 158142c2 bellard
        return z;
6122 158142c2 bellard
    }
6123 158142c2 bellard
    if ( aExp == 0 ) {
6124 158142c2 bellard
        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6125 158142c2 bellard
        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6126 158142c2 bellard
    }
6127 158142c2 bellard
    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6128 158142c2 bellard
    aSig0 |= LIT64( 0x0001000000000000 );
6129 158142c2 bellard
    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6130 158142c2 bellard
    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6131 158142c2 bellard
    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6132 158142c2 bellard
    doubleZSig0 = zSig0<<1;
6133 158142c2 bellard
    mul64To128( zSig0, zSig0, &term0, &term1 );
6134 158142c2 bellard
    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6135 bb98fe42 Andreas Färber
    while ( (int64_t) rem0 < 0 ) {
6136 158142c2 bellard
        --zSig0;
6137 158142c2 bellard
        doubleZSig0 -= 2;
6138 158142c2 bellard
        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6139 158142c2 bellard
    }
6140 158142c2 bellard
    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6141 158142c2 bellard
    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6142 158142c2 bellard
        if ( zSig1 == 0 ) zSig1 = 1;
6143 158142c2 bellard
        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6144 158142c2 bellard
        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6145 158142c2 bellard
        mul64To128( zSig1, zSig1, &term2, &term3 );
6146 158142c2 bellard
        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6147 bb98fe42 Andreas Färber
        while ( (int64_t) rem1 < 0 ) {
6148 158142c2 bellard
            --zSig1;
6149 158142c2 bellard
            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6150 158142c2 bellard
            term3 |= 1;
6151 158142c2 bellard
            term2 |= doubleZSig0;
6152 158142c2 bellard
            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6153 158142c2 bellard
        }
6154 158142c2 bellard
        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6155 158142c2 bellard
    }
6156 158142c2 bellard
    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6157 158142c2 bellard
    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
6158 158142c2 bellard
6159 158142c2 bellard
}
6160 158142c2 bellard
6161 158142c2 bellard
/*----------------------------------------------------------------------------
6162 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
6163 b689362d Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
6164 b689362d Aurelien Jarno
| raised if either operand is a NaN.  Otherwise, the comparison is performed
6165 158142c2 bellard
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6166 158142c2 bellard
*----------------------------------------------------------------------------*/
6167 158142c2 bellard
6168 b689362d Aurelien Jarno
int float128_eq( float128 a, float128 b STATUS_PARAM )
6169 158142c2 bellard
{
6170 158142c2 bellard
6171 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6172 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6173 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6174 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6175 158142c2 bellard
       ) {
6176 b689362d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
6177 158142c2 bellard
        return 0;
6178 158142c2 bellard
    }
6179 158142c2 bellard
    return
6180 158142c2 bellard
           ( a.low == b.low )
6181 158142c2 bellard
        && (    ( a.high == b.high )
6182 158142c2 bellard
             || (    ( a.low == 0 )
6183 bb98fe42 Andreas Färber
                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
6184 158142c2 bellard
           );
6185 158142c2 bellard
6186 158142c2 bellard
}
6187 158142c2 bellard
6188 158142c2 bellard
/*----------------------------------------------------------------------------
6189 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
6190 f5a64251 Aurelien Jarno
| or equal to the corresponding value `b', and 0 otherwise.  The invalid
6191 f5a64251 Aurelien Jarno
| exception is raised if either operand is a NaN.  The comparison is performed
6192 f5a64251 Aurelien Jarno
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6193 158142c2 bellard
*----------------------------------------------------------------------------*/
6194 158142c2 bellard
6195 750afe93 bellard
int float128_le( float128 a, float128 b STATUS_PARAM )
6196 158142c2 bellard
{
6197 158142c2 bellard
    flag aSign, bSign;
6198 158142c2 bellard
6199 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6200 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6201 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6202 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6203 158142c2 bellard
       ) {
6204 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6205 158142c2 bellard
        return 0;
6206 158142c2 bellard
    }
6207 158142c2 bellard
    aSign = extractFloat128Sign( a );
6208 158142c2 bellard
    bSign = extractFloat128Sign( b );
6209 158142c2 bellard
    if ( aSign != bSign ) {
6210 158142c2 bellard
        return
6211 158142c2 bellard
               aSign
6212 bb98fe42 Andreas Färber
            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6213 158142c2 bellard
                 == 0 );
6214 158142c2 bellard
    }
6215 158142c2 bellard
    return
6216 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
6217 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
6218 158142c2 bellard
6219 158142c2 bellard
}
6220 158142c2 bellard
6221 158142c2 bellard
/*----------------------------------------------------------------------------
6222 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
6223 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  The invalid exception is
6224 f5a64251 Aurelien Jarno
| raised if either operand is a NaN.  The comparison is performed according
6225 f5a64251 Aurelien Jarno
| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6226 158142c2 bellard
*----------------------------------------------------------------------------*/
6227 158142c2 bellard
6228 750afe93 bellard
int float128_lt( float128 a, float128 b STATUS_PARAM )
6229 158142c2 bellard
{
6230 158142c2 bellard
    flag aSign, bSign;
6231 158142c2 bellard
6232 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6233 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6234 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6235 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6236 158142c2 bellard
       ) {
6237 158142c2 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6238 158142c2 bellard
        return 0;
6239 158142c2 bellard
    }
6240 158142c2 bellard
    aSign = extractFloat128Sign( a );
6241 158142c2 bellard
    bSign = extractFloat128Sign( b );
6242 158142c2 bellard
    if ( aSign != bSign ) {
6243 158142c2 bellard
        return
6244 158142c2 bellard
               aSign
6245 bb98fe42 Andreas Färber
            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6246 158142c2 bellard
                 != 0 );
6247 158142c2 bellard
    }
6248 158142c2 bellard
    return
6249 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
6250 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
6251 158142c2 bellard
6252 158142c2 bellard
}
6253 158142c2 bellard
6254 158142c2 bellard
/*----------------------------------------------------------------------------
6255 67b7861d Aurelien Jarno
| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
6256 f5a64251 Aurelien Jarno
| be compared, and 0 otherwise.  The invalid exception is raised if either
6257 f5a64251 Aurelien Jarno
| operand is a NaN. The comparison is performed according to the IEC/IEEE
6258 f5a64251 Aurelien Jarno
| Standard for Binary Floating-Point Arithmetic.
6259 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
6260 67b7861d Aurelien Jarno
6261 67b7861d Aurelien Jarno
int float128_unordered( float128 a, float128 b STATUS_PARAM )
6262 67b7861d Aurelien Jarno
{
6263 67b7861d Aurelien Jarno
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6264 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6265 67b7861d Aurelien Jarno
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6266 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6267 67b7861d Aurelien Jarno
       ) {
6268 67b7861d Aurelien Jarno
        float_raise( float_flag_invalid STATUS_VAR);
6269 67b7861d Aurelien Jarno
        return 1;
6270 67b7861d Aurelien Jarno
    }
6271 67b7861d Aurelien Jarno
    return 0;
6272 67b7861d Aurelien Jarno
}
6273 67b7861d Aurelien Jarno
6274 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
6275 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is equal to
6276 f5a64251 Aurelien Jarno
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
6277 f5a64251 Aurelien Jarno
| exception.  The comparison is performed according to the IEC/IEEE Standard
6278 f5a64251 Aurelien Jarno
| for Binary Floating-Point Arithmetic.
6279 158142c2 bellard
*----------------------------------------------------------------------------*/
6280 158142c2 bellard
6281 b689362d Aurelien Jarno
int float128_eq_quiet( float128 a, float128 b STATUS_PARAM )
6282 158142c2 bellard
{
6283 158142c2 bellard
6284 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6285 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6286 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6287 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6288 158142c2 bellard
       ) {
6289 b689362d Aurelien Jarno
        if (    float128_is_signaling_nan( a )
6290 b689362d Aurelien Jarno
             || float128_is_signaling_nan( b ) ) {
6291 b689362d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
6292 b689362d Aurelien Jarno
        }
6293 158142c2 bellard
        return 0;
6294 158142c2 bellard
    }
6295 158142c2 bellard
    return
6296 158142c2 bellard
           ( a.low == b.low )
6297 158142c2 bellard
        && (    ( a.high == b.high )
6298 158142c2 bellard
             || (    ( a.low == 0 )
6299 bb98fe42 Andreas Färber
                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
6300 158142c2 bellard
           );
6301 158142c2 bellard
6302 158142c2 bellard
}
6303 158142c2 bellard
6304 158142c2 bellard
/*----------------------------------------------------------------------------
6305 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
6306 158142c2 bellard
| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
6307 158142c2 bellard
| cause an exception.  Otherwise, the comparison is performed according to the
6308 158142c2 bellard
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6309 158142c2 bellard
*----------------------------------------------------------------------------*/
6310 158142c2 bellard
6311 750afe93 bellard
int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
6312 158142c2 bellard
{
6313 158142c2 bellard
    flag aSign, bSign;
6314 158142c2 bellard
6315 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6316 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6317 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6318 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6319 158142c2 bellard
       ) {
6320 158142c2 bellard
        if (    float128_is_signaling_nan( a )
6321 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
6322 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
6323 158142c2 bellard
        }
6324 158142c2 bellard
        return 0;
6325 158142c2 bellard
    }
6326 158142c2 bellard
    aSign = extractFloat128Sign( a );
6327 158142c2 bellard
    bSign = extractFloat128Sign( b );
6328 158142c2 bellard
    if ( aSign != bSign ) {
6329 158142c2 bellard
        return
6330 158142c2 bellard
               aSign
6331 bb98fe42 Andreas Färber
            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6332 158142c2 bellard
                 == 0 );
6333 158142c2 bellard
    }
6334 158142c2 bellard
    return
6335 158142c2 bellard
          aSign ? le128( b.high, b.low, a.high, a.low )
6336 158142c2 bellard
        : le128( a.high, a.low, b.high, b.low );
6337 158142c2 bellard
6338 158142c2 bellard
}
6339 158142c2 bellard
6340 158142c2 bellard
/*----------------------------------------------------------------------------
6341 158142c2 bellard
| Returns 1 if the quadruple-precision floating-point value `a' is less than
6342 158142c2 bellard
| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
6343 158142c2 bellard
| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
6344 158142c2 bellard
| Standard for Binary Floating-Point Arithmetic.
6345 158142c2 bellard
*----------------------------------------------------------------------------*/
6346 158142c2 bellard
6347 750afe93 bellard
int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
6348 158142c2 bellard
{
6349 158142c2 bellard
    flag aSign, bSign;
6350 158142c2 bellard
6351 158142c2 bellard
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6352 158142c2 bellard
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6353 158142c2 bellard
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6354 158142c2 bellard
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6355 158142c2 bellard
       ) {
6356 158142c2 bellard
        if (    float128_is_signaling_nan( a )
6357 158142c2 bellard
             || float128_is_signaling_nan( b ) ) {
6358 158142c2 bellard
            float_raise( float_flag_invalid STATUS_VAR);
6359 158142c2 bellard
        }
6360 158142c2 bellard
        return 0;
6361 158142c2 bellard
    }
6362 158142c2 bellard
    aSign = extractFloat128Sign( a );
6363 158142c2 bellard
    bSign = extractFloat128Sign( b );
6364 158142c2 bellard
    if ( aSign != bSign ) {
6365 158142c2 bellard
        return
6366 158142c2 bellard
               aSign
6367 bb98fe42 Andreas Färber
            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6368 158142c2 bellard
                 != 0 );
6369 158142c2 bellard
    }
6370 158142c2 bellard
    return
6371 158142c2 bellard
          aSign ? lt128( b.high, b.low, a.high, a.low )
6372 158142c2 bellard
        : lt128( a.high, a.low, b.high, b.low );
6373 158142c2 bellard
6374 158142c2 bellard
}
6375 158142c2 bellard
6376 67b7861d Aurelien Jarno
/*----------------------------------------------------------------------------
6377 67b7861d Aurelien Jarno
| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
6378 67b7861d Aurelien Jarno
| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
6379 67b7861d Aurelien Jarno
| comparison is performed according to the IEC/IEEE Standard for Binary
6380 67b7861d Aurelien Jarno
| Floating-Point Arithmetic.
6381 67b7861d Aurelien Jarno
*----------------------------------------------------------------------------*/
6382 67b7861d Aurelien Jarno
6383 67b7861d Aurelien Jarno
int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
6384 67b7861d Aurelien Jarno
{
6385 67b7861d Aurelien Jarno
    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
6386 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6387 67b7861d Aurelien Jarno
         || (    ( extractFloat128Exp( b ) == 0x7FFF )
6388 67b7861d Aurelien Jarno
              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6389 67b7861d Aurelien Jarno
       ) {
6390 67b7861d Aurelien Jarno
        if (    float128_is_signaling_nan( a )
6391 67b7861d Aurelien Jarno
             || float128_is_signaling_nan( b ) ) {
6392 67b7861d Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
6393 67b7861d Aurelien Jarno
        }
6394 67b7861d Aurelien Jarno
        return 1;
6395 67b7861d Aurelien Jarno
    }
6396 67b7861d Aurelien Jarno
    return 0;
6397 67b7861d Aurelien Jarno
}
6398 67b7861d Aurelien Jarno
6399 1d6bda35 bellard
/* misc functions */
6400 9f8d2a09 Andreas Färber
float32 uint32_to_float32( uint32 a STATUS_PARAM )
6401 1d6bda35 bellard
{
6402 1d6bda35 bellard
    return int64_to_float32(a STATUS_VAR);
6403 1d6bda35 bellard
}
6404 1d6bda35 bellard
6405 9f8d2a09 Andreas Färber
float64 uint32_to_float64( uint32 a STATUS_PARAM )
6406 1d6bda35 bellard
{
6407 1d6bda35 bellard
    return int64_to_float64(a STATUS_VAR);
6408 1d6bda35 bellard
}
6409 1d6bda35 bellard
6410 9f8d2a09 Andreas Färber
uint32 float32_to_uint32( float32 a STATUS_PARAM )
6411 1d6bda35 bellard
{
6412 1d6bda35 bellard
    int64_t v;
6413 9f8d2a09 Andreas Färber
    uint32 res;
6414 1d6bda35 bellard
6415 1d6bda35 bellard
    v = float32_to_int64(a STATUS_VAR);
6416 1d6bda35 bellard
    if (v < 0) {
6417 1d6bda35 bellard
        res = 0;
6418 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6419 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6420 1d6bda35 bellard
        res = 0xffffffff;
6421 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6422 1d6bda35 bellard
    } else {
6423 1d6bda35 bellard
        res = v;
6424 1d6bda35 bellard
    }
6425 1d6bda35 bellard
    return res;
6426 1d6bda35 bellard
}
6427 1d6bda35 bellard
6428 9f8d2a09 Andreas Färber
uint32 float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
6429 1d6bda35 bellard
{
6430 1d6bda35 bellard
    int64_t v;
6431 9f8d2a09 Andreas Färber
    uint32 res;
6432 1d6bda35 bellard
6433 1d6bda35 bellard
    v = float32_to_int64_round_to_zero(a STATUS_VAR);
6434 1d6bda35 bellard
    if (v < 0) {
6435 1d6bda35 bellard
        res = 0;
6436 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6437 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6438 1d6bda35 bellard
        res = 0xffffffff;
6439 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6440 1d6bda35 bellard
    } else {
6441 1d6bda35 bellard
        res = v;
6442 1d6bda35 bellard
    }
6443 1d6bda35 bellard
    return res;
6444 1d6bda35 bellard
}
6445 1d6bda35 bellard
6446 38641f8f Andreas Färber
uint16 float32_to_uint16_round_to_zero( float32 a STATUS_PARAM )
6447 cbcef455 Peter Maydell
{
6448 cbcef455 Peter Maydell
    int64_t v;
6449 38641f8f Andreas Färber
    uint16 res;
6450 cbcef455 Peter Maydell
6451 cbcef455 Peter Maydell
    v = float32_to_int64_round_to_zero(a STATUS_VAR);
6452 cbcef455 Peter Maydell
    if (v < 0) {
6453 cbcef455 Peter Maydell
        res = 0;
6454 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6455 cbcef455 Peter Maydell
    } else if (v > 0xffff) {
6456 cbcef455 Peter Maydell
        res = 0xffff;
6457 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6458 cbcef455 Peter Maydell
    } else {
6459 cbcef455 Peter Maydell
        res = v;
6460 cbcef455 Peter Maydell
    }
6461 cbcef455 Peter Maydell
    return res;
6462 cbcef455 Peter Maydell
}
6463 cbcef455 Peter Maydell
6464 9f8d2a09 Andreas Färber
uint32 float64_to_uint32( float64 a STATUS_PARAM )
6465 1d6bda35 bellard
{
6466 1d6bda35 bellard
    int64_t v;
6467 9f8d2a09 Andreas Färber
    uint32 res;
6468 1d6bda35 bellard
6469 1d6bda35 bellard
    v = float64_to_int64(a STATUS_VAR);
6470 1d6bda35 bellard
    if (v < 0) {
6471 1d6bda35 bellard
        res = 0;
6472 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6473 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6474 1d6bda35 bellard
        res = 0xffffffff;
6475 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6476 1d6bda35 bellard
    } else {
6477 1d6bda35 bellard
        res = v;
6478 1d6bda35 bellard
    }
6479 1d6bda35 bellard
    return res;
6480 1d6bda35 bellard
}
6481 1d6bda35 bellard
6482 9f8d2a09 Andreas Färber
uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
6483 1d6bda35 bellard
{
6484 1d6bda35 bellard
    int64_t v;
6485 9f8d2a09 Andreas Färber
    uint32 res;
6486 1d6bda35 bellard
6487 1d6bda35 bellard
    v = float64_to_int64_round_to_zero(a STATUS_VAR);
6488 1d6bda35 bellard
    if (v < 0) {
6489 1d6bda35 bellard
        res = 0;
6490 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6491 1d6bda35 bellard
    } else if (v > 0xffffffff) {
6492 1d6bda35 bellard
        res = 0xffffffff;
6493 1d6bda35 bellard
        float_raise( float_flag_invalid STATUS_VAR);
6494 1d6bda35 bellard
    } else {
6495 1d6bda35 bellard
        res = v;
6496 1d6bda35 bellard
    }
6497 1d6bda35 bellard
    return res;
6498 1d6bda35 bellard
}
6499 1d6bda35 bellard
6500 38641f8f Andreas Färber
uint16 float64_to_uint16_round_to_zero( float64 a STATUS_PARAM )
6501 cbcef455 Peter Maydell
{
6502 cbcef455 Peter Maydell
    int64_t v;
6503 38641f8f Andreas Färber
    uint16 res;
6504 cbcef455 Peter Maydell
6505 cbcef455 Peter Maydell
    v = float64_to_int64_round_to_zero(a STATUS_VAR);
6506 cbcef455 Peter Maydell
    if (v < 0) {
6507 cbcef455 Peter Maydell
        res = 0;
6508 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6509 cbcef455 Peter Maydell
    } else if (v > 0xffff) {
6510 cbcef455 Peter Maydell
        res = 0xffff;
6511 cbcef455 Peter Maydell
        float_raise( float_flag_invalid STATUS_VAR);
6512 cbcef455 Peter Maydell
    } else {
6513 cbcef455 Peter Maydell
        res = v;
6514 cbcef455 Peter Maydell
    }
6515 cbcef455 Peter Maydell
    return res;
6516 cbcef455 Peter Maydell
}
6517 cbcef455 Peter Maydell
6518 f090c9d4 pbrook
/* FIXME: This looks broken.  */
6519 75d62a58 j_mayer
uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
6520 75d62a58 j_mayer
{
6521 75d62a58 j_mayer
    int64_t v;
6522 75d62a58 j_mayer
6523 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
6524 f090c9d4 pbrook
    v += float64_val(a);
6525 f090c9d4 pbrook
    v = float64_to_int64(make_float64(v) STATUS_VAR);
6526 75d62a58 j_mayer
6527 75d62a58 j_mayer
    return v - INT64_MIN;
6528 75d62a58 j_mayer
}
6529 75d62a58 j_mayer
6530 75d62a58 j_mayer
uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
6531 75d62a58 j_mayer
{
6532 75d62a58 j_mayer
    int64_t v;
6533 75d62a58 j_mayer
6534 f090c9d4 pbrook
    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
6535 f090c9d4 pbrook
    v += float64_val(a);
6536 f090c9d4 pbrook
    v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
6537 75d62a58 j_mayer
6538 75d62a58 j_mayer
    return v - INT64_MIN;
6539 75d62a58 j_mayer
}
6540 75d62a58 j_mayer
6541 1d6bda35 bellard
#define COMPARE(s, nan_exp)                                                  \
6542 750afe93 bellard
INLINE int float ## s ## _compare_internal( float ## s a, float ## s b,      \
6543 1d6bda35 bellard
                                      int is_quiet STATUS_PARAM )            \
6544 1d6bda35 bellard
{                                                                            \
6545 1d6bda35 bellard
    flag aSign, bSign;                                                       \
6546 bb98fe42 Andreas Färber
    uint ## s ## _t av, bv;                                                  \
6547 37d18660 Peter Maydell
    a = float ## s ## _squash_input_denormal(a STATUS_VAR);                  \
6548 37d18660 Peter Maydell
    b = float ## s ## _squash_input_denormal(b STATUS_VAR);                  \
6549 1d6bda35 bellard
                                                                             \
6550 1d6bda35 bellard
    if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) &&                    \
6551 1d6bda35 bellard
         extractFloat ## s ## Frac( a ) ) ||                                 \
6552 1d6bda35 bellard
        ( ( extractFloat ## s ## Exp( b ) == nan_exp ) &&                    \
6553 1d6bda35 bellard
          extractFloat ## s ## Frac( b ) )) {                                \
6554 1d6bda35 bellard
        if (!is_quiet ||                                                     \
6555 1d6bda35 bellard
            float ## s ## _is_signaling_nan( a ) ||                          \
6556 1d6bda35 bellard
            float ## s ## _is_signaling_nan( b ) ) {                         \
6557 1d6bda35 bellard
            float_raise( float_flag_invalid STATUS_VAR);                     \
6558 1d6bda35 bellard
        }                                                                    \
6559 1d6bda35 bellard
        return float_relation_unordered;                                     \
6560 1d6bda35 bellard
    }                                                                        \
6561 1d6bda35 bellard
    aSign = extractFloat ## s ## Sign( a );                                  \
6562 1d6bda35 bellard
    bSign = extractFloat ## s ## Sign( b );                                  \
6563 f090c9d4 pbrook
    av = float ## s ## _val(a);                                              \
6564 cd8a2533 blueswir1
    bv = float ## s ## _val(b);                                              \
6565 1d6bda35 bellard
    if ( aSign != bSign ) {                                                  \
6566 bb98fe42 Andreas Färber
        if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) {                   \
6567 1d6bda35 bellard
            /* zero case */                                                  \
6568 1d6bda35 bellard
            return float_relation_equal;                                     \
6569 1d6bda35 bellard
        } else {                                                             \
6570 1d6bda35 bellard
            return 1 - (2 * aSign);                                          \
6571 1d6bda35 bellard
        }                                                                    \
6572 1d6bda35 bellard
    } else {                                                                 \
6573 f090c9d4 pbrook
        if (av == bv) {                                                      \
6574 1d6bda35 bellard
            return float_relation_equal;                                     \
6575 1d6bda35 bellard
        } else {                                                             \
6576 f090c9d4 pbrook
            return 1 - 2 * (aSign ^ ( av < bv ));                            \
6577 1d6bda35 bellard
        }                                                                    \
6578 1d6bda35 bellard
    }                                                                        \
6579 1d6bda35 bellard
}                                                                            \
6580 1d6bda35 bellard
                                                                             \
6581 750afe93 bellard
int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM )        \
6582 1d6bda35 bellard
{                                                                            \
6583 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 0 STATUS_VAR);              \
6584 1d6bda35 bellard
}                                                                            \
6585 1d6bda35 bellard
                                                                             \
6586 750afe93 bellard
int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM )  \
6587 1d6bda35 bellard
{                                                                            \
6588 1d6bda35 bellard
    return float ## s ## _compare_internal(a, b, 1 STATUS_VAR);              \
6589 1d6bda35 bellard
}
6590 1d6bda35 bellard
6591 1d6bda35 bellard
COMPARE(32, 0xff)
6592 1d6bda35 bellard
COMPARE(64, 0x7ff)
6593 9ee6e8bb pbrook
6594 f6714d36 Aurelien Jarno
INLINE int floatx80_compare_internal( floatx80 a, floatx80 b,
6595 f6714d36 Aurelien Jarno
                                      int is_quiet STATUS_PARAM )
6596 f6714d36 Aurelien Jarno
{
6597 f6714d36 Aurelien Jarno
    flag aSign, bSign;
6598 f6714d36 Aurelien Jarno
6599 f6714d36 Aurelien Jarno
    if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6600 f6714d36 Aurelien Jarno
          ( extractFloatx80Frac( a )<<1 ) ) ||
6601 f6714d36 Aurelien Jarno
        ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6602 f6714d36 Aurelien Jarno
          ( extractFloatx80Frac( b )<<1 ) )) {
6603 f6714d36 Aurelien Jarno
        if (!is_quiet ||
6604 f6714d36 Aurelien Jarno
            floatx80_is_signaling_nan( a ) ||
6605 f6714d36 Aurelien Jarno
            floatx80_is_signaling_nan( b ) ) {
6606 f6714d36 Aurelien Jarno
            float_raise( float_flag_invalid STATUS_VAR);
6607 f6714d36 Aurelien Jarno
        }
6608 f6714d36 Aurelien Jarno
        return float_relation_unordered;
6609 f6714d36 Aurelien Jarno
    }
6610 f6714d36 Aurelien Jarno
    aSign = extractFloatx80Sign( a );
6611 f6714d36 Aurelien Jarno
    bSign = extractFloatx80Sign( b );
6612 f6714d36 Aurelien Jarno
    if ( aSign != bSign ) {
6613 f6714d36 Aurelien Jarno
6614 f6714d36 Aurelien Jarno
        if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6615 f6714d36 Aurelien Jarno
             ( ( a.low | b.low ) == 0 ) ) {
6616 f6714d36 Aurelien Jarno
            /* zero case */
6617 f6714d36 Aurelien Jarno
            return float_relation_equal;
6618 f6714d36 Aurelien Jarno
        } else {
6619 f6714d36 Aurelien Jarno
            return 1 - (2 * aSign);
6620 f6714d36 Aurelien Jarno
        }
6621 f6714d36 Aurelien Jarno
    } else {
6622 f6714d36 Aurelien Jarno
        if (a.low == b.low && a.high == b.high) {
6623 f6714d36 Aurelien Jarno
            return float_relation_equal;
6624 f6714d36 Aurelien Jarno
        } else {
6625 f6714d36 Aurelien Jarno
            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6626 f6714d36 Aurelien Jarno
        }
6627 f6714d36 Aurelien Jarno
    }
6628 f6714d36 Aurelien Jarno
}
6629 f6714d36 Aurelien Jarno
6630 f6714d36 Aurelien Jarno
int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
6631 f6714d36 Aurelien Jarno
{
6632 f6714d36 Aurelien Jarno
    return floatx80_compare_internal(a, b, 0 STATUS_VAR);
6633 f6714d36 Aurelien Jarno
}
6634 f6714d36 Aurelien Jarno
6635 f6714d36 Aurelien Jarno
int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
6636 f6714d36 Aurelien Jarno
{
6637 f6714d36 Aurelien Jarno
    return floatx80_compare_internal(a, b, 1 STATUS_VAR);
6638 f6714d36 Aurelien Jarno
}
6639 f6714d36 Aurelien Jarno
6640 1f587329 blueswir1
INLINE int float128_compare_internal( float128 a, float128 b,
6641 1f587329 blueswir1
                                      int is_quiet STATUS_PARAM )
6642 1f587329 blueswir1
{
6643 1f587329 blueswir1
    flag aSign, bSign;
6644 1f587329 blueswir1
6645 1f587329 blueswir1
    if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6646 1f587329 blueswir1
          ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6647 1f587329 blueswir1
        ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6648 1f587329 blueswir1
          ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6649 1f587329 blueswir1
        if (!is_quiet ||
6650 1f587329 blueswir1
            float128_is_signaling_nan( a ) ||
6651 1f587329 blueswir1
            float128_is_signaling_nan( b ) ) {
6652 1f587329 blueswir1
            float_raise( float_flag_invalid STATUS_VAR);
6653 1f587329 blueswir1
        }
6654 1f587329 blueswir1
        return float_relation_unordered;
6655 1f587329 blueswir1
    }
6656 1f587329 blueswir1
    aSign = extractFloat128Sign( a );
6657 1f587329 blueswir1
    bSign = extractFloat128Sign( b );
6658 1f587329 blueswir1
    if ( aSign != bSign ) {
6659 1f587329 blueswir1
        if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6660 1f587329 blueswir1
            /* zero case */
6661 1f587329 blueswir1
            return float_relation_equal;
6662 1f587329 blueswir1
        } else {
6663 1f587329 blueswir1
            return 1 - (2 * aSign);
6664 1f587329 blueswir1
        }
6665 1f587329 blueswir1
    } else {
6666 1f587329 blueswir1
        if (a.low == b.low && a.high == b.high) {
6667 1f587329 blueswir1
            return float_relation_equal;
6668 1f587329 blueswir1
        } else {
6669 1f587329 blueswir1
            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6670 1f587329 blueswir1
        }
6671 1f587329 blueswir1
    }
6672 1f587329 blueswir1
}
6673 1f587329 blueswir1
6674 1f587329 blueswir1
int float128_compare( float128 a, float128 b STATUS_PARAM )
6675 1f587329 blueswir1
{
6676 1f587329 blueswir1
    return float128_compare_internal(a, b, 0 STATUS_VAR);
6677 1f587329 blueswir1
}
6678 1f587329 blueswir1
6679 1f587329 blueswir1
int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
6680 1f587329 blueswir1
{
6681 1f587329 blueswir1
    return float128_compare_internal(a, b, 1 STATUS_VAR);
6682 1f587329 blueswir1
}
6683 1f587329 blueswir1
6684 274f1b04 Peter Maydell
/* min() and max() functions. These can't be implemented as
6685 274f1b04 Peter Maydell
 * 'compare and pick one input' because that would mishandle
6686 274f1b04 Peter Maydell
 * NaNs and +0 vs -0.
6687 274f1b04 Peter Maydell
 */
6688 274f1b04 Peter Maydell
#define MINMAX(s, nan_exp)                                              \
6689 274f1b04 Peter Maydell
INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b,     \
6690 274f1b04 Peter Maydell
                                        int ismin STATUS_PARAM )        \
6691 274f1b04 Peter Maydell
{                                                                       \
6692 274f1b04 Peter Maydell
    flag aSign, bSign;                                                  \
6693 274f1b04 Peter Maydell
    uint ## s ## _t av, bv;                                             \
6694 274f1b04 Peter Maydell
    a = float ## s ## _squash_input_denormal(a STATUS_VAR);             \
6695 274f1b04 Peter Maydell
    b = float ## s ## _squash_input_denormal(b STATUS_VAR);             \
6696 274f1b04 Peter Maydell
    if (float ## s ## _is_any_nan(a) ||                                 \
6697 274f1b04 Peter Maydell
        float ## s ## _is_any_nan(b)) {                                 \
6698 274f1b04 Peter Maydell
        return propagateFloat ## s ## NaN(a, b STATUS_VAR);             \
6699 274f1b04 Peter Maydell
    }                                                                   \
6700 274f1b04 Peter Maydell
    aSign = extractFloat ## s ## Sign(a);                               \
6701 274f1b04 Peter Maydell
    bSign = extractFloat ## s ## Sign(b);                               \
6702 274f1b04 Peter Maydell
    av = float ## s ## _val(a);                                         \
6703 274f1b04 Peter Maydell
    bv = float ## s ## _val(b);                                         \
6704 274f1b04 Peter Maydell
    if (aSign != bSign) {                                               \
6705 274f1b04 Peter Maydell
        if (ismin) {                                                    \
6706 274f1b04 Peter Maydell
            return aSign ? a : b;                                       \
6707 274f1b04 Peter Maydell
        } else {                                                        \
6708 274f1b04 Peter Maydell
            return aSign ? b : a;                                       \
6709 274f1b04 Peter Maydell
        }                                                               \
6710 274f1b04 Peter Maydell
    } else {                                                            \
6711 274f1b04 Peter Maydell
        if (ismin) {                                                    \
6712 274f1b04 Peter Maydell
            return (aSign ^ (av < bv)) ? a : b;                         \
6713 274f1b04 Peter Maydell
        } else {                                                        \
6714 274f1b04 Peter Maydell
            return (aSign ^ (av < bv)) ? b : a;                         \
6715 274f1b04 Peter Maydell
        }                                                               \
6716 274f1b04 Peter Maydell
    }                                                                   \
6717 274f1b04 Peter Maydell
}                                                                       \
6718 274f1b04 Peter Maydell
                                                                        \
6719 274f1b04 Peter Maydell
float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM)  \
6720 274f1b04 Peter Maydell
{                                                                       \
6721 274f1b04 Peter Maydell
    return float ## s ## _minmax(a, b, 1 STATUS_VAR);                   \
6722 274f1b04 Peter Maydell
}                                                                       \
6723 274f1b04 Peter Maydell
                                                                        \
6724 274f1b04 Peter Maydell
float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM)  \
6725 274f1b04 Peter Maydell
{                                                                       \
6726 274f1b04 Peter Maydell
    return float ## s ## _minmax(a, b, 0 STATUS_VAR);                   \
6727 274f1b04 Peter Maydell
}
6728 274f1b04 Peter Maydell
6729 274f1b04 Peter Maydell
MINMAX(32, 0xff)
6730 274f1b04 Peter Maydell
MINMAX(64, 0x7ff)
6731 274f1b04 Peter Maydell
6732 274f1b04 Peter Maydell
6733 9ee6e8bb pbrook
/* Multiply A by 2 raised to the power N.  */
6734 9ee6e8bb pbrook
float32 float32_scalbn( float32 a, int n STATUS_PARAM )
6735 9ee6e8bb pbrook
{
6736 9ee6e8bb pbrook
    flag aSign;
6737 326b9e98 Aurelien Jarno
    int16_t aExp;
6738 bb98fe42 Andreas Färber
    uint32_t aSig;
6739 9ee6e8bb pbrook
6740 37d18660 Peter Maydell
    a = float32_squash_input_denormal(a STATUS_VAR);
6741 9ee6e8bb pbrook
    aSig = extractFloat32Frac( a );
6742 9ee6e8bb pbrook
    aExp = extractFloat32Exp( a );
6743 9ee6e8bb pbrook
    aSign = extractFloat32Sign( a );
6744 9ee6e8bb pbrook
6745 9ee6e8bb pbrook
    if ( aExp == 0xFF ) {
6746 326b9e98 Aurelien Jarno
        if ( aSig ) {
6747 326b9e98 Aurelien Jarno
            return propagateFloat32NaN( a, a STATUS_VAR );
6748 326b9e98 Aurelien Jarno
        }
6749 9ee6e8bb pbrook
        return a;
6750 9ee6e8bb pbrook
    }
6751 69397542 pbrook
    if ( aExp != 0 )
6752 69397542 pbrook
        aSig |= 0x00800000;
6753 69397542 pbrook
    else if ( aSig == 0 )
6754 69397542 pbrook
        return a;
6755 69397542 pbrook
6756 326b9e98 Aurelien Jarno
    if (n > 0x200) {
6757 326b9e98 Aurelien Jarno
        n = 0x200;
6758 326b9e98 Aurelien Jarno
    } else if (n < -0x200) {
6759 326b9e98 Aurelien Jarno
        n = -0x200;
6760 326b9e98 Aurelien Jarno
    }
6761 326b9e98 Aurelien Jarno
6762 69397542 pbrook
    aExp += n - 1;
6763 69397542 pbrook
    aSig <<= 7;
6764 69397542 pbrook
    return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
6765 9ee6e8bb pbrook
}
6766 9ee6e8bb pbrook
6767 9ee6e8bb pbrook
float64 float64_scalbn( float64 a, int n STATUS_PARAM )
6768 9ee6e8bb pbrook
{
6769 9ee6e8bb pbrook
    flag aSign;
6770 326b9e98 Aurelien Jarno
    int16_t aExp;
6771 bb98fe42 Andreas Färber
    uint64_t aSig;
6772 9ee6e8bb pbrook
6773 37d18660 Peter Maydell
    a = float64_squash_input_denormal(a STATUS_VAR);
6774 9ee6e8bb pbrook
    aSig = extractFloat64Frac( a );
6775 9ee6e8bb pbrook
    aExp = extractFloat64Exp( a );
6776 9ee6e8bb pbrook
    aSign = extractFloat64Sign( a );
6777 9ee6e8bb pbrook
6778 9ee6e8bb pbrook
    if ( aExp == 0x7FF ) {
6779 326b9e98 Aurelien Jarno
        if ( aSig ) {
6780 326b9e98 Aurelien Jarno
            return propagateFloat64NaN( a, a STATUS_VAR );
6781 326b9e98 Aurelien Jarno
        }
6782 9ee6e8bb pbrook
        return a;
6783 9ee6e8bb pbrook
    }
6784 69397542 pbrook
    if ( aExp != 0 )
6785 69397542 pbrook
        aSig |= LIT64( 0x0010000000000000 );
6786 69397542 pbrook
    else if ( aSig == 0 )
6787 69397542 pbrook
        return a;
6788 69397542 pbrook
6789 326b9e98 Aurelien Jarno
    if (n > 0x1000) {
6790 326b9e98 Aurelien Jarno
        n = 0x1000;
6791 326b9e98 Aurelien Jarno
    } else if (n < -0x1000) {
6792 326b9e98 Aurelien Jarno
        n = -0x1000;
6793 326b9e98 Aurelien Jarno
    }
6794 326b9e98 Aurelien Jarno
6795 69397542 pbrook
    aExp += n - 1;
6796 69397542 pbrook
    aSig <<= 10;
6797 69397542 pbrook
    return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
6798 9ee6e8bb pbrook
}
6799 9ee6e8bb pbrook
6800 9ee6e8bb pbrook
floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
6801 9ee6e8bb pbrook
{
6802 9ee6e8bb pbrook
    flag aSign;
6803 326b9e98 Aurelien Jarno
    int32_t aExp;
6804 bb98fe42 Andreas Färber
    uint64_t aSig;
6805 9ee6e8bb pbrook
6806 9ee6e8bb pbrook
    aSig = extractFloatx80Frac( a );
6807 9ee6e8bb pbrook
    aExp = extractFloatx80Exp( a );
6808 9ee6e8bb pbrook
    aSign = extractFloatx80Sign( a );
6809 9ee6e8bb pbrook
6810 326b9e98 Aurelien Jarno
    if ( aExp == 0x7FFF ) {
6811 326b9e98 Aurelien Jarno
        if ( aSig<<1 ) {
6812 326b9e98 Aurelien Jarno
            return propagateFloatx80NaN( a, a STATUS_VAR );
6813 326b9e98 Aurelien Jarno
        }
6814 9ee6e8bb pbrook
        return a;
6815 9ee6e8bb pbrook
    }
6816 326b9e98 Aurelien Jarno
6817 69397542 pbrook
    if (aExp == 0 && aSig == 0)
6818 69397542 pbrook
        return a;
6819 69397542 pbrook
6820 326b9e98 Aurelien Jarno
    if (n > 0x10000) {
6821 326b9e98 Aurelien Jarno
        n = 0x10000;
6822 326b9e98 Aurelien Jarno
    } else if (n < -0x10000) {
6823 326b9e98 Aurelien Jarno
        n = -0x10000;
6824 326b9e98 Aurelien Jarno
    }
6825 326b9e98 Aurelien Jarno
6826 9ee6e8bb pbrook
    aExp += n;
6827 69397542 pbrook
    return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
6828 69397542 pbrook
                                          aSign, aExp, aSig, 0 STATUS_VAR );
6829 9ee6e8bb pbrook
}
6830 9ee6e8bb pbrook
6831 9ee6e8bb pbrook
float128 float128_scalbn( float128 a, int n STATUS_PARAM )
6832 9ee6e8bb pbrook
{
6833 9ee6e8bb pbrook
    flag aSign;
6834 326b9e98 Aurelien Jarno
    int32_t aExp;
6835 bb98fe42 Andreas Färber
    uint64_t aSig0, aSig1;
6836 9ee6e8bb pbrook
6837 9ee6e8bb pbrook
    aSig1 = extractFloat128Frac1( a );
6838 9ee6e8bb pbrook
    aSig0 = extractFloat128Frac0( a );
6839 9ee6e8bb pbrook
    aExp = extractFloat128Exp( a );
6840 9ee6e8bb pbrook
    aSign = extractFloat128Sign( a );
6841 9ee6e8bb pbrook
    if ( aExp == 0x7FFF ) {
6842 326b9e98 Aurelien Jarno
        if ( aSig0 | aSig1 ) {
6843 326b9e98 Aurelien Jarno
            return propagateFloat128NaN( a, a STATUS_VAR );
6844 326b9e98 Aurelien Jarno
        }
6845 9ee6e8bb pbrook
        return a;
6846 9ee6e8bb pbrook
    }
6847 69397542 pbrook
    if ( aExp != 0 )
6848 69397542 pbrook
        aSig0 |= LIT64( 0x0001000000000000 );
6849 69397542 pbrook
    else if ( aSig0 == 0 && aSig1 == 0 )
6850 69397542 pbrook
        return a;
6851 69397542 pbrook
6852 326b9e98 Aurelien Jarno
    if (n > 0x10000) {
6853 326b9e98 Aurelien Jarno
        n = 0x10000;
6854 326b9e98 Aurelien Jarno
    } else if (n < -0x10000) {
6855 326b9e98 Aurelien Jarno
        n = -0x10000;
6856 326b9e98 Aurelien Jarno
    }
6857 326b9e98 Aurelien Jarno
6858 69397542 pbrook
    aExp += n - 1;
6859 69397542 pbrook
    return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
6860 69397542 pbrook
                                          STATUS_VAR );
6861 9ee6e8bb pbrook
6862 9ee6e8bb pbrook
}